mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-06 18:57:21 +08:00
Compare commits
1 Commits
fix/gatewa
...
refactor/w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf4e502147 |
294
gateway/run.py
294
gateway/run.py
@@ -93,6 +93,153 @@ def _telegramize_command_mentions(text: str, platform: Any) -> str:
|
||||
_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60
|
||||
|
||||
|
||||
# --- Stale-code self-check ------------------------------------------------
|
||||
# Long-running gateway processes that survive an ``hermes update`` keep the
|
||||
# old ``hermes_cli.config`` (and friends) cached in ``sys.modules``. When
|
||||
# the updated tool files on disk then try to ``from hermes_cli.config
|
||||
# import cfg_get`` (added in PR #17304), the import resolves against the
|
||||
# already-loaded stale module object and raises ``ImportError`` — see
|
||||
# Issue #17648. Rather than papering over the import failure site-by-site
|
||||
# in every tool file, detect the stale state centrally and auto-restart
|
||||
# so the gateway reloads with fresh code.
|
||||
#
|
||||
# The signal we use is ``git rev-parse HEAD`` — the only thing ``hermes
|
||||
# update`` moves that is NOT moved by agent-driven file edits. Earlier
|
||||
# revisions of this check compared file mtimes across a sentinel set
|
||||
# (run_agent.py, gateway/run.py, ...), but that produced false positives
|
||||
# whenever the agent edited its own source files during a session:
|
||||
# mtime jumps, stale-check fires, gateway restarts, user must retype.
|
||||
# See the conversation at PR #<this> for the motivating incident.
|
||||
#
|
||||
# The legacy mtime sentinels are kept ONLY as a last-resort fallback for
|
||||
# non-git installs (pip install from wheel, sparse clones with no .git
|
||||
# dir). In those environments ``hermes update`` is not a supported path,
|
||||
# so the check effectively no-ops — which is the safe behavior: better
|
||||
# to ship one broken import than to restart on every agent-edit.
|
||||
_STALE_CODE_SENTINELS: tuple[str, ...] = (
|
||||
"hermes_cli/config.py",
|
||||
"hermes_cli/__init__.py",
|
||||
"run_agent.py",
|
||||
"gateway/run.py",
|
||||
"pyproject.toml",
|
||||
)
|
||||
|
||||
# Cache git HEAD reads across consecutive messages so a chat burst doesn't
|
||||
# spawn one subprocess per message. 5s is long enough to collapse a burst
|
||||
# and short enough that the real post-update detection still fires within
|
||||
# the user's perceived "next message" window.
|
||||
_GIT_SHA_CACHE_TTL_SECS = 5.0
|
||||
|
||||
|
||||
def _read_git_head_sha(repo_root: Path) -> Optional[str]:
|
||||
"""Return the git HEAD SHA for ``repo_root``, or None if unavailable.
|
||||
|
||||
Reads ``.git/HEAD`` directly (and follows one level of ref) instead
|
||||
of shelling out to ``git`` — cheaper, no subprocess tax, works on
|
||||
gateway hosts that don't have a ``git`` binary on PATH. Returns
|
||||
None for non-git installs (no ``.git`` dir) or any I/O error; callers
|
||||
treat None as "can't tell" and skip the check.
|
||||
|
||||
Supports the three layouts we care about:
|
||||
1. Main checkout: ``<repo>/.git/`` is a directory.
|
||||
2. Git worktree: ``<repo>/.git`` is a file ``gitdir: <path>`` that
|
||||
points at ``<main>/.git/worktrees/<name>/``. The worktree's
|
||||
gitdir has HEAD + index but NOT refs/heads/ — those live in
|
||||
the main checkout, and ``<worktree-gitdir>/commondir`` points
|
||||
at the main ``.git``. We search both locations for refs.
|
||||
3. Packed refs: ``refs/heads/<branch>`` is absent on disk but
|
||||
listed in ``<main-git-dir>/packed-refs``.
|
||||
"""
|
||||
try:
|
||||
git_dir = repo_root / ".git"
|
||||
# Worktrees store ``.git`` as a file pointing at gitdir: <path>
|
||||
if git_dir.is_file():
|
||||
try:
|
||||
content = git_dir.read_text().strip()
|
||||
if content.startswith("gitdir:"):
|
||||
git_dir = Path(content.split(":", 1)[1].strip())
|
||||
if not git_dir.is_absolute():
|
||||
git_dir = (repo_root / git_dir).resolve()
|
||||
except OSError:
|
||||
return None
|
||||
if not git_dir.is_dir():
|
||||
return None
|
||||
|
||||
# Figure out the "common" git dir — the one that owns shared refs.
|
||||
# For a worktree, commondir points at it (relative path, resolve
|
||||
# against git_dir). For a main checkout, common_dir == git_dir.
|
||||
common_dir = git_dir
|
||||
commondir_file = git_dir / "commondir"
|
||||
if commondir_file.is_file():
|
||||
try:
|
||||
rel = commondir_file.read_text().strip()
|
||||
candidate = (git_dir / rel).resolve() if rel else git_dir
|
||||
if candidate.is_dir():
|
||||
common_dir = candidate
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
head_path = git_dir / "HEAD"
|
||||
if not head_path.is_file():
|
||||
return None
|
||||
head_content = head_path.read_text().strip()
|
||||
|
||||
if head_content.startswith("ref:"):
|
||||
# Symbolic ref — follow one level (e.g. ref: refs/heads/main).
|
||||
# Worktree-local refs (bisect, rebase-merge state) live under
|
||||
# git_dir; shared refs (refs/heads/*, refs/tags/*) live under
|
||||
# common_dir. Try git_dir first, then common_dir.
|
||||
ref_rel = head_content.split(":", 1)[1].strip()
|
||||
for base in (git_dir, common_dir) if git_dir != common_dir else (git_dir,):
|
||||
ref_path = base / ref_rel
|
||||
if ref_path.is_file():
|
||||
try:
|
||||
sha = ref_path.read_text().strip()
|
||||
except OSError:
|
||||
continue
|
||||
if sha:
|
||||
return sha
|
||||
# Packed refs fallback — always stored in the common dir.
|
||||
packed = common_dir / "packed-refs"
|
||||
if packed.is_file():
|
||||
try:
|
||||
for line in packed.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or line.startswith("^"):
|
||||
continue
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) == 2 and parts[1] == ref_rel:
|
||||
return parts[0] or None
|
||||
except OSError:
|
||||
return None
|
||||
return None
|
||||
|
||||
# Detached HEAD — content is the SHA directly.
|
||||
return head_content or None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _compute_repo_mtime(repo_root: Path) -> float:
|
||||
"""Return the newest mtime across the stale-code sentinel files.
|
||||
|
||||
Legacy fallback used only for non-git installs (``.git`` missing).
|
||||
Missing files are ignored (they may not exist on older checkouts).
|
||||
Returns 0.0 if no sentinel file is readable — treat that as "can't
|
||||
tell", which downstream callers interpret as "not stale" to avoid
|
||||
false-positive restart loops.
|
||||
"""
|
||||
newest = 0.0
|
||||
for rel in _STALE_CODE_SENTINELS:
|
||||
try:
|
||||
st = (repo_root / rel).stat()
|
||||
except (OSError, FileNotFoundError):
|
||||
continue
|
||||
if st.st_mtime > newest:
|
||||
newest = st.st_mtime
|
||||
return newest
|
||||
|
||||
|
||||
def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
|
||||
"""Best-effort conversion of stored gateway timestamps to epoch seconds.
|
||||
|
||||
@@ -960,6 +1107,13 @@ class GatewayRunner:
|
||||
_stop_task: Optional[asyncio.Task] = None
|
||||
_session_model_overrides: Dict[str, Dict[str, str]] = {}
|
||||
_session_reasoning_overrides: Dict[str, Dict[str, Any]] = {}
|
||||
# Stale-code self-check defaults (see _detect_stale_code()). Class-level
|
||||
# so tests that construct GatewayRunner via ``object.__new__`` without
|
||||
# running __init__ don't crash when _handle_message reads these.
|
||||
_boot_wall_time: float = 0.0
|
||||
_boot_repo_mtime: float = 0.0
|
||||
_boot_git_sha: Optional[str] = None
|
||||
_stale_code_restart_triggered: bool = False
|
||||
|
||||
def __init__(self, config: Optional[GatewayConfig] = None):
|
||||
global _gateway_runner_ref
|
||||
@@ -968,6 +1122,30 @@ class GatewayRunner:
|
||||
self._warn_if_docker_media_delivery_is_risky()
|
||||
_gateway_runner_ref = _weakref.ref(self)
|
||||
|
||||
# Boot-time snapshot used by the stale-code self-check. Captured
|
||||
# before any work happens so post-update file writes are guaranteed
|
||||
# to have newer mtimes. See _detect_stale_code() / Issue #17648.
|
||||
try:
|
||||
self._boot_wall_time: float = time.time()
|
||||
self._repo_root_for_staleness: Path = Path(__file__).resolve().parent.parent
|
||||
self._boot_git_sha: Optional[str] = _read_git_head_sha(
|
||||
self._repo_root_for_staleness,
|
||||
)
|
||||
self._boot_repo_mtime: float = _compute_repo_mtime(
|
||||
self._repo_root_for_staleness,
|
||||
)
|
||||
except Exception:
|
||||
self._boot_wall_time = 0.0
|
||||
self._repo_root_for_staleness = Path(".")
|
||||
self._boot_git_sha = None
|
||||
self._boot_repo_mtime = 0.0
|
||||
self._stale_code_notified: set[str] = set()
|
||||
self._stale_code_restart_triggered: bool = False
|
||||
# Cached current-SHA read, refreshed at most every
|
||||
# _GIT_SHA_CACHE_TTL_SECS so bursty chats don't hammer the filesystem.
|
||||
self._cached_current_sha: Optional[str] = self._boot_git_sha
|
||||
self._cached_current_sha_at: float = self._boot_wall_time
|
||||
|
||||
# Load ephemeral config from config.yaml / env vars.
|
||||
# Both are injected at API-call time only and never persisted.
|
||||
self._prefill_messages = self._load_prefill_messages()
|
||||
@@ -2675,6 +2853,101 @@ class GatewayRunner:
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
return True
|
||||
|
||||
def _current_git_sha_cached(self) -> Optional[str]:
|
||||
"""Return the current HEAD SHA, cached for _GIT_SHA_CACHE_TTL_SECS.
|
||||
|
||||
A bursty chat (user mashes "hello?" three times) would otherwise
|
||||
re-read ``.git/HEAD`` on every message. Caching collapses that
|
||||
into a single read and still re-checks within the user's
|
||||
perceived "next message" window.
|
||||
"""
|
||||
now = time.time()
|
||||
if (
|
||||
self._cached_current_sha is not None
|
||||
and (now - self._cached_current_sha_at) < _GIT_SHA_CACHE_TTL_SECS
|
||||
):
|
||||
return self._cached_current_sha
|
||||
try:
|
||||
sha = _read_git_head_sha(self._repo_root_for_staleness)
|
||||
except Exception:
|
||||
sha = None
|
||||
self._cached_current_sha = sha
|
||||
self._cached_current_sha_at = now
|
||||
return sha
|
||||
|
||||
def _detect_stale_code(self) -> bool:
|
||||
"""Return True if the git HEAD moved since this process booted.
|
||||
|
||||
A gateway that survives ``hermes update`` (manual SIGTERM never
|
||||
escalated, systemd restart race, detached-process respawn failed,
|
||||
etc.) keeps pre-update modules cached in ``sys.modules``. Later
|
||||
imports of names added post-update — e.g. ``cfg_get`` from PR
|
||||
#17304 — raise ImportError against the stale module object (see
|
||||
Issue #17648).
|
||||
|
||||
We compare the git HEAD SHA at boot to the current SHA on disk.
|
||||
``hermes update`` always moves HEAD forward via ``git pull``;
|
||||
agent file edits (the agent patching ``run_agent.py`` or
|
||||
``gateway/run.py`` during a self-dev session) never move HEAD.
|
||||
That makes SHA comparison free of the false-positive class that
|
||||
the old mtime check suffered from — the agent can edit any file
|
||||
without triggering a phantom restart.
|
||||
|
||||
Returns False when:
|
||||
- the boot SHA is unavailable (non-git install, first call
|
||||
during partial init, etc.); we can't tell and refuse to loop
|
||||
- the current SHA matches the boot SHA
|
||||
- reading the current SHA fails for any reason
|
||||
"""
|
||||
if not self._boot_wall_time:
|
||||
return False
|
||||
if not self._boot_git_sha:
|
||||
# Non-git install. ``hermes update`` is git-based, so a
|
||||
# non-git install can't experience the stale-modules class
|
||||
# this check exists to catch. Return False — no check, no
|
||||
# false positives. (If we ever ship a pip-install update
|
||||
# path, we'd add a persistent update marker here and compare
|
||||
# its timestamp to self._boot_wall_time.)
|
||||
return False
|
||||
try:
|
||||
current = self._current_git_sha_cached()
|
||||
except Exception:
|
||||
return False
|
||||
if not current:
|
||||
return False
|
||||
return current != self._boot_git_sha
|
||||
|
||||
def _trigger_stale_code_restart(self) -> None:
|
||||
"""Idempotently kick off a graceful restart after stale-code detection.
|
||||
|
||||
Runs at most once per process. The restart request goes through
|
||||
the normal drain path so in-flight agent turns finish before the
|
||||
process exits; the service manager (systemd / launchd / detached
|
||||
profile watcher) then respawns with fresh code. On manual
|
||||
``hermes gateway run`` installs without a supervisor, the
|
||||
process exits and the user must restart by hand — but they get a
|
||||
user-visible message telling them so.
|
||||
"""
|
||||
if self._stale_code_restart_triggered:
|
||||
return
|
||||
self._stale_code_restart_triggered = True
|
||||
current_sha = None
|
||||
try:
|
||||
current_sha = self._current_git_sha_cached()
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning(
|
||||
"Stale-code self-check: git HEAD moved since gateway boot "
|
||||
"(boot=%s, current=%s) — requesting graceful restart. "
|
||||
"See Issue #17648.",
|
||||
(self._boot_git_sha or "?")[:12],
|
||||
(current_sha or "?")[:12],
|
||||
)
|
||||
try:
|
||||
self.request_restart(detached=False, via_service=True)
|
||||
except Exception as exc:
|
||||
logger.error("Stale-code restart request failed: %s", exc)
|
||||
|
||||
async def start(self) -> bool:
|
||||
"""
|
||||
Start the gateway and all configured platform adapters.
|
||||
@@ -4600,6 +4873,27 @@ class GatewayRunner:
|
||||
"""
|
||||
source = event.source
|
||||
|
||||
# Stale-code self-check (Issue #17648). A gateway that survives
|
||||
# ``hermes update`` keeps old modules cached in sys.modules; the
|
||||
# first inbound message is our earliest safe chance to detect
|
||||
# this and restart gracefully before we dispatch to the agent
|
||||
# and hit ImportError on freshly-added names (e.g. cfg_get).
|
||||
# Idempotent — runs the real check at most once per message, and
|
||||
# request_restart() no-ops after the first call.
|
||||
try:
|
||||
if self._detect_stale_code():
|
||||
self._trigger_stale_code_restart()
|
||||
# Acknowledge to the user so they don't see a silent
|
||||
# drop; the gateway will be back up in a moment via the
|
||||
# service manager / profile-watcher respawn.
|
||||
return (
|
||||
"⟳ Gateway code was updated in the background — "
|
||||
"restarting this gateway so your next message runs "
|
||||
"on the new code. Please retry in a moment."
|
||||
)
|
||||
except Exception as _stale_exc:
|
||||
logger.debug("Stale-code self-check failed: %s", _stale_exc)
|
||||
|
||||
# Internal events (e.g. background-process completion notifications)
|
||||
# are system-generated and must skip user authorization.
|
||||
is_internal = bool(getattr(event, "internal", False))
|
||||
|
||||
@@ -544,7 +544,13 @@ DEFAULT_CONFIG = {
|
||||
# via TERMINAL_LOCAL_PERSISTENT env var.
|
||||
"persistent_shell": True,
|
||||
},
|
||||
|
||||
|
||||
"web": {
|
||||
"backend": "", # shared fallback — applies to both search and extract
|
||||
"search_backend": "", # per-capability override for web_search (e.g. "searxng")
|
||||
"extract_backend": "", # per-capability override for web_extract (e.g. "native")
|
||||
},
|
||||
|
||||
"browser": {
|
||||
"inactivity_timeout": 120,
|
||||
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
||||
|
||||
55
nix/lib.nix
55
nix/lib.nix
@@ -163,42 +163,35 @@
|
||||
for entry in "''${ENTRIES[@]}"; do
|
||||
IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
|
||||
echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
|
||||
|
||||
# Compute the actual hash from the lockfile directly using
|
||||
# prefetch-npm-deps. This avoids false "ok" from nix build when
|
||||
# an old derivation is cached in a substituter (cachix/cache.nixos.org).
|
||||
LOCK_FILE="$FOLDER/package-lock.json"
|
||||
NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "$LOCK_FILE" 2>/dev/null)
|
||||
if [ -z "$NEW_HASH" ]; then
|
||||
echo " prefetch-npm-deps failed, falling back to nix build" >&2
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
|
||||
STATUS=$?
|
||||
if [ "$STATUS" -eq 0 ]; then
|
||||
echo " ok (via nix build)"
|
||||
continue
|
||||
fi
|
||||
NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
|
||||
if [ -z "$NEW_HASH" ]; then
|
||||
if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
|
||||
echo " skipped (transient cache failure — see primary nix build for real status)" >&2
|
||||
echo "$OUTPUT" | tail -8 >&2
|
||||
continue
|
||||
fi
|
||||
echo " build failed with no hash mismatch:" >&2
|
||||
echo "$OUTPUT" | tail -40 >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
|
||||
| sed -E 's/hash = "(.*)"/\1/')
|
||||
|
||||
if [ "$NEW_HASH" = "$OLD_HASH" ]; then
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
|
||||
STATUS=$?
|
||||
if [ "$STATUS" -eq 0 ]; then
|
||||
echo " ok"
|
||||
continue
|
||||
fi
|
||||
|
||||
NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
|
||||
if [ -z "$NEW_HASH" ]; then
|
||||
# Magic-Nix-Cache occasionally returns HTTP 418 / cache-throttled
|
||||
# mid-run; nix then prints "outputs … not valid, so checking is
|
||||
# not possible" without a `got:` line. That's an infrastructure
|
||||
# blip, not a stale lockfile — warn + skip rather than failing
|
||||
# the lint. A real hash mismatch would still surface in the
|
||||
# primary `.#$ATTR` build, which is a separate CI job.
|
||||
if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
|
||||
echo " skipped (transient cache failure — see primary nix build for real status)" >&2
|
||||
echo "$OUTPUT" | tail -8 >&2
|
||||
continue
|
||||
fi
|
||||
echo " build failed with no hash mismatch:" >&2
|
||||
echo "$OUTPUT" | tail -40 >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
|
||||
OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
|
||||
| sed -E 's/hash = "(.*)"/\1/')
|
||||
LOCK_FILE="$FOLDER/package-lock.json"
|
||||
echo " stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
|
||||
STALE=1
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ let
|
||||
src = ../ui-tui;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-MLcLhjTF6dgdvNBtJWzo8Nh19eNh/ZitD2b07nm61Tc=";
|
||||
hash = "sha256-a/HGI9OgVcTnZrMXA7xFMGnFoVxyHe95fulVz+WNYB0=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
|
||||
|
||||
412
tests/gateway/test_stale_code_self_check.py
Normal file
412
tests/gateway/test_stale_code_self_check.py
Normal file
@@ -0,0 +1,412 @@
|
||||
"""Tests for the gateway stale-code self-check (Issue #17648).
|
||||
|
||||
A gateway that survives ``hermes update`` keeps pre-update modules cached
|
||||
in ``sys.modules``. Later imports of names added post-update (e.g.
|
||||
``cfg_get`` from PR #17304) raise ImportError against the stale module
|
||||
object.
|
||||
|
||||
The self-check compares the git HEAD SHA at boot to the current SHA on
|
||||
disk. ``hermes update`` always moves HEAD forward via ``git pull``;
|
||||
agent-driven file edits (Hermes editing ``run_agent.py`` / ``gateway/run.py``
|
||||
during a self-dev session) never move HEAD — so the SHA signal is free of
|
||||
the false-positive class that the earlier mtime-based check suffered from.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.run import (
|
||||
GatewayRunner,
|
||||
_compute_repo_mtime,
|
||||
_read_git_head_sha,
|
||||
_STALE_CODE_SENTINELS,
|
||||
_GIT_SHA_CACHE_TTL_SECS,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_tmp_repo(tmp_path: Path) -> Path:
|
||||
"""Create a fake repo with all stale-code sentinel files."""
|
||||
for rel in _STALE_CODE_SENTINELS:
|
||||
p = tmp_path / rel
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
p.write_text("# test sentinel\n")
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _make_git_repo(tmp_path: Path, sha: str = "a" * 40, branch: str = "main") -> Path:
|
||||
"""Stamp a minimal .git directory so _read_git_head_sha can resolve a SHA.
|
||||
|
||||
We don't run real git — just lay down the files the reader walks
|
||||
(.git/HEAD pointing at refs/heads/<branch>, refs/heads/<branch>
|
||||
containing the SHA).
|
||||
"""
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir(parents=True, exist_ok=True)
|
||||
(git_dir / "HEAD").write_text(f"ref: refs/heads/{branch}\n")
|
||||
refs_dir = git_dir / "refs" / "heads"
|
||||
refs_dir.mkdir(parents=True, exist_ok=True)
|
||||
(refs_dir / branch).write_text(f"{sha}\n")
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _set_head_sha(repo_root: Path, sha: str, branch: str = "main") -> None:
|
||||
"""Rewrite the current branch ref to a new SHA (simulates git pull)."""
|
||||
(repo_root / ".git" / "refs" / "heads" / branch).write_text(f"{sha}\n")
|
||||
|
||||
|
||||
def _make_runner(
|
||||
repo_root: Path,
|
||||
*,
|
||||
boot_sha: str | None,
|
||||
boot_wall: float = None,
|
||||
boot_mtime: float = 0.0,
|
||||
):
|
||||
"""Bare GatewayRunner with just the stale-check attributes set."""
|
||||
if boot_wall is None:
|
||||
boot_wall = time.time()
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner._repo_root_for_staleness = repo_root
|
||||
runner._boot_wall_time = boot_wall
|
||||
runner._boot_git_sha = boot_sha
|
||||
runner._boot_repo_mtime = boot_mtime
|
||||
runner._stale_code_notified = set()
|
||||
runner._stale_code_restart_triggered = False
|
||||
runner._cached_current_sha = boot_sha
|
||||
runner._cached_current_sha_at = boot_wall
|
||||
return runner
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _read_git_head_sha — raw SHA reader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_read_git_head_sha_branch_ref(tmp_path):
|
||||
"""Resolves ref: refs/heads/<branch> → SHA from refs/heads/<branch>."""
|
||||
sha = "b" * 40
|
||||
_make_git_repo(tmp_path, sha=sha, branch="main")
|
||||
assert _read_git_head_sha(tmp_path) == sha
|
||||
|
||||
|
||||
def test_read_git_head_sha_detached_head(tmp_path):
|
||||
"""Detached HEAD: .git/HEAD contains the SHA directly."""
|
||||
sha = "c" * 40
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir()
|
||||
(git_dir / "HEAD").write_text(f"{sha}\n")
|
||||
assert _read_git_head_sha(tmp_path) == sha
|
||||
|
||||
|
||||
def test_read_git_head_sha_packed_refs(tmp_path):
|
||||
"""Falls back to packed-refs when refs/heads/<branch> is missing."""
|
||||
sha = "d" * 40
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir()
|
||||
(git_dir / "HEAD").write_text("ref: refs/heads/main\n")
|
||||
# No refs/heads/main file — only packed-refs
|
||||
(git_dir / "packed-refs").write_text(
|
||||
f"# pack-refs with: peeled fully-peeled sorted\n"
|
||||
f"{sha} refs/heads/main\n"
|
||||
)
|
||||
assert _read_git_head_sha(tmp_path) == sha
|
||||
|
||||
|
||||
def test_read_git_head_sha_worktree_gitdir_file(tmp_path):
|
||||
"""Worktree: .git is a file with `gitdir: <path>` pointing to the real git dir.
|
||||
|
||||
Real git worktrees store shared refs (refs/heads/*) in the main
|
||||
checkout's .git/ and write a ``commondir`` pointer into the
|
||||
worktree-gitdir. The reader must follow commondir to resolve the
|
||||
branch ref — this is the layout Hermes dev sessions actually use.
|
||||
"""
|
||||
sha = "e" * 40
|
||||
# Main repo layout
|
||||
main_repo = tmp_path / "main-repo"
|
||||
main_git = main_repo / ".git"
|
||||
(main_git / "refs" / "heads").mkdir(parents=True)
|
||||
(main_git / "HEAD").write_text("ref: refs/heads/main\n")
|
||||
(main_git / "refs" / "heads" / "main").write_text("0" * 40 + "\n")
|
||||
|
||||
# Worktree lives in main-repo/.git/worktrees/<name>/
|
||||
worktree_git_dir = main_git / "worktrees" / "feature"
|
||||
worktree_git_dir.mkdir(parents=True)
|
||||
(worktree_git_dir / "HEAD").write_text("ref: refs/heads/feature\n")
|
||||
# commondir points back at the main .git (relative path, "../..")
|
||||
(worktree_git_dir / "commondir").write_text("../..\n")
|
||||
# Feature branch ref lives in the shared refs/heads
|
||||
(main_git / "refs" / "heads" / "feature").write_text(f"{sha}\n")
|
||||
|
||||
# Worktree checkout with .git file pointing at worktree_git_dir
|
||||
worktree = tmp_path / "wt"
|
||||
worktree.mkdir()
|
||||
(worktree / ".git").write_text(f"gitdir: {worktree_git_dir}\n")
|
||||
|
||||
assert _read_git_head_sha(worktree) == sha
|
||||
|
||||
|
||||
def test_read_git_head_sha_worktree_packed_refs_in_common(tmp_path):
|
||||
"""Worktree + packed-refs in common dir: fallback still resolves."""
|
||||
sha = "f" * 40
|
||||
main_repo = tmp_path / "main-repo"
|
||||
main_git = main_repo / ".git"
|
||||
main_git.mkdir(parents=True)
|
||||
(main_git / "HEAD").write_text("ref: refs/heads/main\n")
|
||||
# packed-refs in the common (main) .git
|
||||
(main_git / "packed-refs").write_text(
|
||||
f"# pack-refs with: peeled fully-peeled sorted\n"
|
||||
f"{sha} refs/heads/feature\n"
|
||||
)
|
||||
|
||||
worktree_git_dir = main_git / "worktrees" / "feature"
|
||||
worktree_git_dir.mkdir(parents=True)
|
||||
(worktree_git_dir / "HEAD").write_text("ref: refs/heads/feature\n")
|
||||
(worktree_git_dir / "commondir").write_text("../..\n")
|
||||
|
||||
worktree = tmp_path / "wt"
|
||||
worktree.mkdir()
|
||||
(worktree / ".git").write_text(f"gitdir: {worktree_git_dir}\n")
|
||||
|
||||
assert _read_git_head_sha(worktree) == sha
|
||||
|
||||
|
||||
def test_read_git_head_sha_no_git_returns_none(tmp_path):
|
||||
"""No .git dir → None (non-git install, safely disables the check)."""
|
||||
assert _read_git_head_sha(tmp_path) is None
|
||||
|
||||
|
||||
def test_read_git_head_sha_malformed_head_returns_none(tmp_path):
|
||||
"""Empty HEAD file → None (don't loop on corrupt repos)."""
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir()
|
||||
(git_dir / "HEAD").write_text("")
|
||||
assert _read_git_head_sha(tmp_path) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _detect_stale_code — the main regression guard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_detect_stale_code_false_when_sha_unchanged(tmp_path):
|
||||
"""Boot SHA == current SHA → not stale (no restart)."""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
# Force fresh read by expiring the cache
|
||||
runner._cached_current_sha_at = 0.0
|
||||
assert runner._detect_stale_code() is False
|
||||
|
||||
|
||||
def test_detect_stale_code_true_after_git_pull(tmp_path):
|
||||
"""Boot SHA != current SHA → stale (hermes update happened)."""
|
||||
boot_sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=boot_sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=boot_sha)
|
||||
# Simulate git pull moving HEAD forward
|
||||
_set_head_sha(tmp_path, "b" * 40)
|
||||
runner._cached_current_sha_at = 0.0 # expire cache
|
||||
assert runner._detect_stale_code() is True
|
||||
|
||||
|
||||
def test_detect_stale_code_ignores_agent_file_edits(tmp_path):
|
||||
"""THE CORE REGRESSION: agent edits to source files do NOT trigger restart.
|
||||
|
||||
This is the motivating incident for the SHA-based check. Under the
|
||||
previous mtime-based scheme, any ``patch`` / ``write_file`` call
|
||||
against run_agent.py / gateway/run.py / hermes_cli/config.py would
|
||||
flip the stale-check to True and force a gateway restart on the
|
||||
next message — even though no update actually happened. SHA
|
||||
comparison decouples the two: git HEAD only moves on ``git pull``,
|
||||
never on file writes.
|
||||
"""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
_make_tmp_repo(tmp_path) # lay down sentinel files too
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
|
||||
# Simulate the agent editing run_agent.py and gateway/run.py with
|
||||
# mtimes far into the future — exactly the scenario that used to
|
||||
# false-positive the old mtime check.
|
||||
future = time.time() + 10_000
|
||||
for rel in _STALE_CODE_SENTINELS:
|
||||
p = tmp_path / rel
|
||||
if p.is_file():
|
||||
p.write_text("# agent just edited this\n")
|
||||
os.utime(p, (future, future))
|
||||
|
||||
# HEAD SHA has NOT moved — check must stay False.
|
||||
runner._cached_current_sha_at = 0.0 # expire cache
|
||||
assert runner._detect_stale_code() is False
|
||||
|
||||
|
||||
def test_detect_stale_code_false_for_non_git_install(tmp_path):
|
||||
"""Non-git install (no .git dir) → check disabled, never fires."""
|
||||
# No .git dir at all; runner's boot_sha is None
|
||||
runner = _make_runner(tmp_path, boot_sha=None)
|
||||
# Even if we pretended the current SHA differed, the check should
|
||||
# short-circuit on boot_sha=None and return False.
|
||||
assert runner._detect_stale_code() is False
|
||||
|
||||
|
||||
def test_detect_stale_code_false_when_no_boot_wall_time(tmp_path):
|
||||
"""No boot snapshot at all → can't tell → not stale (no restart loop)."""
|
||||
runner = _make_runner(tmp_path, boot_sha="a" * 40, boot_wall=0.0)
|
||||
assert runner._detect_stale_code() is False
|
||||
|
||||
|
||||
def test_detect_stale_code_handles_disappearing_git_dir(tmp_path):
|
||||
""".git vanishes mid-run → current_sha = None → not stale (don't loop)."""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
# Nuke the git dir after boot
|
||||
import shutil
|
||||
shutil.rmtree(tmp_path / ".git")
|
||||
runner._cached_current_sha_at = 0.0 # expire cache
|
||||
assert runner._detect_stale_code() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SHA cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_current_sha_cache_collapses_bursts(tmp_path, monkeypatch):
|
||||
"""Consecutive calls inside the TTL window reuse the cached SHA."""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
|
||||
read_calls = {"n": 0}
|
||||
real_reader = _read_git_head_sha
|
||||
|
||||
def counting_reader(repo_root):
|
||||
read_calls["n"] += 1
|
||||
return real_reader(repo_root)
|
||||
|
||||
from gateway import run as run_mod
|
||||
monkeypatch.setattr(run_mod, "_read_git_head_sha", counting_reader)
|
||||
|
||||
# Force cache expiry so the first call definitely reads
|
||||
runner._cached_current_sha_at = 0.0
|
||||
runner._current_git_sha_cached()
|
||||
first_count = read_calls["n"]
|
||||
|
||||
# Immediate second/third calls should hit cache (no new read)
|
||||
runner._current_git_sha_cached()
|
||||
runner._current_git_sha_cached()
|
||||
assert read_calls["n"] == first_count
|
||||
|
||||
|
||||
def test_current_sha_cache_expires_after_ttl(tmp_path, monkeypatch):
|
||||
"""After _GIT_SHA_CACHE_TTL_SECS elapses, a fresh read happens."""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
|
||||
read_calls = {"n": 0}
|
||||
real_reader = _read_git_head_sha
|
||||
|
||||
def counting_reader(repo_root):
|
||||
read_calls["n"] += 1
|
||||
return real_reader(repo_root)
|
||||
|
||||
from gateway import run as run_mod
|
||||
monkeypatch.setattr(run_mod, "_read_git_head_sha", counting_reader)
|
||||
|
||||
runner._cached_current_sha_at = 0.0
|
||||
runner._current_git_sha_cached()
|
||||
first = read_calls["n"]
|
||||
|
||||
# Age the cache past the TTL
|
||||
runner._cached_current_sha_at = time.time() - (_GIT_SHA_CACHE_TTL_SECS + 1.0)
|
||||
runner._current_git_sha_cached()
|
||||
assert read_calls["n"] == first + 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _trigger_stale_code_restart — idempotency preserved
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_trigger_stale_code_restart_is_idempotent(tmp_path):
|
||||
"""Calling _trigger_stale_code_restart twice only requests restart once."""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_request_restart(*, detached=False, via_service=False):
|
||||
calls.append((detached, via_service))
|
||||
return True
|
||||
|
||||
runner.request_restart = fake_request_restart
|
||||
|
||||
runner._trigger_stale_code_restart()
|
||||
runner._trigger_stale_code_restart()
|
||||
runner._trigger_stale_code_restart()
|
||||
|
||||
assert len(calls) == 1
|
||||
assert runner._stale_code_restart_triggered is True
|
||||
|
||||
|
||||
def test_trigger_stale_code_restart_survives_request_failure(tmp_path):
|
||||
"""If request_restart raises, we swallow and mark as triggered anyway."""
|
||||
sha = "a" * 40
|
||||
_make_git_repo(tmp_path, sha=sha)
|
||||
runner = _make_runner(tmp_path, boot_sha=sha)
|
||||
|
||||
def boom(*, detached=False, via_service=False):
|
||||
raise RuntimeError("no event loop")
|
||||
|
||||
runner.request_restart = boom
|
||||
|
||||
# Should not raise
|
||||
runner._trigger_stale_code_restart()
|
||||
|
||||
# Marked triggered so we don't retry on every subsequent message
|
||||
assert runner._stale_code_restart_triggered is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Class-level defaults — tests that build bare runners via object.__new__
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_class_level_defaults_prevent_uninitialized_access():
|
||||
"""Partial construction via object.__new__ must not crash _detect_stale_code."""
|
||||
runner = object.__new__(GatewayRunner)
|
||||
# Don't set any instance attrs — class-level defaults should kick in
|
||||
runner._repo_root_for_staleness = Path(".")
|
||||
# _boot_wall_time / _boot_git_sha fall through to class defaults
|
||||
# (0.0 and None respectively)
|
||||
assert runner._detect_stale_code() is False
|
||||
# _stale_code_restart_triggered falls through to class default (False)
|
||||
assert runner._stale_code_restart_triggered is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Legacy mtime reader kept for compatibility — light sanity check only
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_compute_repo_mtime_still_returns_newest(tmp_path):
|
||||
"""_compute_repo_mtime remains available for any legacy callers."""
|
||||
repo = _make_tmp_repo(tmp_path)
|
||||
|
||||
baseline = time.time() - 100
|
||||
for rel in _STALE_CODE_SENTINELS:
|
||||
os.utime(repo / rel, (baseline, baseline))
|
||||
|
||||
newer = time.time()
|
||||
os.utime(repo / "hermes_cli/config.py", (newer, newer))
|
||||
|
||||
result = _compute_repo_mtime(repo)
|
||||
assert abs(result - newer) < 1.0
|
||||
|
||||
|
||||
def test_compute_repo_mtime_missing_files_returns_zero(tmp_path):
|
||||
"""Legacy sanity: missing sentinels → 0.0."""
|
||||
assert _compute_repo_mtime(tmp_path) == 0.0
|
||||
194
tests/tools/test_web_providers.py
Normal file
194
tests/tools/test_web_providers.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""Tests for the web tools provider architecture.
|
||||
|
||||
Covers:
|
||||
- WebSearchProvider / WebExtractProvider ABC enforcement
|
||||
- Per-capability backend selection (_get_search_backend, _get_extract_backend)
|
||||
- Backward compatibility (web.backend still works as shared fallback)
|
||||
- Config keys merge correctly via DEFAULT_CONFIG
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC enforcement
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWebProviderABCs:
|
||||
"""The ABCs enforce the interface contract."""
|
||||
|
||||
def test_cannot_instantiate_search_provider(self):
|
||||
from tools.web_providers.base import WebSearchProvider
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
WebSearchProvider() # type: ignore[abstract]
|
||||
|
||||
def test_cannot_instantiate_extract_provider(self):
|
||||
from tools.web_providers.base import WebExtractProvider
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
WebExtractProvider() # type: ignore[abstract]
|
||||
|
||||
def test_concrete_search_provider_works(self):
|
||||
from tools.web_providers.base import WebSearchProvider
|
||||
|
||||
class Dummy(WebSearchProvider):
|
||||
def provider_name(self) -> str:
|
||||
return "dummy"
|
||||
def is_configured(self) -> bool:
|
||||
return True
|
||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||
return {"success": True, "data": {"web": []}}
|
||||
|
||||
d = Dummy()
|
||||
assert d.provider_name() == "dummy"
|
||||
assert d.is_configured() is True
|
||||
assert d.search("test")["success"] is True
|
||||
|
||||
def test_concrete_extract_provider_works(self):
|
||||
from tools.web_providers.base import WebExtractProvider
|
||||
|
||||
class Dummy(WebExtractProvider):
|
||||
def provider_name(self) -> str:
|
||||
return "dummy"
|
||||
def is_configured(self) -> bool:
|
||||
return True
|
||||
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
|
||||
return {"success": True, "data": [{"url": urls[0], "content": "x"}]}
|
||||
|
||||
d = Dummy()
|
||||
assert d.provider_name() == "dummy"
|
||||
assert d.extract(["https://example.com"])["success"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-capability backend selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPerCapabilityBackendSelection:
|
||||
"""_get_search_backend and _get_extract_backend read per-capability config."""
|
||||
|
||||
def test_search_backend_overrides_generic(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"backend": "firecrawl",
|
||||
"search_backend": "tavily",
|
||||
})
|
||||
monkeypatch.setenv("TAVILY_API_KEY", "test-key")
|
||||
assert web_tools._get_search_backend() == "tavily"
|
||||
|
||||
def test_extract_backend_overrides_generic(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"backend": "tavily",
|
||||
"extract_backend": "exa",
|
||||
})
|
||||
monkeypatch.setenv("EXA_API_KEY", "test-key")
|
||||
assert web_tools._get_extract_backend() == "exa"
|
||||
|
||||
def test_falls_back_to_generic_backend_when_search_backend_empty(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"backend": "tavily",
|
||||
"search_backend": "",
|
||||
})
|
||||
monkeypatch.setenv("TAVILY_API_KEY", "test-key")
|
||||
assert web_tools._get_search_backend() == "tavily"
|
||||
|
||||
def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"backend": "parallel",
|
||||
"extract_backend": "",
|
||||
})
|
||||
monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
|
||||
assert web_tools._get_extract_backend() == "parallel"
|
||||
|
||||
def test_search_backend_ignored_when_not_available(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"backend": "firecrawl",
|
||||
"search_backend": "exa", # set but no EXA_API_KEY
|
||||
})
|
||||
monkeypatch.delenv("EXA_API_KEY", raising=False)
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
|
||||
# Should fall back to firecrawl since exa isn't configured
|
||||
assert web_tools._get_search_backend() == "firecrawl"
|
||||
|
||||
def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
|
||||
"backend": "tavily",
|
||||
})
|
||||
monkeypatch.setenv("TAVILY_API_KEY", "test-key")
|
||||
# No search_backend or extract_backend set — both fall through
|
||||
assert web_tools._get_search_backend() == "tavily"
|
||||
assert web_tools._get_extract_backend() == "tavily"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config key presence in DEFAULT_CONFIG
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDefaultConfig:
|
||||
"""The web section exists in DEFAULT_CONFIG with per-capability keys."""
|
||||
|
||||
def test_web_section_in_default_config(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
assert "web" in DEFAULT_CONFIG
|
||||
web = DEFAULT_CONFIG["web"]
|
||||
assert "backend" in web
|
||||
assert "search_backend" in web
|
||||
assert "extract_backend" in web
|
||||
# All empty string by default (no override)
|
||||
assert web["backend"] == ""
|
||||
assert web["search_backend"] == ""
|
||||
assert web["extract_backend"] == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# web_search_tool uses _get_search_backend
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWebSearchUsesSearchBackend:
|
||||
"""web_search_tool dispatches through _get_search_backend not _get_backend."""
|
||||
|
||||
def test_search_tool_calls_search_backend(self, monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
called_with = []
|
||||
original_get_search = web_tools._get_search_backend
|
||||
|
||||
def tracking_get_search():
|
||||
result = original_get_search()
|
||||
called_with.append(("search", result))
|
||||
return result
|
||||
|
||||
monkeypatch.setattr(web_tools, "_get_search_backend", tracking_get_search)
|
||||
monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "firecrawl"})
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake")
|
||||
|
||||
# The function will fail at Firecrawl client level but we just
|
||||
# need to verify _get_search_backend was called
|
||||
try:
|
||||
web_tools.web_search_tool("test", 1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
assert len(called_with) > 0
|
||||
assert called_with[0][0] == "search"
|
||||
73
tools/web_providers/ARCHITECTURE.md
Normal file
73
tools/web_providers/ARCHITECTURE.md
Normal file
@@ -0,0 +1,73 @@
|
||||
# Web Tools Provider Architecture
|
||||
|
||||
## Overview
|
||||
|
||||
Web tools (`web_search`, `web_extract`) use a **per-capability backend selection** system that allows different providers for search and extract independently.
|
||||
|
||||
## Config Keys
|
||||
|
||||
```yaml
|
||||
web:
|
||||
backend: "firecrawl" # Shared fallback — applies to both if specific keys not set
|
||||
search_backend: "" # Per-capability override for web_search
|
||||
extract_backend: "" # Per-capability override for web_extract
|
||||
```
|
||||
|
||||
**Selection priority (per capability):**
|
||||
1. `web.search_backend` / `web.extract_backend` (explicit per-capability)
|
||||
2. `web.backend` (shared fallback)
|
||||
3. Auto-detect from environment variables
|
||||
|
||||
When per-capability keys are empty (default), behavior is identical to the legacy single-backend selection.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
web_search_tool()
|
||||
└─ _get_search_backend()
|
||||
├─ web.search_backend (if set + available)
|
||||
└─ _get_backend() fallback
|
||||
|
||||
web_extract_tool()
|
||||
└─ _get_extract_backend()
|
||||
├─ web.extract_backend (if set + available)
|
||||
└─ _get_backend() fallback
|
||||
```
|
||||
|
||||
## Provider ABCs
|
||||
|
||||
New providers implement these interfaces in `tools/web_providers/`:
|
||||
|
||||
```python
|
||||
from tools.web_providers.base import WebSearchProvider, WebExtractProvider
|
||||
|
||||
class MySearchProvider(WebSearchProvider):
|
||||
def provider_name(self) -> str: ...
|
||||
def is_configured(self) -> bool: ...
|
||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]: ...
|
||||
|
||||
class MyExtractProvider(WebExtractProvider):
|
||||
def provider_name(self) -> str: ...
|
||||
def is_configured(self) -> bool: ...
|
||||
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: ...
|
||||
```
|
||||
|
||||
## Adding a New Search Provider
|
||||
|
||||
1. Create `tools/web_providers/your_provider.py` implementing `WebSearchProvider`
|
||||
2. Add availability check to `_is_backend_available()` in `web_tools.py`
|
||||
3. Add dispatch branch in `web_search_tool()`
|
||||
4. Add provider to `hermes tools` picker in `tools_config.py`
|
||||
5. Add env var to `OPTIONAL_ENV_VARS` in `config.py` (if needed)
|
||||
6. Write tests in `tests/tools/`
|
||||
|
||||
Search-only providers (like SearXNG) don't need to implement `WebExtractProvider`.
|
||||
Extract-only providers don't need to implement `WebSearchProvider`.
|
||||
|
||||
## hermes tools UX
|
||||
|
||||
The provider picker uses **progressive disclosure**:
|
||||
- **Default path** (90% of users): Pick one provider → sets `web.backend` for both. One selection, done.
|
||||
- **Advanced path**: "Configure separately" option at bottom → two-step sub-picker for search + extract independently.
|
||||
|
||||
See `.hermes/plans/2026-05-03-web-tools-provider-architecture.md` for the full UX flow diagram.
|
||||
6
tools/web_providers/__init__.py
Normal file
6
tools/web_providers/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Web capability providers — search, extract, crawl.
|
||||
|
||||
Each capability has an ABC in ``base.py`` and vendor implementations in
|
||||
sibling modules. Provider registries in ``web_tools.py`` map config names
|
||||
to provider classes.
|
||||
"""
|
||||
89
tools/web_providers/base.py
Normal file
89
tools/web_providers/base.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""Abstract base classes for web capability providers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class WebSearchProvider(ABC):
|
||||
"""Interface for web search backends (Firecrawl, Tavily, Exa, etc.).
|
||||
|
||||
Implementations live in sibling modules. The user selects a provider
|
||||
via ``hermes tools``; the choice is persisted as
|
||||
``config["web"]["search_backend"]`` (falling back to
|
||||
``config["web"]["backend"]``).
|
||||
|
||||
Search providers return results in a normalized format::
|
||||
|
||||
{
|
||||
"success": True,
|
||||
"data": {
|
||||
"web": [
|
||||
{"title": str, "url": str, "description": str, "position": int},
|
||||
...
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
On failure::
|
||||
|
||||
{"success": False, "error": str}
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def provider_name(self) -> str:
|
||||
"""Short, human-readable name shown in logs and diagnostics."""
|
||||
|
||||
@abstractmethod
|
||||
def is_configured(self) -> bool:
|
||||
"""Return True when all required env vars / credentials are present.
|
||||
|
||||
Called at tool-registration time to gate availability.
|
||||
Must be cheap — no network calls.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||
"""Execute a web search and return normalized results."""
|
||||
|
||||
|
||||
class WebExtractProvider(ABC):
|
||||
"""Interface for web content extraction backends.
|
||||
|
||||
Implementations live in sibling modules. The user selects a provider
|
||||
via ``hermes tools``; the choice is persisted as
|
||||
``config["web"]["extract_backend"]`` (falling back to
|
||||
``config["web"]["backend"]``).
|
||||
|
||||
Extract providers return results in a normalized format::
|
||||
|
||||
{
|
||||
"success": True,
|
||||
"data": [
|
||||
{"url": str, "title": str, "content": str,
|
||||
"raw_content": str, "metadata": dict},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
On failure::
|
||||
|
||||
{"success": False, "error": str}
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def provider_name(self) -> str:
|
||||
"""Short, human-readable name shown in logs and diagnostics."""
|
||||
|
||||
@abstractmethod
|
||||
def is_configured(self) -> bool:
|
||||
"""Return True when all required env vars / credentials are present.
|
||||
|
||||
Called at tool-registration time to gate availability.
|
||||
Must be cheap — no network calls.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
|
||||
"""Extract content from the given URLs and return normalized results."""
|
||||
@@ -119,7 +119,7 @@ def _load_web_config() -> dict:
|
||||
return {}
|
||||
|
||||
def _get_backend() -> str:
|
||||
"""Determine which web backend to use.
|
||||
"""Determine which web backend to use (shared fallback).
|
||||
|
||||
Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
|
||||
Falls back to whichever API key is present for users who configured
|
||||
@@ -145,6 +145,44 @@ def _get_backend() -> str:
|
||||
return "firecrawl" # default (backward compat)
|
||||
|
||||
|
||||
def _get_search_backend() -> str:
|
||||
"""Determine which backend to use for web_search specifically.
|
||||
|
||||
Selection priority:
|
||||
1. ``web.search_backend`` (per-capability override)
|
||||
2. ``web.backend`` (shared fallback — existing behavior)
|
||||
3. Auto-detect from env vars
|
||||
|
||||
This enables using different providers for search vs extract
|
||||
(e.g. SearXNG for search + Firecrawl for extract).
|
||||
"""
|
||||
return _get_capability_backend("search")
|
||||
|
||||
|
||||
def _get_extract_backend() -> str:
|
||||
"""Determine which backend to use for web_extract specifically.
|
||||
|
||||
Selection priority:
|
||||
1. ``web.extract_backend`` (per-capability override)
|
||||
2. ``web.backend`` (shared fallback — existing behavior)
|
||||
3. Auto-detect from env vars
|
||||
"""
|
||||
return _get_capability_backend("extract")
|
||||
|
||||
|
||||
def _get_capability_backend(capability: str) -> str:
|
||||
"""Shared helper for per-capability backend selection.
|
||||
|
||||
Reads ``web.{capability}_backend`` from config; if set and available,
|
||||
uses it. Otherwise falls through to the shared ``_get_backend()``.
|
||||
"""
|
||||
cfg = _load_web_config()
|
||||
specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
|
||||
if specific and _is_backend_available(specific):
|
||||
return specific
|
||||
return _get_backend()
|
||||
|
||||
|
||||
def _is_backend_available(backend: str) -> bool:
|
||||
"""Return True when the selected backend is currently usable."""
|
||||
if backend == "exa":
|
||||
@@ -1127,8 +1165,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
if is_interrupted():
|
||||
return tool_error("Interrupted", success=False)
|
||||
|
||||
# Dispatch to the configured backend
|
||||
backend = _get_backend()
|
||||
# Dispatch to the configured search backend
|
||||
backend = _get_search_backend()
|
||||
if backend == "parallel":
|
||||
response_data = _parallel_search(query, limit)
|
||||
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
|
||||
@@ -1284,7 +1322,7 @@ async def web_extract_tool(
|
||||
if not safe_urls:
|
||||
results = []
|
||||
else:
|
||||
backend = _get_backend()
|
||||
backend = _get_extract_backend()
|
||||
|
||||
if backend == "parallel":
|
||||
results = await _parallel_extract(safe_urls)
|
||||
|
||||
Reference in New Issue
Block a user