refactor(web): per-capability backend selection for search/extract split

Introduce the foundation for independently selecting web search and extract backends — enabling future combinations like SearXNG for search + Firecrawl for extract. Architecture: - tools/web_providers/base.py: WebSearchProvider and WebExtractProvider ABCs with normalized result contracts (mirrors CloudBrowserProvider) - tools/web_tools.py: _get_search_backend() and _get_extract_backend() read per-capability config keys, fall through to shared web.backend - hermes_cli/config.py: web.search_backend and web.extract_backend in DEFAULT_CONFIG (empty = inherit from web.backend) Behavioral change: - web_search_tool() now dispatches via _get_search_backend() - web_extract_tool() now dispatches via _get_extract_backend() - When per-capability keys are empty (default), behavior is identical to before — _get_search_backend() falls through to _get_backend() This is purely structural — no new backends are added. SearXNG and other search-only/extract-only providers can now be added as simple drop-in modules in follow-up PRs. 12 new tests, 49 existing tests pass with zero regressions. Ref: #19198
2026-05-06 18:57:21 +08:00 · 2026-05-06 10:30:15 +05:30
10 changed files with 1142 additions and 37 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -93,6 +93,153 @@ def _telegramize_command_mentions(text: str, platform: Any) -> str:
 _AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60


+# --- Stale-code self-check ------------------------------------------------
+# Long-running gateway processes that survive an ``hermes update`` keep the
+# old ``hermes_cli.config`` (and friends) cached in ``sys.modules``.  When
+# the updated tool files on disk then try to ``from hermes_cli.config
+# import cfg_get`` (added in PR #17304), the import resolves against the
+# already-loaded stale module object and raises ``ImportError`` — see
+# Issue #17648.  Rather than papering over the import failure site-by-site
+# in every tool file, detect the stale state centrally and auto-restart
+# so the gateway reloads with fresh code.
+#
+# The signal we use is ``git rev-parse HEAD`` — the only thing ``hermes
+# update`` moves that is NOT moved by agent-driven file edits.  Earlier
+# revisions of this check compared file mtimes across a sentinel set
+# (run_agent.py, gateway/run.py, ...), but that produced false positives
+# whenever the agent edited its own source files during a session:
+# mtime jumps, stale-check fires, gateway restarts, user must retype.
+# See the conversation at PR #<this> for the motivating incident.
+#
+# The legacy mtime sentinels are kept ONLY as a last-resort fallback for
+# non-git installs (pip install from wheel, sparse clones with no .git
+# dir).  In those environments ``hermes update`` is not a supported path,
+# so the check effectively no-ops — which is the safe behavior: better
+# to ship one broken import than to restart on every agent-edit.
+_STALE_CODE_SENTINELS: tuple[str, ...] = (
+    "hermes_cli/config.py",
+    "hermes_cli/__init__.py",
+    "run_agent.py",
+    "gateway/run.py",
+    "pyproject.toml",
+)
+
+# Cache git HEAD reads across consecutive messages so a chat burst doesn't
+# spawn one subprocess per message.  5s is long enough to collapse a burst
+# and short enough that the real post-update detection still fires within
+# the user's perceived "next message" window.
+_GIT_SHA_CACHE_TTL_SECS = 5.0
+
+
+def _read_git_head_sha(repo_root: Path) -> Optional[str]:
+    """Return the git HEAD SHA for ``repo_root``, or None if unavailable.
+
+    Reads ``.git/HEAD`` directly (and follows one level of ref) instead
+    of shelling out to ``git`` — cheaper, no subprocess tax, works on
+    gateway hosts that don't have a ``git`` binary on PATH.  Returns
+    None for non-git installs (no ``.git`` dir) or any I/O error; callers
+    treat None as "can't tell" and skip the check.
+
+    Supports the three layouts we care about:
+      1. Main checkout: ``<repo>/.git/`` is a directory.
+      2. Git worktree: ``<repo>/.git`` is a file ``gitdir: <path>`` that
+         points at ``<main>/.git/worktrees/<name>/``.  The worktree's
+         gitdir has HEAD + index but NOT refs/heads/ — those live in
+         the main checkout, and ``<worktree-gitdir>/commondir`` points
+         at the main ``.git``.  We search both locations for refs.
+      3. Packed refs: ``refs/heads/<branch>`` is absent on disk but
+         listed in ``<main-git-dir>/packed-refs``.
+    """
+    try:
+        git_dir = repo_root / ".git"
+        # Worktrees store ``.git`` as a file pointing at gitdir: <path>
+        if git_dir.is_file():
+            try:
+                content = git_dir.read_text().strip()
+                if content.startswith("gitdir:"):
+                    git_dir = Path(content.split(":", 1)[1].strip())
+                    if not git_dir.is_absolute():
+                        git_dir = (repo_root / git_dir).resolve()
+            except OSError:
+                return None
+        if not git_dir.is_dir():
+            return None
+
+        # Figure out the "common" git dir — the one that owns shared refs.
+        # For a worktree, commondir points at it (relative path, resolve
+        # against git_dir).  For a main checkout, common_dir == git_dir.
+        common_dir = git_dir
+        commondir_file = git_dir / "commondir"
+        if commondir_file.is_file():
+            try:
+                rel = commondir_file.read_text().strip()
+                candidate = (git_dir / rel).resolve() if rel else git_dir
+                if candidate.is_dir():
+                    common_dir = candidate
+            except OSError:
+                pass
+
+        head_path = git_dir / "HEAD"
+        if not head_path.is_file():
+            return None
+        head_content = head_path.read_text().strip()
+
+        if head_content.startswith("ref:"):
+            # Symbolic ref — follow one level (e.g. ref: refs/heads/main).
+            # Worktree-local refs (bisect, rebase-merge state) live under
+            # git_dir; shared refs (refs/heads/*, refs/tags/*) live under
+            # common_dir.  Try git_dir first, then common_dir.
+            ref_rel = head_content.split(":", 1)[1].strip()
+            for base in (git_dir, common_dir) if git_dir != common_dir else (git_dir,):
+                ref_path = base / ref_rel
+                if ref_path.is_file():
+                    try:
+                        sha = ref_path.read_text().strip()
+                    except OSError:
+                        continue
+                    if sha:
+                        return sha
+            # Packed refs fallback — always stored in the common dir.
+            packed = common_dir / "packed-refs"
+            if packed.is_file():
+                try:
+                    for line in packed.read_text().splitlines():
+                        line = line.strip()
+                        if not line or line.startswith("#") or line.startswith("^"):
+                            continue
+                        parts = line.split(None, 1)
+                        if len(parts) == 2 and parts[1] == ref_rel:
+                            return parts[0] or None
+                except OSError:
+                    return None
+            return None
+
+        # Detached HEAD — content is the SHA directly.
+        return head_content or None
+    except Exception:
+        return None
+
+
+def _compute_repo_mtime(repo_root: Path) -> float:
+    """Return the newest mtime across the stale-code sentinel files.
+
+    Legacy fallback used only for non-git installs (``.git`` missing).
+    Missing files are ignored (they may not exist on older checkouts).
+    Returns 0.0 if no sentinel file is readable — treat that as "can't
+    tell", which downstream callers interpret as "not stale" to avoid
+    false-positive restart loops.
+    """
+    newest = 0.0
+    for rel in _STALE_CODE_SENTINELS:
+        try:
+            st = (repo_root / rel).stat()
+        except (OSError, FileNotFoundError):
+            continue
+        if st.st_mtime > newest:
+            newest = st.st_mtime
+    return newest
+
+
 def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
    """Best-effort conversion of stored gateway timestamps to epoch seconds.

@@ -960,6 +1107,13 @@ class GatewayRunner:
    _stop_task: Optional[asyncio.Task] = None
    _session_model_overrides: Dict[str, Dict[str, str]] = {}
    _session_reasoning_overrides: Dict[str, Dict[str, Any]] = {}
+    # Stale-code self-check defaults (see _detect_stale_code()).  Class-level
+    # so tests that construct GatewayRunner via ``object.__new__`` without
+    # running __init__ don't crash when _handle_message reads these.
+    _boot_wall_time: float = 0.0
+    _boot_repo_mtime: float = 0.0
+    _boot_git_sha: Optional[str] = None
+    _stale_code_restart_triggered: bool = False

    def __init__(self, config: Optional[GatewayConfig] = None):
        global _gateway_runner_ref
@@ -968,6 +1122,30 @@ class GatewayRunner:
        self._warn_if_docker_media_delivery_is_risky()
        _gateway_runner_ref = _weakref.ref(self)

+        # Boot-time snapshot used by the stale-code self-check.  Captured
+        # before any work happens so post-update file writes are guaranteed
+        # to have newer mtimes.  See _detect_stale_code() / Issue #17648.
+        try:
+            self._boot_wall_time: float = time.time()
+            self._repo_root_for_staleness: Path = Path(__file__).resolve().parent.parent
+            self._boot_git_sha: Optional[str] = _read_git_head_sha(
+                self._repo_root_for_staleness,
+            )
+            self._boot_repo_mtime: float = _compute_repo_mtime(
+                self._repo_root_for_staleness,
+            )
+        except Exception:
+            self._boot_wall_time = 0.0
+            self._repo_root_for_staleness = Path(".")
+            self._boot_git_sha = None
+            self._boot_repo_mtime = 0.0
+        self._stale_code_notified: set[str] = set()
+        self._stale_code_restart_triggered: bool = False
+        # Cached current-SHA read, refreshed at most every
+        # _GIT_SHA_CACHE_TTL_SECS so bursty chats don't hammer the filesystem.
+        self._cached_current_sha: Optional[str] = self._boot_git_sha
+        self._cached_current_sha_at: float = self._boot_wall_time
+
        # Load ephemeral config from config.yaml / env vars.
        # Both are injected at API-call time only and never persisted.
        self._prefill_messages = self._load_prefill_messages()
@@ -2675,6 +2853,101 @@ class GatewayRunner:
        task.add_done_callback(self._background_tasks.discard)
        return True

+    def _current_git_sha_cached(self) -> Optional[str]:
+        """Return the current HEAD SHA, cached for _GIT_SHA_CACHE_TTL_SECS.
+
+        A bursty chat (user mashes "hello?" three times) would otherwise
+        re-read ``.git/HEAD`` on every message.  Caching collapses that
+        into a single read and still re-checks within the user's
+        perceived "next message" window.
+        """
+        now = time.time()
+        if (
+            self._cached_current_sha is not None
+            and (now - self._cached_current_sha_at) < _GIT_SHA_CACHE_TTL_SECS
+        ):
+            return self._cached_current_sha
+        try:
+            sha = _read_git_head_sha(self._repo_root_for_staleness)
+        except Exception:
+            sha = None
+        self._cached_current_sha = sha
+        self._cached_current_sha_at = now
+        return sha
+
+    def _detect_stale_code(self) -> bool:
+        """Return True if the git HEAD moved since this process booted.
+
+        A gateway that survives ``hermes update`` (manual SIGTERM never
+        escalated, systemd restart race, detached-process respawn failed,
+        etc.) keeps pre-update modules cached in ``sys.modules``.  Later
+        imports of names added post-update — e.g. ``cfg_get`` from PR
+        #17304 — raise ImportError against the stale module object (see
+        Issue #17648).
+
+        We compare the git HEAD SHA at boot to the current SHA on disk.
+        ``hermes update`` always moves HEAD forward via ``git pull``;
+        agent file edits (the agent patching ``run_agent.py`` or
+        ``gateway/run.py`` during a self-dev session) never move HEAD.
+        That makes SHA comparison free of the false-positive class that
+        the old mtime check suffered from — the agent can edit any file
+        without triggering a phantom restart.
+
+        Returns False when:
+          - the boot SHA is unavailable (non-git install, first call
+            during partial init, etc.); we can't tell and refuse to loop
+          - the current SHA matches the boot SHA
+          - reading the current SHA fails for any reason
+        """
+        if not self._boot_wall_time:
+            return False
+        if not self._boot_git_sha:
+            # Non-git install.  ``hermes update`` is git-based, so a
+            # non-git install can't experience the stale-modules class
+            # this check exists to catch.  Return False — no check, no
+            # false positives.  (If we ever ship a pip-install update
+            # path, we'd add a persistent update marker here and compare
+            # its timestamp to self._boot_wall_time.)
+            return False
+        try:
+            current = self._current_git_sha_cached()
+        except Exception:
+            return False
+        if not current:
+            return False
+        return current != self._boot_git_sha
+
+    def _trigger_stale_code_restart(self) -> None:
+        """Idempotently kick off a graceful restart after stale-code detection.
+
+        Runs at most once per process.  The restart request goes through
+        the normal drain path so in-flight agent turns finish before the
+        process exits; the service manager (systemd / launchd / detached
+        profile watcher) then respawns with fresh code.  On manual
+        ``hermes gateway run`` installs without a supervisor, the
+        process exits and the user must restart by hand — but they get a
+        user-visible message telling them so.
+        """
+        if self._stale_code_restart_triggered:
+            return
+        self._stale_code_restart_triggered = True
+        current_sha = None
+        try:
+            current_sha = self._current_git_sha_cached()
+        except Exception:
+            pass
+        logger.warning(
+            "Stale-code self-check: git HEAD moved since gateway boot "
+            "(boot=%s, current=%s) — requesting graceful restart. "
+            "See Issue #17648.",
+            (self._boot_git_sha or "?")[:12],
+            (current_sha or "?")[:12],
+        )
+        try:
+            self.request_restart(detached=False, via_service=True)
+        except Exception as exc:
+            logger.error("Stale-code restart request failed: %s", exc)
+
    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
@@ -4600,6 +4873,27 @@ class GatewayRunner:
        """
        source = event.source

+        # Stale-code self-check (Issue #17648).  A gateway that survives
+        # ``hermes update`` keeps old modules cached in sys.modules; the
+        # first inbound message is our earliest safe chance to detect
+        # this and restart gracefully before we dispatch to the agent
+        # and hit ImportError on freshly-added names (e.g. cfg_get).
+        # Idempotent — runs the real check at most once per message, and
+        # request_restart() no-ops after the first call.
+        try:
+            if self._detect_stale_code():
+                self._trigger_stale_code_restart()
+                # Acknowledge to the user so they don't see a silent
+                # drop; the gateway will be back up in a moment via the
+                # service manager / profile-watcher respawn.
+                return (
+                    "⟳ Gateway code was updated in the background — "
+                    "restarting this gateway so your next message runs "
+                    "on the new code. Please retry in a moment."
+                )
+        except Exception as _stale_exc:
+            logger.debug("Stale-code self-check failed: %s", _stale_exc)
+
        # Internal events (e.g. background-process completion notifications)
        # are system-generated and must skip user authorization.
        is_internal = bool(getattr(event, "internal", False))
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -544,7 +544,13 @@ DEFAULT_CONFIG = {
        # via TERMINAL_LOCAL_PERSISTENT env var.
        "persistent_shell": True,
    },
-    
+
+    "web": {
+        "backend": "",           # shared fallback — applies to both search and extract
+        "search_backend": "",    # per-capability override for web_search (e.g. "searxng")
+        "extract_backend": "",   # per-capability override for web_extract (e.g. "native")
+    },
+
    "browser": {
        "inactivity_timeout": 120,
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
--- a/nix/lib.nix
+++ b/nix/lib.nix
@@ -163,42 +163,35 @@
      for entry in "''${ENTRIES[@]}"; do
        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
-
-        # Compute the actual hash from the lockfile directly using
-        # prefetch-npm-deps. This avoids false "ok" from nix build when
-        # an old derivation is cached in a substituter (cachix/cache.nixos.org).
-        LOCK_FILE="$FOLDER/package-lock.json"
-        NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "$LOCK_FILE" 2>/dev/null)
-        if [ -z "$NEW_HASH" ]; then
-          echo "    prefetch-npm-deps failed, falling back to nix build" >&2
-          OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
-          STATUS=$?
-          if [ "$STATUS" -eq 0 ]; then
-            echo "    ok (via nix build)"
-            continue
-          fi
-          NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
-          if [ -z "$NEW_HASH" ]; then
-            if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
-              echo "    skipped (transient cache failure — see primary nix build for real status)" >&2
-              echo "$OUTPUT" | tail -8 >&2
-              continue
-            fi
-            echo "    build failed with no hash mismatch:" >&2
-            echo "$OUTPUT" | tail -40 >&2
-            exit 1
-          fi
-        fi
-
-        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
-          | sed -E 's/hash = "(.*)"/\1/')
-
-        if [ "$NEW_HASH" = "$OLD_HASH" ]; then
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        STATUS=$?
+        if [ "$STATUS" -eq 0 ]; then
          echo "    ok"
          continue
        fi

+        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
+        if [ -z "$NEW_HASH" ]; then
+          # Magic-Nix-Cache occasionally returns HTTP 418 / cache-throttled
+          # mid-run; nix then prints "outputs … not valid, so checking is
+          # not possible" without a `got:` line.  That's an infrastructure
+          # blip, not a stale lockfile — warn + skip rather than failing
+          # the lint.  A real hash mismatch would still surface in the
+          # primary `.#$ATTR` build, which is a separate CI job.
+          if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
+            echo "    skipped (transient cache failure — see primary nix build for real status)" >&2
+            echo "$OUTPUT" | tail -8 >&2
+            continue
+          fi
+          echo "    build failed with no hash mismatch:" >&2
+          echo "$OUTPUT" | tail -40 >&2
+          exit 1
+        fi
+
        HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
+        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
+          | sed -E 's/hash = "(.*)"/\1/')
+        LOCK_FILE="$FOLDER/package-lock.json"
        echo "    stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
        STALE=1

--- a/nix/tui.nix
+++ b/nix/tui.nix
@@ -4,7 +4,7 @@ let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-MLcLhjTF6dgdvNBtJWzo8Nh19eNh/ZitD2b07nm61Tc=";
+    hash = "sha256-a/HGI9OgVcTnZrMXA7xFMGnFoVxyHe95fulVz+WNYB0=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
--- a/tests/gateway/test_stale_code_self_check.py
+++ b/tests/gateway/test_stale_code_self_check.py
@@ -0,0 +1,412 @@
+"""Tests for the gateway stale-code self-check (Issue #17648).
+
+A gateway that survives ``hermes update`` keeps pre-update modules cached
+in ``sys.modules``.  Later imports of names added post-update (e.g.
+``cfg_get`` from PR #17304) raise ImportError against the stale module
+object.
+
+The self-check compares the git HEAD SHA at boot to the current SHA on
+disk.  ``hermes update`` always moves HEAD forward via ``git pull``;
+agent-driven file edits (Hermes editing ``run_agent.py`` / ``gateway/run.py``
+during a self-dev session) never move HEAD — so the SHA signal is free of
+the false-positive class that the earlier mtime-based check suffered from.
+"""
+
+import os
+import time
+from pathlib import Path
+
+import pytest
+
+from gateway.run import (
+    GatewayRunner,
+    _compute_repo_mtime,
+    _read_git_head_sha,
+    _STALE_CODE_SENTINELS,
+    _GIT_SHA_CACHE_TTL_SECS,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_tmp_repo(tmp_path: Path) -> Path:
+    """Create a fake repo with all stale-code sentinel files."""
+    for rel in _STALE_CODE_SENTINELS:
+        p = tmp_path / rel
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text("# test sentinel\n")
+    return tmp_path
+
+
+def _make_git_repo(tmp_path: Path, sha: str = "a" * 40, branch: str = "main") -> Path:
+    """Stamp a minimal .git directory so _read_git_head_sha can resolve a SHA.
+
+    We don't run real git — just lay down the files the reader walks
+    (.git/HEAD pointing at refs/heads/<branch>, refs/heads/<branch>
+    containing the SHA).
+    """
+    git_dir = tmp_path / ".git"
+    git_dir.mkdir(parents=True, exist_ok=True)
+    (git_dir / "HEAD").write_text(f"ref: refs/heads/{branch}\n")
+    refs_dir = git_dir / "refs" / "heads"
+    refs_dir.mkdir(parents=True, exist_ok=True)
+    (refs_dir / branch).write_text(f"{sha}\n")
+    return tmp_path
+
+
+def _set_head_sha(repo_root: Path, sha: str, branch: str = "main") -> None:
+    """Rewrite the current branch ref to a new SHA (simulates git pull)."""
+    (repo_root / ".git" / "refs" / "heads" / branch).write_text(f"{sha}\n")
+
+
+def _make_runner(
+    repo_root: Path,
+    *,
+    boot_sha: str | None,
+    boot_wall: float = None,
+    boot_mtime: float = 0.0,
+):
+    """Bare GatewayRunner with just the stale-check attributes set."""
+    if boot_wall is None:
+        boot_wall = time.time()
+    runner = object.__new__(GatewayRunner)
+    runner._repo_root_for_staleness = repo_root
+    runner._boot_wall_time = boot_wall
+    runner._boot_git_sha = boot_sha
+    runner._boot_repo_mtime = boot_mtime
+    runner._stale_code_notified = set()
+    runner._stale_code_restart_triggered = False
+    runner._cached_current_sha = boot_sha
+    runner._cached_current_sha_at = boot_wall
+    return runner
+
+
+# ---------------------------------------------------------------------------
+# _read_git_head_sha — raw SHA reader
+# ---------------------------------------------------------------------------
+
+def test_read_git_head_sha_branch_ref(tmp_path):
+    """Resolves ref: refs/heads/<branch> → SHA from refs/heads/<branch>."""
+    sha = "b" * 40
+    _make_git_repo(tmp_path, sha=sha, branch="main")
+    assert _read_git_head_sha(tmp_path) == sha
+
+
+def test_read_git_head_sha_detached_head(tmp_path):
+    """Detached HEAD: .git/HEAD contains the SHA directly."""
+    sha = "c" * 40
+    git_dir = tmp_path / ".git"
+    git_dir.mkdir()
+    (git_dir / "HEAD").write_text(f"{sha}\n")
+    assert _read_git_head_sha(tmp_path) == sha
+
+
+def test_read_git_head_sha_packed_refs(tmp_path):
+    """Falls back to packed-refs when refs/heads/<branch> is missing."""
+    sha = "d" * 40
+    git_dir = tmp_path / ".git"
+    git_dir.mkdir()
+    (git_dir / "HEAD").write_text("ref: refs/heads/main\n")
+    # No refs/heads/main file — only packed-refs
+    (git_dir / "packed-refs").write_text(
+        f"# pack-refs with: peeled fully-peeled sorted\n"
+        f"{sha} refs/heads/main\n"
+    )
+    assert _read_git_head_sha(tmp_path) == sha
+
+
+def test_read_git_head_sha_worktree_gitdir_file(tmp_path):
+    """Worktree: .git is a file with `gitdir: <path>` pointing to the real git dir.
+
+    Real git worktrees store shared refs (refs/heads/*) in the main
+    checkout's .git/ and write a ``commondir`` pointer into the
+    worktree-gitdir.  The reader must follow commondir to resolve the
+    branch ref — this is the layout Hermes dev sessions actually use.
+    """
+    sha = "e" * 40
+    # Main repo layout
+    main_repo = tmp_path / "main-repo"
+    main_git = main_repo / ".git"
+    (main_git / "refs" / "heads").mkdir(parents=True)
+    (main_git / "HEAD").write_text("ref: refs/heads/main\n")
+    (main_git / "refs" / "heads" / "main").write_text("0" * 40 + "\n")
+
+    # Worktree lives in main-repo/.git/worktrees/<name>/
+    worktree_git_dir = main_git / "worktrees" / "feature"
+    worktree_git_dir.mkdir(parents=True)
+    (worktree_git_dir / "HEAD").write_text("ref: refs/heads/feature\n")
+    # commondir points back at the main .git (relative path, "../..")
+    (worktree_git_dir / "commondir").write_text("../..\n")
+    # Feature branch ref lives in the shared refs/heads
+    (main_git / "refs" / "heads" / "feature").write_text(f"{sha}\n")
+
+    # Worktree checkout with .git file pointing at worktree_git_dir
+    worktree = tmp_path / "wt"
+    worktree.mkdir()
+    (worktree / ".git").write_text(f"gitdir: {worktree_git_dir}\n")
+
+    assert _read_git_head_sha(worktree) == sha
+
+
+def test_read_git_head_sha_worktree_packed_refs_in_common(tmp_path):
+    """Worktree + packed-refs in common dir: fallback still resolves."""
+    sha = "f" * 40
+    main_repo = tmp_path / "main-repo"
+    main_git = main_repo / ".git"
+    main_git.mkdir(parents=True)
+    (main_git / "HEAD").write_text("ref: refs/heads/main\n")
+    # packed-refs in the common (main) .git
+    (main_git / "packed-refs").write_text(
+        f"# pack-refs with: peeled fully-peeled sorted\n"
+        f"{sha} refs/heads/feature\n"
+    )
+
+    worktree_git_dir = main_git / "worktrees" / "feature"
+    worktree_git_dir.mkdir(parents=True)
+    (worktree_git_dir / "HEAD").write_text("ref: refs/heads/feature\n")
+    (worktree_git_dir / "commondir").write_text("../..\n")
+
+    worktree = tmp_path / "wt"
+    worktree.mkdir()
+    (worktree / ".git").write_text(f"gitdir: {worktree_git_dir}\n")
+
+    assert _read_git_head_sha(worktree) == sha
+
+
+def test_read_git_head_sha_no_git_returns_none(tmp_path):
+    """No .git dir → None (non-git install, safely disables the check)."""
+    assert _read_git_head_sha(tmp_path) is None
+
+
+def test_read_git_head_sha_malformed_head_returns_none(tmp_path):
+    """Empty HEAD file → None (don't loop on corrupt repos)."""
+    git_dir = tmp_path / ".git"
+    git_dir.mkdir()
+    (git_dir / "HEAD").write_text("")
+    assert _read_git_head_sha(tmp_path) is None
+
+
+# ---------------------------------------------------------------------------
+# _detect_stale_code — the main regression guard
+# ---------------------------------------------------------------------------
+
+def test_detect_stale_code_false_when_sha_unchanged(tmp_path):
+    """Boot SHA == current SHA → not stale (no restart)."""
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    runner = _make_runner(tmp_path, boot_sha=sha)
+    # Force fresh read by expiring the cache
+    runner._cached_current_sha_at = 0.0
+    assert runner._detect_stale_code() is False
+
+
+def test_detect_stale_code_true_after_git_pull(tmp_path):
+    """Boot SHA != current SHA → stale (hermes update happened)."""
+    boot_sha = "a" * 40
+    _make_git_repo(tmp_path, sha=boot_sha)
+    runner = _make_runner(tmp_path, boot_sha=boot_sha)
+    # Simulate git pull moving HEAD forward
+    _set_head_sha(tmp_path, "b" * 40)
+    runner._cached_current_sha_at = 0.0  # expire cache
+    assert runner._detect_stale_code() is True
+
+
+def test_detect_stale_code_ignores_agent_file_edits(tmp_path):
+    """THE CORE REGRESSION: agent edits to source files do NOT trigger restart.
+
+    This is the motivating incident for the SHA-based check.  Under the
+    previous mtime-based scheme, any ``patch`` / ``write_file`` call
+    against run_agent.py / gateway/run.py / hermes_cli/config.py would
+    flip the stale-check to True and force a gateway restart on the
+    next message — even though no update actually happened.  SHA
+    comparison decouples the two: git HEAD only moves on ``git pull``,
+    never on file writes.
+    """
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    _make_tmp_repo(tmp_path)  # lay down sentinel files too
+    runner = _make_runner(tmp_path, boot_sha=sha)
+
+    # Simulate the agent editing run_agent.py and gateway/run.py with
+    # mtimes far into the future — exactly the scenario that used to
+    # false-positive the old mtime check.
+    future = time.time() + 10_000
+    for rel in _STALE_CODE_SENTINELS:
+        p = tmp_path / rel
+        if p.is_file():
+            p.write_text("# agent just edited this\n")
+            os.utime(p, (future, future))
+
+    # HEAD SHA has NOT moved — check must stay False.
+    runner._cached_current_sha_at = 0.0  # expire cache
+    assert runner._detect_stale_code() is False
+
+
+def test_detect_stale_code_false_for_non_git_install(tmp_path):
+    """Non-git install (no .git dir) → check disabled, never fires."""
+    # No .git dir at all; runner's boot_sha is None
+    runner = _make_runner(tmp_path, boot_sha=None)
+    # Even if we pretended the current SHA differed, the check should
+    # short-circuit on boot_sha=None and return False.
+    assert runner._detect_stale_code() is False
+
+
+def test_detect_stale_code_false_when_no_boot_wall_time(tmp_path):
+    """No boot snapshot at all → can't tell → not stale (no restart loop)."""
+    runner = _make_runner(tmp_path, boot_sha="a" * 40, boot_wall=0.0)
+    assert runner._detect_stale_code() is False
+
+
+def test_detect_stale_code_handles_disappearing_git_dir(tmp_path):
+    """.git vanishes mid-run → current_sha = None → not stale (don't loop)."""
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    runner = _make_runner(tmp_path, boot_sha=sha)
+    # Nuke the git dir after boot
+    import shutil
+    shutil.rmtree(tmp_path / ".git")
+    runner._cached_current_sha_at = 0.0  # expire cache
+    assert runner._detect_stale_code() is False
+
+
+# ---------------------------------------------------------------------------
+# SHA cache
+# ---------------------------------------------------------------------------
+
+def test_current_sha_cache_collapses_bursts(tmp_path, monkeypatch):
+    """Consecutive calls inside the TTL window reuse the cached SHA."""
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    runner = _make_runner(tmp_path, boot_sha=sha)
+
+    read_calls = {"n": 0}
+    real_reader = _read_git_head_sha
+
+    def counting_reader(repo_root):
+        read_calls["n"] += 1
+        return real_reader(repo_root)
+
+    from gateway import run as run_mod
+    monkeypatch.setattr(run_mod, "_read_git_head_sha", counting_reader)
+
+    # Force cache expiry so the first call definitely reads
+    runner._cached_current_sha_at = 0.0
+    runner._current_git_sha_cached()
+    first_count = read_calls["n"]
+
+    # Immediate second/third calls should hit cache (no new read)
+    runner._current_git_sha_cached()
+    runner._current_git_sha_cached()
+    assert read_calls["n"] == first_count
+
+
+def test_current_sha_cache_expires_after_ttl(tmp_path, monkeypatch):
+    """After _GIT_SHA_CACHE_TTL_SECS elapses, a fresh read happens."""
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    runner = _make_runner(tmp_path, boot_sha=sha)
+
+    read_calls = {"n": 0}
+    real_reader = _read_git_head_sha
+
+    def counting_reader(repo_root):
+        read_calls["n"] += 1
+        return real_reader(repo_root)
+
+    from gateway import run as run_mod
+    monkeypatch.setattr(run_mod, "_read_git_head_sha", counting_reader)
+
+    runner._cached_current_sha_at = 0.0
+    runner._current_git_sha_cached()
+    first = read_calls["n"]
+
+    # Age the cache past the TTL
+    runner._cached_current_sha_at = time.time() - (_GIT_SHA_CACHE_TTL_SECS + 1.0)
+    runner._current_git_sha_cached()
+    assert read_calls["n"] == first + 1
+
+
+# ---------------------------------------------------------------------------
+# _trigger_stale_code_restart — idempotency preserved
+# ---------------------------------------------------------------------------
+
+def test_trigger_stale_code_restart_is_idempotent(tmp_path):
+    """Calling _trigger_stale_code_restart twice only requests restart once."""
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    runner = _make_runner(tmp_path, boot_sha=sha)
+
+    calls = []
+
+    def fake_request_restart(*, detached=False, via_service=False):
+        calls.append((detached, via_service))
+        return True
+
+    runner.request_restart = fake_request_restart
+
+    runner._trigger_stale_code_restart()
+    runner._trigger_stale_code_restart()
+    runner._trigger_stale_code_restart()
+
+    assert len(calls) == 1
+    assert runner._stale_code_restart_triggered is True
+
+
+def test_trigger_stale_code_restart_survives_request_failure(tmp_path):
+    """If request_restart raises, we swallow and mark as triggered anyway."""
+    sha = "a" * 40
+    _make_git_repo(tmp_path, sha=sha)
+    runner = _make_runner(tmp_path, boot_sha=sha)
+
+    def boom(*, detached=False, via_service=False):
+        raise RuntimeError("no event loop")
+
+    runner.request_restart = boom
+
+    # Should not raise
+    runner._trigger_stale_code_restart()
+
+    # Marked triggered so we don't retry on every subsequent message
+    assert runner._stale_code_restart_triggered is True
+
+
+# ---------------------------------------------------------------------------
+# Class-level defaults — tests that build bare runners via object.__new__
+# ---------------------------------------------------------------------------
+
+def test_class_level_defaults_prevent_uninitialized_access():
+    """Partial construction via object.__new__ must not crash _detect_stale_code."""
+    runner = object.__new__(GatewayRunner)
+    # Don't set any instance attrs — class-level defaults should kick in
+    runner._repo_root_for_staleness = Path(".")
+    # _boot_wall_time / _boot_git_sha fall through to class defaults
+    # (0.0 and None respectively)
+    assert runner._detect_stale_code() is False
+    # _stale_code_restart_triggered falls through to class default (False)
+    assert runner._stale_code_restart_triggered is False
+
+
+# ---------------------------------------------------------------------------
+# Legacy mtime reader kept for compatibility — light sanity check only
+# ---------------------------------------------------------------------------
+
+def test_compute_repo_mtime_still_returns_newest(tmp_path):
+    """_compute_repo_mtime remains available for any legacy callers."""
+    repo = _make_tmp_repo(tmp_path)
+
+    baseline = time.time() - 100
+    for rel in _STALE_CODE_SENTINELS:
+        os.utime(repo / rel, (baseline, baseline))
+
+    newer = time.time()
+    os.utime(repo / "hermes_cli/config.py", (newer, newer))
+
+    result = _compute_repo_mtime(repo)
+    assert abs(result - newer) < 1.0
+
+
+def test_compute_repo_mtime_missing_files_returns_zero(tmp_path):
+    """Legacy sanity: missing sentinels → 0.0."""
+    assert _compute_repo_mtime(tmp_path) == 0.0
--- a/tests/tools/test_web_providers.py
+++ b/tests/tools/test_web_providers.py
@@ -0,0 +1,194 @@
+"""Tests for the web tools provider architecture.
+
+Covers:
+- WebSearchProvider / WebExtractProvider ABC enforcement
+- Per-capability backend selection (_get_search_backend, _get_extract_backend)
+- Backward compatibility (web.backend still works as shared fallback)
+- Config keys merge correctly via DEFAULT_CONFIG
+"""
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# ABC enforcement
+# ---------------------------------------------------------------------------
+
+
+class TestWebProviderABCs:
+    """The ABCs enforce the interface contract."""
+
+    def test_cannot_instantiate_search_provider(self):
+        from tools.web_providers.base import WebSearchProvider
+
+        with pytest.raises(TypeError):
+            WebSearchProvider()  # type: ignore[abstract]
+
+    def test_cannot_instantiate_extract_provider(self):
+        from tools.web_providers.base import WebExtractProvider
+
+        with pytest.raises(TypeError):
+            WebExtractProvider()  # type: ignore[abstract]
+
+    def test_concrete_search_provider_works(self):
+        from tools.web_providers.base import WebSearchProvider
+
+        class Dummy(WebSearchProvider):
+            def provider_name(self) -> str:
+                return "dummy"
+            def is_configured(self) -> bool:
+                return True
+            def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
+                return {"success": True, "data": {"web": []}}
+
+        d = Dummy()
+        assert d.provider_name() == "dummy"
+        assert d.is_configured() is True
+        assert d.search("test")["success"] is True
+
+    def test_concrete_extract_provider_works(self):
+        from tools.web_providers.base import WebExtractProvider
+
+        class Dummy(WebExtractProvider):
+            def provider_name(self) -> str:
+                return "dummy"
+            def is_configured(self) -> bool:
+                return True
+            def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
+                return {"success": True, "data": [{"url": urls[0], "content": "x"}]}
+
+        d = Dummy()
+        assert d.provider_name() == "dummy"
+        assert d.extract(["https://example.com"])["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Per-capability backend selection
+# ---------------------------------------------------------------------------
+
+
+class TestPerCapabilityBackendSelection:
+    """_get_search_backend and _get_extract_backend read per-capability config."""
+
+    def test_search_backend_overrides_generic(self, monkeypatch):
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "backend": "firecrawl",
+            "search_backend": "tavily",
+        })
+        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
+        assert web_tools._get_search_backend() == "tavily"
+
+    def test_extract_backend_overrides_generic(self, monkeypatch):
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "backend": "tavily",
+            "extract_backend": "exa",
+        })
+        monkeypatch.setenv("EXA_API_KEY", "test-key")
+        assert web_tools._get_extract_backend() == "exa"
+
+    def test_falls_back_to_generic_backend_when_search_backend_empty(self, monkeypatch):
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "backend": "tavily",
+            "search_backend": "",
+        })
+        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
+        assert web_tools._get_search_backend() == "tavily"
+
+    def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "backend": "parallel",
+            "extract_backend": "",
+        })
+        monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
+        assert web_tools._get_extract_backend() == "parallel"
+
+    def test_search_backend_ignored_when_not_available(self, monkeypatch):
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "backend": "firecrawl",
+            "search_backend": "exa",  # set but no EXA_API_KEY
+        })
+        monkeypatch.delenv("EXA_API_KEY", raising=False)
+        monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
+        # Should fall back to firecrawl since exa isn't configured
+        assert web_tools._get_search_backend() == "firecrawl"
+
+    def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "backend": "tavily",
+        })
+        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
+        # No search_backend or extract_backend set — both fall through
+        assert web_tools._get_search_backend() == "tavily"
+        assert web_tools._get_extract_backend() == "tavily"
+
+
+# ---------------------------------------------------------------------------
+# Config key presence in DEFAULT_CONFIG
+# ---------------------------------------------------------------------------
+
+
+class TestDefaultConfig:
+    """The web section exists in DEFAULT_CONFIG with per-capability keys."""
+
+    def test_web_section_in_default_config(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        assert "web" in DEFAULT_CONFIG
+        web = DEFAULT_CONFIG["web"]
+        assert "backend" in web
+        assert "search_backend" in web
+        assert "extract_backend" in web
+        # All empty string by default (no override)
+        assert web["backend"] == ""
+        assert web["search_backend"] == ""
+        assert web["extract_backend"] == ""
+
+
+# ---------------------------------------------------------------------------
+# web_search_tool uses _get_search_backend
+# ---------------------------------------------------------------------------
+
+
+class TestWebSearchUsesSearchBackend:
+    """web_search_tool dispatches through _get_search_backend not _get_backend."""
+
+    def test_search_tool_calls_search_backend(self, monkeypatch):
+        from tools import web_tools
+
+        called_with = []
+        original_get_search = web_tools._get_search_backend
+
+        def tracking_get_search():
+            result = original_get_search()
+            called_with.append(("search", result))
+            return result
+
+        monkeypatch.setattr(web_tools, "_get_search_backend", tracking_get_search)
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "firecrawl"})
+        monkeypatch.setenv("FIRECRAWL_API_KEY", "fake")
+
+        # The function will fail at Firecrawl client level but we just
+        # need to verify _get_search_backend was called
+        try:
+            web_tools.web_search_tool("test", 1)
+        except Exception:
+            pass
+
+        assert len(called_with) > 0
+        assert called_with[0][0] == "search"
--- a/tools/web_providers/ARCHITECTURE.md
+++ b/tools/web_providers/ARCHITECTURE.md
@@ -0,0 +1,73 @@
+# Web Tools Provider Architecture
+
+## Overview
+
+Web tools (`web_search`, `web_extract`) use a **per-capability backend selection** system that allows different providers for search and extract independently.
+
+## Config Keys
+
+```yaml
+web:
+  backend: "firecrawl"       # Shared fallback — applies to both if specific keys not set
+  search_backend: ""         # Per-capability override for web_search
+  extract_backend: ""        # Per-capability override for web_extract
+```
+
+**Selection priority (per capability):**
+1. `web.search_backend` / `web.extract_backend` (explicit per-capability)
+2. `web.backend` (shared fallback)
+3. Auto-detect from environment variables
+
+When per-capability keys are empty (default), behavior is identical to the legacy single-backend selection.
+
+## Architecture
+
+```
+web_search_tool()
+    └─ _get_search_backend()
+         ├─ web.search_backend (if set + available)
+         └─ _get_backend() fallback
+
+web_extract_tool()
+    └─ _get_extract_backend()
+         ├─ web.extract_backend (if set + available)
+         └─ _get_backend() fallback
+```
+
+## Provider ABCs
+
+New providers implement these interfaces in `tools/web_providers/`:
+
+```python
+from tools.web_providers.base import WebSearchProvider, WebExtractProvider
+
+class MySearchProvider(WebSearchProvider):
+    def provider_name(self) -> str: ...
+    def is_configured(self) -> bool: ...
+    def search(self, query: str, limit: int = 5) -> Dict[str, Any]: ...
+
+class MyExtractProvider(WebExtractProvider):
+    def provider_name(self) -> str: ...
+    def is_configured(self) -> bool: ...
+    def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]: ...
+```
+
+## Adding a New Search Provider
+
+1. Create `tools/web_providers/your_provider.py` implementing `WebSearchProvider`
+2. Add availability check to `_is_backend_available()` in `web_tools.py`
+3. Add dispatch branch in `web_search_tool()` 
+4. Add provider to `hermes tools` picker in `tools_config.py`
+5. Add env var to `OPTIONAL_ENV_VARS` in `config.py` (if needed)
+6. Write tests in `tests/tools/`
+
+Search-only providers (like SearXNG) don't need to implement `WebExtractProvider`.
+Extract-only providers don't need to implement `WebSearchProvider`.
+
+## hermes tools UX
+
+The provider picker uses **progressive disclosure**:
+- **Default path** (90% of users): Pick one provider → sets `web.backend` for both. One selection, done.
+- **Advanced path**: "Configure separately" option at bottom → two-step sub-picker for search + extract independently.
+
+See `.hermes/plans/2026-05-03-web-tools-provider-architecture.md` for the full UX flow diagram.
--- a/tools/web_providers/init.py
+++ b/tools/web_providers/init.py
@@ -0,0 +1,6 @@
+"""Web capability providers — search, extract, crawl.
+
+Each capability has an ABC in ``base.py`` and vendor implementations in
+sibling modules.  Provider registries in ``web_tools.py`` map config names
+to provider classes.
+"""
--- a/tools/web_providers/base.py
+++ b/tools/web_providers/base.py
@@ -0,0 +1,89 @@
+"""Abstract base classes for web capability providers."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+
+
+class WebSearchProvider(ABC):
+    """Interface for web search backends (Firecrawl, Tavily, Exa, etc.).
+
+    Implementations live in sibling modules.  The user selects a provider
+    via ``hermes tools``; the choice is persisted as
+    ``config["web"]["search_backend"]`` (falling back to
+    ``config["web"]["backend"]``).
+
+    Search providers return results in a normalized format::
+
+        {
+            "success": True,
+            "data": {
+                "web": [
+                    {"title": str, "url": str, "description": str, "position": int},
+                    ...
+                ]
+            }
+        }
+
+    On failure::
+
+        {"success": False, "error": str}
+    """
+
+    @abstractmethod
+    def provider_name(self) -> str:
+        """Short, human-readable name shown in logs and diagnostics."""
+
+    @abstractmethod
+    def is_configured(self) -> bool:
+        """Return True when all required env vars / credentials are present.
+
+        Called at tool-registration time to gate availability.
+        Must be cheap — no network calls.
+        """
+
+    @abstractmethod
+    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
+        """Execute a web search and return normalized results."""
+
+
+class WebExtractProvider(ABC):
+    """Interface for web content extraction backends.
+
+    Implementations live in sibling modules.  The user selects a provider
+    via ``hermes tools``; the choice is persisted as
+    ``config["web"]["extract_backend"]`` (falling back to
+    ``config["web"]["backend"]``).
+
+    Extract providers return results in a normalized format::
+
+        {
+            "success": True,
+            "data": [
+                {"url": str, "title": str, "content": str,
+                 "raw_content": str, "metadata": dict},
+                ...
+            ]
+        }
+
+    On failure::
+
+        {"success": False, "error": str}
+    """
+
+    @abstractmethod
+    def provider_name(self) -> str:
+        """Short, human-readable name shown in logs and diagnostics."""
+
+    @abstractmethod
+    def is_configured(self) -> bool:
+        """Return True when all required env vars / credentials are present.
+
+        Called at tool-registration time to gate availability.
+        Must be cheap — no network calls.
+        """
+
+    @abstractmethod
+    def extract(self, urls: List[str], **kwargs) -> Dict[str, Any]:
+        """Extract content from the given URLs and return normalized results."""
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -119,7 +119,7 @@ def _load_web_config() -> dict:
        return {}

 def _get_backend() -> str:
-    """Determine which web backend to use.
+    """Determine which web backend to use (shared fallback).

    Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
    Falls back to whichever API key is present for users who configured
@@ -145,6 +145,44 @@ def _get_backend() -> str:
    return "firecrawl"  # default (backward compat)


+def _get_search_backend() -> str:
+    """Determine which backend to use for web_search specifically.
+
+    Selection priority:
+    1. ``web.search_backend`` (per-capability override)
+    2. ``web.backend`` (shared fallback — existing behavior)
+    3. Auto-detect from env vars
+
+    This enables using different providers for search vs extract
+    (e.g. SearXNG for search + Firecrawl for extract).
+    """
+    return _get_capability_backend("search")
+
+
+def _get_extract_backend() -> str:
+    """Determine which backend to use for web_extract specifically.
+
+    Selection priority:
+    1. ``web.extract_backend`` (per-capability override)
+    2. ``web.backend`` (shared fallback — existing behavior)
+    3. Auto-detect from env vars
+    """
+    return _get_capability_backend("extract")
+
+
+def _get_capability_backend(capability: str) -> str:
+    """Shared helper for per-capability backend selection.
+
+    Reads ``web.{capability}_backend`` from config; if set and available,
+    uses it. Otherwise falls through to the shared ``_get_backend()``.
+    """
+    cfg = _load_web_config()
+    specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
+    if specific and _is_backend_available(specific):
+        return specific
+    return _get_backend()
+
+
 def _is_backend_available(backend: str) -> bool:
    """Return True when the selected backend is currently usable."""
    if backend == "exa":
@@ -1127,8 +1165,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
        if is_interrupted():
            return tool_error("Interrupted", success=False)

-        # Dispatch to the configured backend
-        backend = _get_backend()
+        # Dispatch to the configured search backend
+        backend = _get_search_backend()
        if backend == "parallel":
            response_data = _parallel_search(query, limit)
            debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
@@ -1284,7 +1322,7 @@ async def web_extract_tool(
        if not safe_urls:
            results = []
        else:
-            backend = _get_backend()
+            backend = _get_extract_backend()

            if backend == "parallel":
                results = await _parallel_extract(safe_urls)