feat(tui): interactive Plugins Hub overlay for enable/disable

The TUI had no way to toggle plugins — `/plugins` only printed a static list, and the classic `hermes plugins` picker is curses-based and can't run inside the Ink UI. Users had to drop to a separate shell and run `hermes plugins enable/disable`. Add a PluginsHub overlay modeled on the existing SkillsHub: - New gateway RPC `plugins.manage` (list + toggle) backed by the same disk-discovery + dashboard_set_agent_plugin_enabled primitives the CLI and dashboard already use, so all three surfaces agree on state. The toggle path also wires the plugin's toolset into platform_toolsets. - `/plugins` with no arg opens the hub; any subcommand still falls through to the text slash worker for CLI parity. - pluginsHub overlay state threaded through overlayStore / interfaces / useInputHandlers (Esc closes) / appOverlays (renders the FloatBox); preserved across turn teardown like other user-toggled overlays. - Hub UI: arrow/number select, Enter/Space toggles live, Tab switches user-only vs all (bundled) scope, shows ✓/✗/○ activation glyphs. plugins.manage added to _LONG_HANDLERS (disk + config I/O).
2026-06-17 07:31:21 +08:00 · 2026-06-09 00:23:50 -04:00
1219 changed files with 19638 additions and 140282 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -63,45 +63,3 @@ data/
 # Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
 hermes-config/
 runtime/
-
-# ---------- Not needed inside the Docker image ----------
-
-# Desktop app source (Tauri/Electron); never installed in the container
-apps/
-
-# Test suite — not shipped in production images
-tests/
-
-# Documentation site (Docusaurus) and supplementary docs
-website/
-docs/
-
-# Assets only used by the GitHub README
-assets/
-infographic/
-
-# Plugin-level docs (hermes-achievements ships docs/ but the runtime doesn't read them)
-plugins/hermes-achievements/docs/
-
-# Nix / Homebrew / AUR packaging metadata — irrelevant to Docker
-nix/
-flake.nix
-flake.lock
-packaging/
-
-# Design and planning documents
-plans/
-.plans/
-
-# ACP registry manifest (icon + agent.json) — not consumed at runtime
-acp_registry/
-
-# Repo-level dotfiles that are git-only or dev-tooling config
-.env.example
-.envrc
-.gitattributes
-.hadolint.yaml
-.mailmap
-
-# Top-level LICENSE (not matched by *.md); not needed inside the container
-LICENSE
--- a/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg
+++ b/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg
--- a/.github/workflows/contributor-check.yml
+++ b/.github/workflows/contributor-check.yml
@@ -1,11 +1,12 @@
 name: Contributor Attribution Check

 on:
+  pull_request:
+    branches: [main]
  # No paths filter — the job must always run so the required check
  # reports a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-  pull_request:
-    branches: [main]
+
 permissions:
  contents: read

--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -11,20 +11,8 @@ on:
      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:
-    inputs:
-      skills_index_run_id:
-        description: 'Optional Build Skills Index run ID whose skills-index artifact should be deployed'
-        required: false
-        type: string
-      rebuild_skills_index:
-        description: 'Force a fresh multi-source crawl instead of reusing the latest healthy index'
-        required: false
-        default: false
-        type: boolean

 permissions:
-  contents: read
-  actions: read
  pages: write
  id-token: write

@@ -56,7 +44,7 @@ jobs:

      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

@@ -67,81 +55,26 @@ jobs:
      - name: Install PyYAML for skill extraction
        run: pip install pyyaml==6.0.2 httpx==0.28.1

-      - name: Prepare skills index (unified multi-source catalog)
+      - name: Build skills index (unified multi-source catalog)
        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_TOKEN: ${{ github.token }}
-          SKILLS_INDEX_RUN_ID: ${{ github.event.inputs.skills_index_run_id || '' }}
-          REBUILD_SKILLS_INDEX: ${{ github.event.inputs.rebuild_skills_index || 'false' }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          # The unified external catalog is expensive to crawl and can burn
-          # through the repository installation's GitHub API quota when several
-          # docs deploys land close together.  Normal docs deploys therefore
-          # reuse the latest healthy catalog: first the artifact from a
-          # scheduled skills-index run, then the currently live index.  Only a
-          # manual force rebuild does a fresh crawl here.
+          # Rebuild the unified catalog. The file is gitignored, so a fresh
+          # checkout starts without it and we want the freshest crawl in
+          # every deploy.
          #
-          # If we do crawl, the build remains fatal. build_skills_index.py runs
-          # the health check BEFORE writing and exits non-zero on source
-          # collapse, keeping the last good Pages deployment live instead of
-          # publishing a degenerate catalog.
-          set -euo pipefail
-          INDEX_PATH="website/static/api/skills-index.json"
-          mkdir -p "$(dirname "$INDEX_PATH")"
-
-          validate_index() {
-            python3 - "$INDEX_PATH" <<'PY'
-          import json
-          import sys
-          from pathlib import Path
-
-          path = Path(sys.argv[1])
-          try:
-              data = json.loads(path.read_text(encoding="utf-8"))
-          except Exception as exc:
-              print(f"invalid skills index JSON: {exc}", file=sys.stderr)
-              sys.exit(1)
-          skills = data.get("skills")
-          if not isinstance(skills, list) or len(skills) < 1500:
-              count = len(skills) if isinstance(skills, list) else "missing"
-              print(f"skills index too small: {count}", file=sys.stderr)
-              sys.exit(1)
-          print(f"skills index ready: {len(skills)} skills")
-          PY
-          }
-
-          if [ "$REBUILD_SKILLS_INDEX" = "true" ]; then
-            python3 scripts/build_skills_index.py
-            validate_index
-            exit 0
-          fi
-
-          if [ -n "$SKILLS_INDEX_RUN_ID" ]; then
-            tmpdir="$(mktemp -d)"
-            echo "Downloading skills-index artifact from run $SKILLS_INDEX_RUN_ID"
-            if gh run download "$SKILLS_INDEX_RUN_ID" --name skills-index --dir "$tmpdir"; then
-              candidate="$(find "$tmpdir" -name skills-index.json -type f | head -n 1 || true)"
-              if [ -n "$candidate" ]; then
-                cp "$candidate" "$INDEX_PATH"
-                if validate_index; then
-                  exit 0
-                fi
-              fi
-            fi
-            echo "::warning::Could not use skills-index artifact from run $SKILLS_INDEX_RUN_ID; trying live index"
-          fi
-
-          echo "Downloading currently live skills index"
-          if curl -fsSL --retry 3 --retry-delay 5 \
-            "https://hermes-agent.nousresearch.com/docs/api/skills-index.json" \
-            -o "$INDEX_PATH" && validate_index; then
-            exit 0
-          fi
-
-          echo "::warning::Live skills index unavailable or unhealthy; falling back to a fresh crawl"
-          rm -f "$INDEX_PATH"
+          # This MUST be fatal. build_skills_index.py runs a health check and
+          # exits non-zero WITHOUT writing the output file when a source
+          # collapses (e.g. a GitHub API rate limit zeroes the github /
+          # claude-marketplace / well-known taps all at once). Letting the
+          # deploy continue would either (a) ship a degenerate index missing
+          # whole hubs — the June 2026 regression where OpenAI/Anthropic/
+          # HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
+          # local-only catalog. Failing here keeps the last good deployment
+          # live (GitHub Pages serves the previous build) instead of
+          # publishing a broken catalog. Re-run the workflow once the
+          # transient rate limit clears.
          python3 scripts/build_skills_index.py
-          validate_index

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -18,12 +18,13 @@ on:
      - docker/**
      - .hadolint.yaml
      - .github/workflows/docker-lint.yml
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths:
+      - Dockerfile
+      - docker/**
+      - .hadolint.yaml
+      - .github/workflows/docker-lint.yml

 permissions:
  contents: read
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -11,13 +11,16 @@ on:
      - 'docker/**'
      - '.github/workflows/docker-publish.yml'
      - '.github/actions/hermes-smoke-test/**'
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
+      - '.github/actions/hermes-smoke-test/**'
  release:
    types: [published]

@@ -87,7 +90,7 @@ jobs:
      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
      # shard would otherwise reach the session-scoped ``built_image``
      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
-      # ``docker build`` — guaranteed to
+      # ``docker build`` under a 180s pytest-timeout cap — guaranteed to
      # die in fixture setup.
      #
      # Piggybacking here avoids a second image build: the smoke test
@@ -111,7 +114,7 @@ jobs:
        run: |
          uv venv .venv --python 3.11
          source .venv/bin/activate
-          # ``dev`` extra pulls in pytest, pytest-asyncio —
+          # ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -1,12 +1,10 @@
 name: Docs Site Checks

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
-    branches: [main]
-
+    paths:
+      - 'website/**'
+      - '.github/workflows/docs-site-checks.yml'
  workflow_dispatch:

 permissions:
@@ -16,11 +14,11 @@ jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

@@ -28,9 +26,9 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.11"
+          python-version: '3.11'

      - name: Install ascii-guard
        run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
--- a/.github/workflows/history-check.yml
+++ b/.github/workflows/history-check.yml
@@ -14,9 +14,6 @@ name: History Check
 # the PR head and main to be non-empty.

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -27,9 +24,9 @@ jobs:
  check-common-ancestor:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
-          fetch-depth: 0 # full history both sides for merge-base
+          fetch-depth: 0  # full history both sides for merge-base

      - name: Reject PRs with no common ancestor on main
        run: |
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -15,12 +15,12 @@ on:
      - "**/*.md"
      - "docs/**"
      - "website/**"
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "website/**"

 permissions:
  contents: read
@@ -154,6 +154,7 @@ jobs:
              });
            }

+
  ruff-blocking:
    # Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
    # PLW1514 (unspecified-encoding) — catches bare ``open()`` /
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -0,0 +1,255 @@
+name: Nix Lockfile Fix
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'package-lock.json'
+      - 'package.json'
+      - 'ui-tui/package.json'
+      - 'apps/desktop/package.json'
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to fix (leave empty to run on the selected branch)'
+        required: false
+        type: string
+  issue_comment:
+    types: [edited]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  # ── Auto-fix on main ───────────────────────────────────────────────
+  # Fires when a push to main touches package.json or package-lock.json.
+  # Runs fix-lockfiles and pushes the hash update commit directly to main
+  # so Nix builds never stay broken.
+  #
+  # Safety invariants:
+  #   1. The fix commit only touches nix/*.nix files, which are NOT in
+  #      the paths filter above, so this cannot re-trigger itself.
+  #   2. An explicit file-whitelist check before commit aborts if
+  #      fix-lockfiles ever modifies unexpected files.
+  #   3. Job-level concurrency with cancel-in-progress: true ensures
+  #      back-to-back pushes collapse to the newest; ref: main checkout
+  #      always operates on the latest branch state.
+  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
+  #      triggers downstream nix.yml verification.
+  auto-fix-main:
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    concurrency:
+      group: auto-fix-main
+      cancel-in-progress: true
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: main
+          token: ${{ steps.app-token.outputs.token }}
+
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Ensure only nix/lib.nix (home of the single npmDepsHash) was
+          # modified — prevents accidental self-triggering if fix-lockfiles
+          # ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/lib\.nix$' || true)"
+          if [ -n "$unexpected" ]; then
+            echo "::error::Unexpected modified files: $unexpected"
+            exit 1
+          fi
+
+          # Record the base SHA before committing — used to detect package
+          # file changes if we need to rebase after a non-fast-forward push.
+          BASE_SHA="$(git rev-parse HEAD)"
+
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/lib.nix
+          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
+            -m "Source: $GITHUB_SHA" \
+            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+
+          # Retry push with rebase in case main advanced with an unrelated
+          # commit during the nix build. Without this, a non-fast-forward
+          # rejection silently loses the fix. If package files changed during
+          # the rebase, abort — a fresh auto-fix run will handle the new state.
+          for attempt in 1 2 3; do
+            if git push origin HEAD:main; then
+              exit 0
+            fi
+            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
+            git fetch origin main
+
+            # If package files changed between our base and the new main,
+            # our computed hashes are stale. Abort and let the next triggered
+            # run recompute from the correct package-lock state.
+            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
+              'package-lock.json' 'package.json' \
+              'ui-tui/package.json' 'apps/desktop/package.json' || true)"
+            if [ -n "$pkg_changed" ]; then
+              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
+              exit 0
+            fi
+
+            git rebase origin/main
+          done
+          echo "::error::Failed to push after 3 rebase attempts"
+          exit 1
+
+  # ── PR fix (manual / checkbox) ─────────────────────────────────────
+  # Existing behavior: run on manual dispatch OR when a task-list
+  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
+  fix:
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment'
+       && github.event.issue.pull_request != null
+       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
+       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Authorize & resolve PR
+        id: resolve
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
+        with:
+          script: |
+            // 1. Verify the actor has write access — applies to both checkbox
+            //    clicks and manual dispatch.
+            const { data: perm } =
+              await github.rest.repos.getCollaboratorPermissionLevel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                username: context.actor,
+              });
+            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
+              core.setFailed(
+                `${context.actor} lacks write access (has: ${perm.permission})`
+              );
+              return;
+            }
+
+            // 2. Resolve which ref to check out.
+            let prNumber = '';
+            if (context.eventName === 'issue_comment') {
+              prNumber = String(context.payload.issue.number);
+            } else if (context.eventName === 'workflow_dispatch') {
+              prNumber = context.payload.inputs.pr_number || '';
+            }
+
+            if (!prNumber) {
+              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
+              core.setOutput('repo', context.repo.repo);
+              core.setOutput('owner', context.repo.owner);
+              core.setOutput('pr', '');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number(prNumber),
+            });
+            core.setOutput('ref', pr.head.ref);
+            core.setOutput('repo', pr.head.repo.name);
+            core.setOutput('owner', pr.head.repo.owner.login);
+            core.setOutput('pr', String(pr.number));
+
+      # Wipe the sticky lockfile-check comment to a "running" state as soon
+      # as the job is authorized, so the user sees their click was picked up
+      # before the ~minute of nix build work.
+      - name: Mark sticky as running
+        if: steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### 🔄 Applying lockfile fix…
+
+            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
+          ref: ${{ steps.resolve.outputs.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/lib.nix
+          git commit -m "fix(nix): refresh npm lockfile hashes"
+          git push
+
+      - name: Update sticky (applied)
+        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile fix applied
+
+            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (already current)
+        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile hashes already current
+
+            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (failed)
+        if: failure() && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ❌ Lockfile fix failed
+
+            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -0,0 +1,105 @@
+name: Nix
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  nix:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Resolve head SHA
+        if: github.event_name == 'pull_request'
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check flake
+        id: flake
+        continue-on-error: true
+        run: nix flake check --print-build-logs
+
+      # When the flake check fails, run a targeted diagnostic to see if
+      # the failure is specifically a stale npm lockfile hash in one of the
+      # known npm subpackages (tui / web).  This avoids surfacing a generic
+      # "build failed" message when the fix is a single known command.
+      - name: Diagnose npm lockfile hashes
+        id: hash_check
+        if: steps.flake.outcome == 'failure' && runner.os == 'Linux'
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
+      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
+      # mode rather than silently ignoring it.
+      - name: Fail if hash check crashed without reporting
+        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
+      - name: Post sticky PR comment (stale hashes)
+        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.hash_check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles` and commit the diff
+
+      # Clear the sticky comment when either the flake check passed outright (no
+      # hash check needed) or the hash check explicitly returned stale=false
+      # (check failed for a non-hash reason).
+      - name: Clear sticky PR comment (resolved)
+        if: |
+          github.event_name == 'pull_request' &&
+          (steps.hash_check.outputs.stale == 'false' ||
+           steps.flake.outcome == 'success')
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Final fail if flake check failed
+        if: steps.flake.outcome == 'failure'
+        run: |
+          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
+            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
+          else
+            echo "::error::Nix flake check failed. See logs above."
+          fi
+          exit 1
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -20,23 +20,29 @@ name: OSV-Scanner
 # vulnerabilities in pinned deps that we may need to patch deliberately.

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths:
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'ui-tui/package.json'
+      - 'website/package.json'
+      - 'website/package-lock.json'
+      - '.github/workflows/osv-scanner.yml'
  push:
    branches: [main]
    paths:
-      - "uv.lock"
-      - "pyproject.toml"
-      - "package.json"
-      - "package-lock.json"
-      - "website/package-lock.json"
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'website/package-lock.json'
  schedule:
    # Weekly scan against main — catches CVEs published after merge for
    # deps that haven't changed since.
-    - cron: "0 9 * * 1"
+    - cron: '0 9 * * 1'
  workflow_dispatch:

 permissions:
@@ -48,7 +54,7 @@ permissions:
 jobs:
  scan:
    name: Scan lockfiles
-    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
+    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2  # v2.3.8
    with:
      # Scan explicit lockfiles rather than recursing, so we only look at
      # the three sources of truth and skip vendored / test / worktree dirs.
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -53,4 +53,4 @@ jobs:
      - name: Trigger Deploy Site workflow
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh workflow run deploy-site.yml --repo ${{ github.repository }} -f skills_index_run_id=${{ github.run_id }}
+        run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -1,11 +1,11 @@
 name: Supply Chain Audit

 on:
+  pull_request:
+    types: [opened, synchronize, reopened]
  # No paths filter — the jobs must always run so required checks
  # report a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-  pull_request:
-    types: [opened, synchronize, reopened]

 permissions:
  pull-requests: write
@@ -29,10 +29,8 @@ jobs:
      scan: ${{ steps.filter.outputs.scan }}
      # True when pyproject.toml changed in this PR
      deps: ${{ steps.filter.outputs.deps }}
-      # True when the curated MCP catalog / bundled MCP manifests changed.
-      mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0
      - name: Check for relevant file changes
@@ -56,14 +54,6 @@ jobs:
          else
            echo "deps=false" >> "$GITHUB_OUTPUT"
          fi
-          MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
-            'optional-mcps/**' \
-            'hermes_cli/mcp_catalog.py' || true)
-          if [ -n "$MCP_CATALOG_FILES" ]; then
-            echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
-          fi

  scan:
    name: Scan PR for critical supply chain risks
@@ -72,7 +62,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

@@ -207,7 +197,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

@@ -278,50 +268,3 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - run: echo "No pyproject.toml changes, skipping dependency bounds check."
-
-  mcp-catalog-review:
-    name: MCP catalog security review
-    needs: changes
-    if: needs.changes.outputs.mcp_catalog == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 0
-
-      - name: Require explicit MCP catalog review label
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set -euo pipefail
-          PR="${{ github.event.pull_request.number }}"
-          LABELS=$(gh pr view "$PR" --json labels --jq '.labels[].name' || true)
-          if echo "$LABELS" | grep -Fxq 'mcp-catalog-reviewed'; then
-            echo "MCP catalog review label present."
-            exit 0
-          fi
-
-          BODY="## ⚠️ MCP catalog security review required
-
-          This PR changes the bundled MCP catalog or MCP catalog installer code. MCP entries can define local commands that users later install into \`mcp_servers\`, so this needs explicit maintainer review before merge.
-
-          A maintainer should verify:
-          - any new/changed \`optional-mcps/**/manifest.yaml\` command and args are expected,
-          - stdio transports do not use shell+egress/exfiltration payloads,
-          - git install refs are pinned and bootstrap commands are minimal,
-          - requested env vars/secrets match the upstream MCP's documented needs.
-
-          After review, add the \`mcp-catalog-reviewed\` label and re-run this check."
-
-          gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
-          echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
-          exit 1
-
-  mcp-catalog-review-gate:
-    name: MCP catalog security review
-    needs: changes
-    if: always() && needs.changes.outputs.mcp_catalog != 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo "No MCP catalog changes, skipping MCP catalog security review."
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,13 +4,13 @@ on:
  push:
    branches: [main]
    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
+      - '**/*.md'
+      - 'docs/**'
  pull_request:
    branches: [main]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'

 permissions:
  contents: read
@@ -30,17 +30,13 @@ jobs:
        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Restore duration cache
-        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: test_durations.json
-          # main always writes a new suffix, but jobs pick the latest one with the same prefix
-          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
-          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
-          # If there are no exact matches, the action searches for partial matches of the restore keys.
-          # When the action finds a partial match, the most recent cache is restored to the path directory.
+          # Single stable key. main always overwrites, PRs always find it.
          key: test-durations

      - name: Install ripgrep (prebuilt binary)
@@ -58,32 +54,16 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
-        with:
-          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
-          # Keyed on the dependency manifests, so the cache is reused until
-          # pyproject.toml or uv.lock changes. `uv sync` still runs every
-          # time, but resolves from the warm cache instead of re-downloading
-          # and re-building wheels.
-          enable-cache: true
-          cache-dependency-glob: |
-            pyproject.toml
-            uv.lock
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11
        run: uv python install 3.11

      - name: Install dependencies
-        # `uv sync --locked` installs the exact pinned set from uv.lock (and
-        # fails if the lock is out of sync with pyproject.toml), giving a
-        # reproducible env. It also creates .venv itself, so no separate
-        # `uv venv` step is needed.
-        run: uv sync --locked --python 3.11 --extra all --extra dev
-
-      - name: Minimize uv cache
-        # Optimized for CI: prunes pre-built wheels that are cheap to
-        # re-download, keeping the persisted cache small and fast to restore.
-        run: uv cache prune --ci
+        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
+          uv pip install -e ".[all,dev]"

      - name: Run tests (slice ${{ matrix.slice }}/6)
        # Per-file isolation via scripts/run_tests_parallel.py: discovers
@@ -119,7 +99,7 @@ jobs:
          NOUS_API_KEY: ""

      - name: Upload per-slice durations
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
        with:
          name: test-durations-slice-${{ matrix.slice }}
          path: test_durations.json
@@ -129,11 +109,11 @@ jobs:
  # (including PRs) get balanced slicing.
  save-durations:
    needs: test
-    if: needs.test.result == 'success' && github.ref == 'refs/heads/main'
+    if: always() && github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - name: Download all slice durations
-        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
        with:
          pattern: test-durations-slice-*
          path: durations
@@ -153,17 +133,17 @@ jobs:
          "

      - name: Save merged duration cache
-        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: test_durations.json
-          key: test-durations-${{ github.run_id }}
+          key: test-durations

  e2e:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Install ripgrep (prebuilt binary)
        run: |
@@ -180,32 +160,16 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
-        with:
-          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
-          # Keyed on the dependency manifests, so the cache is reused until
-          # pyproject.toml or uv.lock changes. `uv sync` still runs every
-          # time, but resolves from the warm cache instead of re-downloading
-          # and re-building wheels.
-          enable-cache: true
-          cache-dependency-glob: |
-            pyproject.toml
-            uv.lock
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11
        run: uv python install 3.11

      - name: Install dependencies
-        # `uv sync --locked` installs the exact pinned set from uv.lock (and
-        # fails if the lock is out of sync with pyproject.toml), giving a
-        # reproducible env. It also creates .venv itself, so no separate
-        # `uv venv` step is needed.
-        run: uv sync --locked --python 3.11 --extra all --extra dev
-
-      - name: Minimize uv cache
-        # Optimized for CI: prunes pre-built wheels that are cheap to
-        # re-download, keeping the persisted cache small and fast to restore.
-        run: uv cache prune --ci
+        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
+          uv pip install -e ".[all,dev]"

      - name: Packaged-wheel i18n smoke test
        run: |
@@ -219,4 +183,4 @@ jobs:
        env:
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -1,45 +0,0 @@
-# .github/workflows/typecheck.yml
-name: Typecheck
-
-on:
-  push:
-    branches: [main]
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
-  pull_request:
-    branches: [main]
-
-jobs:
-  typecheck:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        package:
-          [ui-tui, web, apps/bootstrap-installer, apps/desktop, apps/shared]
-      fail-fast: false # report all failures, not just the first one
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
-        with:
-          node-version: 22
-          cache: npm
-      - run: npm ci
-      - run: npm run --prefix ${{ matrix.package }} typecheck
-
-  # Production build of the desktop renderer. `typecheck` runs `tsc` only,
-  # which does NOT exercise Vite/Rolldown module resolution — so an
-  # unresolvable package export (e.g. a transitive @assistant-ui/tap that no
-  # longer exports "./react-shim") slips past typecheck and only explodes when
-  # users build apps/desktop from source on install/update. Run the real
-  # `vite build` here so that class of break fails in CI instead.
-  desktop-build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
-        with:
-          node-version: 22
-          cache: npm
-      - run: npm ci
-      - run: npm run --prefix apps/desktop build
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -47,15 +47,15 @@ on:
  push:
    branches: [main]
    paths:
-      - "pyproject.toml"
-      - "uv.lock"
-      - ".github/workflows/uv-lockfile-check.yml"
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - '.github/workflows/uv-lockfile-check.yml'
  pull_request:
    branches: [main]
+    paths:
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - '.github/workflows/uv-lockfile-check.yml'

 permissions:
  contents: read
@@ -71,10 +71,10 @@ jobs:
    timeout-minutes: 5
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
--- a/.gitignore
+++ b/.gitignore
@@ -89,9 +89,6 @@ website/static/api/skills-index.json
 # every build).
 website/static/api/skills.json
 website/static/api/skills-meta.json
-# automation-blueprints-index.json is a build artifact emitted by
-# website/scripts/extract-automation-blueprints.py during prebuild.
-website/static/api/automation-blueprints-index.json
 models-dev-upstream/

 # Local editor / agent tooling (machine-specific; keep in global config, not the repo)
@@ -117,12 +114,6 @@ docs/superpowers/*
 # treat it as a local edit and autostash it on every run (#38529).
 .hermes-bootstrap-complete

-# Interrupted-update breadcrumb + recovery lock written next to the shared venv
-# by `hermes update` / launch-time self-heal. Runtime state, never a code change
-# — ignore so `git status` stays clean and update's autostash skips them.
-.update-incomplete
-.update-incomplete.lock
-
 # Tool Search live-test harness output — non-deterministic model transcripts,
 # regenerated by scripts/tool_search_livetest.py. Never an artifact of the repo.
 scripts/out/
@@ -132,7 +123,3 @@ scripts/out/
 # stores the published notes. They are not a build artifact and must never be
 # committed to the repo root. See the hermes-release skill.
 RELEASE_v*.md
-
-# Desktop demo-run scratch output (hermes writes demo/*.txt during recorded
-# walkthroughs). Throwaway artifacts, never part of the app.
-apps/desktop/demo/
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,201 +4,6 @@ Instructions for AI coding assistants and developers working on the hermes-agent

 **Never give up on the right solution.**

-## What Hermes Is
-
-Hermes is a personal AI agent that runs the same agent core across a CLI, a
-messaging gateway (Telegram, Discord, Slack, and ~20 other platforms), a TUI,
-and an Electron desktop app. It learns across sessions (memory + skills),
-delegates to subagents, runs scheduled jobs, and drives a real terminal and
-browser. It is extended primarily through **plugins and skills**, not by
-growing the core.
-
-Two properties shape almost every design decision and are the lens for
-reviewing any change:
-
- **Per-conversation prompt caching is sacred.** A long-lived conversation
-  reuses a cached prefix every turn. Anything that mutates past context,
-  swaps toolsets, or rebuilds the system prompt mid-conversation invalidates
-  that cache and multiplies the user's cost. We do not do it (the one
-  exception is context compression).
- **The core is a narrow waist; capability lives at the edges.** Every model
-  tool we add is sent on every API call, so the bar for a new *core* tool is
-  high. Most new capability should arrive as a CLI command + skill, a
-  service-gated tool, or a plugin — not as core surface.
-
-## Contribution Rubric — What We Want / What We Don't
-
-This is the project's intent layer. Use it two ways:
-
-1. **For humans and for your own work** — what gets merged and what gets
-   rejected, so a contribution aims at the target.
-2. **For automated review (the triage sweeper)** — guidance on when a PR is
-   safe to close on the three allowed reasons (`implemented_on_main`,
-   `cannot_reproduce`, `incoherent`) and, just as important, **when NOT to
-   close** one. Taste-based "we don't want this / out of scope" closes are NOT
-   an automated decision — those stay with a human maintainer. The sweeper's
-   job here is to recognize design intent and *avoid wrongly closing a
-   legitimate contribution*, not to make the won't-implement call itself.
-
-Read the balance right: Hermes ships a **lot** — most merges are bug fixes to
-real reported behavior, and the product surface (platforms, channels,
-providers, models, desktop/TUI features) expands aggressively and on purpose.
-The restraint below is aimed squarely at the **core agent + the model tool
-schema**, the one place where every addition is paid for on every API call.
-"Smallest footprint" governs *how a capability is wired into the core*, NOT
-whether the product is allowed to grow. We are expansive at the edges and
-conservative at the waist.
-
-### What we want
-
- **Fix real bugs, well.** The bulk of what lands is `fix(...)` against an
-  actual reported symptom. A good fix reproduces the symptom on current
-  `main`, points to the exact line where it manifests, and fixes the whole bug
-  class — sibling call paths included — not just the one site the reporter hit.
- **Expand reach at the edges.** New platform adapters, channels, providers,
-  models, and desktop/TUI/dashboard features are welcome and land routinely,
-  including large ones (a new messaging channel, a session-cap feature, a
-  Windows PTY bridge). Breadth in the product is a goal, not a footprint
-  concern — as long as it integrates with the existing setup/config UX
-  (`hermes tools`, `hermes setup`, auto-install) rather than bolting on a raw
-  env var.
- **Refactor god-files into clean modules.** Extracting a multi-thousand-line
-  cluster out of `cli.py` / `run_agent.py` / `gateway/run.py` into a focused
-  mixin or module is wanted work, even when the diff is huge and mechanical
-  (large `+N/-N` refactors merge regularly). The "every line traces to the
-  request" test applies to *feature* PRs; a declared refactor's request IS the
-  extraction.
- **Keep the core narrow.** New *model tools* are the expensive exception —
-  every tool ships on every API call. Prefer, in order: extend existing code →
-  CLI command + skill → service-gated tool (`check_fn`) → plugin → MCP server
-  in the catalog → new core tool (last resort). See "The Footprint Ladder."
- **Extend, don't duplicate.** Before adding a module/manager/hook, check
-  whether existing infrastructure already covers the use case. When several PRs
-  integrate the same *category*, design one shared interface instead of merging
-  them one at a time (see the ABC + orchestrator note under the Footprint
-  Ladder).
- **Behavior contracts over snapshots.** Tests should assert how two pieces of
-  data must relate (invariants), not freeze a current value (model lists,
-  config version literals, enumeration counts). See "Don't write
-  change-detector tests."
- **E2E validation, not just green unit mocks.** For anything touching
-  resolution chains, config propagation, security boundaries, remote
-  backends, or file/network I/O, exercise the real path with real imports
-  against a temp `HERMES_HOME`. Mocks hide integration bugs.
- **Cache-, alternation-, and invariant-safe.** Preserve prompt caching, strict
-  message role alternation (never two same-role messages in a row; never a
-  synthetic user message injected mid-loop), and a system prompt that is
-  byte-stable for the life of a conversation.
- **Contributor credit preserved.** Salvage external work by cherry-picking
-  (rebase-merge) so authorship survives in git history; don't reimplement from
-  scratch when you can build on top.
-
-### What we don't want (rejected even when well-built)
-
- **Speculative infrastructure.** Hooks, callbacks, or extension points with no
-  concrete consumer. Adding a hook is easy; removing one after plugins depend
-  on it is hard. A hook is NOT speculative if a contributor has a real, stated
-  use case — even if the consumer ships separately.
- **New `HERMES_*` env vars for non-secret config.** `.env` is for secrets
-  only (API keys, tokens, passwords). All behavioral settings — timeouts,
-  thresholds, feature flags, display prefs — go in `config.yaml`. Bridge to an
-  internal env var if the mechanism needs one, but user-facing docs point to
-  `config.yaml`. Reject PRs that tell users to "set X in your .env" unless X
-  is a credential.
- **A new core tool when terminal + file already do the job, or when a skill
-  would.** If the only barrier is file visibility on a remote backend, fix the
-  mount, not the toolset.
- **Lazy-reading escape hatches on instructional tools.** No `offset`/`limit`
-  pagination on tools that load content the agent must read fully (skills,
-  prompts, playbooks). Models will read page 1 and skip the rest.
- **"Fixes" that destroy the feature they secure.** A mitigation that kills the
-  feature's purpose is the wrong mitigation. Read the original commit's intent
-  (`git log -p -S`) before restricting behavior; find a fix that preserves the
-  feature.
- **Outbound telemetry / usage attribution without opt-in gating.** No new
-  analytics, third-party identifier tagging, or attribution tags until a
-  generic user-facing opt-in (config gate + setup prompt + `hermes tools`
-  toggle) exists. Park behind a label, do not merge.
- **Change-detector tests, cache-breaking mid-conversation, dead code wired in
-  without E2E proof, and plugins that touch core files.** Plugins live in their
-  own directory and work within the ABCs/hooks we provide; if a plugin needs
-  more, widen the generic plugin surface, don't special-case it in core.
-
-### Before you call it a bug — verify the premise (and when NOT to close)
-
-The most common reason a well-written PR gets closed is not code quality — it
-is that the change is built on a **wrong premise**, or it treats an
-**intentional design as a gap**. These patterns cut both ways: they tell a
-human reviewer what to scrutinize, and they tell the automated sweeper when a
-PR is NOT safe to close as `implemented_on_main` / `cannot_reproduce` (when in
-doubt, leave it open for a human). They are distilled from real closes.
-
- **"Intentional design, not a gap."** A limitation that looks like an
-  oversight is often deliberate. Before "fixing" a missing link or a
-  restriction, ask whether the isolation IS the design. Example: profiles are
-  independent islands on purpose — a PR adding live config inheritance from the
-  default profile was closed because coupling profiles together is exactly what
-  the design prevents (the copy-at-creation `--clone` path already covers the
-  legitimate "start from my default" case). Read the original commit's intent
-  (`git log -p -S "<symbol>"`) before assuming something is unfinished.
- **"The premise doesn't hold against how X actually works."** A PR's
-  justification frequently rests on a wrong mental model of an existing
-  mechanism. Trace the real code/runtime before accepting the rationale. Two
-  real closes: a rate-limit "re-probe during cooldown" PR (the breaker only
-  trips on a *confirmed-empty* account bucket, so re-probing just hammers a
-  bucket we've already proven empty); a usage-accumulation fix whose new branch
-  **never executes at runtime** because an earlier guard already popped the
-  state it depended on. If you can't point to the exact line where the bug
-  manifests AND show the fix changes that line's behavior, you haven't verified
-  the premise.
- **"This fix was wrong — the absence/omission was deliberate."** Adding the
-  obvious-looking missing piece can break things the omission was protecting.
-  Example: restoring "missing" `__init__.py` files made a test tree importable
-  as a dotted package that shadowed the real plugin, deleting its `register()`
-  at import time. The absence was load-bearing.
- **"Overreached / resurrected an approach we'd moved past."** Scope creep that
-  supersedes an agreed-on base, or revives a direction the maintainers
-  deliberately closed, gets rejected even when the code works. Keep the change
-  to the narrow piece that was actually agreed; offer the rest as a focused
-  follow-up.
-
-The throughline: **verify the claim AND the intent against the codebase before
-writing or merging a fix.** A confirmed reproduction on current `main` plus a
-line-level account of where the fix acts beats a plausible-sounding rationale
-every time. When in doubt about intent, it is cheaper to ask than to ship a
-fix that fights the design.
-
-### The Footprint Ladder (new capability decision)
-
-Each rung adds more permanent surface than the one above. Choose the highest
-(least-footprint) rung that correctly solves the problem:
-
-1. **Extend existing code** — the capability is a variation of something that
-   already exists. Zero new surface.
-2. **CLI command + skill** — manages config/state/infra expressible as shell
-   commands. The agent runs `hermes <subcommand>` guided by a skill. Zero
-   model-tool footprint. Default choice for subscriptions, scheduled tasks,
-   service setup. Examples: `hermes webhook`, `hermes cron`, `hermes tools`.
-3. **Service-gated tool (`check_fn`)** — needs structured params/returns AND
-   only appears when a prerequisite is configured. Zero footprint otherwise.
-   Examples: Home Assistant tools (gated on token), memory-provider tools.
-4. **Plugin** — third-party/niche/user-specific capability that doesn't ship in
-   core. Lives in `~/.hermes/plugins/` or a pip package, discovered at runtime.
-5. **MCP server (in the catalog)** — if the capability genuinely needs to be a
-   tool (structured I/O the agent invokes) but isn't core-fundamental, prefer
-   building it as an MCP server and adding it to the MCP catalog over growing
-   the core toolset. The agent connects to it through the built-in MCP client;
-   zero permanent core-schema footprint, and it's reusable by any MCP host.
-6. **New core tool** — only when the capability is fundamental, broadly useful
-   to nearly every user, and unreachable via terminal + file (or an MCP server).
-   Examples of correct core tools: terminal, read_file, web_search,
-   browser_navigate.
-
-When 3+ open PRs try to integrate the same *category* of thing (memory
-backends, providers, notifiers), don't merge them one at a time — design an
-ABC + orchestrator, wrap the existing built-in as the first provider, and turn
-the competing PRs into plugins against that interface.
-
 ## Development Environment

 ```bash
@@ -459,7 +264,7 @@ npm install       # first time
 npm run dev       # watch mode (rebuilds hermes-ink + tsx --watch)
 npm start         # production
 npm run build     # full build (hermes-ink + tsc)
-npm run typecheck # typecheck only (tsc --noEmit)
+npm run type-check # typecheck only (tsc --noEmit)
 npm run lint      # eslint
 npm run fmt       # prettier
 npm test          # vitest
@@ -497,11 +302,9 @@ A **separate** chat surface from both the classic CLI and the dashboard's embedd

 ## Adding New Tools

-Before adding any tool, settle the footprint question first (see "The
-Footprint Ladder" in the Contribution Rubric): most capabilities should NOT
-be core tools. For custom or local-only tools, do **not** edit Hermes core.
-Use the plugin route instead: create `~/.hermes/plugins/<name>/plugin.yaml`
-and `~/.hermes/plugins/<name>/__init__.py`, then register tools with
+For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
+route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
+`~/.hermes/plugins/<name>/__init__.py`, then register tools with
 `ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
 enabled or disabled without touching `tools/` or `toolsets.py`.

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,41 +78,7 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
 | **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

-### Install with the standard installer
-
-For most contributors, the best development bootstrap is the same path users
-take: run the standard installer, then work inside the repository it cloned.
-The installer creates the Hermes venv, wires the `hermes` command, stamps the
-install method for `hermes update`, and clones the full git project into
-`$HERMES_HOME/hermes-agent` (usually `~/.hermes/hermes-agent`). That keeps your
-development environment on the same layout the CLI, updater, lazy dependency
-installer, gateway, and docs assume.
-
-```bash
-curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
-cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
-
-# Add dev/test extras on top of the standard install.
-uv pip install -e ".[all,dev]"
-
-# Optional: browser tools / docs site dependencies.
-npm install
-```
-
-After that, create branches and run tests from that checkout:
-
-```bash
-git checkout -b fix/description
-scripts/run_tests.sh
-```
-
-### Manual clone fallback
-
-Use this only if you intentionally do not want Hermes' managed install layout
-(for example, a throwaway clone inside a container or CI job). If you install
-this way, make sure you run the `hermes` entrypoint from this venv; running the
-system `python3 -m hermes_cli.main` can pick up unrelated system Python
-packages.
+### Clone and install

 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
@@ -143,17 +109,13 @@ echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ### Run

 ```bash
-# The standard installer already put `hermes` on PATH.
-hermes doctor
-hermes chat -q "Hello"
-```
-
-If you used the manual clone fallback, run `./hermes` from the checkout or
-symlink this clone's venv explicitly:
-
-```bash
+# Symlink for global access
 mkdir -p ~/.local/bin
 ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
+
+# Verify
+hermes doctor
+hermes chat -q "Hello"
 ```

 ### Run tests
--- a/59
+++ b/59
@@ -1,14 +1,12 @@
 FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
-# Node 26 source stage. Debian trixie's bundled nodejs is pinned to 20.x
-# (EOL April 2026), so we copy node + npm + corepack from the upstream node:26
-# image instead.  Node 26 (Current; LTS promotion ~Oct 2026) is REQUIRED by the
-# native OpenTUI TUI engine, which loads its renderer via the experimental
-# `node:ffi` API that only exists on Node 26.3+ (the Ink engine + web build run
-# on it too).  Bookworm-based slim image used so the produced binary links
-# against glibc 2.36, which runs cleanly on our Debian 13 (trixie, glibc 2.41)
-# runtime.  The pinned tag ships v26.3.0.  Bumping Node is a one-line change here.
-# NOTE: verify the full image build + Ink/web/Playwright on Node 26 in CI.
-FROM node:26-bookworm-slim@sha256:79723b41edbedf595f62e943a9f8b0ba9af5b1e61045c5f8f59c2c02c1212a16 AS node_source
+# Node 22 LTS source stage. Debian trixie's bundled nodejs is pinned to 20.x
+# which reached EOL in April 2026 — we copy node + npm + corepack from the
+# upstream node:22 image instead so we can stay on a supported LTS without
+# waiting for Debian 14 (forky, ~mid-2027).  Bookworm-based slim image used
+# so the produced binary links against glibc 2.36, which runs cleanly on
+# our Debian 13 (trixie, glibc 2.41) runtime.  Bumping to a new Node major
+# is a one-line ARG change; see #4977.
+FROM node:22-bookworm-slim@sha256:7af03b14a13c8cdd38e45058fd957bf00a72bbe17feac43b1c15a689c029c732 AS node_source
 FROM debian:13.4

 # Disable Python stdout buffering to ensure logs are printed immediately
@@ -27,7 +25,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # hermes process, the dashboard, and per-profile gateways.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    ca-certificates curl iputils-ping python3 python-is-python3 ripgrep ffmpeg gcc g++ make cmake python3-dev python3-venv libffi-dev libolm-dev procps git openssh-client docker-cli xz-utils && \
+    ca-certificates curl iputils-ping python3 python-is-python3 ripgrep ffmpeg gcc python3-dev python3-venv libffi-dev libolm-dev procps git openssh-client docker-cli xz-utils && \
    rm -rf /var/lib/apt/lists/*

 # ---------- s6-overlay install ----------
@@ -92,7 +90,7 @@ RUN useradd -u 10000 -m -d /opt/data hermes

 COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/

-# Node 26: copy the node binary plus the bundled npm + corepack JS
+# Node 22 LTS: copy the node binary plus the bundled npm + corepack JS
 # installs from the upstream image.  npm and npx are recreated as symlinks
 # because they're symlinks in the source image (and need to live on PATH).
 # See node_source stage at the top of the file for the version-bump
@@ -121,7 +119,7 @@ COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/

 # `npm_config_install_links=false` forces npm to install `file:` deps as
 # symlinks instead of copies.  This is the default since npm 10+, which is
-# what the image ships now (via the node:26 source stage).  We set it
+# what the image ships now (via the node:22 source stage).  We set it
 # explicitly anyway as defense-in-depth: the previous Debian-bundled npm
 # 9.x defaulted to install-as-copy, which produced a hidden
 # node_modules/.package-lock.json that permanently disagreed with the root
@@ -148,9 +146,9 @@ RUN npm install --prefer-offline --no-audit && \
 #
 # `uv sync --frozen --no-install-project --extra all --extra messaging`
 # installs the deps reachable through the composite `[all]` extra
-# (handpicked set intended for the production image — excludes `[dev]`),
-# plus gateway messaging adapters that should work in the published image
-# without a first-boot lazy install.  We do NOT use `--all-extras`:
+# (handpicked set intended for the production image), plus gateway
+# messaging adapters that should work in the published image without a
+# first-boot lazy install.  We do NOT use `--all-extras`:
 # that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
 # git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
 # redundancy), none of which belong in the published container.
@@ -166,38 +164,19 @@ RUN npm install --prefer-offline --no-audit && \
 # image update and recall/retain then fails with
 # `ModuleNotFoundError: No module named 'hindsight_client'` (#38128).
 #
-# The Matrix gateway's deps ([matrix] extra) are baked in because
-# python-olm (transitive via mautrix[encryption]) builds from source on
-# Python/image combinations without usable wheels.  The Docker image is
-# Linux-only, so keeping the native libolm/build-toolchain packages here
-# avoids the cross-platform failures that kept [matrix] out of [all]
-# while still making Matrix work in the published container. Fixes #30399.
-#
 # The editable link is created after the source copy below.
 COPY pyproject.toml uv.lock ./
 RUN touch ./README.md
-RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra anthropic --extra bedrock --extra azure-identity --extra hindsight --extra matrix
-
-# ---------- Frontend build (cached independently from Python source) ----------
-# Copy only the frontend source trees first so that Python-only changes don't
-# invalidate the (relatively slow) web + ui-tui build layer.
-COPY web/ web/
-COPY ui-tui/ ui-tui/
-COPY ui-opentui/ ui-opentui/
-# ui-opentui is the opt-in native OpenTUI engine (HERMES_TUI_ENGINE=opentui;
-# default stays Ink). .dockerignore strips its node_modules/dist, so install +
-# esbuild-build it here -> dist/main.js, then prune devDeps (esbuild/babel/
-# vitest); the runtime only needs the prod deps (the external @opentui/core +
-# its native blob -- the bundle inlines solid/effect). Build needs Node 26.3
-# (node:ffi floor), which this image ships.
-RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build && \
-    cd ../ui-opentui && npm install --no-audit --no-fund && npm run build && npm prune --omit=dev
+RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra anthropic --extra bedrock --extra azure-identity --extra hindsight

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

+# Build browser dashboard and terminal UI assets.
+RUN cd web && npm run build && \
+    cd ../ui-tui && npm run build
+
 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
 # The venv needs to be traversable too.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,5 @@
 graft skills
 graft optional-skills
-graft optional-mcps
 graft locales
 # Bundled plugin manifests (plugin.yaml / plugin.yml). Without these the
 # PluginManager scan (hermes_cli/plugins.py) finds zero plugins on installs
--- a/README.md
+++ b/README.md
@@ -3,9 +3,7 @@
 </p>

 # Hermes Agent ☤
-<p align="center">
-  <a href="https://hermes-agent.nousresearch.com/">Hermes Agent</a> | <a href="https://hermes-agent.nousresearch.com/">Hermes Desktop</a>
-</p>
+
 <p align="center">
  <a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
@@ -107,8 +105,6 @@ You can still bring your own keys per-tool whenever you want — the gateway is

 Hermes has two entry points: start the terminal UI with `hermes`, or run the gateway and talk to it from Telegram, Discord, Slack, WhatsApp, Signal, or Email. Once you're in a conversation, many slash commands are shared across both interfaces.

-> **TUI engine:** On supported hosts (Linux/macOS with Node 26.3+), the terminal UI defaults to the native **OpenTUI** engine, which the installer provisions for you. The legacy **Ink** engine remains the fallback — it's used automatically on Windows, Termux, or when the native engine can't run, and you can select it explicitly with `HERMES_TUI_ENGINE=ink hermes`. Ink is not going away; it's the kept fallback.
-
 | Action                         | CLI                                           | Messaging platforms                                                              |
 | ------------------------------ | --------------------------------------------- | -------------------------------------------------------------------------------- |
 | Start chatting                 | `hermes`                                      | Run `hermes gateway setup` + `hermes gateway start`, then send the bot a message |
@@ -183,20 +179,16 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration

 We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.

-Quick start for contributors — use the standard installer, then work from the
-full git checkout it creates at `$HERMES_HOME/hermes-agent` (usually
-`~/.hermes/hermes-agent`). This matches the layout used by `hermes update`, the
-managed venv, lazy dependencies, gateway, and docs tooling.
+Quick start for contributors — clone and go with `setup-hermes.sh`:

 ```bash
-curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
-cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
-uv pip install -e ".[all,dev]"
-scripts/run_tests.sh
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh     # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
+./hermes              # auto-detects the venv, no need to `source` first
 ```

-Manual clone fallback (for throwaway clones/CI where you intentionally do not
-want the managed install layout):
+Manual path (equivalent to the above):

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -164,18 +164,16 @@ hermes claw migrate --overwrite  # 覆盖已有冲突

 欢迎贡献！请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。

-贡献者快速开始——使用标准安装器，然后在它创建的完整 git checkout 中开发：
-`$HERMES_HOME/hermes-agent`（通常是 `~/.hermes/hermes-agent`）。这会匹配
-`hermes update`、托管 venv、lazy dependencies、gateway 和 docs tooling 使用的布局。
+贡献者快速开始——克隆并使用 `setup-hermes.sh`：

 ```bash
-curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
-cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
-uv pip install -e ".[all,dev]"
-scripts/run_tests.sh
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh     # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
+./hermes              # 自动检测 venv，无需先 source
 ```

-手动克隆备用路径（用于一次性 clone / CI，或你明确不想使用 managed install layout 时）：
+手动安装（等效于上述命令）：

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -824,7 +824,6 @@ class HermesACPAgent(acp.Agent):

        try:
            from model_tools import get_tool_definitions
-            from agent.memory_manager import inject_memory_provider_tools

            enabled_toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
@@ -840,7 +839,6 @@ class HermesACPAgent(acp.Agent):
            state.agent.valid_tool_names = {
                tool["function"]["name"] for tool in state.agent.tools or []
            }
-            inject_memory_provider_tools(state.agent)
            invalidate = getattr(state.agent, "_invalidate_system_prompt", None)
            if callable(invalidate):
                invalidate()
@@ -1781,25 +1779,10 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
-            from types import SimpleNamespace
-            from agent.memory_manager import inject_memory_provider_tools
-
            toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            )
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
-            tool_view = SimpleNamespace(
-                tools=list(tools or []),
-                valid_tool_names={
-                    tool.get("function", {}).get("name")
-                    for tool in tools or []
-                    if isinstance(tool, dict)
-                },
-                enabled_toolsets=toolsets,
-                _memory_manager=getattr(state.agent, "_memory_manager", None),
-            )
-            inject_memory_provider_tools(tool_view)
-            tools = tool_view.tools
            if not tools:
                return "No tools available."
            lines = [f"Available tools ({len(tools)}):"]
--- a/agent/account_usage.py
+++ b/agent/account_usage.py
@@ -145,7 +145,7 @@ def build_nous_credits_snapshot(account_info) -> Optional[AccountUsageSnapshot]:
    account info to show (fail-open: caller just shows nothing).
    """
    try:
-        from hermes_cli.nous_account import nous_portal_topup_url
+        from hermes_cli.nous_account import nous_portal_billing_url

        if account_info is None or not getattr(account_info, "logged_in", False):
            return None
@@ -213,8 +213,7 @@ def build_nous_credits_snapshot(account_info) -> Optional[AccountUsageSnapshot]:
        if not windows and not details:
            return None

-        details.append(f"Top up: {nous_portal_topup_url(account_info)}")
-        details.append("(or run /credits)")
+        details.append(f"Manage / top up: {nous_portal_billing_url(account_info)}")

        plan = getattr(sub, "plan", None) if sub is not None else None
        return AccountUsageSnapshot(
@@ -338,93 +337,6 @@ def _snapshot_from_credits_state(state) -> Optional[AccountUsageSnapshot]:
        return None


-@dataclass(frozen=True)
-class CreditsView:
-    """Surface-agnostic data for the ``/credits`` command.
-
-    One portal fetch, one parse — consumed identically by the CLI panel, the
-    gateway button, and any other money surface. Fail-open: when not logged in
-    or the portal is unreachable, ``logged_in`` is False / ``topup_url`` is None
-    and callers degrade gracefully.
-    """
-
-    logged_in: bool
-    balance_lines: tuple[str, ...] = ()
-    identity_line: Optional[str] = None
-    topup_url: Optional[str] = None
-    depleted: bool = False
-
-
-def build_credits_view(*, markdown: bool = False, timeout: float = 10.0) -> CreditsView:
-    """Build the /credits view: balance block + identity line + top-up URL.
-
-    Reuses the same account fetch + snapshot + URL builder as the /usage credits
-    block, so the numbers always match. The balance block is the rendered
-    snapshot MINUS its trailing top-up/command-hint lines (the /credits surface
-    supplies its own affordance). Fail-open → ``CreditsView(logged_in=False)``.
-    """
-    not_logged_in = CreditsView(logged_in=False)
-    try:
-        from hermes_cli.auth import get_provider_auth_state
-
-        tok = (get_provider_auth_state("nous") or {}).get("access_token")
-        if not (isinstance(tok, str) and tok.strip()):
-            return not_logged_in
-    except Exception:
-        return not_logged_in
-
-    try:
-        import concurrent.futures
-
-        from hermes_cli.nous_account import (
-            get_nous_portal_account_info,
-            nous_portal_topup_url,
-        )
-
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            account = pool.submit(get_nous_portal_account_info, force_fresh=True).result(
-                timeout=timeout
-            )
-    except Exception:
-        logger.debug("credits ▸ /credits portal fetch failed (fail-open)", exc_info=True)
-        return not_logged_in
-
-    if account is None or not getattr(account, "logged_in", False):
-        return not_logged_in
-
-    snapshot = build_nous_credits_snapshot(account)
-    # Balance lines = the snapshot block minus the two trailing affordance lines
-    # ("Top up: <url>" + "(or run /credits)") that build_nous_credits_snapshot
-    # appends for the /usage surface. /credits renders its own button/panel.
-    balance_lines: list[str] = []
-    if snapshot is not None:
-        rendered = render_account_usage_lines(snapshot, markdown=markdown)
-        balance_lines = [
-            line
-            for line in rendered
-            if not line.lstrip().startswith("Top up:")
-            and not line.lstrip().startswith("(or run")
-        ]
-
-    # Identity line — shown before any open (roadmap §4.4).
-    email = getattr(account, "email", None)
-    org_name = getattr(account, "org_name", None)
-    who: list[str] = []
-    if email:
-        who.append(str(email))
-    if org_name:
-        who.append(f"org {org_name}")
-    identity_line = ("Topping up as " + " / ".join(who)) if who else None
-
-    return CreditsView(
-        logged_in=True,
-        balance_lines=tuple(balance_lines),
-        identity_line=identity_line,
-        topup_url=nous_portal_topup_url(account),
-        depleted=getattr(account, "paid_service_access", None) is False,
-    )
-
-
 def _resolve_codex_usage_url(base_url: str) -> str:
    normalized = (base_url or "").strip().rstrip("/")
    if not normalized:
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -187,7 +187,6 @@ def init_agent(
    thinking_callback: callable = None,
    reasoning_callback: callable = None,
    clarify_callback: callable = None,
-    read_terminal_callback: callable = None,
    step_callback: callable = None,
    stream_delta_callback: callable = None,
    interim_assistant_callback: callable = None,
@@ -299,7 +298,6 @@ def init_agent(
    # would mangle the escape sequences.  None = use builtins.print.
    agent._print_fn = None
    agent.background_review_callback = None  # Optional sync callback for gateway delivery
-    agent.memory_notifications = "on"  # Memory update notifications: "off", "on", "verbose"
    agent.skip_context_files = skip_context_files
    agent.load_soul_identity = load_soul_identity
    agent.pass_session_id = pass_session_id
@@ -419,7 +417,6 @@ def init_agent(
    agent.thinking_callback = thinking_callback
    agent.reasoning_callback = reasoning_callback
    agent.clarify_callback = clarify_callback
-    agent.read_terminal_callback = read_terminal_callback
    agent.step_callback = step_callback
    agent.stream_delta_callback = stream_delta_callback
    agent.interim_assistant_callback = interim_assistant_callback
@@ -901,9 +898,6 @@ def init_agent(
        agent.api_key = client_kwargs.get("api_key", "")
        agent.base_url = client_kwargs.get("base_url", agent.base_url)
        try:
-            from agent.ssl_guard import verify_ca_bundle_with_fallback
-
-            verify_ca_bundle_with_fallback()
            agent.client = agent._create_openai_client(client_kwargs, reason="agent_init", shared=True)
            if not agent.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {agent.model}")
@@ -1197,8 +1191,38 @@ def init_agent(
            _ra().logger.warning("Memory provider plugin init failed: %s", _mpe)
            agent._memory_manager = None

-    from agent.memory_manager import inject_memory_provider_tools as _inject_memory_provider_tools
-    _inject_memory_provider_tools(agent)
+    # Inject memory provider tool schemas into the tool surface.
+    # Skip tools whose names already exist (plugins may register the
+    # same tools via ctx.register_tool(), which lands in agent.tools
+    # through _ra().get_tool_definitions()).  Duplicate function names cause
+    # 400 errors on providers that enforce unique names (e.g. Xiaomi
+    # MiMo via Nous Portal).
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    #   enabled_toolsets is None        → no filter, inject (backward compat)
+    #   "memory" in enabled_toolsets    → user opted in, inject
+    #   otherwise (incl. [])            → user excluded memory, skip injection
+    #
+    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
+    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
+    # penalty on local models and a frequent trigger of tool-call loops.
+    if agent._memory_manager and agent.tools is not None and (
+        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
+    ):
+        _existing_tool_names = {
+            t.get("function", {}).get("name")
+            for t in agent.tools
+            if isinstance(t, dict)
+        }
+        for _schema in agent._memory_manager.get_all_tool_schemas():
+            _tname = _schema.get("name", "")
+            if _tname and _tname in _existing_tool_names:
+                continue  # already registered via plugin path
+            _wrapped = {"type": "function", "function": _schema}
+            agent.tools.append(_wrapped)
+            if _tname:
+                agent.valid_tool_names.add(_tname)
+                _existing_tool_names.add(_tname)

    # Skills config: nudge interval for skill creation reminders
    agent._skill_nudge_interval = 10
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -49,7 +49,7 @@ def _ra():


 AGENT_RUNTIME_POST_HOOK_TOOL_NAMES = frozenset(
-    {"todo", "session_search", "memory", "clarify", "read_terminal", "delegate_task"}
+    {"todo", "session_search", "memory", "clarify", "delegate_task"}
 )


@@ -445,45 +445,6 @@ def repair_message_sequence(agent, messages: List[Dict]) -> int:
    return repairs


-def repair_message_sequence_with_cursor(agent, messages: List[Dict]) -> int:
-    """Run :func:`repair_message_sequence` and keep the SessionDB flush
-    cursor consistent with the compacted list (#44837).
-
-    ``repair_message_sequence`` merges/drops messages in place, shrinking
-    the list. ``_last_flushed_db_idx`` (the DB-write cursor) indexes into
-    that list, so after compaction it can point past the new end — the
-    turn-end flush would then skip the assistant/tool chain entirely — or
-    past unflushed messages shifted to lower indexes.
-
-    Repair preserves object identity for surviving messages, so counting
-    the survivors from the previously-flushed prefix gives the exact new
-    cursor even when messages are dropped/merged at indexes *before* the
-    cursor — a plain ``min()`` clamp would silently skip that many
-    unflushed rows. Falls back to the clamp when no prefix snapshot is
-    available.
-
-    Returns the number of repairs made (same as ``repair_message_sequence``).
-    """
-    pre_repair_flushed_ids = None
-    flush_cursor = getattr(agent, "_last_flushed_db_idx", None)
-    if isinstance(flush_cursor, int) and flush_cursor > 0:
-        pre_repair_flushed_ids = {id(m) for m in messages[:flush_cursor]}
-
-    repairs = repair_message_sequence(agent, messages)
-
-    if repairs > 0 and hasattr(agent, "_last_flushed_db_idx"):
-        if pre_repair_flushed_ids is not None:
-            agent._last_flushed_db_idx = sum(
-                1 for m in messages if id(m) in pre_repair_flushed_ids
-            )
-        else:
-            agent._last_flushed_db_idx = min(
-                agent._last_flushed_db_idx, len(messages)
-            )
-
-    return repairs
-
-

 def strip_think_blocks(agent, content: str) -> str:
    """Remove reasoning/thinking blocks from content, returning only visible text.
@@ -618,33 +579,12 @@ def recover_with_credential_pool(
    current_provider = (getattr(agent, "provider", "") or "").strip().lower()
    pool_provider = (getattr(pool, "provider", "") or "").strip().lower()
    if current_provider and pool_provider and current_provider != pool_provider:
-        # Custom endpoints use two naming conventions for the SAME provider:
-        # the agent carries the generic ``custom`` label while the pool is
-        # keyed ``custom:<name>`` (see CUSTOM_POOL_PREFIX). A literal string
-        # compare treats them as a mismatch and skips recovery for every
-        # custom-provider user — 401s/429s then burn the full retry cycle
-        # with no rotation or refresh. Accept the pair as matching only when
-        # the agent's CURRENT base_url actually resolves to this pool key,
-        # so a fallback provider (or a different custom endpoint) still
-        # triggers the guard.
-        _custom_match = False
-        if current_provider == "custom" and pool_provider.startswith("custom:"):
-            try:
-                from agent.credential_pool import get_custom_provider_pool_key
-                _agent_base = (getattr(agent, "base_url", "") or "").strip()
-                _custom_match = bool(_agent_base) and (
-                    (get_custom_provider_pool_key(_agent_base) or "").strip().lower()
-                    == pool_provider
-                )
-            except Exception:
-                _custom_match = False
-        if not _custom_match:
-            _ra().logger.warning(
-                "Credential pool provider mismatch: pool=%s, agent=%s — "
-                "skipping pool mutation to avoid cross-provider contamination",
-                pool_provider, current_provider,
-            )
-            return False, has_retried_429
+        _ra().logger.warning(
+            "Credential pool provider mismatch: pool=%s, agent=%s — "
+            "skipping pool mutation to avoid cross-provider contamination",
+            pool_provider, current_provider,
+        )
+        return False, has_retried_429

    effective_reason = classified_reason
    if effective_reason is None:
@@ -739,28 +679,15 @@ def recover_with_credential_pool(
        # long-running TUI sessions stuck on stale tokens until the user
        # exited and reopened.
        is_entitlement = agent._is_entitlement_failure(error_context, status_code)
-        _auth_haystack = " ".join(
-            str(error_context.get(k) or "").lower()
-            for k in ("message", "reason", "code", "error")
-            if isinstance(error_context, dict)
-        )
-        if (
-            not is_entitlement
-            and status_code == 403
-            and "oauth authentication is currently not allowed for this organization" in _auth_haystack
-        ):
-            is_entitlement = True
-        if (
-            not is_entitlement
-            and status_code == 403
-            and (agent.provider or "") == "anthropic"
-            and getattr(agent, "api_mode", "") == "anthropic_messages"
-        ):
-            is_entitlement = True
        if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth":
+            _disambiguator_haystack = " ".join(
+                str(error_context.get(k) or "").lower()
+                for k in ("message", "reason", "code", "error")
+                if isinstance(error_context, dict)
+            )
            _is_xai_auth_failure = (
-                "[wke=unauthenticated:" in _auth_haystack
-                or "oauth2 access token could not be validated" in _auth_haystack
+                "[wke=unauthenticated:" in _disambiguator_haystack
+                or "oauth2 access token could not be validated" in _disambiguator_haystack
            )
            if not _is_xai_auth_failure:
                is_entitlement = True
@@ -881,8 +808,6 @@ def try_recover_primary_transport(

 def drop_thinking_only_and_merge_users(
    messages: List[Dict[str, Any]],
-    *,
-    drop_codex_reasoning_items: bool = True,
 ) -> List[Dict[str, Any]]:
    """Drop thinking-only assistant turns; merge any adjacent user messages left behind.

@@ -904,13 +829,7 @@ def drop_thinking_only_and_merge_users(
        return messages

    # Pass 1: drop thinking-only assistant turns.
-    kept = [
-        m for m in messages
-        if not _ra().AIAgent._is_thinking_only_assistant(
-            m,
-            drop_codex_reasoning_items=drop_codex_reasoning_items,
-        )
-    ]
+    kept = [m for m in messages if not _ra().AIAgent._is_thinking_only_assistant(m)]
    dropped = len(messages) - len(kept)
    if dropped == 0:
        return messages
@@ -1217,23 +1136,12 @@ def dump_api_request_debug(

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
-
-        # Redact secrets before persisting/printing. This dump captures the
-        # full request body (system prompt, tool defs, context-embedded
-        # values), and this path fires unconditionally on API errors — so it
-        # otherwise lands any context-embedded secret in cleartext on disk.
-        # Run the serialized dump through the same scrubber used for logs/tool
-        # output, then hand the resulting payload back to the shared atomic
-        # JSON writer so request dumps keep the same write semantics as before.
-        from agent.redact import redact_sensitive_text
-        _serialized = json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)
-        _redacted_payload = json.loads(redact_sensitive_text(_serialized, force=True))
-        atomic_json_write(dump_file, _redacted_payload, default=str)
+        atomic_json_write(dump_file, dump_payload, default=str)

        agent._vprint(f"{agent.log_prefix}🧾 Request debug dump written to: {dump_file}")

        if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"):
-            print(json.dumps(_redacted_payload, ensure_ascii=False, indent=2, default=str))
+            print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))

        return dump_file
    except Exception as dump_error:
@@ -1876,17 +1784,6 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
                ),
                next_args,
            )
-    elif function_name == "read_terminal":
-        def _execute(next_args: dict) -> Any:
-            from tools.read_terminal_tool import read_terminal_tool as _read_terminal_tool
-            return _finish_agent_tool(
-                _read_terminal_tool(
-                    start_line=next_args.get("start_line"),
-                    count=next_args.get("count"),
-                    callback=getattr(agent, "read_terminal_callback", None),
-                ),
-                next_args,
-            )
    elif function_name == "delegate_task":
        def _execute(next_args: dict) -> Any:
            return _finish_agent_tool(agent._dispatch_delegate_task(next_args), next_args)
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -73,50 +73,20 @@ ADAPTIVE_EFFORT_MAP = {
    "minimal": "low",
 }

-# ── Anthropic thinking-mode classification ────────────────────────────
-# Claude 4.6 replaced budget-based extended thinking with *adaptive* thinking,
-# and 4.7 additionally forbids the manual ``thinking`` block entirely and drops
-# temperature/top_p/top_k.  Newer Claude releases (4.8, and named models like
-# claude-fable-5) follow the same modern contract — but they share no common
-# version substring, so an allowlist of version numbers ("4.6", "4.7", …) goes
-# stale the moment a model ships without a recognized number and silently
-# routes it down the legacy manual-thinking path.
-#
-# Instead we DEFAULT unknown Claude models to the modern contract and keep an
-# explicit *legacy* list of the older Claude families that still require manual
-# thinking.  This mirrors _get_anthropic_max_output's "default to newest" design
-# (future models are unlikely to regress to the older contract), so each new
-# Claude release works without a code change.
-#
-# Non-Claude Anthropic-Messages models (minimax, qwen3, GLM, …) are NOT Claude,
-# so they fall through to the legacy path automatically — exactly what those
-# manual-thinking endpoints need.
-
-# Older Claude families that DON'T support adaptive thinking (manual thinking
-# with budget_tokens only). Substring-matched against the model name.
-_LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS = (
-    "claude-3",          # 3, 3.5, 3.7
-    "claude-opus-4-0", "claude-opus-4.0", "claude-opus-4-1", "claude-opus-4.1",
-    "claude-sonnet-4-0", "claude-sonnet-4.0",
-    "claude-opus-4-2025", "claude-sonnet-4-2025",  # date-stamped 4.0 IDs
-    "claude-opus-4-5", "claude-opus-4.5",
-    "claude-sonnet-4-5", "claude-sonnet-4.5",
-    "claude-haiku-4-5", "claude-haiku-4.5",
-)
-
-# Older Claude families that DON'T accept the "xhigh" effort level (4.6 only
-# supports low/medium/high/max). xhigh arrived with Opus 4.7. Adaptive models
-# not in this list (4.7, 4.8, fable, future) accept xhigh.
-_NO_XHIGH_CLAUDE_SUBSTRINGS = (
-    "claude-opus-4-6", "claude-opus-4.6",
-    "claude-sonnet-4-6", "claude-sonnet-4.6",
-)
-
-
-def _is_claude_model(model: str | None) -> bool:
-    return "claude" in (model or "").lower()
+# Models that accept the "xhigh" output_config.effort level.  Opus 4.7 added
+# xhigh as a distinct level between high and max; older adaptive-thinking
+# models (4.6) reject it with a 400.  Keep this substring list in sync with
+# the Anthropic migration guide as new model families ship.
+_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")

+# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
+# is the only supported mode; 4.7 additionally forbids manual thinking entirely
+# and drops temperature/top_p/top_k).
+_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7", "4-8", "4.8")

+# Models where temperature/top_p/top_k return 400 if set to non-default values.
+# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
+_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
 _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")

 # ── Max output token limits per Anthropic model ───────────────────────
@@ -124,8 +94,6 @@ _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
-    # Mythos-class named models (claude-fable-5, …) — 1M context, reasoning
-    "claude-fable":      128_000,
    # Claude 4.8
    "claude-opus-4-8":   128_000,
    # Claude 4.7
@@ -240,17 +208,8 @@ def _resolve_anthropic_messages_max_tokens(


 def _supports_adaptive_thinking(model: str) -> bool:
-    """Return True for Claude models that use adaptive thinking (4.6+).
-
-    Defaults *unknown* Claude models to adaptive (the modern contract) and
-    only returns False for the explicit legacy list of older Claude families
-    that require manual budget-based thinking. Non-Claude Anthropic-Messages
-    models (minimax, qwen3, …) return False so they keep the manual path.
-    """
-    if not _is_claude_model(model):
-        return False
-    m = model.lower()
-    return not any(v in m for v in _LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS)
+    """Return True for Claude 4.6+ models that support adaptive thinking."""
+    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)


 def _supports_xhigh_effort(model: str) -> bool:
@@ -260,33 +219,18 @@ def _supports_xhigh_effort(model: str) -> bool:
    Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
    and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
    when this returns False.
-
-    Defaults unknown adaptive Claude models to accepting xhigh (4.7+ contract);
-    only the 4.6 family and legacy manual-thinking models are excluded.
    """
-    if not _supports_adaptive_thinking(model):
-        return False
-    m = model.lower()
-    return not any(v in m for v in _NO_XHIGH_CLAUDE_SUBSTRINGS)
+    return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)


 def _forbids_sampling_params(model: str) -> bool:
    """Return True for models that 400 on any non-default temperature/top_p/top_k.

-    Opus 4.7 introduced this restriction; later Claude releases follow it.
-    Defaults unknown Claude models to forbidding sampling params (the modern
-    contract). The 4.6 family still accepts them, and the legacy manual-thinking
-    families (4.5 and older) accept them too, so both are excluded. Non-Claude
-    models are unaffected. Callers should omit these fields entirely rather than
-    passing zero/default values (the API rejects anything non-null).
+    Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
+    expected to follow suit.  Callers should omit these fields entirely rather
+    than passing zero/default values (the API rejects anything non-null).
    """
-    if not _is_claude_model(model):
-        return False
-    m = model.lower()
-    # 4.6 family is adaptive but still accepts sampling params.
-    if any(v in m for v in _NO_XHIGH_CLAUDE_SUBSTRINGS):
-        return False
-    return not any(v in m for v in _LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS)
+    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)


 def _supports_fast_mode(model: str) -> bool:
@@ -751,9 +695,6 @@ def build_anthropic_client(
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
-    if normalized_base_url:
-        import re as _re
-        normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
@@ -880,7 +821,6 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
            capture_output=True,
            text=True,
            timeout=5,
-            stdin=subprocess.DEVNULL,
        )
    except (OSError, subprocess.TimeoutExpired):
        logger.debug("Keychain: security command not available or timed out")
@@ -1223,10 +1163,7 @@ def run_oauth_setup_token() -> Optional[str]:
            "Install it with: npm install -g @anthropic-ai/claude-code"
        )

-    # Run interactively — stdin/stdout/stderr inherited so the user can
-    # complete the OAuth login prompt. Must keep inherited stdin; the TUI-EOF
-    # concern does not apply to an interactive login the user explicitly
-    # invokes.  noqa: subprocess-stdin
+    # Run interactively — stdin/stdout/stderr inherited so user can interact
    try:
        subprocess.run([claude_path, "setup-token"])
    except (KeyboardInterrupt, EOFError):
@@ -1574,15 +1511,6 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:

    if ptype == "input_text":
        block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
-    elif ptype == "text":
-        # A stored Anthropic text block. Rebuild from whitelisted fields only —
-        # SDK response text blocks carry output-only siblings (parsed_output,
-        # citations=None) that the Messages INPUT schema rejects with HTTP 400
-        # "Extra inputs are not permitted". Do NOT dict(part) it verbatim.
-        block = {"type": "text", "text": part.get("text", "")}
-        cits = part.get("citations")
-        if isinstance(cits, list) and cits:
-            block["citations"] = cits
    elif ptype in {"image_url", "input_image"}:
        image_value = part.get("image_url", {})
        url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
@@ -1697,58 +1625,6 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
    return out


-def _sanitize_replay_block(b: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    """Strip output-only fields from a stored Anthropic content block so it is
-    valid as REQUEST input on replay.
-
-    The SDK response objects carry output-only attributes that the Messages
-    *input* schema forbids ("Extra inputs are not permitted"): text blocks get
-    ``parsed_output``/``citations`` (when null), tool_use blocks get ``caller``,
-    etc. ``normalize_response`` captured blocks verbatim via ``_to_plain_data``,
-    so these leak back as input on the next turn → HTTP 400.
-
-    Whitelist per type (NOT a blacklist) so future SDK output-only fields can't
-    reintroduce the bug. Returns a clean block, or None to drop it.
-    """
-    if not isinstance(b, dict):
-        return None
-    btype = b.get("type")
-    if btype == "text":
-        out: Dict[str, Any] = {"type": "text", "text": b.get("text", "")}
-        # citations is input-valid ONLY when it's a non-empty list; the SDK
-        # emits citations=None on responses, which the input schema rejects.
-        cits = b.get("citations")
-        if isinstance(cits, list) and cits:
-            out["citations"] = cits
-        if isinstance(b.get("cache_control"), dict):
-            out["cache_control"] = b["cache_control"]
-        return out
-    if btype == "thinking":
-        out = {"type": "thinking", "thinking": b.get("thinking", "")}
-        if b.get("signature"):
-            out["signature"] = b["signature"]
-        return out
-    if btype == "redacted_thinking":
-        # Only valid with its data payload; drop if missing.
-        return {"type": "redacted_thinking", "data": b["data"]} if b.get("data") else None
-    if btype == "tool_use":
-        out = {
-            "type": "tool_use",
-            "id": _sanitize_tool_id(b.get("id", "")),
-            "name": b.get("name", ""),
-            "input": b.get("input", {}),
-        }
-        if isinstance(b.get("cache_control"), dict):
-            out["cache_control"] = b["cache_control"]
-        return out
-    if btype == "image":
-        src = b.get("source")
-        return {"type": "image", "source": src} if isinstance(src, dict) else None
-    # Unknown/unsupported block type on the input path — drop rather than risk
-    # another "Extra inputs are not permitted".
-    return None
-
-
 def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
    """Convert an assistant message to Anthropic content blocks.

@@ -1756,55 +1632,6 @@ def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
    content = m.get("content", "")
-    # Anthropic interleaved-thinking fast path: when this turn carries a
-    # verbatim, order-preserving block list (set by normalize_response only
-    # for turns that interleave SIGNED thinking with tool_use), replay it.
-    # Each block is run through _sanitize_replay_block to strip output-only
-    # SDK fields (parsed_output, caller, citations=None, …) that the Messages
-    # INPUT schema forbids — replaying them verbatim caused HTTP 400 "Extra
-    # inputs are not permitted" (text.parsed_output). Block ORDER is preserved
-    # (the reason this channel exists); only forbidden sibling fields are
-    # dropped, leaving thinking signatures and tool_use id/name/input intact.
-    ordered_blocks = m.get("anthropic_content_blocks")
-    if isinstance(ordered_blocks, list) and ordered_blocks:
-        # Re-source each tool_use input from the stored tool_calls map rather
-        # than the captured block. The ordered-blocks list captures tool_use
-        # input from the RAW API response (normalize_response), which is NOT
-        # credential-redacted; tool_calls[].function.arguments IS redacted at
-        # storage time (build_assistant_message, #19798). Replaying the raw
-        # block input would resurrect a secret the model inlined into a tool
-        # call (e.g. terminal(command="curl -H 'Authorization: Bearer sk-...'")
-        # onto the wire, even though the same value is redacted everywhere else
-        # in history. Keying by sanitized tool id preserves interleave order
-        # (the reason this channel exists) while swapping in the redacted
-        # input. Adapted from #36071 (replay-time tool-input re-sourcing).
-        redacted_input_by_id: Dict[str, Any] = {}
-        for tc in m.get("tool_calls", []) or []:
-            if not isinstance(tc, dict):
-                continue
-            fn = tc.get("function", {}) or {}
-            raw_args = fn.get("arguments", "{}")
-            try:
-                parsed_args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args
-            except (json.JSONDecodeError, ValueError):
-                parsed_args = {}
-            redacted_input_by_id[_sanitize_tool_id(tc.get("id", ""))] = parsed_args
-        replayed: List[Dict[str, Any]] = []
-        for b in ordered_blocks:
-            clean = _sanitize_replay_block(b)
-            if clean is None:
-                continue
-            if clean.get("type") == "tool_use":
-                # Override raw (un-redacted) input with the redacted copy when
-                # we have one for this id; fall back to the sanitized block
-                # input only if the tool_call is missing (shape mismatch).
-                redacted = redacted_input_by_id.get(clean.get("id", ""))
-                if redacted is not None:
-                    clean["input"] = redacted
-            replayed.append(clean)
-        if replayed:
-            return {"role": "assistant", "content": replayed}
-
    blocks = _extract_preserved_thinking_blocks(m)
    if content:
        if isinstance(content, list):
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -102,7 +102,7 @@ OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import base_url_host_matches, base_url_hostname, model_forces_max_completion_tokens, normalize_proxy_env_vars
+from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars

 logger = logging.getLogger(__name__)

@@ -1144,8 +1144,7 @@ def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
    normalized = (base_url or "").strip().lower().rstrip("/")
    if not normalized:
        return False
-    path = urlparse(normalized).path.rstrip("/")
-    if path.endswith("/anthropic") or path.endswith("/anthropic/v1"):
+    if normalized.endswith("/anthropic"):
        return True
    hostname = base_url_hostname(normalized)
    if hostname == "api.anthropic.com":
@@ -3079,20 +3078,23 @@ def _try_configured_fallback_chain(
        if not fb_provider or fb_provider.lower() == skip:
            continue
        fb_model = str(entry.get("model", "")).strip() or None
+        fb_base_url = str(entry.get("base_url", "")).strip() or None
+        fb_api_key = str(entry.get("api_key", "")).strip() or None

        label = f"fallback_chain[{i}]({fb_provider})"

        try:
-            fb_client, resolved_model = _resolve_fallback_entry(entry)
+            fb_client = _resolve_single_provider(
+                fb_provider, fb_model, fb_base_url, fb_api_key)
        except Exception:
-            fb_client, resolved_model = None, None
+            fb_client = None

        if fb_client is not None:
            logger.info(
                "Auxiliary %s: %s on %s — configured fallback to %s (%s)",
-                task, reason, failed_provider, label, resolved_model or fb_model or "default",
+                task, reason, failed_provider, label, fb_model or "default",
            )
-            return fb_client, resolved_model or fb_model, label
+            return fb_client, fb_model, label
        tried.append(label)

    if tried:
@@ -3103,103 +3105,6 @@ def _try_configured_fallback_chain(
    return None, None, ""


-def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
-    """Resolve inline or env-backed API key from a fallback-chain entry."""
-    explicit = str(entry.get("api_key") or "").strip()
-    if explicit:
-        return explicit
-    key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
-    if key_env:
-        return os.getenv(key_env, "").strip() or None
-    return None
-
-
-def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
-    """Resolve one fallback entry through the central provider router."""
-    provider = str(entry.get("provider") or "").strip()
-    model = str(entry.get("model") or "").strip() or None
-    if not provider or not model:
-        return None, None
-    base_url = str(entry.get("base_url") or "").strip() or None
-    api_key = _fallback_entry_api_key(entry)
-    api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
-    return resolve_provider_client(
-        provider,
-        model=model,
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
-        api_mode=api_mode,
-    )
-
-
-def _try_main_fallback_chain(
-    task: Optional[str],
-    failed_provider: str = "",
-    reason: str = "error",
-) -> Tuple[Optional[Any], Optional[str], str]:
-    """Try the top-level main-agent fallback chain for an auxiliary call.
-
-    ``provider: auto`` auxiliary tasks should respect the user's declared
-    main fallback policy before dropping into Hermes' built-in discovery
-    chain. The top-level chain is read through ``get_fallback_chain`` so
-    both modern ``fallback_providers`` and legacy ``fallback_model`` entries
-    participate in the same order as the main agent.
-    """
-    try:
-        from hermes_cli.config import load_config
-        from hermes_cli.fallback_config import get_fallback_chain
-
-        chain = get_fallback_chain(load_config())
-    except Exception as exc:
-        logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
-        return None, None, ""
-
-    if not chain:
-        return None, None, ""
-
-    failed_norm = (failed_provider or "").strip().lower()
-    main_norm = (_read_main_provider() or "").strip().lower()
-    skip = {p for p in (failed_norm, main_norm, "auto") if p}
-    tried: List[str] = []
-
-    for i, entry in enumerate(chain):
-        if not isinstance(entry, dict):
-            continue
-        fb_provider = str(entry.get("provider") or "").strip()
-        fb_model = str(entry.get("model") or "").strip()
-        if not fb_provider or not fb_model:
-            continue
-        fb_norm = fb_provider.lower()
-        label = f"fallback_providers[{i}]({fb_provider})"
-        if fb_norm in skip:
-            tried.append(f"{label} (skipped)")
-            continue
-        if _is_provider_unhealthy(fb_norm):
-            _log_skip_unhealthy(fb_norm, task)
-            tried.append(f"{label} (unhealthy)")
-            continue
-        try:
-            fb_client, resolved_model = _resolve_fallback_entry(entry)
-        except Exception as exc:
-            logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
-            fb_client, resolved_model = None, None
-        if fb_client is not None:
-            logger.info(
-                "Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
-                task or "call", reason, failed_provider or "auto", label,
-                resolved_model or fb_model,
-            )
-            return fb_client, resolved_model or fb_model, fb_provider
-        tried.append(label)
-
-    if tried:
-        logger.debug(
-            "Auxiliary %s: main fallback chain exhausted (tried: %s)",
-            task or "call", ", ".join(tried),
-        )
-    return None, None, ""
-
-
 def _resolve_single_provider(
    provider: str,
    model: Optional[str] = None,
@@ -3210,19 +3115,16 @@ def _resolve_single_provider(

    Uses the existing provider resolution infrastructure where possible.
    """
-    # Reuse resolve_provider_client which handles provider→client mapping.
+    # Reuse resolve_provider_client which handles provider→client mapping
    client, resolved_model = resolve_provider_client(
        provider=provider,
        model=model,
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
+        base_url=base_url,
+        api_key=api_key,
    )
    return client

-def _resolve_auto(
-    main_runtime: Optional[Dict[str, Any]] = None,
-    task: Optional[str] = None,
-) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -3288,7 +3190,7 @@ def _resolve_auto(
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
-        explicit_base_url = runtime_base_url or None
+        explicit_base_url = None
        explicit_api_key = None
        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
            resolved_provider = "custom"
@@ -3320,22 +3222,7 @@ def _resolve_auto(
                            main_provider, resolved or main_model)
                return client, resolved or main_model

-    # ── Step 2: user-configured fallback policy ─────────────────────────
-    # In auto mode, respect the task-specific fallback chain first, then the
-    # main agent's top-level fallback_providers/fallback_model chain. The
-    # hardcoded provider discovery chain below is only the convenience default
-    # for users who have not declared a fallback policy.
-    if task:
-        fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
-            task, main_provider or "auto", reason="main provider unavailable")
-        if fb_client is not None:
-            return fb_client, fb_model
-    fb_client, fb_model, _fb_label = _try_main_fallback_chain(
-        task, main_provider or "auto", reason="main provider unavailable")
-    if fb_client is not None:
-        return fb_client, fb_model
-
-    # ── Step 3: aggregator / fallback chain ──────────────────────────────
+    # ── Step 2: aggregator / fallback chain ──────────────────────────────
    tried = []
    for label, try_fn in _get_provider_chain():
        if _is_provider_unhealthy(label):
@@ -3456,7 +3343,6 @@ def resolve_provider_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -3577,7 +3463,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
+        client, resolved = _resolve_auto(main_runtime=main_runtime)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -4414,15 +4300,13 @@ def get_auxiliary_extra_body() -> dict:
    return _nous_extra_body() if auxiliary_is_nous else {}


-def auxiliary_max_tokens_param(value: int, *, model: Optional[str] = None) -> dict:
+def auxiliary_max_tokens_param(value: int) -> dict:
    """Return the correct max tokens kwarg for the auxiliary client's provider.
-
+    
    OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
-    models (gpt-4o, gpt-4.1, gpt-5+, o-series) requires 'max_completion_tokens'.
+    models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
    The Codex adapter translates max_tokens internally, so we use max_tokens
-    for it as well. Pass ``model`` so third-party OpenAI-compatible endpoints
-    fronting the newer families are also recognised — URL-only detection
-    misses the case where a custom base URL serves e.g. ``gpt-5.4``.
+    for it as well.
    """
    custom_base = _current_custom_base_url()
    or_key = os.getenv("OPENROUTER_API_KEY")
@@ -4432,9 +4316,6 @@ def auxiliary_max_tokens_param(value: int, *, model: Optional[str] = None) -> di
            and _read_nous_auth() is None
            and base_url_hostname(custom_base) in {"api.openai.com", "api.githubcopilot.com"}):
        return {"max_completion_tokens": value}
-    # ...and for any caller serving a newer OpenAI-family model by name.
-    if model_forces_max_completion_tokens(model):
-        return {"max_completion_tokens": value}
    return {"max_tokens": value}


@@ -4470,16 +4351,11 @@ def _client_cache_key(
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    # `auto` can now resolve through task-specific or main fallback policy,
-    # so the task participates in the cache key. Non-auto providers keep the
-    # old cache shape because the explicit provider/model tuple is sufficient.
-    task_key = (task or "") if provider == "auto" else ""
    pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -4672,7 +4548,6 @@ def _get_cached_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -4710,7 +4585,6 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=main_runtime,
        is_vision=is_vision,
-        task=task,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -4755,7 +4629,6 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=runtime,
        is_vision=is_vision,
-        task=task,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -5126,7 +4999,7 @@ def _build_call_kwargs(

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
-    if provider == "nous":
+    if provider == "nous" or auxiliary_is_nous:
        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
    if merged_extra:
        kwargs["extra_body"] = merged_extra
@@ -5261,7 +5134,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5587,19 +5460,14 @@ def call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. For auto: top-level main fallback_providers/fallback_model
-            #   3. For auto: built-in auxiliary discovery chain
-            #   4. For explicit aux providers: main agent model safety net
+            #   2. Main agent model (last-resort safety net)
+            # For auto users (no explicit aux provider), use the full
+            # auto-detection chain instead — its Step 1 IS the main agent
+            # model, so users on `auto` already get main-model fallback.
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
-                    task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
-                        task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_payment_fallback(
-                        resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
@@ -5762,7 +5630,7 @@ async def async_call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
+                client, final_model = _get_cached_client("auto", async_mode=True)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -6030,19 +5898,13 @@ async def async_call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. For auto: top-level main fallback_providers/fallback_model
-            #   3. For auto: built-in auxiliary discovery chain
-            #   4. For explicit aux providers: main agent model safety net
+            #   2. Main agent model (last-resort safety net)
+            # Auto users get the full auto-detection chain instead — its
+            # Step 1 IS the main agent model.
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
-                    task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
-                        task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_payment_fallback(
-                        resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -237,25 +237,18 @@ _COMBINED_REVIEW_PROMPT = (
 def summarize_background_review_actions(
    review_messages: List[Dict],
    prior_snapshot: List[Dict],
-    notification_mode: str = "on",
 ) -> List[str]:
    """Build the human-facing action summary for a background review pass.

-    Walks the review agent's session messages and collects successful memory
-    and skill-management actions to surface to the user. Tool messages already
-    present in ``prior_snapshot`` are skipped so stale inherited results are
-    not re-surfaced as fresh background work (issue #14944).
+    Walks the review agent's session messages and collects "successful tool
+    action" descriptions to surface to the user (e.g. "Memory updated").
+    Tool messages already present in ``prior_snapshot`` are skipped so we
+    don't re-surface stale results from the prior conversation that the
+    review agent inherited via ``conversation_history`` (issue #14944).

-    ``notification_mode`` controls display detail:
-    - ``off``: return no actions.
-    - ``on``: generic "Memory updated"/tool messages.
-    - ``verbose``: include compact content previews from tool-call arguments.
+    Matching is by ``tool_call_id`` when available, with a content-equality
+    fallback for tool messages that lack one.
    """
-    mode = str(notification_mode or "on").lower()
-    if mode == "off":
-        return []
-    verbose = mode == "verbose"
-
    existing_tool_call_ids = set()
    existing_tool_contents = set()
    for prior in prior_snapshot or []:
@@ -269,42 +262,6 @@ def summarize_background_review_actions(
            if isinstance(content, str):
                existing_tool_contents.add(content)

-    # Map review-agent tool results back to the calls that produced them.  The
-    # result JSON only says "Entry added"; the call arguments contain action,
-    # target, and content previews.  Restricting to notify_tools also prevents
-    # helper tools from surfacing as memory work just because they succeeded.
-    notify_tools = {"memory", "skill_manage"}
-    all_tool_call_ids: set = set()
-    call_details: dict = {}
-    for msg in review_messages or []:
-        if not isinstance(msg, dict) or msg.get("role") != "assistant":
-            continue
-        for tc in msg.get("tool_calls", []) or []:
-            if not isinstance(tc, dict):
-                continue
-            fn = tc.get("function", {}) or {}
-            fn_name = fn.get("name", "")
-            tcid = tc.get("id")
-            if tcid:
-                all_tool_call_ids.add(tcid)
-            if fn_name not in notify_tools:
-                continue
-            try:
-                args = json.loads(fn.get("arguments", "{}"))
-            except (json.JSONDecodeError, TypeError):
-                args = {}
-            if tcid:
-                call_details[tcid] = {
-                    "tool": fn_name,
-                    "action": args.get("action", "?"),
-                    "target": args.get("target", "memory"),
-                    "content": args.get("content", ""),
-                    "old_text": args.get("old_text", ""),
-                    "name": args.get("name", ""),
-                    "old_string": args.get("old_string", ""),
-                    "new_string": args.get("new_string", ""),
-                }
-
    actions: List[str] = []
    for msg in review_messages or []:
        if not isinstance(msg, dict) or msg.get("role") != "tool":
@@ -316,8 +273,6 @@ def summarize_background_review_actions(
            content_str = msg.get("content")
            if isinstance(content_str, str) and content_str in existing_tool_contents:
                continue
-        if tcid and all_tool_call_ids and tcid not in call_details:
-            continue
        try:
            data = json.loads(msg.get("content", "{}"))
        except (json.JSONDecodeError, TypeError):
@@ -325,75 +280,19 @@ def summarize_background_review_actions(
        if not isinstance(data, dict) or not data.get("success"):
            continue
        message = data.get("message", "")
-        detail = call_details.get(tcid, {})
-        target = data.get("target", "") or detail.get("target", "")
-        is_skill = detail.get("tool") == "skill_manage"
-
-        message_lower = message.lower()
-        if not verbose:
-            if "created" in message_lower:
-                actions.append(message)
-                continue
-            if "updated" in message_lower:
-                actions.append(message)
-                continue
-            if is_skill and "patched" in message_lower:
-                actions.append(message)
-                continue
-
-        if is_skill:
-            label = "Skill"
-        elif target:
+        target = data.get("target", "")
+        if "created" in message.lower():
+            actions.append(message)
+        elif "updated" in message.lower():
+            actions.append(message)
+        elif "added" in message.lower() or (target and "add" in message.lower()):
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "Entry added" in message:
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "removed" in message.lower() or "replaced" in message.lower():
            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-        else:
-            continue
-
-        if verbose:
-            action = detail.get("action", "")
-            content = detail.get("content", "")
-            old_text = detail.get("old_text", "")
-            skill_name = detail.get("name", "")
-            max_preview = 120
-            if is_skill:
-                change = data.get("_change", {})
-                old_string = change.get("old", "") or detail.get("old_string", "")
-                new_string = change.get("new", "") or detail.get("new_string", "")
-                description = change.get("description", "")
-                if action == "patch" and (old_string or new_string):
-                    old_preview = old_string[:80].replace("\n", " ") + (
-                        "…" if len(old_string) > 80 else ""
-                    )
-                    new_preview = new_string[:80].replace("\n", " ") + (
-                        "…" if len(new_string) > 80 else ""
-                    )
-                    actions.append(
-                        f"📝 Skill '{skill_name}' patched: "
-                        f"\"{old_preview}\" → \"{new_preview}\""
-                    )
-                elif action == "create" and description:
-                    actions.append(f"📝 Skill '{skill_name}' created: {description}")
-                elif action == "edit" and description:
-                    actions.append(f"📝 Skill '{skill_name}' rewritten: {description}")
-                else:
-                    actions.append(f"📝 {message}" if message else f"Skill {action}")
-            elif action == "add" and content:
-                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
-                actions.append(f"{label} ➕ {preview}")
-            elif action == "replace" and content:
-                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
-                actions.append(f"{label} ✏️ {preview}")
-            elif action == "remove" and old_text:
-                preview = old_text[:60] + ("…" if len(old_text) > 60 else "")
-                actions.append(f"{label} ➖ {preview}")
-            else:
-                actions.append(f"{label} updated")
-        elif (
-            "added" in message_lower
-            or "replaced" in message_lower
-            or "removed" in message_lower
-            or (target and "add" in message.lower())
-            or "Entry added" in message
-        ):
            actions.append(f"{label} updated")
    return actions

@@ -623,7 +522,6 @@ def _run_review_in_thread(
        actions = summarize_background_review_actions(
            review_messages,
            messages_snapshot,
-            notification_mode=getattr(agent, "memory_notifications", "on"),
        )

        if actions:
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -58,34 +58,17 @@ _bedrock_runtime_client_cache: Dict[str, Any] = {}
 _bedrock_control_client_cache: Dict[str, Any] = {}


-_MIN_BOTO3_VERSION = (1, 34, 59)
-
-
 def _require_boto3():
-    """Import boto3, raising a clear error if not installed or too old."""
+    """Import boto3, raising a clear error if not installed."""
    try:
        import boto3
+        return boto3
    except ImportError:
        raise ImportError(
            "The 'boto3' package is required for the AWS Bedrock provider. "
            "Install it with: pip install boto3\n"
            "Or install Hermes with Bedrock support: pip install -e '.[bedrock]'"
        )
-    # converse() / converse_stream() were added in boto3 1.34.59.
-    # When Hermes is installed editable into system Python, the system boto3
-    # (e.g. Ubuntu 24.04 ships 1.34.46) may take precedence over the venv
-    # version pinned in pyproject.toml.
-    try:
-        version = tuple(int(x) for x in boto3.__version__.split(".")[:3])
-    except (AttributeError, ValueError):
-        return boto3  # can't parse — don't block on version check
-    if version < _MIN_BOTO3_VERSION:
-        raise RuntimeError(
-            f"boto3 {boto3.__version__} does not support converse_stream "
-            f"(minimum 1.34.59 required). Upgrade with: "
-            f"pip install --upgrade boto3"
-        )
-    return boto3


 def _get_bedrock_runtime_client(region: str):
@@ -225,41 +208,6 @@ def is_stale_connection_error(exc: BaseException) -> bool:
    return False


-def is_streaming_access_denied_error(exc: BaseException) -> bool:
-    """Return True when AWS denied the ``bedrock:InvokeModelWithResponseStream`` action.
-
-    IAM policies scoped to ``bedrock:InvokeModel`` only (a common least-privilege
-    setup) reject ``converse_stream()`` with an ``AccessDeniedException`` whose
-    message names the streaming action, e.g.::
-
-        User: arn:aws:iam::123456789012:user/x is not authorized to perform:
-        bedrock:InvokeModelWithResponseStream on resource: ...
-
-    This is permanent for the session — retrying the stream can never succeed —
-    so callers should flip to the non-streaming ``converse()`` path (which maps
-    to ``bedrock:InvokeModel``) instead of burning retries.
-
-    Detection is deliberately message-based: boto3 surfaces this as a
-    ``ClientError`` with ``Error.Code == "AccessDeniedException"``, and the
-    AnthropicBedrock SDK wraps the same AWS response in its own exception
-    types, but both preserve the action name in the message.
-    """
-    msg = str(exc).lower()
-    if "invokemodelwithresponsestream" not in msg:
-        return False
-    # ClientError with an explicit access-denied code is the canonical form.
-    try:
-        from botocore.exceptions import ClientError
-    except ImportError:  # pragma: no cover — botocore always present with boto3
-        ClientError = None  # type: ignore[assignment]
-    if ClientError is not None and isinstance(exc, ClientError):
-        code = (getattr(exc, "response", None) or {}).get("Error", {}).get("Code", "")
-        return code in ("AccessDeniedException", "UnauthorizedException")
-    # Wrapped forms (e.g. AnthropicBedrock SDK PermissionDeniedError) — match
-    # on the authorization-failure phrasing AWS uses.
-    return "not authorized" in msg or "accessdenied" in msg
-
-
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -952,14 +900,11 @@ def build_converse_kwargs(
    if system_prompt:
        kwargs["system"] = system_prompt

-    from agent.anthropic_adapter import _forbids_sampling_params
+    if temperature is not None:
+        kwargs["inferenceConfig"]["temperature"] = temperature

-    if not _forbids_sampling_params(model):
-        if temperature is not None:
-            kwargs["inferenceConfig"]["temperature"] = temperature
-
-        if top_p is not None:
-            kwargs["inferenceConfig"]["topP"] = top_p
+    if top_p is not None:
+        kwargs["inferenceConfig"]["topP"] = top_p

    if stop_sequences:
        kwargs["inferenceConfig"]["stopSequences"] = stop_sequences
@@ -1058,16 +1003,6 @@ def call_converse_stream(
    try:
        response = client.converse_stream(**kwargs)
    except Exception as exc:
-        if is_streaming_access_denied_error(exc):
-            # IAM allows bedrock:InvokeModel but not
-            # InvokeModelWithResponseStream — permanent for this session.
-            # Fall back to the non-streaming converse() path.
-            logger.info(
-                "bedrock: converse_stream denied by IAM on (region=%s, model=%s) — "
-                "falling back to non-streaming converse().",
-                region, model,
-            )
-            return normalize_converse_response(client.converse(**kwargs))
        if is_stale_connection_error(exc):
            logger.warning(
                "bedrock: stale-connection error on converse_stream(region=%s, "
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -952,18 +952,6 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
        if preserved:
            msg["reasoning_details"] = preserved

-    # Anthropic interleaved-thinking replay: when a turn interleaves signed
-    # thinking blocks with tool_use, the parallel reasoning_details +
-    # tool_calls fields lose the cross-type ordering, and reconstruction
-    # front-loads thinking — reordering signed blocks and triggering HTTP 400
-    # ("thinking ... blocks in the latest assistant message cannot be
-    # modified"). Carry the verbatim ordered block list so the adapter can
-    # replay the latest assistant message unchanged. See
-    # agent/transports/anthropic.py and agent/anthropic_adapter.py.
-    ordered_blocks = getattr(assistant_message, "anthropic_content_blocks", None)
-    if ordered_blocks:
-        msg["anthropic_content_blocks"] = ordered_blocks
-
    # Codex Responses API: preserve encrypted reasoning items for
    # multi-turn continuity. These get replayed as input on the next turn.
    codex_items = getattr(assistant_message, "codex_reasoning_items", None)
@@ -1615,8 +1603,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                    _get_bedrock_runtime_client,
                    invalidate_runtime_client,
                    is_stale_connection_error,
-                    is_streaming_access_denied_error,
-                    normalize_converse_response,
                    stream_converse_with_callbacks,
                )
                region = api_kwargs.pop("__bedrock_region__", "us-east-1")
@@ -1625,29 +1611,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                try:
                    raw_response = client.converse_stream(**api_kwargs)
                except Exception as _bedrock_exc:
-                    # IAM policies scoped to bedrock:InvokeModel only (no
-                    # InvokeModelWithResponseStream) reject converse_stream()
-                    # with AccessDeniedException. That denial is permanent for
-                    # the session — fall back to the non-streaming converse()
-                    # inline (it maps to bedrock:InvokeModel) and disable
-                    # streaming for subsequent calls so we don't re-fail every
-                    # turn.
-                    if is_streaming_access_denied_error(_bedrock_exc):
-                        agent._disable_streaming = True
-                        agent._safe_print(
-                            "\n⚠  AWS IAM denied bedrock:InvokeModelWithResponseStream — "
-                            "falling back to non-streaming InvokeModel.\n"
-                            "   Grant that action to restore streaming output.\n"
-                        )
-                        logger.info(
-                            "bedrock: converse_stream denied by IAM (%s) — "
-                            "using non-streaming converse() for this session.",
-                            type(_bedrock_exc).__name__,
-                        )
-                        result["response"] = normalize_converse_response(
-                            client.converse(**api_kwargs)
-                        )
-                        return
                    # Evict the cached client on stale-connection failures
                    # so the outer retry loop builds a fresh client/pool.
                    if is_stale_connection_error(_bedrock_exc):
@@ -1735,14 +1698,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
    # poll loop uses this to detect stale connections that keep receiving
    # SSE keep-alive pings but no actual data.
    last_chunk_time = {"t": time.time()}
-    # Stale-stream patience, shared between the httpx socket read timeout
-    # (built in ``_call_chat_completions`` below) and the stale-stream detector
-    # (computed further down, before the worker thread starts).  Initialized
-    # here so the read-timeout builder can floor itself at the stale value and
-    # never fire before the detector.  ``None`` until the detector value is
-    # resolved, so the builder degrades to its plain default if it ever runs
-    # first.
-    _stream_stale_timeout = None

    def _fire_first_delta():
        if not first_delta_fired["done"] and on_first_delta:
@@ -1779,26 +1734,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                    "Local provider detected (%s) — stream read timeout raised to %.0fs",
                    agent.base_url, _stream_read_timeout,
                )
-            elif (
-                _stream_read_timeout == 120.0
-                and _stream_stale_timeout is not None
-                and _stream_stale_timeout != float("inf")
-                and _stream_stale_timeout > _stream_read_timeout
-            ):
-                # Cloud reasoning models (e.g. Opus) routinely pause mid-stream
-                # for minutes during extended thinking.  The stale-stream
-                # detector is deliberately scaled up to tolerate this (180–300s,
-                # see the stale-timeout block below), but the raw httpx socket
-                # read timeout defaulted to a flat 120s and fired *first* —
-                # tearing down a healthy reasoning stream before the stale
-                # detector (which owns retry + diagnostics) could act.  Keep the
-                # socket read timeout in step with the detector so it no longer
-                # preempts it.
-                _stream_read_timeout = _stream_stale_timeout
-                logger.debug(
-                    "Cloud reasoning stream — read timeout raised to %.0fs to "
-                    "match stale-stream detector", _stream_read_timeout,
-                )
        # Cap connect/pool at 60s even when provider timeout is higher.
        # connect/pool cover TCP handshake, not model inference.
        _conn_cap = min(_base_timeout, 60.0) if _provider_timeout_cfg is not None else 30.0
@@ -2449,34 +2384,9 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            "stream" in _err_lower
                            and "not supported" in _err_lower
                        )
-                        # AWS Bedrock (AnthropicBedrock SDK path): IAM policies
-                        # with bedrock:InvokeModel but not
-                        # InvokeModelWithResponseStream reject messages.stream()
-                        # with a permission error naming the streaming action.
-                        # Permanent for the session — flip to non-streaming
-                        # (messages.create() maps to bedrock:InvokeModel).
-                        _is_bedrock_stream_denied = False
-                        if (
-                            not _is_stream_unsupported
-                            and "invokemodelwithresponsestream" in _err_lower
-                        ):
-                            # Cheap message pre-check before importing the
-                            # adapter — bedrock_adapter triggers a lazy boto3
-                            # install at import time, which must not run for
-                            # unrelated providers' stream errors.
-                            from agent.bedrock_adapter import (
-                                is_streaming_access_denied_error,
-                            )
-                            _is_bedrock_stream_denied = (
-                                is_streaming_access_denied_error(e)
-                            )
-                        if _is_stream_unsupported or _is_bedrock_stream_denied:
+                        if _is_stream_unsupported:
                            agent._disable_streaming = True
                            agent._safe_print(
-                                "\n⚠  AWS IAM denied bedrock:InvokeModelWithResponseStream. "
-                                "Switching to non-streaming.\n"
-                                "   Grant that action to restore streaming output.\n"
-                                if _is_bedrock_stream_denied else
                                "\n⚠  Streaming is not supported for this "
                                "model/provider. Switching to non-streaming.\n"
                                "   To avoid this delay, set display.streaming: false "
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -127,21 +127,14 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
    return converted


-def _summarize_user_message_for_log(content: Any, *, sep: str = " ") -> str:
-    """Flatten message content to a plain-text summary.
+def _summarize_user_message_for_log(content: Any) -> str:
+    """Return a short text summary of a user message for logging/trajectory.

    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
-    parts from the API server.  Several consumers want a plain string:
-
-    - Logging, spinner previews, and trajectory files (the default ``sep=" "``).
-    - External memory providers, which feed the text to regexes
-      (``sanitize_context``) and text APIs — a raw list crashes the sync with
-      ``expected string or bytes-like object, got 'list'`` (use ``sep="\\n"``).
-
-    Text parts are joined with ``sep``; images become a ``[N image(s)]`` marker
-    so the turn isn't recorded as if the attachment never existed.  Returns an
-    empty string for empty lists and ``str(content)`` for unexpected scalar
-    types.
+    parts from the API server.  Logging, spinner previews, and trajectory
+    files all want a plain string — this helper extracts the first chunk of
+    text and notes any attached images.  Returns an empty string for empty
+    lists and ``str(content)`` for unexpected scalar types.
    """
    if content is None:
        return ""
@@ -164,7 +157,7 @@ def _summarize_user_message_for_log(content: Any, *, sep: str = " ") -> str:
                    text_bits.append(text)
            elif ptype in {"image_url", "input_image"}:
                image_count += 1
-        summary = sep.join(text_bits).strip()
+        summary = " ".join(text_bits).strip()
        if image_count:
            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
            summary = f"{note} {summary}" if summary else note
@@ -1081,7 +1074,6 @@ def _normalize_codex_response(
    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
-    saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"}
    saw_commentary_phase = False
    saw_final_answer_phase = False
    saw_reasoning_item = False
@@ -1096,7 +1088,6 @@ def _normalize_codex_response(

        if item_status in {"queued", "in_progress", "incomplete"}:
            has_incomplete_items = True
-            saw_streaming_or_item_incomplete = True

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
@@ -1254,9 +1245,7 @@ def _normalize_codex_response(
        finish_reason = "tool_calls"
    elif leaked_tool_call_text:
        finish_reason = "incomplete"
-    elif saw_streaming_or_item_incomplete:
-        finish_reason = "incomplete"
-    elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase:
+    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
        # Response contains only reasoning (encrypted thinking state and/or
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@@ -25,154 +25,6 @@ from typing import Any, Dict, List
 logger = logging.getLogger(__name__)


-def _coerce_usage_int(value: Any) -> int:
-    if isinstance(value, bool):
-        return 0
-    if isinstance(value, int):
-        return max(value, 0)
-    if isinstance(value, float):
-        return max(int(value), 0)
-    if isinstance(value, str):
-        try:
-            return max(int(value), 0)
-        except ValueError:
-            return 0
-    return 0
-
-
-def _record_codex_app_server_usage(agent, turn) -> dict[str, Any]:
-    """Translate Codex app-server token usage into Hermes accounting.
-
-    Codex app-server reports usage via thread/tokenUsage/updated as:
-    inputTokens, cachedInputTokens, outputTokens, reasoningOutputTokens,
-    totalTokens.
-
-    Hermes' canonical prompt bucket includes uncached input + cached input.
-    The Codex app-server protocol does not currently expose cache-write tokens,
-    so that bucket remains zero on this runtime.
-
-    Even when Codex omits usage for a turn, Hermes should still count that turn
-    as one API call for session/status accounting.
-    """
-    agent.session_api_calls += 1
-
-    usage = getattr(turn, "token_usage_last", None)
-    if not isinstance(usage, dict) or not usage:
-        if agent._session_db and agent.session_id:
-            try:
-                if not agent._session_db_created:
-                    agent._ensure_db_session()
-                agent._session_db.update_token_counts(
-                    agent.session_id,
-                    model=agent.model,
-                    api_call_count=1,
-                )
-            except Exception as exc:
-                logger.debug(
-                    "Codex app-server api-call persistence failed (session=%s): %s",
-                    agent.session_id, exc,
-                )
-        return {}
-
-    from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
-
-    input_tokens = _coerce_usage_int(usage.get("inputTokens"))
-    cache_read_tokens = _coerce_usage_int(usage.get("cachedInputTokens"))
-    output_tokens = _coerce_usage_int(usage.get("outputTokens"))
-    reasoning_tokens = _coerce_usage_int(usage.get("reasoningOutputTokens"))
-    reported_total = _coerce_usage_int(usage.get("totalTokens"))
-
-    canonical_usage = CanonicalUsage(
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        cache_read_tokens=cache_read_tokens,
-        cache_write_tokens=0,
-        reasoning_tokens=reasoning_tokens,
-        raw_usage=usage,
-    )
-    prompt_tokens = canonical_usage.prompt_tokens
-    completion_tokens = canonical_usage.output_tokens
-    total_tokens = reported_total or canonical_usage.total_tokens
-    usage_dict = {
-        "prompt_tokens": prompt_tokens,
-        "completion_tokens": completion_tokens,
-        "total_tokens": total_tokens,
-        "input_tokens": canonical_usage.input_tokens,
-        "output_tokens": canonical_usage.output_tokens,
-        "cache_read_tokens": canonical_usage.cache_read_tokens,
-        "cache_write_tokens": canonical_usage.cache_write_tokens,
-        "reasoning_tokens": canonical_usage.reasoning_tokens,
-    }
-
-    compressor = getattr(agent, "context_compressor", None)
-    if compressor is not None:
-        try:
-            compressor.update_from_response(usage_dict)
-            context_window = getattr(turn, "model_context_window", None)
-            if isinstance(context_window, int) and context_window > 0:
-                compressor.context_length = context_window
-        except Exception:
-            logger.debug("codex app-server usage update failed", exc_info=True)
-
-    agent.session_prompt_tokens += prompt_tokens
-    agent.session_completion_tokens += completion_tokens
-    agent.session_total_tokens += total_tokens
-    agent.session_input_tokens += canonical_usage.input_tokens
-    agent.session_output_tokens += canonical_usage.output_tokens
-    agent.session_cache_read_tokens += canonical_usage.cache_read_tokens
-    agent.session_cache_write_tokens += canonical_usage.cache_write_tokens
-    agent.session_reasoning_tokens += canonical_usage.reasoning_tokens
-
-    cost_result = estimate_usage_cost(
-        agent.model,
-        canonical_usage,
-        provider=agent.provider,
-        base_url=agent.base_url,
-        api_key=getattr(agent, "api_key", ""),
-    )
-    if cost_result.amount_usd is not None:
-        agent.session_estimated_cost_usd += float(cost_result.amount_usd)
-    agent.session_cost_status = cost_result.status
-    agent.session_cost_source = cost_result.source
-
-    if agent._session_db and agent.session_id:
-        try:
-            if not agent._session_db_created:
-                agent._ensure_db_session()
-            agent._session_db.update_token_counts(
-                agent.session_id,
-                input_tokens=canonical_usage.input_tokens,
-                output_tokens=canonical_usage.output_tokens,
-                cache_read_tokens=canonical_usage.cache_read_tokens,
-                cache_write_tokens=canonical_usage.cache_write_tokens,
-                reasoning_tokens=canonical_usage.reasoning_tokens,
-                estimated_cost_usd=float(cost_result.amount_usd)
-                if cost_result.amount_usd is not None else None,
-                cost_status=cost_result.status,
-                cost_source=cost_result.source,
-                billing_provider=agent.provider,
-                billing_base_url=agent.base_url,
-                billing_mode="subscription_included"
-                if cost_result.status == "included" else None,
-                model=agent.model,
-                api_call_count=1,
-            )
-        except Exception as exc:
-            logger.debug(
-                "Codex app-server token persistence failed (session=%s, tokens=%d): %s",
-                agent.session_id, total_tokens, exc,
-            )
-
-    return {
-        **usage_dict,
-        "last_prompt_tokens": prompt_tokens,
-        "estimated_cost_usd": float(cost_result.amount_usd)
-        if cost_result.amount_usd is not None else None,
-        "cost_status": cost_result.status,
-        "cost_source": cost_result.source,
-    }
-
-
 def run_codex_app_server_turn(
    agent,
    *,
@@ -268,8 +120,6 @@ def run_codex_app_server_turn(
    agent._iters_since_skill = (
        getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
    )
-    usage_result = _record_codex_app_server_usage(agent, turn)
-    api_calls = 1

    # Now check the skill nudge AFTER iters were incremented — same
    # pattern the chat_completions path uses (line ~15432).
@@ -314,13 +164,12 @@ def run_codex_app_server_turn(
    return {
        "final_response": turn.final_text,
        "messages": messages,
-        "api_calls": api_calls,
+        "api_calls": 1,  # one app-server "turn" maps to one logical API call
        "completed": not turn.interrupted and turn.error is None,
        "partial": turn.interrupted or turn.error is not None,
        "error": turn.error,
        "codex_thread_id": turn.thread_id,
        "codex_turn_id": turn.turn_id,
-        **usage_result,
    }


--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -1,738 +0,0 @@
-"""Coding-context awareness — base Hermes, every interactive surface.
-
-When the user runs Hermes inside a code workspace (CLI, TUI, desktop app, or an
-editor over ACP), Hermes shifts into a **coding posture**. This module is the
-single place that decides whether we're in that posture and what it implies,
-so the rest of the codebase never re-derives "are we coding?" on its own.
-
-Architecture — one seam, many consumers
----------------------------------------
-The posture is modelled as a frozen :class:`RuntimeMode` selected from a small
-:class:`ContextProfile` registry (today: ``coding`` and ``general``). A profile
-is *data* — it declares the toolset to collapse to, the operating brief to
-inject, and hints for other domains (model routing, memory, subagents). Every
-domain reads the same resolved object instead of probing git/config itself:
-
-  * **System prompt** — ``RuntimeMode.system_blocks()`` → the operating brief +
-    a live git/workspace snapshot (``agent/system_prompt.py``).
-  * **Toolset** — ``RuntimeMode.toolset_selection()`` → the ``coding`` toolset
-    plus the user's enabled MCP servers (``cli.py`` / ``tui_gateway``). Only
-    under the opt-in ``focus`` mode: the default posture is prompt-only and
-    never touches the user's configured toolsets (toolsets like messaging /
-    smart-home / music are off-by-default anyway, and someone who explicitly
-    enabled image-gen or Spotify shouldn't lose it for being in a git repo).
-  * **Delegation** — subagents inherit the parent's toolset and run through the
-    same prompt builder, so the coding posture propagates to children for free.
-  * **Model / memory / compression** — declared on the profile
-    (``model_hint``, ``memory_policy``) as the extension seam; consumers read
-    ``mode.profile`` rather than re-deciding.
-
-Cache safety
------------
-The mode is resolved **once** and is immutable. The workspace snapshot is built
-once at prompt-build time and baked into the *stable* system-prompt tier — never
-re-probed per turn (that would shatter the prompt cache). Branch and dirty state
-drift mid-session, so the brief tells the model to re-check with ``git`` before
-acting on the snapshot. A ``/coding`` flip therefore only takes effect next
-session (deferred), the same contract as ``/skills install`` vs ``--now``.
-
-Activation (config ``agent.coding_context``):
-
-  * ``auto`` (default) — posture (brief + snapshot) on an interactive coding
-    surface sitting in a code workspace (git repo or recognised project root).
-    Prompt-only; toolsets and the skill index untouched.
-  * ``focus`` — like ``auto``, but additionally collapses the toolset to the
-    ``coding`` set + enabled MCP servers and demotes non-coding skill
-    categories to names-only in the prompt's skill index (no skill is ever
-    hidden). Explicit opt-in for a lean schema.
-  * ``on`` — force the posture anywhere (incl. non-workspaces). Prompt-only.
-  * ``off`` — disable entirely.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import re
-import subprocess
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Optional
-
-logger = logging.getLogger("hermes.coding_context")
-
-CODING_TOOLSET = "coding"
-
-# Surfaces where a coding posture makes sense under ``auto``. Messaging
-# platforms (telegram, discord, slack, …) are intentionally absent — a chat bot
-# in a group is not pair-programming.
-INTERACTIVE_CODING_PLATFORMS = {"cli", "tui", "acp", "desktop", ""}
-
-# Project-root signals that mark a directory as a code workspace even when it
-# isn't (yet) a git repo. Cheap filename checks — no parsing.
-_PROJECT_MARKERS = (
-    "pyproject.toml", "setup.py", "setup.cfg", "requirements.txt",
-    "package.json", "tsconfig.json", "deno.json",
-    "Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts",
-    "Gemfile", "composer.json", "mix.exs", "pubspec.yaml",
-    "CMakeLists.txt", "Makefile", "Dockerfile",
-    "AGENTS.md", "CLAUDE.md", ".cursorrules",
-)
-
-# Agent-instruction files surfaced separately from manifests in the snapshot.
-_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
-
-# Lockfile → package manager, checked in priority order.
-_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
-_JS_LOCKFILES = (
-    ("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"),
-    ("yarn.lock", "yarn"), ("package-lock.json", "npm"),
-)
-
-# package.json scripts / Makefile targets worth surfacing as verify commands.
-_VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format")
-_MAX_VERIFY_COMMANDS = 8
-_MAX_FACT_FILE_BYTES = 256 * 1024
-
-_GIT_TIMEOUT = 2.5
-
-
-# Per-model edit-format steering. Matching the edit tool format to how a model
-# was trained reduces mistakes and wasted reasoning (OpenAI/Codex handle
-# patch-style diffs best; Anthropic models — and most open-weight coding
-# models, whose RL scaffolds use str_replace-style editors — do best with
-# string-replacement). Our `patch` tool exposes both: mode="patch" (V4A
-# multi-file) and mode="replace" (find-and-swap). We nudge each family toward
-# its native format. Unknown families get nothing (the brief's neutral wording
-# stands). Substrings match the model id; aligned with TOOL_USE_ENFORCEMENT_MODELS.
-#
-# GPT/Codex get V4A for ALL edits, single-file included: in codex-rs,
-# apply_patch (V4A — apply_patch.lark) is the ONLY file editor, no
-# str_replace-style tool exists, and the shipped model prompts say to use
-# apply_patch even "for single file edits" — so a replace-mode nudge would
-# steer those models toward a format their first-party harness never taught
-# them.
-_EDIT_FORMAT_GUIDANCE: dict[str, tuple[tuple[str, ...], str]] = {
-    "patch": (
-        ("gpt", "codex"),
-        "- Edit format: author new files with `write_file`; for edits to "
-        "existing code use `patch` with `mode='patch'` (V4A diff) — including "
-        "single-file edits. It's the edit format you handle most reliably.",
-    ),
-    "replace": (
-        ("claude", "sonnet", "opus", "haiku",
-         "gemini", "gemma", "deepseek", "qwen", "kimi", "glm", "grok",
-         "hermes", "llama", "mistral", "devstral", "minimax"),
-        "- Edit format: author new files with `write_file`; for edits to "
-        "existing code prefer `patch` in `mode='replace'` — match a unique "
-        "snippet and swap it. Reach for `mode='patch'` (V4A) only when an edit "
-        "genuinely spans several files at once.",
-    ),
-}
-
-
-def _model_family(model: Optional[str]) -> Optional[str]:
-    """Classify a model id into an edit-format family key, or ``None``.
-
-    Used to steer the coding posture toward the edit tool format a model was
-    trained on. Family-agnostic by design: an unrecognised model gets ``None``
-    and the operating brief's neutral edit wording applies.
-    """
-    if not model:
-        return None
-    lowered = model.lower()
-    for family, (needles, _line) in _EDIT_FORMAT_GUIDANCE.items():
-        if any(n in lowered for n in needles):
-            return family
-    return None
-
-
-def _edit_format_line(model: Optional[str]) -> str:
-    """The edit-format guidance line for this model's family (``""`` if none)."""
-    family = _model_family(model)
-    if family is None:
-        return ""
-    return _EDIT_FORMAT_GUIDANCE[family][1]
-
-
-# Operating brief for the coding posture. Tool names referenced here (read_file,
-# search_files, patch, write_file, terminal, todo) are in the coding toolset and
-# in _HERMES_CORE_TOOLS, so they're present on every surface this fires on.
-CODING_AGENT_GUIDANCE = (
-    "You are a coding agent pairing with the user inside their codebase. "
-    "Operate like a careful senior engineer.\n"
-    "\n"
-    "Gather context first:\n"
-    "- Read the relevant files with `read_file` and locate code with "
-    "`search_files` before changing anything. Trace a symbol to its definition "
-    "and usages rather than guessing its shape.\n"
-    "- Batch independent lookups: when several reads/searches don't depend on "
-    "each other, issue them together in one turn instead of one at a time.\n"
-    "- Never invent files, symbols, APIs, or imports. If you haven't seen it in "
-    "the repo, go look. Don't assume a library is available — check the project "
-    "manifest (pyproject.toml / package.json / Cargo.toml / go.mod) and how "
-    "neighbouring files import it.\n"
-    "\n"
-    "Make changes through the tools, not the chat:\n"
-    "- Edit with `patch`/`write_file`. Do NOT print code blocks to the user as "
-    "a substitute for editing — apply the change, then summarise it. Only show "
-    "code when the user explicitly asks to see it.\n"
-    "- Match the project's existing style and conventions; AGENTS.md / "
-    "CLAUDE.md / .cursorrules already in context win over your defaults. Touch "
-    "only what the task needs — no drive-by refactors, renames, or reformatting "
-    "— and add any imports/dependencies your code requires.\n"
-    "- If an edit fails to apply, re-read the file to get the current exact "
-    "contents before retrying — don't repeat a stale patch. If the same region "
-    "fails twice, rewrite the enclosing function or file with `write_file` "
-    "instead of attempting a third patch.\n"
-    "\n"
-    "Verify, and know when to stop:\n"
-    "- Use `terminal` for git, builds, tests, and inspection. Run the relevant "
-    "tests/linter/build and confirm they pass before claiming the work is done.\n"
-    "- Terminal state persists across calls: current directory and exported "
-    "environment variables carry forward. Activate a virtualenv or export setup "
-    "vars once, then reuse that state instead of re-sourcing it before every "
-    "test command.\n"
-    "- Fix root causes, not symptoms: when you find a bug, check sibling call "
-    "paths for the same flaw and fix the class, not just the reported site.\n"
-    "- When fixing linter/type errors on a file, stop after about three "
-    "attempts on the same file and ask the user rather than looping.\n"
-    "- Track multi-step work with `todo`. Reference code as `path:line` instead "
-    "of pasting whole files.\n"
-    "\n"
-    "Respect the user's repo: don't commit, push, or rewrite history unless "
-    "asked, and never read, print, or commit secrets — leave `.env` and "
-    "credential files alone unless the user explicitly asks. The Workspace "
-    "block below is a snapshot from session start — re-run `git status`/"
-    "`git branch` before relying on it. Be concise: lead with the change or "
-    "answer, not a preamble."
-)
-
-
-# ── Context profiles (declarative posture definitions) ──────────────────────
-
-
-@dataclass(frozen=True)
-class ContextProfile:
-    """A named operating posture. Pure data — consumers read these fields.
-
-    ``toolset``      — collapse to this toolset (+ enabled MCP) when no explicit
-                       selection is pinned; ``None`` keeps the platform default.
-    ``guidance``     — operating brief injected into the stable system prompt;
-                       ``""`` injects nothing.
-    ``model_hint``   — routing preference key for smart model routing
-                       (extension seam; not yet consumed by the router).
-    ``memory_policy``— memory namespace/weighting hint (extension seam).
-    ``compact_skill_categories`` — skill categories DEMOTED to names-only in
-                       the system-prompt skill index under the opt-in ``focus``
-                       mode. Never hidden: every skill name stays visible
-                       (so memory-anchored recall keeps working) — only the
-                       descriptions are dropped to cut index noise. Deny-list
-                       semantics so unknown/custom categories keep full
-                       entries.
-    """
-
-    name: str
-    toolset: Optional[str] = None
-    guidance: str = ""
-    model_hint: Optional[str] = None
-    memory_policy: str = "default"
-    compact_skill_categories: tuple[str, ...] = ()
-
-
-# Skill categories that are clearly not part of a coding workflow. Demoted to
-# names-only in the prompt's skill index under the opt-in ``focus`` mode only
-# (deny-list — anything not listed here, incl. custom user categories, keeps
-# full entries). Coding-adjacent categories (devops, github, mcp,
-# data-science, diagramming, research, security, …) are intentionally absent.
-_NON_CODING_SKILL_CATEGORIES = (
-    "apple", "communication", "cooking", "creative", "email", "finance",
-    "gaming", "gifs", "health", "media", "music", "note-taking",
-    "productivity", "shopping", "smart-home", "social-media", "travel",
-    "yuanbao",
-)
-
-
-GENERAL_PROFILE = ContextProfile(name="general")
-CODING_PROFILE = ContextProfile(
-    name="coding",
-    toolset=CODING_TOOLSET,
-    guidance=CODING_AGENT_GUIDANCE,
-    model_hint="coding",
-    memory_policy="project",
-    compact_skill_categories=_NON_CODING_SKILL_CATEGORIES,
-)
-
-_PROFILES: dict[str, ContextProfile] = {
-    GENERAL_PROFILE.name: GENERAL_PROFILE,
-    CODING_PROFILE.name: CODING_PROFILE,
-}
-
-
-def get_profile(name: str) -> ContextProfile:
-    """Return a registered profile, falling back to ``general``."""
-    return _PROFILES.get(name, GENERAL_PROFILE)
-
-
-# ── Helpers ─────────────────────────────────────────────────────────────────
-
-
-def _coding_mode(config: Optional[dict[str, Any]]) -> str:
-    """Return the normalized ``agent.coding_context`` mode (auto/focus/on/off)."""
-    if config is None:
-        try:
-            from hermes_cli.config import load_config
-
-            config = load_config()
-        except Exception:
-            config = {}
-    raw = ((config or {}).get("agent", {}) or {}).get("coding_context", "auto")
-    mode = str(raw).strip().lower()
-    if mode in {"focus", "strict", "lean"}:
-        return "focus"
-    if mode in {"on", "true", "yes", "1", "always"}:
-        return "on"
-    if mode in {"off", "false", "no", "0", "never"}:
-        return "off"
-    return "auto"
-
-
-def _resolve_cwd(cwd: Optional[str | Path]) -> Path:
-    if cwd:
-        return Path(cwd).expanduser()
-    try:
-        from agent.runtime_cwd import resolve_agent_cwd
-
-        return resolve_agent_cwd()
-    except Exception:
-        return Path(os.getcwd())
-
-
-def _git_root(cwd: Path) -> Optional[Path]:
-    current = cwd.resolve()
-    for parent in [current, *current.parents]:
-        if (parent / ".git").exists():
-            return parent
-    return None
-
-
-def _home() -> Optional[Path]:
-    try:
-        return Path.home().resolve()
-    except (OSError, RuntimeError):
-        return None
-
-
-def _marker_root(cwd: Path) -> Optional[Path]:
-    """Nearest ancestor that looks like a project root, or ``None``.
-
-    Walks up at most a few levels so a manifest in the workspace root counts
-    even when the user is in a subdirectory. ``$HOME`` itself is skipped — a
-    Makefile or AGENTS.md sitting in the home directory is global user config,
-    not a project-root signal.
-    """
-    current = cwd.resolve()
-    home = _home()
-    for depth, parent in enumerate([current, *current.parents]):
-        if depth > 6:
-            break
-        if parent == home:
-            continue
-        for marker in _PROJECT_MARKERS:
-            if (parent / marker).exists():
-                return parent
-    return None
-
-
-def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str:
-    """Resolve which profile applies.
-
-    ``auto``/``focus``: coding when the surface is interactive AND the cwd is a
-    code workspace (a git repo or a recognised project root). ``on``: always
-    coding. ``off``: always general.
-
-    A git repo rooted at ``$HOME`` (the dotfiles pattern) is NOT a workspace
-    signal — without the guard, every session anywhere under a dotfiles-managed
-    home directory would silently flip to the coding posture.
-
-    Detection is intentionally not memoized: it's a handful of ``stat`` calls,
-    and callers resolve the mode once per session anyway. Caching here would
-    risk a stale posture if a long-lived process (gateway/TUI) serves sessions
-    from different working directories.
-    """
-    if mode == "off":
-        return GENERAL_PROFILE.name
-    if mode == "on":
-        return CODING_PROFILE.name
-    if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS:
-        return GENERAL_PROFILE.name
-    cwd = Path(cwd_str)
-    git_root = _git_root(cwd)
-    if git_root is not None and git_root == _home():
-        git_root = None  # dotfiles repo at $HOME — not a code workspace
-    if git_root is not None or _marker_root(cwd) is not None:
-        return CODING_PROFILE.name
-    return GENERAL_PROFILE.name
-
-
-# ── RuntimeMode (the seam) ──────────────────────────────────────────────────
-
-
-@dataclass(frozen=True)
-class RuntimeMode:
-    """The resolved operating posture for a session. Immutable by construction.
-
-    Built once via :func:`resolve_runtime_mode` and consumed by every domain
-    that cares about the coding/general distinction. Never mutate or re-resolve
-    mid-session — that would break the prompt cache.
-    """
-
-    profile: ContextProfile
-    surface: str
-    cwd: Path
-    # The normalized ``agent.coding_context`` mode this posture was resolved
-    # under (auto/focus/on/off). Toolset collapse is gated on ``focus``.
-    config_mode: str = "auto"
-    # The model id this session runs (e.g. "anthropic/claude-opus-4.8"). Used
-    # only to steer edit-format guidance toward the model's family — see
-    # ``_edit_format_line``. Fixed for the session, so cache-safe.
-    model: Optional[str] = None
-
-    @property
-    def kind(self) -> str:
-        return self.profile.name
-
-    @property
-    def is_coding(self) -> bool:
-        return self.profile.name == CODING_PROFILE.name
-
-    def toolset_selection(self, config: Optional[dict[str, Any]] = None) -> Optional[list[str]]:
-        """Toolset list for this posture, or ``None`` to keep the platform default.
-
-        Non-``None`` only under the opt-in ``focus`` mode. The default posture
-        is prompt-only: most strippable toolsets are off-by-default anyway, and
-        a user who explicitly enabled one (image-gen for frontend/game assets,
-        messaging for build notifications, …) keeps it while coding.
-
-        Callers apply this only when the user hasn't pinned an explicit
-        selection (``--toolsets``, ``HERMES_TUI_TOOLSETS``, …); they never
-        override a pin. Returns the profile's toolset plus enabled MCP servers.
-        """
-        if self.config_mode != "focus":
-            return None
-        if self.profile.toolset is None:
-            return None
-        return [self.profile.toolset, *_enabled_mcp_servers(config)]
-
-    def system_blocks(self) -> list[str]:
-        """Stable system-prompt blocks for this posture (brief + workspace).
-
-        The operating brief carries a model-family edit-format nudge appended
-        to it (one cached string, not a separate block) so the model is steered
-        toward the `patch` mode it handles best — see ``_edit_format_line``.
-        """
-        if not self.is_coding:
-            return []
-        blocks: list[str] = []
-        if self.profile.guidance:
-            brief = self.profile.guidance
-            edit_line = _edit_format_line(self.model)
-            if edit_line:
-                brief = f"{brief}\n{edit_line}"
-            blocks.append(brief)
-        workspace = build_coding_workspace_block(self.cwd)
-        if workspace:
-            blocks.append(workspace)
-        return blocks
-
-    def compact_skill_categories(self) -> frozenset[str]:
-        """Skill categories to demote to names-only in the prompt's skill index.
-
-        Gated on the opt-in ``focus`` mode, like the toolset collapse: the
-        default posture leaves the skill index untouched. Users who didn't ask
-        for a lean prompt keep full entries for every category — index changes
-        under ``auto`` proved too surprising in practice, even names-only ones
-        (a demoted description is information the model no longer weighs when
-        deciding what to load).
-
-        Demoted — never hidden — even under ``focus``. An earlier revision
-        fully pruned these categories from the index, which caused silent
-        capability loss in a real workflow: agent-created skills are the
-        model's accumulated project memory (server-ops runbooks, learned
-        pitfalls, …), and models do not reliably reach for ``skills_list`` to
-        rediscover what the index stopped showing them. Names-only keeps every
-        skill loadable on recall while still cutting the description noise.
-        """
-        if not self.is_coding or self.config_mode != "focus":
-            return frozenset()
-        return frozenset(self.profile.compact_skill_categories)
-
-
-def resolve_runtime_mode(
-    *,
-    platform: Optional[str] = None,
-    cwd: Optional[str | Path] = None,
-    config: Optional[dict[str, Any]] = None,
-    model: Optional[str] = None,
-) -> RuntimeMode:
-    """Resolve the operating posture once. Cheap — a handful of ``stat`` calls.
-
-    This is the single entry point every domain should call. The returned
-    object is immutable and safe to cache for the session. Detection itself is
-    intentionally *not* memoized (see ``_detect_profile_name``) so a long-lived
-    process can't pin a stale posture; callers resolve once per session and
-    hold the result. ``model`` is recorded only to steer edit-format guidance;
-    it never affects detection.
-    """
-    resolved_cwd = _resolve_cwd(cwd)
-    mode = _coding_mode(config)
-    name = _detect_profile_name(
-        mode, (platform or "").strip().lower(), str(resolved_cwd)
-    )
-    return RuntimeMode(
-        profile=get_profile(name),
-        surface=platform or "",
-        cwd=resolved_cwd,
-        config_mode=mode,
-        model=model,
-    )
-
-
-# ── Back-compat surface (thin wrappers over RuntimeMode) ────────────────────
-
-
-def is_coding_context(
-    *,
-    platform: Optional[str] = None,
-    cwd: Optional[str | Path] = None,
-    config: Optional[dict[str, Any]] = None,
-) -> bool:
-    """Whether Hermes should operate in its coding posture right now."""
-    return resolve_runtime_mode(platform=platform, cwd=cwd, config=config).is_coding
-
-
-def coding_selection(
-    *,
-    platform: Optional[str] = None,
-    cwd: Optional[str | Path] = None,
-    config: Optional[dict[str, Any]] = None,
-) -> Optional[list[str]]:
-    """Toolset selection for the coding posture.
-
-    ``None`` unless the user opted into ``focus`` mode AND the posture is
-    active — the default coding posture never overrides configured toolsets.
-    """
-    return resolve_runtime_mode(
-        platform=platform, cwd=cwd, config=config
-    ).toolset_selection(config)
-
-
-def coding_system_blocks(
-    *,
-    platform: Optional[str] = None,
-    cwd: Optional[str | Path] = None,
-    config: Optional[dict[str, Any]] = None,
-    model: Optional[str] = None,
-) -> list[str]:
-    """Stable system-prompt blocks for the current posture (empty when general).
-
-    ``model`` steers the brief's edit-format nudge toward the model's family.
-    """
-    return resolve_runtime_mode(
-        platform=platform, cwd=cwd, config=config, model=model
-    ).system_blocks()
-
-
-def coding_compact_skill_categories(
-    *,
-    platform: Optional[str] = None,
-    cwd: Optional[str | Path] = None,
-    config: Optional[dict[str, Any]] = None,
-) -> frozenset[str]:
-    """Skill categories the active posture demotes to names-only in the index.
-
-    Empty outside the coding posture and outside the opt-in ``focus`` mode —
-    the default posture never touches the skill index. Under ``focus``,
-    demoted — never hidden: every skill name stays in the index and remains
-    loadable via ``skill_view`` / ``skills_list``; only descriptions are
-    dropped.
-    """
-    return resolve_runtime_mode(
-        platform=platform, cwd=cwd, config=config
-    ).compact_skill_categories()
-
-
-def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:
-    """Names of MCP servers the user has enabled — kept in the coding posture.
-
-    MCP servers (figma, browser, tophat, …) are explicitly configured and part
-    of the coding workflow, not noise to strip.
-    """
-    try:
-        from hermes_cli.config import read_raw_config
-        from hermes_cli.tools_config import _parse_enabled_flag
-
-        servers = read_raw_config().get("mcp_servers") or {}
-        return [
-            str(name)
-            for name, cfg in servers.items()
-            if isinstance(cfg, dict)
-            and _parse_enabled_flag(cfg.get("enabled", True), default=True)
-        ]
-    except Exception:
-        return []
-
-
-# ── git/workspace probe ─────────────────────────────────────────────────────
-
-
-def _git(cwd: Path, *args: str) -> str:
-    try:
-        out = subprocess.run(
-            ["git", "-C", str(cwd), *args],
-            capture_output=True,
-            text=True,
-            timeout=_GIT_TIMEOUT,
-        )
-    except (OSError, subprocess.SubprocessError):
-        return ""
-    return out.stdout.strip() if out.returncode == 0 else ""
-
-
-def _parse_status(porcelain: str) -> tuple[dict[str, str], dict[str, int]]:
-    """Parse ``git status --porcelain=2 --branch`` into branch + counts."""
-    branch: dict[str, str] = {}
-    counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0}
-    for line in porcelain.splitlines():
-        if line.startswith("# branch.head"):
-            branch["head"] = line.split(maxsplit=2)[-1]
-        elif line.startswith("# branch.upstream"):
-            branch["upstream"] = line.split(maxsplit=2)[-1]
-        elif line.startswith("# branch.ab"):
-            parts = line.split()
-            branch["ahead"], branch["behind"] = parts[2].lstrip("+"), parts[3].lstrip("-")
-        elif line.startswith(("1 ", "2 ")):
-            xy = line.split(maxsplit=2)[1]
-            if xy[0] != ".":
-                counts["staged"] += 1
-            if xy[1] != ".":
-                counts["modified"] += 1
-        elif line.startswith("u "):
-            counts["conflicts"] += 1
-        elif line.startswith("? "):
-            counts["untracked"] += 1
-    return branch, counts
-
-
-def _read_small(path: Path) -> str:
-    """Read a small text file, or ``""`` — never raises, never reads huge files."""
-    try:
-        if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES:
-            return ""
-        return path.read_text(encoding="utf-8", errors="replace")
-    except OSError:
-        return ""
-
-
-def _project_facts(root: Path) -> list[str]:
-    """Detected project facts for the workspace snapshot.
-
-    The point is to hand the model its *verify loop* up front — which manifest,
-    which package manager, and the exact test/lint/build commands — instead of
-    making it rediscover them every session. Cheap: stat calls plus reads of a
-    couple of small files; built once at prompt-build time (cache-safe).
-    """
-    facts: list[str] = []
-
-    manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()]
-    package_managers = [
-        pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
-    ]
-    if manifests:
-        line = f"- Project: {', '.join(manifests[:6])}"
-        if package_managers:
-            line += f" ({'/'.join(dict.fromkeys(package_managers))})"
-        facts.append(line)
-
-    verify: list[str] = []
-    if (root / "scripts" / "run_tests.sh").is_file():
-        verify.append("scripts/run_tests.sh")
-    if (root / "package.json").is_file():
-        try:
-            scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {}
-        except (json.JSONDecodeError, AttributeError):
-            scripts = {}
-        js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm")
-        verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts)
-    if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"):
-        verify.append("pytest")
-    makefile = _read_small(root / "Makefile")
-    if makefile:
-        verify.extend(
-            f"make {name}" for name in _VERIFY_TARGETS
-            if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
-        )
-    if verify:
-        deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
-        facts.append(f"- Verify: {'; '.join(deduped)}")
-
-    context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
-    if context_files:
-        facts.append(f"- Context files: {', '.join(context_files)}")
-
-    return facts
-
-
-def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str:
-    """Workspace snapshot for the system prompt (empty outside a workspace).
-
-    Git state (branch/status/commits) when the cwd is in a repo, plus detected
-    project facts (manifest, package manager, verify commands, context files)
-    — so marker-only (non-git) projects still get a snapshot.
-    """
-    resolved = _resolve_cwd(cwd)
-    git_root = _git_root(resolved)
-    root = git_root or _marker_root(resolved)
-    if root is None:
-        return ""
-
-    lines = ["Workspace (snapshot at session start — re-check with `git` before acting on it):"]
-    lines.append(f"- Root: {root}")
-
-    if git_root is not None:
-        branch, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch"))
-        head = branch.get("head", "")
-        if head and head != "(detached)":
-            line = f"- Branch: {head}"
-            if branch.get("upstream"):
-                line += f" \u2192 {branch['upstream']}"
-                ahead, behind = branch.get("ahead", "0"), branch.get("behind", "0")
-                if ahead != "0" or behind != "0":
-                    line += f" (ahead {ahead}, behind {behind})"
-            lines.append(line)
-        elif head == "(detached)":
-            lines.append("- Branch: (detached HEAD)")
-
-        # Linked worktree: the per-worktree git dir differs from the shared common dir.
-        # We surface the fact that it's a worktree (so the model knows branches/stashes
-        # are shared state) but deliberately do NOT expose the primary tree path —
-        # giving the model a second absolute path causes it to sometimes run commands
-        # in the wrong directory.
-        git_dir, common_dir = _git(root, "rev-parse", "--git-dir"), _git(root, "rev-parse", "--git-common-dir")
-        if git_dir and common_dir and Path(git_dir).resolve() != Path(common_dir).resolve():
-            lines.append("- Worktree: linked (git state shared with primary tree)")
-
-        dirty = [f"{n} {label}" for label, n in (
-            ("staged", counts["staged"]), ("modified", counts["modified"]),
-            ("untracked", counts["untracked"]), ("conflicts", counts["conflicts"]),
-        ) if n]
-        lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}")
-
-        recent = _git(root, "log", "-3", "--pretty=%h %s")
-        if recent:
-            lines.append("- Recent commits:")
-            lines.extend(f"    {c}" for c in recent.splitlines())
-
-    lines.extend(_project_facts(root))
-    return "\n".join(lines)
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -7,7 +7,7 @@ protecting head and tail context.
 Improvements over v2:
  - Structured summary template with Resolved/Pending question tracking
  - Filter-safe summarizer preamble that treats prior turns as source material
-  - Historical (reference-only) section headings replace "Next Steps"/"Remaining Work" to avoid reading as active instructions
+  - "Remaining Work" replaces "Next Steps" to avoid reading as active instructions
  - Clear separator when summary merges into tail message
  - Iterative summary updates (preserves info across multiple compactions)
  - Token-budget tail protection instead of fixed message count
@@ -34,75 +34,7 @@ from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

-HISTORICAL_TASK_HEADING = "## Historical Task Snapshot"
-HISTORICAL_IN_PROGRESS_HEADING = "## Historical In-Progress State"
-HISTORICAL_PENDING_ASKS_HEADING = "## Historical Pending User Asks"
-HISTORICAL_REMAINING_WORK_HEADING = "## Historical Remaining Work"
-
-
 SUMMARY_PREFIX = (
-    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
-    "into the summary below. This is a handoff from a previous context "
-    "window — treat it as background reference, NOT as active instructions. "
-    "Do NOT answer questions or fulfill requests mentioned in this summary; "
-    "they were already addressed. "
-    "Respond ONLY to the latest user message that appears AFTER this "
-    "summary — that message is the single source of truth for what to do "
-    "right now. "
-    "Topic overlap with the summary does NOT mean you should resume its "
-    "task: even on similar topics, the latest user message WINS. Treat ONLY "
-    "the latest message as the active task and discard stale items from "
-    f"'{HISTORICAL_TASK_HEADING}' / '{HISTORICAL_IN_PROGRESS_HEADING}' / "
-    f"'{HISTORICAL_PENDING_ASKS_HEADING}' / "
-    f"'{HISTORICAL_REMAINING_WORK_HEADING}' entirely — do not 'wrap up' or "
-    "'finish' work described there unless the latest message explicitly "
-    "asks for it. "
-    "Reverse signals in the latest message (e.g. 'stop', 'undo', 'roll "
-    "back', 'just verify', 'don't do that anymore', 'never mind', a new "
-    "topic) must immediately end any in-flight work described in the "
-    "summary; do not re-surface it in later turns. "
-    "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
-    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
-    "memory content due to this compaction note. "
-    "The current session state (files, config, etc.) may reflect work "
-    "described here — avoid repeating it:"
-)
-LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
-
-# Metadata key added to context compression summary messages so that frontends
-# (CLI, Desktop, gateway, TUI) can distinguish them from real assistant/user
-# messages and filter or render them appropriately without content-prefix
-# heuristics. See https://github.com/NousResearch/hermes-agent/issues/38389
-#
-# Underscore-prefixed ON PURPOSE: the wire sanitizers
-# (agent/transports/chat_completions.py convert_messages and the summary-path
-# mirror in agent/chat_completion_helpers.py) strip every top-level message
-# key starting with "_" before the request leaves the process. Strict
-# OpenAI-compatible gateways (Fireworks, Mistral, Moonshot/Kimi, opencode-go)
-# reject payloads carrying unknown keys with "Extra inputs are not permitted",
-# poisoning every subsequent request in the session — a bare key like
-# "is_compressed_summary" would reach the wire and trip exactly that.
-COMPRESSED_SUMMARY_METADATA_KEY = "_compressed_summary"
-
-# Appended to every standalone summary message (and to the merged-into-tail
-# prefix) so the model has an unambiguous "summary ends here" boundary.
-# Without it, weak models read the verbatim "## Active Task" quote as fresh
-# user input (#11475, #14521) or regurgitate an assistant-role summary as
-# their own output (#33256).
-_SUMMARY_END_MARKER = (
-    "--- END OF CONTEXT SUMMARY — "
-    "respond to the message below, not the summary above ---"
-)
-
-# Handoff prefixes that shipped in earlier releases. A summary persisted under
-# one of these can be inherited into a resumed lineage (#35344); when it is
-# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
-# stale directive it carried (e.g. "resume exactly from Active Task") survives
-# embedded in the body and keeps hijacking replies. Keep newest-first; entries
-# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
-_HISTORICAL_SUMMARY_PREFIXES = (
-    # Carveout era (#41607/#38364/#42812): "consistent → use as background"
-    # licensed stale-task resumption on topic overlap.
    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
    "into the summary below. This is a handoff from a previous context "
    "window — treat it as background reference, NOT as active instructions. "
@@ -125,7 +57,17 @@ _HISTORICAL_SUMMARY_PREFIXES = (
    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
    "memory content due to this compaction note. "
    "The current session state (files, config, etc.) may reflect work "
-    "described here — avoid repeating it:",
+    "described here — avoid repeating it:"
+)
+LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
+
+# Handoff prefixes that shipped in earlier releases. A summary persisted under
+# one of these can be inherited into a resumed lineage (#35344); when it is
+# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
+# stale directive it carried (e.g. "resume exactly from Active Task") survives
+# embedded in the body and keeps hijacking replies. Keep newest-first; entries
+# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
+_HISTORICAL_SUMMARY_PREFIXES = (
    # Pre-#35344: contained the self-contradicting "resume exactly" directive.
    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
    "into the summary below. This is a handoff from a previous context "
@@ -168,23 +110,10 @@ _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 # become another unbounded transcript copy after the LLM summarizer failed.
 _FALLBACK_SUMMARY_MAX_CHARS = 8_000
 _FALLBACK_TURN_MAX_CHARS = 700
-_AUTO_FOCUS_MAX_TURNS = 3
-_AUTO_FOCUS_TURN_MAX_CHARS = 260
-_AUTO_FOCUS_MAX_CHARS = 700
-# Keep a short run of recent messages verbatim even when the token budget is
-# already exhausted.  The public ``protect_last_n`` default is intentionally
-# high for small/light tails, but using all 20 as a hard floor here would bring
-# back the old large-tool-output case where nothing can be compacted.
-_MAX_TAIL_MESSAGE_FLOOR = 8


 _PATH_MENTION_RE = re.compile(r"(?:/|~/?|[A-Za-z]:\\)[^\s`'\")\]}<>]+")

-# MEDIA delivery directives must not reach the summarizer — if one leaks into
-# the summary, the downstream model may re-emit it as an active directive on
-# the next turn, triggering bogus attachment sends (#14665).
-_MEDIA_DIRECTIVE_RE = re.compile(r"MEDIA:\S+")
-

 def _dedupe_append(items: list[str], value: str, *, limit: int) -> None:
    value = value.strip()
@@ -1045,7 +974,6 @@ class ContextCompressor(ContextEngine):
        for msg in turns:
            role = msg.get("role", "unknown")
            content = redact_sensitive_text(msg.get("content") or "")
-            content = _MEDIA_DIRECTIVE_RE.sub("[media attachment]", content)

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -1227,7 +1155,7 @@ class ContextCompressor(ContextEngine):
            )

        reason_text = f" Summary failure reason: {reason}." if reason else ""
-        body = f"""{HISTORICAL_TASK_HEADING}
+        body = f"""## Active Task
 {active_task}

 ## Goal
@@ -1244,7 +1172,7 @@ Recovered from a deterministic fallback because the LLM context summarizer was u
 ## Active State
 Unknown from deterministic fallback. Inspect current repository/session state if needed.

-{HISTORICAL_IN_PROGRESS_HEADING}
+## In Progress
 {active_task}

 ## Blocked
@@ -1256,13 +1184,13 @@ None recoverable from deterministic fallback.
 ## Resolved Questions
 None recoverable from deterministic fallback.

-{HISTORICAL_PENDING_ASKS_HEADING}
+## Pending User Asks
 {active_task}

 ## Relevant Files
 {_bullets(relevant_files, limit=12)}

-{HISTORICAL_REMAINING_WORK_HEADING}
+## Remaining Work
 Continue from the most recent unfulfilled user ask and protected tail messages. Verify state with tools before making claims.

 ## Last Dropped Turns
@@ -1384,7 +1312,7 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
            _temporal_anchoring_rule = ""

        # Shared structured template (used by both paths).
-        _template_sections = f"""{HISTORICAL_TASK_HEADING}
+        _template_sections = f"""## Active Task
 [THE SINGLE MOST IMPORTANT FIELD. Capture the user's most recent unfulfilled
 input verbatim — the exact words they used. This includes:
 - Explicit task assignments ("refactor the auth module")
@@ -1431,7 +1359,7 @@ Be specific with file paths, commands, line numbers, and results.]
 - Any running processes or servers
 - Environment details that matter]

-{HISTORICAL_IN_PROGRESS_HEADING}
+## In Progress
 [Work currently underway — what was being done when compaction fired]

 ## Blocked
@@ -1443,14 +1371,14 @@ Be specific with file paths, commands, line numbers, and results.]
 ## Resolved Questions
 [Questions the user asked that were ALREADY answered — include the answer so it is not repeated]

-{HISTORICAL_PENDING_ASKS_HEADING}
-[Questions or requests from the user that have NOT yet been answered or fulfilled. These are STALE — they were from the compacted turns. Write them here for reference only. The agent must NOT act on them unless the latest user message explicitly requests it. If none, write "None."]
+## Pending User Asks
+[Questions or requests from the user that have NOT yet been answered or fulfilled. If none, write "None."]

 ## Relevant Files
 [Files read, modified, or created — with brief note on each]

-{HISTORICAL_REMAINING_WORK_HEADING}
-[What remains to be done — framed as STALE context for reference only. The agent must NOT resume this work unless the latest user message explicitly asks for it.]
+## Remaining Work
+[What remains to be done — framed as context, not instructions]

 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
@@ -1493,7 +1421,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-This compaction should PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""

        try:
            call_kwargs = {
@@ -1646,13 +1574,7 @@ This compaction should PRIORITISE preserving all information related to the focu
        text = (summary or "").strip()
        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
            if text.startswith(prefix):
-                text = text[len(prefix):].lstrip()
-                break
-        # Strip the trailing end marker too — a rehydrated handoff body that
-        # keeps it would leak the boundary directive into the iterative-update
-        # summarizer prompt (and the marker is re-appended on insertion anyway).
-        if text.endswith(_SUMMARY_END_MARKER):
-            text = text[: -len(_SUMMARY_END_MARKER)].rstrip()
+                return text[len(prefix):].lstrip()
        return text

    @classmethod
@@ -1668,52 +1590,6 @@ This compaction should PRIORITISE preserving all information related to the focu
            return True
        return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)

-    @staticmethod
-    def _has_compressed_summary_metadata(message: Any) -> bool:
-        """Return True if *message* carries the compressed-summary flag.
-
-        Callers (frontends, CLI, gateway) can use this to distinguish context
-        compaction summaries from real assistant or user messages without
-        relying on content-prefix heuristics.  The flag is in-process only —
-        the wire sanitizers strip underscore-prefixed keys before API calls.
-        """
-        if not isinstance(message, dict):
-            return False
-        return bool(message.get(COMPRESSED_SUMMARY_METADATA_KEY))
-
-    @classmethod
-    def _derive_auto_focus_topic(
-        cls,
-        messages: List[Dict[str, Any]],
-    ) -> Optional[str]:
-        """Infer a compact focus hint from the most recent real user turns."""
-        candidates: list[str] = []
-        for idx in range(len(messages) - 1, -1, -1):
-            msg = messages[idx]
-            if msg.get("role") != "user":
-                continue
-            content = msg.get("content")
-            if cls._is_context_summary_content(content):
-                continue
-            text = redact_sensitive_text(_content_text_for_contains(content).strip())
-            if not text:
-                continue
-            text = " ".join(text.split())
-            if len(text) > _AUTO_FOCUS_TURN_MAX_CHARS:
-                text = text[: _AUTO_FOCUS_TURN_MAX_CHARS - 1].rstrip() + "…"
-            candidates.append(text)
-            if len(candidates) >= _AUTO_FOCUS_MAX_TURNS:
-                break
-
-        if not candidates:
-            return None
-
-        candidates.reverse()
-        focus = "Recent user focus:\n" + "\n".join(f"- {item}" for item in candidates)
-        if len(focus) > _AUTO_FOCUS_MAX_CHARS:
-            focus = focus[: _AUTO_FOCUS_MAX_CHARS - 1].rstrip() + "…"
-        return focus
-
    @classmethod
    def _find_latest_context_summary(
        cls,
@@ -1866,105 +1742,6 @@ This compaction should PRIORITISE preserving all information related to the focu
                return i
        return -1

-    def _find_last_assistant_message_idx(
-        self, messages: List[Dict[str, Any]], head_end: int
-    ) -> int:
-        """Return the index of the last user-visible assistant reply at or
-        after *head_end*, or -1.
-
-        A "user-visible reply" is an assistant message with non-empty
-        textual content — i.e. one that the WebUI / TUI / SessionsPage
-        rendered as a bubble the operator could read. We deliberately
-        skip assistant messages that contain only ``tool_calls`` (and
-        no text), because those render as small "calling tool X"
-        indicators and aren't what the reporter means by "the output
-        of the last message you sent" (#29824).
-
-        Falling back to the most recent assistant message of ANY kind
-        only kicks in when no content-bearing assistant message exists
-        in the compressible region — typically a fresh session that
-        just started a multi-step tool sequence with no prior reply
-        to anchor. In that case the agent fix is a no-op and the
-        existing user-message anchor carries the load.
-        """
-        last_any = -1
-        for i in range(len(messages) - 1, head_end - 1, -1):
-            msg = messages[i]
-            if msg.get("role") != "assistant":
-                continue
-            if last_any < 0:
-                last_any = i
-            content = msg.get("content")
-            if isinstance(content, str) and content.strip():
-                return i
-            if isinstance(content, list):
-                # Multimodal / Anthropic-style content: look for any
-                # text block with non-empty text.
-                for part in content:
-                    if isinstance(part, dict):
-                        text = part.get("text") or part.get("content")
-                        if isinstance(text, str) and text.strip():
-                            return i
-        return last_any
-
-    def _ensure_last_assistant_message_in_tail(
-        self,
-        messages: List[Dict[str, Any]],
-        cut_idx: int,
-        head_end: int,
-    ) -> int:
-        """Guarantee the most recent assistant message is in the protected tail.
-
-        WebUI / TUI / SessionsPage bug (#29824). Without this anchor,
-        ``_find_tail_cut_by_tokens`` can leave the user's most recent
-        visible assistant response inside the compressed middle region —
-        especially when the conversation has a single oversized tool
-        result or a long stretch of tool-call/result pairs after the
-        last assistant reply. The summariser then rolls that reply up
-        into the single ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block
-        persisted as ``role="user"`` or ``role="assistant"``. From the
-        operator's perspective the WebUI session viewer
-        (``web/src/pages/SessionsPage.tsx``) and the TUI chat panel
-        both suddenly show the opaque "Context compaction" block in the
-        slot where they were just reading the assistant's actual reply:
-
-            User:       "i cant see the output of the last message you
-                         sent, i did see it previously, however now see
-                         'context compaction'"
-
-        Mirror of ``_ensure_last_user_message_in_tail`` but anchors on
-        the last assistant-role message. Re-runs the tool-group
-        alignment so we don't split a ``tool_call`` / ``tool_result``
-        group that immediately precedes the anchored message — orphaned
-        tool messages would otherwise be removed by
-        ``_sanitize_tool_pairs`` and trigger the same data-loss symptom
-        we're trying to prevent.
-        """
-        last_asst_idx = self._find_last_assistant_message_idx(messages, head_end)
-        if last_asst_idx < 0:
-            # No assistant message in the compressible region — nothing
-            # to anchor (single-turn pre-reply state, etc.).
-            return cut_idx
-        if last_asst_idx >= cut_idx:
-            # Already in the tail — the token-budget walk did the right
-            # thing on its own.
-            return cut_idx
-        # Pull cut_idx back to the assistant message, then re-align so
-        # we don't split a tool group that immediately precedes it
-        # (e.g. an ``assistant(tool_calls)`` → ``tool(result)`` →
-        # ``assistant(final reply)`` sequence would otherwise leave the
-        # ``tool`` orphan when cut lands at the final reply).
-        new_cut = self._align_boundary_backward(messages, last_asst_idx)
-        if not self.quiet_mode:
-            logger.debug(
-                "Anchoring tail cut to last assistant message at index %d "
-                "(was %d, aligned to %d) to keep the previously-visible "
-                "reply out of the compaction summary (#29824)",
-                last_asst_idx, cut_idx, new_cut,
-            )
-        # Safety: never go back into the head region.
-        return max(new_cut, head_end + 1)
-
    def _ensure_last_user_message_in_tail(
        self,
        messages: List[Dict[str, Any]],
@@ -1976,7 +1753,7 @@ This compaction should PRIORITISE preserving all information related to the focu
        Context compressor bug (#10896): ``_align_boundary_backward`` can pull
        ``cut_idx`` past a user message when it tries to keep tool_call/result
        groups together.  If the last user message ends up in the *compressed*
-        middle region the LLM summariser writes it into "Historical Pending User Asks",
+        middle region the LLM summariser writes it into "Pending User Asks",
        but ``SUMMARY_PREFIX`` tells the next model to respond only to user
        messages *after* the summary — so the task effectively disappears from
        the active context, causing the agent to stall, repeat completed work,
@@ -2023,12 +1800,11 @@ This compaction should PRIORITISE preserving all information related to the focu
        derived from ``summary_target_ratio * context_length``, so it
        scales automatically with the model's context window.

-        Token budget is the primary criterion.  A bounded message-count floor
-        keeps a short run of recent turns verbatim even when the budget is
-        exhausted, but the budget is allowed to exceed by up to 1.5x to avoid
-        cutting inside an oversized message (tool output, file read, etc.). If
-        even that floor exceeds 1.5x the budget, the cut is placed right after
-        the head so compression still runs.
+        Token budget is the primary criterion.  A hard minimum of 3 messages
+        is always protected, but the budget is allowed to exceed by up to
+        1.5x to avoid cutting inside an oversized message (tool output, file
+        read, etc.).  If even the minimum 3 messages exceed 1.5x the budget
+        the cut is placed right after the head so compression still runs.

        Never cuts inside a tool_call/result group.  Always ensures the most
        recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
@@ -2036,19 +1812,8 @@ This compaction should PRIORITISE preserving all information related to the focu
        if token_budget is None:
            token_budget = self.tail_token_budget
        n = len(messages)
-        # Hard minimum: always keep a bounded recent-message floor in the tail.
-        # ``protect_last_n`` remains a minimum up to the cap; the cap avoids
-        # preserving a whole run of bulky tool outputs on every compaction.
-        available_tail = max(0, n - head_end - 1)
-        min_tail_floor = max(3, min(self.protect_last_n, _MAX_TAIL_MESSAGE_FLOOR))
-        # Leave at least two non-head messages available to summarize on short
-        # transcripts; otherwise compression can replace a tiny middle with a
-        # summary and save no messages at all.
-        compressible_tail_cap = max(3, available_tail - 2)
-        min_tail = (
-            min(min_tail_floor, compressible_tail_cap, available_tail)
-            if available_tail > 1 else 0
-        )
+        # Hard minimum: always keep at least 3 messages in the tail
+        min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
        soft_ceiling = int(token_budget * 1.5)
        accumulated = 0
        cut_idx = n  # start from beyond the end
@@ -2120,13 +1885,6 @@ This compaction should PRIORITISE preserving all information related to the focu
        # active task is never lost to compression (fixes #10896).
        cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)

-        # Ensure the most recent assistant message is always in the tail
-        # so the previously-visible reply isn't silently rolled into the
-        # ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block (fixes #29824).
-        # Each anchor only walks ``cut_idx`` backward, so chaining them is
-        # monotonic — the tail can only grow, never shrink.
-        cut_idx = self._ensure_last_assistant_message_in_tail(messages, cut_idx, head_end)
-
        return max(cut_idx, head_end + 1)

    # ------------------------------------------------------------------
@@ -2279,8 +2037,7 @@ This compaction should PRIORITISE preserving all information related to the focu
            )

        # Phase 3: Generate structured summary
-        summary_focus_topic = focus_topic or self._derive_auto_focus_topic(messages)
-        summary = self._generate_summary(turns_to_summarize, focus_topic=summary_focus_topic)
+        summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)

        # If summary generation failed, behavior splits on
        # ``abort_on_summary_failure`` (config: compression.abort_on_summary_failure):
@@ -2360,33 +2117,32 @@ This compaction should PRIORITISE preserving all information related to the focu

        # When the summary lands as a standalone role="user" message,
        # weak models read the verbatim "## Active Task" quote of a past
-        # user request as fresh input (#11475, #14521).
-        # When it lands as role="assistant", models may regurgitate the
-        # summary text as their own output (#33256). In both cases, append
-        # the explicit end marker so the model has a clear "summary ends
-        # here, respond to the message below" signal.
-        if not _merge_summary_into_tail:
-            summary = summary + "\n\n" + _SUMMARY_END_MARKER
+        # user request as fresh input (#11475, #14521). Append the explicit
+        # end marker — the same one used in the merge-into-tail path — so
+        # the model has a clear "summary above, not new input" signal.
+        if not _merge_summary_into_tail and summary_role == "user":
+            summary = (
+                summary
+                + "\n\n--- END OF CONTEXT SUMMARY — "
+                "respond to the message below, not the summary above ---"
+            )

        if not _merge_summary_into_tail:
-            compressed.append({
-                "role": summary_role,
-                "content": summary,
-                COMPRESSED_SUMMARY_METADATA_KEY: True,
-            })
+            compressed.append({"role": summary_role, "content": summary})

        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = summary + "\n\n" + _SUMMARY_END_MARKER + "\n\n"
+                merged_prefix = (
+                    summary
+                    + "\n\n--- END OF CONTEXT SUMMARY — "
+                    "respond to the message below, not the summary above ---\n\n"
+                )
                msg["content"] = _append_text_to_content(
                    msg.get("content"),
                    merged_prefix,
                    prepend=True,
                )
-                # Mark the merged message so frontends can identify it as
-                # containing a compression summary prefix.
-                msg[COMPRESSED_SUMMARY_METADATA_KEY] = True
                _merge_summary_into_tail = False
            compressed.append(msg)

--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -246,14 +246,7 @@ def _expand_file_reference(
    if not path.is_file():
        return f"{ref.raw}: path is not a file", None
    if _is_binary_file(path):
-        # A binary file can't be inlined as text, but it IS on disk (the agent's
-        # tools run where this resolves — the local cwd, or the staged copy in a
-        # remote session workspace). Returning a bare "not supported" warning
-        # with no content was a dead end: the model saw a failure and gave up
-        # (told the user the file type wasn't supported). Instead, hand it an
-        # actionable block — the path, type, size, and a nudge to use its tools —
-        # so it can read/convert/view the file itself.
-        return None, _binary_reference_block(ref, path)
+        return f"{ref.raw}: binary files are not supported", None

    text = path.read_text(encoding="utf-8")
    if ref.line_start is not None:
@@ -297,7 +290,6 @@ def _expand_git_reference(
            capture_output=True,
            text=True,
            timeout=30,
-            stdin=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired:
        return f"{ref.raw}: git command timed out (30s)", None
@@ -490,7 +482,6 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
            capture_output=True,
            text=True,
            timeout=10,
-            stdin=subprocess.DEVNULL,
        )
    except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
        return None
@@ -500,30 +491,6 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
    return files[:limit]


-def _human_bytes(n: int) -> str:
-    size = float(n)
-    for unit in ("B", "KB", "MB", "GB"):
-        if size < 1024 or unit == "GB":
-            return f"{int(size)} {unit}" if unit == "B" else f"{size:.1f} {unit}"
-        size /= 1024
-    return f"{size:.1f} GB"
-
-
-def _binary_reference_block(ref: ContextReference, path: Path) -> str:
-    mime, _ = mimetypes.guess_type(path.name)
-    mime = mime or "application/octet-stream"
-    try:
-        size = _human_bytes(path.stat().st_size)
-    except OSError:
-        size = "unknown size"
-    return (
-        f"📎 {ref.raw} ({mime}, {size}) — binary file, not inlined as text. "
-        f"It is available on disk at `{path}`. Use your tools to work with it "
-        f"(read or convert it, extract its text, or view/render it as needed); "
-        f"do not tell the user the file type is unsupported."
-    )
-
-
 def _file_metadata(path: Path) -> str:
    if _is_binary_file(path):
        return f"{path.stat().st_size} bytes"
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -40,16 +40,6 @@ from agent.model_metadata import estimate_request_tokens_rough

 logger = logging.getLogger(__name__)

-# Stable marker the gateway matches on to re-tag the auto-compaction lifecycle
-# status as ``kind="compacting"`` (tui_gateway/server.py::_status_update), so
-# drivers like the desktop app can show an explicit "Summarizing…" indicator
-# instead of the transcript appearing to silently reset. Keep the marker phrase
-# intact if you reword COMPACTION_STATUS.
-COMPACTION_STATUS_MARKER = "Compacting context"
-COMPACTION_STATUS = (
-    f"🗜️ {COMPACTION_STATUS_MARKER} — summarizing earlier conversation so I can continue..."
-)
-

 def _compression_lock_holder(agent: Any) -> str:
    """Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
@@ -334,7 +324,9 @@ def compress_context(
        f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
        focus_topic,
    )
-    agent._emit_status(COMPACTION_STATUS)
+    agent._emit_status(
+        "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
+    )

    # ── Compression lock ────────────────────────────────────────────────
    # Atomic, state.db-backed lock per session_id.  Without this, two
@@ -639,11 +631,7 @@ def compress_context(
    return compressed, new_system_prompt


-def try_shrink_image_parts_in_messages(
-    api_messages: list,
-    *,
-    max_dimension: int = 8000,
-) -> bool:
+def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    """Re-encode all native image parts at a smaller size to recover from
    image-too-large errors (Anthropic 5 MB, unknown other providers).

@@ -654,8 +642,7 @@ def try_shrink_image_parts_in_messages(
    Strategy: look for ``image_url`` / ``input_image`` parts carrying a
    ``data:image/...;base64,...`` payload.  For each one whose encoded
    size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
-    ceiling with header overhead) or whose longest side exceeds
-    ``max_dimension``, write the base64 to a tempfile, call
+    ceiling with header overhead), write the base64 to a tempfile, call
    ``vision_tools._resize_image_for_vision`` to produce a smaller data
    URL, and substitute it in place.

@@ -677,9 +664,10 @@ def try_shrink_image_parts_in_messages(
    # after a confirmed provider rejection, so the alternative is failure.
    target_bytes = 4 * 1024 * 1024
    # Anthropic enforces an 8000px per-side dimension cap independently of
-    # the 5 MB byte cap.  In many-image requests, the provider can report a
-    # lower cap (observed: 2000px).  The caller passes that parsed ceiling
-    # when the rejection includes it.
+    # the 5 MB byte cap.  A tall screenshot can be well under 5 MB yet far
+    # over 8000px (e.g. 1200×12000 at 0.06 MB).  We check pixel dimensions
+    # even when the byte budget is fine.
+    max_dimension = 8000
    changed_count = 0
    # Track parts that are over the target but could NOT be shrunk under it.
    # If any survive, retrying is pointless — the same oversized payload will
@@ -696,9 +684,9 @@ def try_shrink_image_parts_in_messages(
        # Check both byte size AND pixel dimensions.
        needs_shrink = len(url) > target_bytes  # over byte budget
        if not needs_shrink:
-            # Even if bytes are fine, check pixel dimensions against the
-            # provider's reported per-side cap.  A screenshot can be tiny in
-            # bytes yet too large in pixels.
+            # Even if bytes are fine, check pixel dimensions against
+            # Anthropic's 8000px cap.  A tall image can be tiny in bytes
+            # yet huge in pixels.
            try:
                import base64 as _b64_dim
                header_d, _, data_d = url.partition(",")
@@ -807,8 +795,6 @@ def try_shrink_image_parts_in_messages(


 __all__ = [
-    "COMPACTION_STATUS",
-    "COMPACTION_STATUS_MARKER",
    "check_compression_model_feasibility",
    "replay_compression_warning",
    "compress_context",
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -71,35 +71,6 @@ logger = logging.getLogger(__name__)
 INTERRUPT_WAITING_FOR_MODEL_PREFIX = "Operation interrupted: waiting for model response ("


-def _image_error_max_dimension(error: Exception) -> Optional[int]:
-    """Extract a provider-reported image dimension ceiling, if present."""
-    parts = []
-    for value in (
-        error,
-        getattr(error, "message", None),
-        getattr(error, "body", None),
-    ):
-        if value:
-            try:
-                parts.append(str(value))
-            except Exception:
-                pass
-    text = " ".join(parts).lower()
-    if "image" not in text or "dimension" not in text or "max allowed size" not in text:
-        return None
-
-    match = re.search(r"max allowed size(?:\s+for [^:]+)?:\s*(\d{3,5})\s*pixels?", text)
-    if not match:
-        return None
-    try:
-        max_dimension = int(match.group(1))
-    except ValueError:
-        return None
-    if 512 <= max_dimension <= 8000:
-        return max_dimension
-    return None
-
-
 def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
    """Return a user-facing error when Ollama is loaded with too little context."""
    if not getattr(agent, "tools", None):
@@ -397,42 +368,6 @@ def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List
        )


-# Shared recovery hint appended to every content-policy refusal message. Both
-# the HTTP-200 refusal path (``finish_reason=content_filter``) and the
-# exception path (a provider moderation error classified as
-# ``content_policy_blocked``) end with the same actionable next steps, so they
-# share one trailer to keep the guidance from drifting between the two sites.
-_CONTENT_POLICY_RECOVERY_HINT = (
-    "Try rephrasing the request, narrowing the context, or "
-    "adding a fallback provider with `hermes fallback add`."
-)
-
-
-def _content_policy_blocked_result(
-    messages: List[Dict],
-    api_call_count: int,
-    *,
-    final_response: str,
-    error_detail: str,
-) -> Dict[str, Any]:
-    """Build the terminal turn result for a content-policy block.
-
-    A content-policy refusal is deterministic for the unchanged prompt, so the
-    turn ends here (no retry). Both the HTTP-200 refusal handler and the
-    exception-path handler return the identical shape — a failed, non-completed
-    turn carrying the user-facing message and a ``content_policy_blocked:``
-    prefixed error — so they funnel through this one builder.
-    """
-    return {
-        "final_response": final_response,
-        "messages": messages,
-        "api_calls": api_call_count,
-        "completed": False,
-        "failed": True,
-        "error": f"content_policy_blocked: {error_detail}",
-    }
-
-
 def run_conversation(
    agent,
    user_message: str,
@@ -660,11 +595,7 @@ def run_conversation(
        # landed after an orphan tool result). Most providers return
        # empty content on malformed sequences, which would otherwise
        # retrigger the empty-retry loop indefinitely.
-        # repair_message_sequence_with_cursor also recomputes the SessionDB
-        # flush cursor (_last_flushed_db_idx) when repair compacts the list,
-        # so the turn-end flush doesn't skip the assistant/tool chain (#44837).
-        from agent.agent_runtime_helpers import repair_message_sequence_with_cursor
-        repaired_seq = repair_message_sequence_with_cursor(agent, messages)
+        repaired_seq = agent._repair_message_sequence(messages)
        if repaired_seq > 0:
            request_logger.info(
                "Repaired %s message-alternation violations before request (session=%s)",
@@ -772,10 +703,7 @@ def run_conversation(
        # a thinking-only turn. Runs on the per-call copy only — the
        # stored conversation history keeps the reasoning block for the
        # UI transcript and session persistence.
-        api_messages = agent._drop_thinking_only_and_merge_users(
-            api_messages,
-            drop_codex_reasoning_items=agent.api_mode != "codex_responses",
-        )
+        api_messages = agent._drop_thinking_only_and_merge_users(api_messages)

        # Normalize message whitespace and tool-call JSON for consistent
        # prefix matching.  Ensures bit-perfect prefixes across turns,
@@ -1384,106 +1312,6 @@ def run_conversation(
                        )
                        finish_reason = "length"

-                # ── Content-policy refusal (HTTP 200) ──────────────────
-                # The model — or the provider's safety system — returned a
-                # *successful* response whose stop/finish reason is a refusal:
-                # Anthropic ``stop_reason="refusal"`` → ``content_filter``;
-                # OpenAI / portal ``finish_reason="content_filter"`` or a
-                # populated ``message.refusal`` (mapped in the chat_completions
-                # transport); Bedrock ``guardrail_intervened``. The content is
-                # typically empty, so without this branch the response falls
-                # through to the empty-response / invalid-response retry loops
-                # and is mis-surfaced as "rate limited" / "no content after
-                # retries" — burning paid attempts reproducing a deterministic
-                # refusal. Surface it clearly and stop. Mirrors the
-                # exception-based ``content_policy_blocked`` recovery: try a
-                # configured fallback once, otherwise return the refusal.
-                if finish_reason == "content_filter":
-                    _refusal_transport = agent._get_transport()
-                    if agent.api_mode == "anthropic_messages":
-                        _refusal_result = _refusal_transport.normalize_response(
-                            response, strip_tool_prefix=agent._is_anthropic_oauth
-                        )
-                    else:
-                        _refusal_result = _refusal_transport.normalize_response(response)
-                    _refusal_text = (getattr(_refusal_result, "content", None) or "").strip()
-                    # Some refusals carry the explanation only in the reasoning
-                    # channel; fall back to it so the user sees *something*.
-                    if not _refusal_text:
-                        _refusal_text = (agent._extract_reasoning(_refusal_result) or "").strip()
-
-                    agent._invoke_api_request_error_hook(
-                        task_id=effective_task_id,
-                        turn_id=turn_id,
-                        api_request_id=api_request_id,
-                        api_call_count=api_call_count,
-                        api_start_time=api_start_time,
-                        api_kwargs=api_kwargs,
-                        error_type="ContentPolicyBlocked",
-                        error_message=_refusal_text or "model declined to respond (content_filter)",
-                        status_code=None,
-                        retry_count=retry_count,
-                        max_retries=max_retries,
-                        retryable=False,
-                        reason=FailoverReason.content_policy_blocked.value,
-                    )
-
-                    if thinking_spinner:
-                        thinking_spinner.stop("")
-                        thinking_spinner = None
-                    if agent.thinking_callback:
-                        agent.thinking_callback("")
-
-                    # Deterministic for the unchanged prompt — never retry.
-                    # Try a configured fallback once (a different model may not
-                    # refuse); otherwise surface the refusal terminally.
-                    if agent._has_pending_fallback():
-                        agent._buffer_status(
-                            "⚠️ Model declined to respond (safety refusal) — trying fallback..."
-                        )
-                    if agent._try_activate_fallback():
-                        retry_count = 0
-                        compression_attempts = 0
-                        _retry.primary_recovery_attempted = False
-                        continue
-
-                    agent._flush_status_buffer()
-                    _refusal_log = (
-                        _refusal_text[:500] + "..."
-                        if len(_refusal_text) > 500
-                        else _refusal_text
-                    )
-                    logger.warning(
-                        "%sModel declined to respond (finish_reason=content_filter). "
-                        "model=%s provider=%s refusal=%s",
-                        agent.log_prefix, agent.model, agent.provider,
-                        _refusal_log or "(no text)",
-                    )
-                    agent._emit_status(
-                        "⚠️ The model declined to respond to this request (safety refusal)."
-                    )
-
-                    _refusal_detail = (
-                        f"Model's explanation: {_refusal_text}"
-                        if _refusal_text
-                        else "The model returned no explanation."
-                    )
-                    _refusal_response = (
-                        "⚠️  The model declined to respond to this request "
-                        "(safety refusal — not a Hermes/gateway failure).\n\n"
-                        f"{_refusal_detail}\n\n"
-                        f"{_CONTENT_POLICY_RECOVERY_HINT}"
-                    )
-
-                    agent._cleanup_task_resources(effective_task_id)
-                    agent._persist_session(messages, conversation_history)
-                    return _content_policy_blocked_result(
-                        messages,
-                        api_call_count,
-                        final_response=_refusal_response,
-                        error_detail=_refusal_text or "model declined (content_filter)",
-                    )
-
                if finish_reason == "length":
                    if getattr(response, "id", "") == PARTIAL_STREAM_STUB_ID:
                        agent._vprint(
@@ -2235,11 +2063,7 @@ def run_conversation(
                    and not _retry.image_shrink_retry_attempted
                ):
                    _retry.image_shrink_retry_attempted = True
-                    image_max_dimension = _image_error_max_dimension(api_error) or 8000
-                    if agent._try_shrink_image_parts_in_messages(
-                        api_messages,
-                        max_dimension=image_max_dimension,
-                    ):
+                    if agent._try_shrink_image_parts_in_messages(api_messages):
                        agent._vprint(
                            f"{agent.log_prefix}📐 Image(s) exceeded provider size limit — "
                            f"shrank and retrying...",
@@ -2397,54 +2221,30 @@ def run_conversation(
                    print(f"{agent.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
                    print(f"{agent.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")

-                # Thinking block signature recovery.
-                #
+                # ── Thinking block signature recovery ─────────────────
                # Anthropic signs thinking blocks against the full turn
-                # content. Any upstream mutation (context compression,
+                # content.  Any upstream mutation (context compression,
                # session truncation, message merging) invalidates the
-                # signature and the API replies HTTP 400 ("invalid
-                # signature" or "cannot be modified"). Recovery strips
-                # ``reasoning_details`` so the retry sends no thinking
-                # blocks at all. One-shot per outer loop.
-                #
-                # The strip targets ``api_messages``, which is the
-                # API-call-time list that ``_build_api_kwargs`` consumes
-                # on every retry. ``api_messages`` was populated once at
-                # the start of the turn from shallow copies of
-                # ``messages``, so mutating it does not touch the
-                # canonical store. The previous implementation popped
-                # ``reasoning_details`` from ``messages`` instead, which
-                # had two problems: ``api_messages`` carried its own
-                # reference to the field through the shallow copy, so the
-                # retry's wire payload still included thinking blocks and
-                # the recovery never reached the API; and the mutation
-                # persisted into ``state.db`` through any subsequent
-                # ``_persist_session`` call, permanently corrupting the
-                # conversation. Future turns would replay the stripped
-                # state, hit the same 400, and the agent would terminate
-                # with ``max_retries_exhausted``, often spawning
-                # cascading compaction-ended sessions chained off the
-                # corrupted parent.
+                # signature → HTTP 400.  Recovery: strip reasoning_details
+                # from all messages so the next retry sends no thinking
+                # blocks at all.  One-shot — don't retry infinitely.
                if (
                    classified.reason == FailoverReason.thinking_signature
                    and not _retry.thinking_sig_retry_attempted
                ):
                    _retry.thinking_sig_retry_attempted = True
-                    _api_stripped = 0
-                    for _m in api_messages:
-                        if isinstance(_m, dict) and "reasoning_details" in _m:
+                    for _m in messages:
+                        if isinstance(_m, dict):
                            _m.pop("reasoning_details", None)
-                            _api_stripped += 1
                    agent._vprint(
-                        f"{agent.log_prefix}⚠️  Thinking block signature invalid, "
-                        f"stripped reasoning_details from api_messages for retry...",
+                        f"{agent.log_prefix}⚠️  Thinking block signature invalid — "
+                        f"stripped all thinking blocks, retrying...",
                        force=True,
                    )
                    logger.warning(
                        "%sThinking block signature recovery: stripped "
-                        "reasoning_details from %d api_messages "
-                        "(canonical messages unchanged)",
-                        agent.log_prefix, _api_stripped,
+                        "reasoning_details from %d messages",
+                        agent.log_prefix, len(messages),
                    )
                    continue

@@ -2807,13 +2607,10 @@ def run_conversation(
                    except Exception:
                        pass
                    if _genuine_nous_rate_limit:
-                        # Re-enter the loop exactly once so the
-                        # top-of-loop Nous guard handles fallback or
-                        # bails cleanly. (Setting retry_count to
-                        # max_retries would make the while condition
-                        # false immediately and the guard would never
-                        # run -- no fallback, generic exhaustion error.)
-                        retry_count = max(0, max_retries - 1)
+                        # Skip straight to max_retries -- the
+                        # top-of-loop guard will handle fallback or
+                        # bail cleanly.
+                        retry_count = max_retries
                        continue
                    # Upstream capacity 429: fall through to normal
                    # retry logic.  A different model (or the same
@@ -3255,17 +3052,20 @@ def run_conversation(
                    if classified.reason == FailoverReason.content_policy_blocked:
                        _summary = agent._summarize_api_error(api_error)
                        _policy_response = (
-                            "⚠️  The model provider's safety filter blocked this request "
-                            "(not a Hermes/gateway failure).\n\n"
+                            f"⚠️  The model provider's safety filter blocked this request "
+                            f"(not a Hermes/gateway failure).\n\n"
                            f"Provider message: {_summary}\n\n"
-                            f"{_CONTENT_POLICY_RECOVERY_HINT}"
-                        )
-                        return _content_policy_blocked_result(
-                            messages,
-                            api_call_count,
-                            final_response=_policy_response,
-                            error_detail=_summary,
+                            f"Try rephrasing the request, narrowing the context, or "
+                            f"adding a fallback provider with `hermes fallback add`."
                        )
+                        return {
+                            "final_response": _policy_response,
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "failed": True,
+                            "error": f"content_policy_blocked: {_summary}",
+                        }
                    return {
                        "final_response": None,
                        "messages": messages,
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -70,6 +70,16 @@ def _resolve_args() -> list[str]:

 def _resolve_home_dir() -> str:
    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
    home = os.environ.get("HOME", "").strip()
    if home:
        return home
@@ -95,10 +105,7 @@ def _resolve_home_dir() -> str:

 def _build_subprocess_env() -> dict[str, str]:
    env = os.environ.copy()
-    home = _resolve_home_dir()
-    env["HOME"] = home
-    from hermes_constants import apply_subprocess_home_env
-    apply_subprocess_home_env(env)
+    env["HOME"] = _resolve_home_dir()
    return env


--- a/agent/credits_tracker.py
+++ b/agent/credits_tracker.py
@@ -194,71 +194,17 @@ class AgentNotice:
    id: Optional[str] = None


-# ── is_free_tier_model (local-data-only free-model check) ────────────────────
-
-
-def is_free_tier_model(model: str, base_url: str = "") -> bool:
-    """Return True when *model* is a Nous free-tier model, using ONLY local data.
-
-    Two signals, both zero-network:
-
-    1. The ``:free`` suffix — the canonical Nous free SKU marker (e.g.
-       ``nvidia/nemotron-3-ultra:free``). Free by construction on the API side
-       (spend is forced to 0 for ``:free`` ids).
-    2. A peek into the in-process pricing cache in ``hermes_cli.models``
-       (populated when the model picker fetched ``/v1/models`` pricing for
-       *base_url*). PEEK ONLY — a cache miss never triggers a fetch. This is
-       CLI/TUI-session best-effort: gateway sessions never run the picker's
-       pricing fetch, so suppression there rests entirely on the ``:free``
-       suffix (which all Nous free SKUs carry).
-
-    Fail-open to False (the depleted notice still shows) on any error: wrongly
-    showing the warning is recoverable noise; wrongly hiding it on a paid model
-    would mask a real billing block.
-    """
-    if not model:
-        return False
-    if model.endswith(":free"):
-        return True
-    if not base_url:
-        return False
-    try:
-        from hermes_cli.models import _is_model_free, _pricing_cache
-
-        # Mirror get_pricing_for_provider's key normalization: the agent's
-        # Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1)
-        # but the picker keys _pricing_cache on the pre-/v1 root.
-        key = base_url.rstrip("/")
-        if key.endswith("/v1"):
-            key = key[:-3].rstrip("/")
-        pricing = _pricing_cache.get(key)
-        if not pricing:
-            return False
-        return _is_model_free(model, pricing)
-    except Exception:
-        return False
-
-
 # ── evaluate_credits_notices (pure reconciliation function) ──────────────────


 def evaluate_credits_notices(
    state: CreditsState,
    latch: dict,
-    *,
-    model_is_free: bool = False,
 ) -> tuple[list[AgentNotice], list[str]]:
    """Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.

    latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.

-    ``model_is_free``: True when the session's active model is a Nous free-tier
-    model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted``
-    notice — a depleted account on a free model can keep inferencing, so the
-    error banner is noise (and confuses free-tier users who never had credits).
-    Suppression does NOT emit the "restored" success notice; that fires only on
-    a genuine ``paid_access`` flip back to True.
-
    Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
    Caller emits to_clear FIRST, then to_show.

@@ -286,16 +232,6 @@ def evaluate_credits_notices(
        for band in CREDITS_USAGE_BANDS:  # ascending → last match wins = highest
            if uf >= band[0]:
                current_band = band
-    # Top-up suppression: when the account holds purchased (top-up) credits,
-    # the subscription-cap gauge is the wrong denominator — warning "90% used"
-    # at a user sitting on $50 of top-up is noise (and it previously stuck
-    # PERMANENTLY alongside grant_spent at >=100%). Suppress the usage band
-    # entirely; the cap-reached case is covered by the grant_spent info notice
-    # below, which already names the remaining top-up balance. A top-up landing
-    # mid-session flips current_band → None and the clear path below removes
-    # any showing band line.
-    if state.purchased_micros > 0:
-        current_band = None
    grant_cond = (
        state.denominator_kind == "subscription_cap"
        and uf is not None
@@ -348,14 +284,10 @@ def evaluate_credits_notices(
        active.discard("credits.grant_spent")

    # ── depleted ─────────────────────────────────────────────────────────────
-    # Suppressed while the active model is free: inference still works there,
-    # so the error banner would just alarm users (free-tier users especially,
-    # who never had paid credits to "lose").
-    show_depleted = depleted_cond and not model_is_free
-    if show_depleted and "credits.depleted" not in active:
+    if depleted_cond and "credits.depleted" not in active:
        to_show.append(
            AgentNotice(
-                text="✕ Credit access paused · run /credits to top up",
+                text="✕ Credit access paused · run /usage for balance",
                level="error",
                kind=CREDITS_NOTICE_KIND,
                key="credits.depleted",
@@ -363,23 +295,20 @@ def evaluate_credits_notices(
            )
        )
        active.add("credits.depleted")
-    elif "credits.depleted" in active and not show_depleted:
+    elif "credits.depleted" in active and not depleted_cond:
        to_clear.append("credits.depleted")
        active.discard("credits.depleted")
-        if not depleted_cond:
-            # Genuine recovery (paid_access flipped back True): also emit the
-            # success notice. A clear caused by switching to a free model while
-            # still depleted must NOT claim access was restored.
-            to_show.append(
-                AgentNotice(
-                    text="✓ Credit access restored",
-                    level="success",
-                    kind="ttl",
-                    ttl_ms=CREDITS_RESTORED_TTL_MS,
-                    key="credits.restored",
-                    id="credits.restored",
-                )
+        # Recovery: also emit the success notice
+        to_show.append(
+            AgentNotice(
+                text="✓ Credit access restored",
+                level="success",
+                kind="ttl",
+                ttl_ms=CREDITS_RESTORED_TTL_MS,
+                key="credits.restored",
+                id="credits.restored",
            )
+        )

    return (to_show, to_clear)

--- a/agent/curator.py
+++ b/agent/curator.py
@@ -25,6 +25,7 @@ import json
 import logging
 import os
 import re
+import tempfile
 import threading
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
@@ -32,7 +33,6 @@ from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set

 from hermes_constants import get_hermes_home
 from tools import skill_usage
-from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -97,7 +97,20 @@ def load_state() -> Dict[str, Any]:
 def save_state(data: Dict[str, Any]) -> None:
    path = _state_file()
    try:
-        atomic_json_write(path, data, indent=2, sort_keys=True)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".curator_state_", suffix=".tmp")
+        try:
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(tmp, path)
+        except BaseException:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
    except Exception as e:
        logger.debug("Failed to save curator state: %s", e, exc_info=True)

--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -454,16 +454,16 @@ def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
        report["attempted"] = True  # we tried but there was nothing to do
        return report

-    # Load and rewrite the live jobs under the scheduler's cross-process lock.
+    # Load and rewrite the live jobs under the scheduler's lock.
    try:
-        from cron.jobs import load_jobs, save_jobs, _jobs_lock
+        from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
    except ImportError as e:
        report["error"] = f"cron module unavailable: {e}"
        return report

    report["attempted"] = True
    try:
-        with _jobs_lock():
+        with _jobs_file_lock:
            live_jobs = load_jobs()
            changed = False

--- a/agent/display.py
+++ b/agent/display.py
@@ -12,7 +12,6 @@ import time
 from dataclasses import dataclass, field
 from difflib import unified_diff
 from pathlib import Path
-from typing import Any

 from utils import safe_json_loads
 from agent.tool_result_classification import file_mutation_result_landed
@@ -169,27 +168,6 @@ def _oneline(text: str) -> str:
    return " ".join(text.split())


-def _truncate_preview(text: str, max_len: int | None) -> str:
-    if max_len and max_len > 0 and len(text) > max_len:
-        if max_len <= 3:
-            return "." * max_len
-        return text[:max_len - 3] + "..."
-    return text
-
-
-def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
-    if not isinstance(tasks, list):
-        return 0, []
-    goals: list[str] = []
-    for task in tasks:
-        if not isinstance(task, dict):
-            continue
-        raw_goal = task.get("goal")
-        goal = "?" if raw_goal is None else _oneline(str(raw_goal))
-        goals.append(_truncate_preview(goal or "?", per_goal_len))
-    return len(goals), goals
-
-
 def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
    """Build a short preview of a tool call's primary argument for display.

@@ -213,22 +191,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
        "clarify": "question", "skill_manage": "name",
    }

-    # delegate_task: show goal (single) or individual task goals (batch)
-    if tool_name == "delegate_task":
-        tasks = args.get("tasks")
-        if tasks and isinstance(tasks, list):
-            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=40)
-            preview = (
-                f"{task_count} tasks: " + " | ".join(goals)
-                if goals else f"{len(tasks)} parallel tasks"
-            )
-            return _truncate_preview(preview, max_len)
-        goal = args.get("goal", "")
-        if goal is None:
-            return None
-        preview = _oneline(str(goal))
-        return _truncate_preview(preview, max_len) if preview else None
-
    if tool_name == "process":
        action = args.get("action", "")
        sid = args.get("session_id", "")
@@ -1057,10 +1019,7 @@ def get_cute_tool_message(
    if tool_name == "delegate_task":
        tasks = args.get("tasks")
        if tasks and isinstance(tasks, list):
-            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=30)
-            detail = " | ".join(goals) if goals else "parallel"
-            count_label = task_count or len(tasks)
-            return _wrap(f"┊ 🔀 delegate  {count_label}x: {_trunc(detail, 35)}  {dur}")
+            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")

    preview = build_tool_preview(tool_name, args) or ""
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -549,32 +549,14 @@ def classify_api_error(
            should_fallback=True,
        )

-    # Anthropic thinking block recovery (400).  Two distinct failure modes,
-    # same recovery (strip all reasoning_details and retry without thinking
-    # blocks — see the thinking_signature handler in conversation_loop.py):
-    #   1. Signature mismatch: a thinking block is signed against the full
-    #      turn content; any upstream mutation (context compression, session
-    #      truncation, message merging) invalidates the signature.
-    #      Pattern: "signature" + "thinking".
-    #   2. Frozen-block mutation: Anthropic rejects any change to the
-    #      thinking/redacted_thinking blocks in the *latest* assistant
-    #      message — "`thinking` or `redacted_thinking` blocks in the latest
-    #      assistant message cannot be modified. These blocks must remain as
-    #      they were in the original response."  This carries no "signature"
-    #      token, so the original pattern missed it and the turn hard-aborted
-    #      as a non-retryable client error instead of self-healing.
-    #      Pattern: "thinking" + ("cannot be modified" | "must remain as they were").
+    # Anthropic thinking block signature invalid (400).
    # Don't gate on provider — OpenRouter proxies Anthropic errors, so the
    # provider may be "openrouter" even though the error is Anthropic-specific.
-    # The combined patterns are unique enough.
+    # The message pattern ("signature" + "thinking") is unique enough.
    if (
        status_code == 400
+        and "signature" in error_msg
        and "thinking" in error_msg
-        and (
-            "signature" in error_msg
-            or "cannot be modified" in error_msg
-            or "must remain as they were" in error_msg
-        )
    ):
        return _result(
            FailoverReason.thinking_signature,
@@ -984,34 +966,6 @@ def _classify_400(
            should_fallback=False,
        )

-    # Request-validation errors (unsupported / unknown parameter) MUST be
-    # checked BEFORE context_overflow.  A GPT-5 model rejecting max_tokens
-    # returns:
-    #   "Unsupported parameter: 'max_tokens' is not supported with this model.
-    #    Use 'max_completion_tokens' instead."
-    # That string contains the literal substring "max_tokens", which is one of
-    # the _CONTEXT_OVERFLOW_PATTERNS — so without this guard the 400 is
-    # misclassified as context_overflow, routed into the compression loop,
-    # re-sent with the same bad parameter, and ends in "Cannot compress
-    # further".  These errors are deterministic (every retry gets the identical
-    # rejection), so classify as a non-retryable format_error and fall back.
-    #
-    # NOTE: we deliberately do NOT key off the generic ``invalid_request_error``
-    # code here — OpenAI stamps that same code on genuine context-overflow 400s,
-    # so matching it would mis-route real overflows away from compression. The
-    # unambiguous signals are the explicit "unsupported/unknown parameter"
-    # message text and the specific parameter-level error codes.
-    if (
-        any(p in error_msg for p in _REQUEST_VALIDATION_PATTERNS
-            if p != "invalid_request_error")
-        or error_code_lower in {"unknown_parameter", "unsupported_parameter"}
-    ):
-        return result_fn(
-            FailoverReason.format_error,
-            retryable=False,
-            should_fallback=True,
-        )
-
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
--- a/agent/errors.py
+++ b/agent/errors.py
@@ -1,3 +0,0 @@
-class SSLConfigurationError(Exception):
-    """Raised when SSL/TLS certificate bundle configuration fails."""
-    pass
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -46,6 +46,11 @@ def build_write_denied_paths(home: str) -> set[str]:
            # Top-level Anthropic PKCE credential store remains sensitive even
            # when a profile is active; default/non-profile sessions still read it.
            str(hermes_root / ".anthropic_oauth.json"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
            os.path.join(home, ".netrc"),
            os.path.join(home, ".pgpass"),
            os.path.join(home, ".npmrc"),
@@ -99,6 +104,12 @@ def is_write_denied(path: str) -> bool:
        if resolved.startswith(prefix):
            return True

+    # Hermes control-plane files: block both the ACTIVE profile's view
+    # (hermes_home) AND the global root view. Without the root pass, a
+    # profile-mode session leaves <root>/auth.json + <root>/config.yaml
+    # writable — letting a prompt-injected write_file overwrite the global
+    # files that every profile inherits from (same shape as #15981).
+    control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
    mcp_tokens_dir_name = "mcp-tokens"

    hermes_dirs = []
@@ -111,6 +122,12 @@ def is_write_denied(path: str) -> bool:
            continue

    for base_real in hermes_dirs:
+        for name in control_file_names:
+            try:
+                if resolved == os.path.realpath(os.path.join(base_real, name)):
+                    return True
+            except Exception:
+                continue
        try:
            mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
            if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -41,16 +41,6 @@ DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
 GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535


-def bare_gemini_model_id(model: str) -> str:
-    """Strip Gemini's own provider prefix from an aggregator-style model id."""
-    name = (model or "").strip()
-    lowered = name.lower()
-    for prefix in ("google/", "gemini/"):
-        if lowered.startswith(prefix):
-            return name[len(prefix):].strip() or name
-    return name
-
-
 def is_native_gemini_base_url(base_url: str) -> bool:
    """Return True when the endpoint speaks Gemini's native REST API."""
    normalized = str(base_url or "").strip().rstrip("/").lower()
@@ -340,7 +330,7 @@ def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[st
    system_instruction = None
    joined_system = "\n".join(part for part in system_text_parts if part).strip()
    if joined_system:
-        system_instruction = {"role": "system", "parts": [{"text": joined_system}]}
+        system_instruction = {"parts": [{"text": joined_system}]}
    return contents, system_instruction


@@ -924,7 +914,6 @@ class GeminiNativeClient:
            thinking_config=thinking_config,
        )

-        model = bare_gemini_model_id(model)
        if stream:
            return self._stream_completion(model=model, request=request, timeout=timeout)

--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@@ -262,7 +262,6 @@ def _install_npm(
            capture_output=True,
            text=True,
            timeout=300,
-            stdin=subprocess.DEVNULL,
        )
        if proc.returncode != 0:
            logger.warning(
@@ -311,7 +310,6 @@ def _install_go(pkg: str, bin_name: str) -> Optional[str]:
            text=True,
            timeout=600,
            env=env,
-            stdin=subprocess.DEVNULL,
        )
        if proc.returncode != 0:
            logger.warning(
@@ -349,7 +347,6 @@ def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
            capture_output=True,
            text=True,
            timeout=300,
-            stdin=subprocess.DEVNULL,
        )
        if proc.returncode != 0:
            logger.warning(
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -44,66 +44,6 @@ logger = logging.getLogger(__name__)
 _SYNC_DRAIN_TIMEOUT_S = 5.0


-def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
-    """Return whether external memory-provider tools should be exposed."""
-    if enabled_toolsets is None:
-        return True
-    if not enabled_toolsets:
-        return False
-    if "memory" in enabled_toolsets:
-        return True
-
-    try:
-        from toolsets import resolve_toolset
-
-        return any("memory" in resolve_toolset(name) for name in enabled_toolsets)
-    except Exception:
-        logger.debug("Failed to resolve enabled toolsets for memory-provider tools", exc_info=True)
-        return False
-
-
-def inject_memory_provider_tools(agent: Any) -> int:
-    """Append external memory-provider tool schemas to an agent tool surface."""
-    memory_manager = getattr(agent, "_memory_manager", None)
-    tools = getattr(agent, "tools", None)
-    if not memory_manager or tools is None:
-        return 0
-
-    existing_tool_names = {
-        tool.get("function", {}).get("name")
-        for tool in tools
-        if isinstance(tool, dict)
-    }
-    if (
-        "memory" not in existing_tool_names
-        and not memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None))
-    ):
-        return 0
-
-    get_schemas = getattr(memory_manager, "get_all_tool_schemas", None)
-    if not callable(get_schemas):
-        return 0
-
-    valid_tool_names = getattr(agent, "valid_tool_names", None)
-    if valid_tool_names is None:
-        valid_tool_names = set()
-        agent.valid_tool_names = valid_tool_names
-
-    added = 0
-    for schema in get_schemas():
-        if not isinstance(schema, dict):
-            continue
-        tool_name = schema.get("name", "")
-        if not tool_name or tool_name in existing_tool_names:
-            continue
-        tools.append({"type": "function", "function": schema})
-        valid_tool_names.add(tool_name)
-        existing_tool_names.add(tool_name)
-        added += 1
-
-    return added
-
-
 # ---------------------------------------------------------------------------
 # Context fencing helpers
 # ---------------------------------------------------------------------------
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -5,7 +5,6 @@ and run_agent.py for pre-flight context checks.
 """

 import ipaddress
-import json
 import logging
 import os
 import re
@@ -17,7 +16,7 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import atomic_json_write, base_url_host_matches, base_url_hostname
+from utils import base_url_host_matches, base_url_hostname

 from hermes_constants import OPENROUTER_MODELS_URL

@@ -112,57 +111,6 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
 _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

-
-def _get_model_metadata_cache_path() -> Path:
-    """Return path to the OpenRouter model metadata disk cache."""
-    from hermes_constants import get_hermes_home
-    return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
-
-
-def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
-    """Return disk-cache age in seconds, or None if freshness is unknown."""
-    try:
-        cache_path = _get_model_metadata_cache_path()
-        if not cache_path.exists():
-            return None
-        age = time.time() - cache_path.stat().st_mtime
-        if age < 0:
-            return None
-        return age
-    except Exception:
-        return None
-
-
-def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
-    """Load processed OpenRouter metadata cache from disk."""
-    try:
-        cache_path = _get_model_metadata_cache_path()
-        with cache_path.open("r", encoding="utf-8") as f:
-            data = json.load(f)
-        if not isinstance(data, dict):
-            return {}
-        return {
-            str(key): value
-            for key, value in data.items()
-            if isinstance(value, dict)
-        }
-    except Exception as e:
-        logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
-        return {}
-
-
-def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
-    """Save processed OpenRouter metadata cache to disk atomically."""
-    try:
-        atomic_json_write(
-            _get_model_metadata_cache_path(),
-            data,
-            indent=0,
-            separators=(",", ":"),
-        )
-    except Exception as e:
-        logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
-
 # Descending tiers for context length probing when the model is unknown.
 # We start at 256K (covers GPT-5.x, many current large-context models) and
 # step down on context-length errors until one works.  Tier[0] is also the
@@ -193,8 +141,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
    # substring of "anthropic/claude-sonnet-4.6").
    # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
-    "claude-fable-5": 1000000,
-    "claude-fable": 1000000,
    "claude-opus-4-8": 1000000,
    "claude-opus-4.8": 1000000,
    "claude-opus-4-7": 1000000,
@@ -261,13 +207,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    # https://platform.minimax.io/docs/api-reference/text-chat-openai
    "minimax-m3": 1000000,
    "minimax": 204800,
-    # GLM — GLM-5.2 ships with a 1M context window (verified empirically:
-    # needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
-    # zero errors on api.z.ai/api/coding/paas/v4).  Older GLM models
-    # (5, 5.1, 5-turbo) are ~202K.  Longest-key-first substring matching
-    # ensures "glm-5.2" resolves to 1M while older variants still hit the
-    # generic 202K fallback.
-    "glm-5.2": 1_048_576,
+    # GLM
    "glm": 202752,
    # xAI Grok — xAI /v1/models does not return context_length metadata,
    # so these hardcoded fallbacks prevent Hermes from probing-down to
@@ -685,15 +625,6 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
        return _model_metadata_cache

-    if not force_refresh:
-        disk_age = _model_metadata_disk_cache_age_seconds()
-        if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
-            disk_cache = _load_model_metadata_disk_cache()
-            if disk_cache:
-                _model_metadata_cache = disk_cache
-                _model_metadata_cache_time = time.time() - disk_age
-                return _model_metadata_cache
-
    try:
        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
@@ -715,24 +646,12 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any

        _model_metadata_cache = cache
        _model_metadata_cache_time = time.time()
-        _save_model_metadata_disk_cache(cache)
        logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
        return cache

    except Exception as e:
        logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
-        if _model_metadata_cache:
-            return _model_metadata_cache
-        disk_cache = _load_model_metadata_disk_cache()
-        if disk_cache:
-            _model_metadata_cache = disk_cache
-            disk_age = _model_metadata_disk_cache_age_seconds()
-            if disk_age is not None:
-                _model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
-            else:
-                _model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
-            return _model_metadata_cache
-        return {}
+        return _model_metadata_cache or {}


 def fetch_endpoint_model_metadata(
@@ -1049,16 +968,6 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
        # OpenRouter/Nous phrasing of the same condition.
        "in the output" in error_lower
        and "maximum context length" in error_lower
-    ) or (
-        # LM Studio / llama.cpp / some OpenAI-compatible servers:
-        #   "This model's maximum context length is 65536 tokens. However, you
-        #    requested 65536 output tokens and your prompt contains 77409
-        #    characters ..."
-        # The "requested N output tokens" phrasing means the OUTPUT cap is the
-        # problem (the input itself fits) — reduce max_tokens, don't compress.
-        "maximum context length" in error_lower
-        and "requested" in error_lower
-        and "output tokens" in error_lower
    )
    if not is_output_cap_error:
        return None
@@ -1090,22 +999,6 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
        if _available >= 1:
            return _available

-    # LM Studio / llama.cpp style: context window is reported in tokens but the
-    # prompt size is reported in CHARACTERS, e.g.
-    #   "maximum context length is 65536 tokens ... your prompt contains 77409
-    #    characters ...".
-    # Estimate the input tokens conservatively (~3 chars/token, which
-    # over-reserves the input so the retried output cap stays safely inside the
-    # window) and leave the remainder of the window for output.
-    _m_ctx_tok = re.search(r'maximum context length is (\d+)\s*token', error_lower)
-    _m_chars = re.search(r'prompt contains (\d+)\s*character', error_lower)
-    if _m_ctx_tok and _m_chars:
-        _ctx = int(_m_ctx_tok.group(1))
-        _est_input = (int(_m_chars.group(1)) + 2) // 3
-        _available = _ctx - _est_input
-        if _available >= 1:
-            return _available
-
    return None


@@ -1891,43 +1784,10 @@ def get_model_context_length(
        if ctx is not None:
            save_context_length(model, base_url, ctx)
            return ctx
-    # 5f. OpenRouter live /models metadata — authoritative for OpenRouter-routed
-    # models. OpenRouter's catalog carries per-model context_length (e.g.
-    # anthropic/claude-fable-5 -> 1M) and refreshes as new slugs ship, so it
-    # must win over both models.dev (step 5g) and the hardcoded family catch-all
-    # (step 8). Before this branch, an OpenRouter selection set
-    # effective_provider="openrouter", which (a) made the models.dev lookup miss
-    # brand-new slugs and (b) skipped the step-6 OR fallback (gated on `not
-    # effective_provider`), so a fresh slug like claude-fable-5 fell through to
-    # the generic "claude": 200K entry and under-reported a 1M window. Mirrors
-    # the dedicated Nous/Copilot/GMI branches above.
-    if effective_provider == "openrouter":
-        metadata = fetch_model_metadata()
-        entry = metadata.get(model)
-        if entry:
-            or_ctx = entry.get("context_length")
-            # Guard against the known OpenRouter Kimi-family 32k underreport
-            # (same class the hardcoded overrides exist to mitigate).
-            if isinstance(or_ctx, int) and or_ctx > 0 and not (
-                or_ctx == 32768 and _model_name_suggests_kimi(model)
-            ):
-                return or_ctx
-
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
        if ctx:
-            # MiniMax M3: models.dev reports 512K but actual context is 1M.
-            # Prefer hardcoded catalog over stale probe value.
-            if _model_name_suggests_minimax_m3(model):
-                catalog = DEFAULT_CONTEXT_LENGTHS.get("minimax-m3")
-                if catalog and ctx < catalog:
-                    logger.info(
-                        "Rejecting models.dev context=%s for %r "
-                        "(MiniMax-M3 underreport); using hardcoded default %s",
-                        ctx, model, f"{catalog:,}",
-                    )
-                    ctx = catalog
            return ctx

    # 6. OpenRouter live API metadata — provider-unaware fallback.
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -135,14 +135,7 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:

 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
    """Infer a reasonable ``type`` if this schema node has none."""
-    node_type = node.get("type")
-    if isinstance(node_type, list):
-        concrete = next(
-            (t for t in node_type if isinstance(t, str) and t not in {"", "null"}),
-            "string",
-        )
-        return {**node, "type": concrete}
-    if "type" in node and node_type not in {None, ""}:
+    if "type" in node and node["type"] not in {None, ""}:
        return node

    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -489,41 +489,15 @@ PLATFORM_HINTS = {
        "files arrive as downloadable documents. You can also include image "
        "URLs in markdown format ![alt](url) and they will be sent as photos."
    ),
-    "whatsapp_cloud": (
-        "You are on a text messaging communication platform, WhatsApp "
-        "(via Meta's official Business Cloud API). Standard markdown "
-        "(**bold**, ~~strike~~, # headers, [links](url)) is auto-converted "
-        "to WhatsApp's native syntax (*bold*, ~strike~, etc.) — feel free "
-        "to write in markdown. Tables are NOT supported — prefer bullet "
-        "lists or labeled key:value pairs. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png) become photo attachments, "
-        "videos (.mp4) play inline, audio (.mp3, .ogg) sends as voice/audio "
-        "messages, other files arrive as documents. Image URLs in markdown "
-        "format ![alt](url) also work. "
-        "IMPORTANT: this platform has a 24-hour conversation window — if the "
-        "user hasn't messaged in 24h, free-form replies are refused by Meta "
-        "(error 131047). This rarely matters for live chat, but is worth "
-        "knowing if you're scheduling a delayed message."
-    ),
    "telegram": (
        "You are on a text messaging communication platform, Telegram. "
-        "Standard Markdown is automatically converted to Telegram formatting. "
+        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram now supports rich Markdown, so lean into it: whenever it "
-        "makes the answer clearer or easier to scan, actively reach for real "
-        "Markdown tables (pipe `| col | col |` syntax), bullet and numbered "
-        "lists, task lists (`- [ ]` / `- [x]`), headings, nested blockquotes, "
-        "collapsible details, footnotes/references, math/formulas (`$...$`, "
-        "`$$...$$`), underline, subscript/superscript, marked (highlighted) "
-        "text, and anchors. Default to structured formatting over dense "
-        "paragraphs for any comparison, set of steps, key/value summary, or "
-        "tabular data. Prefer real Markdown tables and task lists over "
-        "hand-built bullet substitutes when presenting structured data; these "
-        "degrade gracefully (tables become readable bullet groups) when rich "
-        "rendering is unavailable, but advanced constructs like math and "
-        "collapsible details may render as plain source text in that case. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -911,22 +885,6 @@ def build_environment_hints() -> str:
                f"`uname -a && whoami && pwd`."
            )

-    # Hermes desktop GUI — any agent running under the desktop app should know
-    # it. HERMES_DESKTOP marks the backend powering the chat; HERMES_DESKTOP_TERMINAL
-    # marks a hermes launched in the embedded terminal pane. Both set by main.cjs.
-    _truthy = ("1", "true", "yes")
-    _in_desktop = (os.getenv("HERMES_DESKTOP") or "").strip().lower() in _truthy
-    _in_desktop_term = (os.getenv("HERMES_DESKTOP_TERMINAL") or "").strip().lower() in _truthy
-    if _in_desktop or _in_desktop_term:
-        _desktop_hint = "Runtime surface: you're running inside the Hermes desktop GUI app."
-        if _in_desktop_term:
-            _desktop_hint += (
-                " You're in its embedded terminal pane, beside the GUI chat — the user can "
-                "select your output (⌥-drag on macOS, Shift-drag elsewhere) and press "
-                "⌘/Ctrl+L to send it to the chat composer."
-            )
-        hints.append(_desktop_hint)
-
    if is_wsl():
        hints.append(WSL_ENVIRONMENT_HINT)

@@ -1127,12 +1085,11 @@ def _skill_should_show(
 def build_skills_system_prompt(
    available_tools: "set[str] | None" = None,
    available_toolsets: "set[str] | None" = None,
-    compact_categories: "frozenset[str] | None" = None,
 ) -> str:
    """Build a compact skill index for the system prompt.

    Two-layer cache:
-      1. In-process LRU dict keyed by (skills_dir, tools, toolsets, hidden)
+      1. In-process LRU dict keyed by (skills_dir, tools, toolsets)
      2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by
         mtime/size manifest — survives process restarts

@@ -1142,12 +1099,6 @@ def build_skills_system_prompt(
    scanned alongside the local ``~/.hermes/skills/`` directory.  External dirs
    are read-only — they appear in the index but new skills are always created
    in the local dir.  Local skills take precedence when names collide.
-
-    ``compact_categories`` (e.g. from the coding posture — see
-    agent/coding_context.py) demotes whole categories to a names-only line in
-    the rendered index. Nothing is ever hidden: every skill name stays
-    visible and loadable via ``skill_view`` / ``skills_list``; only the
-    descriptions are dropped, and a footer note explains the demotion.
    """
    skills_dir = get_skills_dir()
    external_dirs = get_all_skills_dirs()[1:]  # skip local (index 0)
@@ -1164,7 +1115,7 @@ def build_skills_system_prompt(
        or get_session_env("HERMES_SESSION_PLATFORM")
        or ""
    )
-    disabled = get_disabled_skill_names(_platform_hint or None)
+    disabled = get_disabled_skill_names()
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
@@ -1172,7 +1123,6 @@ def build_skills_system_prompt(
        tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
        _platform_hint,
        tuple(sorted(disabled)),
-        tuple(sorted(compact_categories or ())),
    )
    with _SKILLS_PROMPT_CACHE_LOCK:
        cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -1306,44 +1256,18 @@ def build_skills_system_prompt(
            except Exception as e:
                logger.debug("Could not read external skill description %s: %s", desc_file, e)

-    # Posture-driven category demotion (e.g. non-coding skills while pairing
-    # on code). Demoted categories stay in the index as a single names-only
-    # line — descriptions are dropped to cut noise, but every skill name
-    # remains visible so memory-anchored recall ("load <name>") keeps working.
-    # NEVER remove entries entirely: agent-created skills are the model's
-    # project memory, and models don't reach for skills_list to rediscover
-    # what the index stops showing them. Match on the top-level category
-    # segment so nested categories ("social-media/twitter") are demoted with
-    # their parent.
-    demoted = frozenset(
-        cat for cat in skills_by_category
-        if cat.split("/", 1)[0] in (compact_categories or frozenset())
-    )
-
-    hidden_note = ""
-    if demoted:
-        hidden_note = (
-            "\n(Categories marked [names only] are outside the current coding "
-            "context, so their descriptions are omitted — the skills work "
-            "normally and load with skill_view(name) as usual.)"
-        )
-
    if not skills_by_category:
        result = ""
    else:
        index_lines = []
        for category in sorted(skills_by_category.keys()):
-            # Deduplicate and sort skills within each category
-            seen = set()
-            if category in demoted:
-                names = sorted({name for name, _ in skills_by_category[category]})
-                index_lines.append(f"  {category} [names only]: {', '.join(names)}")
-                continue
            cat_desc = category_descriptions.get(category, "")
            if cat_desc:
                index_lines.append(f"  {category}: {cat_desc}")
            else:
                index_lines.append(f"  {category}:")
+            # Deduplicate and sort skills within each category
+            seen = set()
            for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
                if name in seen:
                    continue
@@ -1380,7 +1304,6 @@ def build_skills_system_prompt(
            "</available_skills>\n"
            "\n"
            "Only proceed without loading a skill if genuinely none are relevant to the task."
-            + hidden_note
        )

    # ── Store in LRU cache ────────────────────────────────────────────
@@ -1444,13 +1367,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -

    lines = [
        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, OpenAI Whisper STT, and browser automation (Browser Use) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
        "Current capability status:",
    ]
    lines.extend(_status_line(feature) for feature in features.items())
    lines.extend(
        [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys.",
+            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -104,7 +104,6 @@ _PREFIX_PATTERNS = [
    r"mem0_[A-Za-z0-9]{10,}",           # Mem0 Platform API key
    r"brv_[A-Za-z0-9]{10,}",            # ByteRover API key
    r"xai-[A-Za-z0-9]{30,}",            # xAI (Grok) API key
-    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
--- a/agent/secret_sources/bitwarden.py
+++ b/agent/secret_sources/bitwarden.py
@@ -274,7 +274,6 @@ def _platform_asset_name() -> str:
                capture_output=True,
                text=True,
                timeout=2,
-                stdin=subprocess.DEVNULL,
            )
            if "musl" in (res.stdout + res.stderr).lower():
                libc = "musl"
@@ -526,7 +525,6 @@ def _run_bws_list(
            capture_output=True,
            text=True,
            timeout=_BWS_RUN_TIMEOUT,
-            stdin=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired as exc:
        raise RuntimeError(
--- a/agent/skill_preprocessing.py
+++ b/agent/skill_preprocessing.py
@@ -74,7 +74,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
            text=True,
            timeout=max(1, int(timeout)),
            check=False,
-            stdin=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired:
        return f"[inline-shell timeout after {timeout}s: {command}]"
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -272,65 +272,27 @@ def skill_matches_environment(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


-_RAW_CONFIG_CACHE: Dict[Tuple[str, int, int], Dict[str, Any]] = {}
-
-
-def _raw_config_cache_clear() -> None:
-    """Test hook — drop the shared raw config cache."""
-    _RAW_CONFIG_CACHE.clear()
-
-
-def _load_raw_config() -> Dict[str, Any]:
-    """Read config.yaml with a shared mtime+size keyed cache.
-
-    This module intentionally avoids importing ``hermes_cli.config`` on the
-    skill prompt/build path. A tiny local cache gives the same repeated-read
-    win without pulling the heavier CLI config stack into startup.
-    """
-    config_path = get_config_path()
-    if not config_path.exists():
-        return {}
-    try:
-        stat = config_path.stat()
-        cache_key = (str(config_path), stat.st_mtime_ns, stat.st_size)
-    except OSError:
-        cache_key = None
-
-    if cache_key is not None:
-        cached = _RAW_CONFIG_CACHE.get(cache_key)
-        if cached is not None:
-            return cached
-
-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception as e:
-        logger.debug("Could not read skill config %s: %s", config_path, e)
-        return {}
-    if not isinstance(parsed, dict):
-        return {}
-
-    if cache_key is not None:
-        _RAW_CONFIG_CACHE.clear()
-        _RAW_CONFIG_CACHE[cache_key] = parsed
-    return parsed
-
-
 def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    """Read disabled skill names from config.yaml.

    Args:
        platform: Explicit platform name (e.g. ``"telegram"``).  When
            *None*, resolves from ``HERMES_PLATFORM`` or
-            ``HERMES_SESSION_PLATFORM`` env vars.  Returns the global
-            disabled list, unioned with the platform-specific list when a
-            platform is resolved (a globally-disabled skill stays disabled
-            on every platform).
+            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
+            global disabled list when no platform is determined.

    Reads the config file directly (no CLI config imports) to stay
    lightweight.
    """
-    parsed = _load_raw_config()
-    if not parsed:
+    config_path = get_config_path()
+    if not config_path.exists():
+        return set()
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception as e:
+        logger.debug("Could not read skill config %s: %s", config_path, e)
+        return set()
+    if not isinstance(parsed, dict):
        return set()

    skills_cfg = parsed.get("skills")
@@ -343,14 +305,13 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
        or os.getenv("HERMES_PLATFORM")
        or get_session_env("HERMES_SESSION_PLATFORM")
    )
-    global_disabled = _normalize_string_set(skills_cfg.get("disabled"))
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
        )
        if platform_disabled is not None:
-            return global_disabled | _normalize_string_set(platform_disabled)
-    return global_disabled
+            return _normalize_string_set(platform_disabled)
+    return _normalize_string_set(skills_cfg.get("disabled"))


 def _normalize_string_set(values) -> Set[str]:
@@ -375,7 +336,6 @@ _EXTERNAL_DIRS_CACHE: Dict[Tuple[str, int], List[Path]] = {}
 def _external_dirs_cache_clear() -> None:
    """Test hook — drop the in-process cache."""
    _EXTERNAL_DIRS_CACHE.clear()
-    _raw_config_cache_clear()


 def get_external_skills_dirs() -> List[Path]:
@@ -408,8 +368,11 @@ def get_external_skills_dirs() -> List[Path]:
            # Return a copy so callers can't mutate the cached list.
            return list(cached)

-    parsed = _load_raw_config()
-    if not parsed:
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+    if not isinstance(parsed, dict):
        return []

    skills_cfg = parsed.get("skills")
@@ -621,7 +584,15 @@ def resolve_skill_config_values(
    current values (or the declared default if the key isn't set).
    Path values are expanded via ``os.path.expanduser``.
    """
-    config = _load_raw_config()
+    config_path = get_config_path()
+    config: Dict[str, Any] = {}
+    if config_path.exists():
+        try:
+            parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+            if isinstance(parsed, dict):
+                config = parsed
+        except Exception:
+            pass

    resolved: Dict[str, Any] = {}
    for var in config_vars:
--- a/agent/ssl_guard.py
+++ b/agent/ssl_guard.py
@@ -1,94 +0,0 @@
-"""Preventive SSL CA certificate checks for Hermes Agent.
-
-This module catches broken CA bundle paths before OpenAI/httpx turns them into
-opaque ``FileNotFoundError: [Errno 2] No such file or directory`` failures.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import ssl
-from pathlib import Path
-
-from agent.errors import SSLConfigurationError
-
-logger = logging.getLogger(__name__)
-
-_CA_BUNDLE_ENV_VARS = (
-    "HERMES_CA_BUNDLE",
-    "SSL_CERT_FILE",
-    "REQUESTS_CA_BUNDLE",
-    "CURL_CA_BUNDLE",
-)
-
-_SKIP_VALUES = {"1", "true", "yes", "on"}
-
-
-def _skip_ssl_guard_enabled() -> bool:
-    return os.getenv("HERMES_SKIP_SSL_GUARD", "").strip().lower() in _SKIP_VALUES
-
-
-def _repair_hint() -> str:
-    return (
-        "Repair: python -m pip install --force-reinstall certifi openai httpx\n"
-        "If you configured a custom corporate CA bundle, fix or unset the "
-        "broken CA bundle environment variable."
-    )
-
-
-def _ssl_err(message: str) -> SSLConfigurationError:
-    """Create a consistent, user-actionable SSL configuration error."""
-    return SSLConfigurationError(f"{message}\n{_repair_hint()}")
-
-
-def _validate_bundle_path(label: str, value: str, *, require_substantial: bool = False) -> None:
-    path = Path(value).expanduser()
-    if not path.exists():
-        raise _ssl_err(f"{label} points to a missing CA bundle: {value}")
-    if not path.is_file():
-        raise _ssl_err(f"{label} does not point to a CA bundle file: {value}")
-    if require_substantial and path.stat().st_size < 1024:
-        raise _ssl_err(f"{label} at {value} appears corrupted (too small)")
-    try:
-        ctx = ssl.create_default_context(cafile=str(path))
-    except Exception as exc:
-        raise _ssl_err(f"{label} CA bundle at {value} cannot be loaded: {exc}") from exc
-    if not ctx.get_ca_certs():
-        raise _ssl_err(f"{label} CA bundle at {value} did not load any certificates")
-
-
-def verify_ca_bundle() -> None:
-    """Verify configured and bundled CA certificates are present and loadable.
-
-    Raises:
-        SSLConfigurationError: If an explicit CA-bundle environment variable
-            points at a bad path, or if certifi's bundled ``cacert.pem`` is
-            missing/corrupt.
-    """
-    if _skip_ssl_guard_enabled():
-        logger.debug("SSL CA bundle guard skipped via HERMES_SKIP_SSL_GUARD")
-        return
-
-    for env_var in _CA_BUNDLE_ENV_VARS:
-        value = os.getenv(env_var)
-        if value:
-            _validate_bundle_path(env_var, value)
-
-    try:
-        import certifi
-    except Exception as exc:
-        raise _ssl_err(f"certifi is not importable: {exc}") from exc
-
-    ca_bundle = str(certifi.where())
-    _validate_bundle_path("certifi", ca_bundle, require_substantial=True)
-
-
-def verify_ca_bundle_with_fallback() -> None:
-    """Backward-compatible wrapper for older call sites.
-
-    The old PR name mentioned a platform fallback, but allowing startup with a
-    broken certifi bundle still leaves httpx/OpenAI and requests call sites
-    failing later. Keep the wrapper name but enforce the same check.
-    """
-    verify_ca_bundle()
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -191,23 +191,9 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
            )
            if toolset
        }
-        # Focus mode (opt-in) demotes non-coding skill categories to
-        # names-only in the index (never hidden — skill_view/skills_list
-        # reach everything, and every name stays visible for recall). The
-        # default coding posture leaves the index untouched.
-        _compact_cats = frozenset()
-        try:
-            from agent.coding_context import coding_compact_skill_categories
-
-            _compact_cats = coding_compact_skill_categories(
-                platform=agent.platform, cwd=resolve_context_cwd()
-            )
-        except Exception:
-            _compact_cats = frozenset()
        skills_prompt = _r.build_skills_system_prompt(
            available_tools=agent.valid_tool_names,
            available_toolsets=avail_toolsets,
-            compact_categories=_compact_cats or None,
        )
    else:
        skills_prompt = ""
@@ -235,26 +221,6 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    if _env_hints:
        stable_parts.append(_env_hints)

-    # Coding posture (base Hermes, any interactive coding surface in a code
-    # workspace — see agent/coding_context.py). The operating brief + the live
-    # git/workspace snapshot are built once here and cached for the session;
-    # the snapshot is never re-probed per turn (that would break the prompt
-    # cache), so the brief tells the model to re-check git before relying on it.
-    if agent.valid_tool_names:
-        try:
-            from agent.coding_context import coding_system_blocks
-
-            stable_parts.extend(
-                coding_system_blocks(
-                    platform=agent.platform,
-                    cwd=resolve_context_cwd(),
-                    model=agent.model,
-                )
-            )
-        except Exception:
-            # Coding-context probing must never block prompt build.
-            pass
-
    # Local Python toolchain probe — names python/pip/uv/PEP-668 state when
    # something is non-default so the model can pick the right install
    # strategy without discovering by failure.  Emits a single line; emits
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -417,7 +417,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe

    # ── Logging / callbacks ──────────────────────────────────────────
    tool_names_str = ", ".join(name for _, name, _, _, _, _ in parsed_calls)
-    if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
+    if not agent.quiet_mode:
        print(f"  ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
        for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
            args_str = json.dumps(args, ensure_ascii=False)
@@ -702,7 +702,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
        if agent._should_emit_quiet_tool_messages():
            cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
            agent._safe_print(f"  {cute_msg}")
-        elif not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
+        elif getattr(agent, "tool_progress_mode", "all") != "off":
            _preview_str = _multimodal_text_summary(function_result)
            if agent.verbose_logging:
                print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
@@ -866,7 +866,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        elif function_name == "skill_manage":
            agent._iters_since_skill = 0

-        if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
+        if not agent.quiet_mode:
            args_str = json.dumps(function_args, ensure_ascii=False)
            if agent.verbose_logging:
                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
@@ -1065,25 +1065,6 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            tool_duration = time.time() - tool_start_time
            if agent._should_emit_quiet_tool_messages():
                agent._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
-        elif function_name == "read_terminal":
-            def _execute(next_args: dict) -> Any:
-                from tools.read_terminal_tool import read_terminal_tool as _read_terminal_tool
-                return _read_terminal_tool(
-                    start_line=next_args.get("start_line"),
-                    count=next_args.get("count"),
-                    callback=getattr(agent, "read_terminal_callback", None),
-                )
-            function_result, function_args = _run_agent_tool_execution_middleware(
-                agent,
-                function_name=function_name,
-                function_args=function_args,
-                effective_task_id=effective_task_id,
-                tool_call_id=getattr(tool_call, "id", "") or "",
-                execute=_execute,
-            )
-            tool_duration = time.time() - tool_start_time
-            if agent._should_emit_quiet_tool_messages():
-                agent._vprint(f"  {_get_cute_tool_message_impl('read_terminal', function_args, tool_duration, result=function_result)}")
        elif function_name == "delegate_task":
            tasks_arg = function_args.get("tasks")
            if tasks_arg and isinstance(tasks_arg, list):
@@ -1384,7 +1365,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        # entire batch.  The model sees it on the next API iteration.
        agent._apply_pending_steer_to_tool_results(messages, 1)

-        if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
+        if not agent.quiet_mode:
            if agent.verbose_logging:
                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
                print(agent._wrap_verbose("Result: ", function_result))
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -84,7 +84,7 @@ class AnthropicTransport(ProviderTransport):
        to OpenAI finish_reason, and collects reasoning_details in provider_data.
        """
        import json
-        from agent.anthropic_adapter import _to_plain_data, _sanitize_replay_block
+        from agent.anthropic_adapter import _to_plain_data
        from agent.transports.types import ToolCall

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
@@ -94,40 +94,14 @@ class AnthropicTransport(ProviderTransport):
        reasoning_parts = []
        reasoning_details = []
        tool_calls = []
-        # Verbatim, order-preserving copy of every content block in the turn.
-        # Anthropic signs each thinking block against the turn content that
-        # PRECEDES it at its position; when a turn interleaves thinking and
-        # tool_use (adaptive/interleaved thinking, Claude 4.6+), the parallel
-        # reasoning_details + tool_calls lists below lose that cross-type
-        # ordering. Replaying the latest assistant message in the wrong order
-        # invalidates the signatures -> HTTP 400 "thinking ... blocks in the
-        # latest assistant message cannot be modified". Preserve the exact
-        # block sequence here so the adapter can replay it unchanged. See
-        # tests/agent/test_anthropic_thinking_block_order.py.
-        ordered_blocks = []

        for block in response.content:
-            block_dict = _to_plain_data(block)
-            clean_block = None
-            if isinstance(block_dict, dict):
-                # Sanitize at capture so output-only SDK fields (parsed_output,
-                # caller, citations=None, …) never persist to state.db and leak
-                # back as request input on replay → HTTP 400 "Extra inputs are
-                # not permitted". Defence-in-depth with the replay-side sanitize.
-                clean_block = _sanitize_replay_block(block_dict)
-                if clean_block is not None:
-                    ordered_blocks.append(clean_block)
            if block.type == "text":
                text_parts.append(block.text)
-            elif block.type in ("thinking", "redacted_thinking"):
-                if block.type == "thinking":
-                    reasoning_parts.append(block.thinking)
-                # Use the sanitized block (clean_block) for reasoning_details too,
-                # since _extract_preserved_thinking_blocks replays these on the
-                # non-ordered path. Falls back to raw only if sanitize dropped it.
-                if isinstance(clean_block, dict):
-                    reasoning_details.append(clean_block)
-                elif isinstance(block_dict, dict):
+            elif block.type == "thinking":
+                reasoning_parts.append(block.thinking)
+                block_dict = _to_plain_data(block)
+                if isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
                name = block.name
@@ -156,23 +130,6 @@ class AnthropicTransport(ProviderTransport):
        provider_data = {}
        if reasoning_details:
            provider_data["reasoning_details"] = reasoning_details
-        # Only worth carrying the ordered-blocks channel when the turn
-        # actually interleaves signed thinking with tool_use — that's the
-        # only shape the parallel lists reconstruct incorrectly. A turn that
-        # is purely text, or thinking-then-tools with a single leading
-        # thinking block, replays correctly without it.
-        _has_signed_thinking = any(
-            isinstance(b, dict)
-            and b.get("type") in ("thinking", "redacted_thinking")
-            and (b.get("signature") or b.get("data"))
-            for b in ordered_blocks
-        )
-        _has_tool_use = any(
-            isinstance(b, dict) and b.get("type") == "tool_use"
-            for b in ordered_blocks
-        )
-        if _has_signed_thinking and _has_tool_use:
-            provider_data["anthropic_content_blocks"] = ordered_blocks

        return NormalizedResponse(
            content="\n".join(text_parts) if text_parts else None,
@@ -186,21 +143,10 @@ class AnthropicTransport(ProviderTransport):
    def validate_response(self, response: Any) -> bool:
        """Check Anthropic response structure is valid.

-        An empty content list is legitimate for terminal stop reasons that
-        carry no text payload:
-
-        - ``end_turn`` — the model's canonical "nothing more to add" after a
-          tool turn that already delivered the user-facing text.
-        - ``refusal`` — the model declined to respond (Claude 4.5+). The
-          Messages API returns an empty ``content`` list with this stop
-          reason. Treating it as invalid sends a deterministic refusal into
-          the invalid-response retry loop, which reproduces the refusal on
-          every attempt and surfaces a misleading "rate limited / invalid
-          response" error instead of the refusal. ``normalize_response`` maps
-          ``refusal`` → ``content_filter`` so the agent loop's refusal handler
-          can surface it.
-
-        Treating either as invalid falsely retries a completed response.
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
        """
        if response is None:
            return False
@@ -208,7 +154,7 @@ class AnthropicTransport(ProviderTransport):
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return getattr(response, "stop_reason", None) in {"end_turn", "refusal"}
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -531,7 +531,6 @@ class ChatCompletionsTransport(ProviderTransport):
                supports_reasoning=params.get("supports_reasoning", False),
                qwen_session_metadata=params.get("qwen_session_metadata"),
                model=model,
-                base_url=params.get("base_url"),
                ollama_num_ctx=params.get("ollama_num_ctx"),
                session_id=params.get("session_id"),
            )
@@ -665,42 +664,8 @@ class ChatCompletionsTransport(ProviderTransport):
        if rd:
            provider_data["reasoning_details"] = rd

-        # OpenAI structured-refusal field. When a model declines, the SDK
-        # populates ``message.refusal`` with the explanation and leaves
-        # ``content`` empty. OpenAI-compatible proxies that front Anthropic /
-        # Bedrock (e.g. Nous Portal) surface a Claude refusal this way — or via
-        # ``finish_reason="content_filter"`` — instead of the native
-        # ``stop_reason="refusal"``. Without capturing it the refusal looks
-        # like an empty response, so the agent loop retries a deterministic
-        # refusal three times and gives up with "no content after retries".
-        # Promote it to content + a ``content_filter`` finish reason so the
-        # loop's refusal handler surfaces it clearly and stops. ``refusal`` is
-        # ``None`` for normal responses, so this is a no-op in the common case.
-        content = msg.content
-        refusal = getattr(msg, "refusal", None)
-        if refusal is None and hasattr(msg, "model_extra"):
-            _msg_extra = getattr(msg, "model_extra", None) or {}
-            if isinstance(_msg_extra, dict):
-                refusal = _msg_extra.get("refusal")
-        if isinstance(refusal, str) and refusal.strip():
-            # Record the refusal explanation regardless — it's useful provider
-            # metadata even when the model also returned a usable payload.
-            provider_data["refusal"] = refusal
-            _has_text = isinstance(content, str) and content.strip()
-            _has_tool_calls = bool(tool_calls)
-            # Only promote to a terminal ``content_filter`` when the refusal is
-            # the *sole* payload — no visible text and no tool calls. A response
-            # that carries real content (or tool calls) alongside a refusal note
-            # is a normal, usable turn: surfacing it as a failed safety refusal
-            # would discard the model's actual work. In the empty-payload case,
-            # adopt the refusal as content so the loop has something to show.
-            if not _has_text and not _has_tool_calls:
-                content = refusal
-                if finish_reason in (None, "stop"):
-                    finish_reason = "content_filter"
-
        return NormalizedResponse(
-            content=content,
+            content=msg.content,
            tool_calls=tool_calls,
            finish_reason=finish_reason,
            reasoning=reasoning,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -218,10 +218,22 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs.pop("timeout", None)

        if is_codex_backend:
-            # chatgpt.com/backend-api/codex rejects body-level
-            # ``extra_headers`` with HTTP 400. Correlation/cache routing for
-            # this backend must not be sent through the Responses payload.
-            kwargs.pop("extra_headers", None)
+            prompt_cache_key = kwargs.get("prompt_cache_key")
+            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
+            if cache_scope_id:
+                existing_extra_headers = kwargs.get("extra_headers")
+                merged_extra_headers: Dict[str, str] = {}
+                if isinstance(existing_extra_headers, dict):
+                    merged_extra_headers.update(
+                        {
+                            str(key): str(value)
+                            for key, value in existing_extra_headers.items()
+                            if key and value is not None
+                        }
+                    )
+                merged_extra_headers["session_id"] = cache_scope_id
+                merged_extra_headers["x-client-request-id"] = cache_scope_id
+                kwargs["extra_headers"] = merged_extra_headers

        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
--- a/agent/transports/codex_app_server.py
+++ b/agent/transports/codex_app_server.py
@@ -378,7 +378,6 @@ def check_codex_binary(
            capture_output=True,
            text=True,
            timeout=10,
-            stdin=subprocess.DEVNULL,
        )
    except FileNotFoundError:
        return False, (
--- a/agent/transports/codex_app_server_session.py
+++ b/agent/transports/codex_app_server_session.py
@@ -72,9 +72,6 @@ class TurnResult:
    error: Optional[str] = None  # Set if turn ended in a non-recoverable error
    turn_id: Optional[str] = None
    thread_id: Optional[str] = None
-    token_usage_last: Optional[dict[str, Any]] = None
-    token_usage_total: Optional[dict[str, Any]] = None
-    model_context_window: Optional[int] = None
    # Hint to the caller that the underlying codex subprocess is likely
    # wedged (turn-level timeout fired, post-tool watchdog tripped, or
    # token-refresh failure killed the child). The caller should retire
@@ -504,7 +501,6 @@ class CodexAppServerSession:
                    pending = self._client.take_notification(timeout=0)
                    if pending is None:
                        break
-                    _apply_token_usage_notification(result, pending)
                    self._track_pending_file_change(pending)
                    proj = projector.project(pending)
                    if proj.messages:
@@ -540,8 +536,6 @@ class CodexAppServerSession:
                except Exception:  # pragma: no cover - display callback
                    logger.debug("on_event callback raised", exc_info=True)

-            _apply_token_usage_notification(result, note)
-
            # Track in-progress fileChange items so the approval bridge
            # can surface a real change summary when codex requests
            # approval (the approval params themselves don't carry the
@@ -808,30 +802,6 @@ class CodexAppServerSession:
        return cached


-def _apply_token_usage_notification(result: TurnResult, note: dict) -> None:
-    """Capture Codex app-server token usage updates for caller accounting.
-
-    Codex does not put token usage on turn/completed. It emits a separate
-    thread/tokenUsage/updated notification containing cumulative totals and
-    the latest turn breakdown.
-    """
-    if not isinstance(note, dict) or note.get("method") != "thread/tokenUsage/updated":
-        return
-    params = note.get("params") or {}
-    token_usage = params.get("tokenUsage") or {}
-    if not isinstance(token_usage, dict):
-        return
-    last = token_usage.get("last")
-    total = token_usage.get("total")
-    if isinstance(last, dict):
-        result.token_usage_last = dict(last)
-    if isinstance(total, dict):
-        result.token_usage_total = dict(total)
-    window = token_usage.get("modelContextWindow")
-    if isinstance(window, int) and window > 0:
-        result.model_context_window = window
-
-
 def _approval_choice_to_codex_decision(choice: str) -> str:
    """Map Hermes approval choices onto codex's CommandExecutionApprovalDecision
    / FileChangeApprovalDecision wire values.
--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@@ -121,18 +121,6 @@ class NormalizedResponse:
        pd = self.provider_data or {}
        return pd.get("reasoning_details")

-    @property
-    def anthropic_content_blocks(self):
-        """Verbatim, order-preserving Anthropic content blocks for a turn.
-
-        Present only when an Anthropic turn interleaves signed thinking with
-        tool_use — the one shape the parallel reasoning_details + tool_calls
-        lists reconstruct in the wrong order, invalidating thinking-block
-        signatures on replay. See agent/transports/anthropic.py.
-        """
-        pd = self.provider_data or {}
-        return pd.get("anthropic_content_blocks")
-
    @property
    def codex_reasoning_items(self):
        pd = self.provider_data or {}
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -13,7 +13,6 @@ DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

 _ZERO = Decimal("0")
 _ONE_MILLION = Decimal("1000000")
-_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"

 CostStatus = Literal["actual", "estimated", "included", "unknown"]
 CostSource = Literal[
@@ -571,8 +570,6 @@ def resolve_billing_route(
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
-    if provider_name == "nous" or base_url_host_matches(base_url or "", "inference-api.nousresearch.com"):
-        return BillingRoute(provider="nous", model=model, base_url=base_url or _NOUS_DEFAULT_BASE_URL, billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
--- a/apps/bootstrap-installer/package.json
+++ b/apps/bootstrap-installer/package.json
@@ -11,12 +11,11 @@
    "tauri": "tauri",
    "tauri:dev": "tauri dev",
    "tauri:build": "tauri build",
-    "tauri:build:debug": "tauri build --debug",
-    "typecheck": "tsc -p . --noEmit"
+    "tauri:build:debug": "tauri build --debug"
  },
  "dependencies": {
    "@nous-research/ui": "0.16.0",
-    "@tailwindcss/vite": "^4.2.4",
+    "@tailwindcss/vite": "^4.2.1",
    "@tailwindcss/typography": "^0.5.19",
    "@tauri-apps/api": "^2.0.0",
    "@tauri-apps/plugin-dialog": "^2.0.0",
@@ -40,8 +39,8 @@
    "@tauri-apps/cli": "^2.0.0",
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
-    "@vitejs/plugin-react": "^6.0.2",
-    "typescript": "^6.0.3",
-    "vite": "^8.0.16"
+    "@vitejs/plugin-react": "^5.2.0",
+    "typescript": "~5.9.3",
+    "vite": "^7.3.1"
  }
 }
--- a/apps/bootstrap-installer/src-tauri/src/update.rs
+++ b/apps/bootstrap-installer/src-tauri/src/update.rs
@@ -3,9 +3,8 @@
 //! Driven when the installer is launched as `Hermes-Setup.exe --update` (see
 //! `AppMode` in lib.rs). The desktop app hands off to us — it exits, then we:
 //!
-//!   1. wait for the old Hermes desktop process to fully exit (so both the
-//!      venv shim and packaged app.asar are free; otherwise `hermes update`
-//!      or repair bootstrap can race locked files),
+//!   1. wait for the old Hermes desktop process to fully exit (so the venv
+//!      shim is free; otherwise `hermes update` aborts with exit code 2),
 //!   2. run `hermes update --yes --gateway` (Python/repo update; this does NOT
 //!      rebuild apps/desktop by design — see cmd_update in hermes_cli/main.py),
 //!   3. run `hermes desktop --build-only` (the rebuild step update skips),
@@ -39,8 +38,8 @@ use crate::events::{BootstrapEvent, LogStream, StageInfo, StageState};
 /// hermes_cli/main.py (sys.exit(2)). We surface a targeted message for this.
 const UPDATE_EXIT_CONCURRENT: i32 = 2;

-/// How long to wait for the old desktop process to release files under the
-/// install tree before giving up and letting `hermes update`'s own guard decide.
+/// How long to wait for the old desktop process to release the venv shim
+/// before giving up and letting `hermes update`'s own guard decide.
 const DESKTOP_EXIT_WAIT: Duration = Duration::from_secs(20);
 const DESKTOP_EXIT_POLL: Duration = Duration::from_millis(500);

@@ -151,10 +150,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
    // ---- pre-step: wait for the old desktop to die -----------------------
    // The desktop exec'd us then called app.exit(), but process teardown is
    // async on Windows. If it still holds the venv shim, `hermes update`
-    // aborts with exit 2. If it still holds the packaged app.asar,
-    // install.ps1's repair/re-clone path cannot move/remove the install tree.
-    // Give both handles a bounded window to clear.
-    wait_for_install_locks_free(&install_root, &app, "update").await;
+    // aborts with exit 2. Give it a bounded window to clear.
+    wait_for_venv_free(&install_root, &app).await;

    // ---- stage 1: hermes update -----------------------------------------
    // Pass --branch so `hermes update` targets the branch this installer was
@@ -176,8 +173,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
        vec!["update".into(), "--yes".into(), "--gateway".into()];
    // --force skips `hermes update`'s Windows running-exe guard (which would
    // `sys.exit(2)` and dead-end the handoff). By contract the desktop has
-    // already exited and waited for the install locks to clear before launching
-    // us, and wait_for_install_locks_free below force-kills any straggler — so by the
+    // already exited and waited for the venv shim to unlock before launching
+    // us, and wait_for_venv_free below force-kills any straggler — so by the
    // time `hermes update` runs there is no legitimate hermes.exe to protect,
    // and the guard would only produce a false "Hermes is still running" stop.
    update_args.push("--force".into());
@@ -394,57 +391,48 @@ async fn run_update(app: AppHandle) -> Result<()> {
    Ok(())
 }

-/// Poll until the venv shim AND packaged desktop app bundle are no longer locked
-/// (Windows) or a bounded timeout elapses. On non-Windows this is a short fixed
-/// grace since file locking isn't the failure mode there.
-pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHandle, stage: &str) {
-    let lock_targets = install_lock_probe_paths(install_root);
+/// Poll until the venv shim is no longer locked (Windows) or a bounded timeout
+/// elapses. On non-Windows this is a short fixed grace since file locking
+/// isn't the failure mode there.
+async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
+    let shim = venv_hermes(install_root);
    let deadline = Instant::now() + DESKTOP_EXIT_WAIT;

-    emit_log(app, Some(stage), LogStream::Stdout, "[handoff] waiting for Hermes to exit…");
+    emit_log(app, Some("update"), LogStream::Stdout, "[update] waiting for Hermes to exit…");

    loop {
-        let locked = locked_paths(&lock_targets);
-        if locked.is_empty() {
+        if !is_locked(&shim) {
            return;
        }
        if Instant::now() >= deadline {
-            // Last resort: a backend hermes.exe (or the desktop Hermes.exe
-            // itself) is still holding one of the update-sensitive files. The
-            // desktop should have reaped its tree before handing off, but
-            // SIGTERM races / detached grandchildren / AV handles can leave a
-            // straggler. Rather than "proceed anyway" straight into uv's
-            // "Access is denied" or install.ps1's locked app.asar failure,
-            // force-kill every Hermes.exe except ourselves, then give the OS a
-            // beat to unload the image.
+            // Last resort: a backend hermes.exe (or a grandchild it spawned)
+            // is still holding the shim. The desktop should have reaped its
+            // tree before handing off, but SIGTERM races / detached
+            // grandchildren / AV handles can leave a straggler. Rather than
+            // "proceed anyway" straight into uv's "Access is denied", force-kill
+            // every hermes.exe except ourselves, then give the OS a beat to
+            // unload the image.
            emit_log(
                app,
-                Some(stage),
+                Some("update"),
                LogStream::Stdout,
-                &format!(
-                    "[handoff] Hermes still holding install files ({}); force-killing stragglers…",
-                    format_locked_paths(&locked)
-                ),
+                "[update] Hermes still holding the venv shim; force-killing stragglers…",
            );
            force_kill_other_hermes();
            tokio::time::sleep(Duration::from_millis(800)).await;
-            let locked_after_kill = locked_paths(&lock_targets);
-            if locked_after_kill.is_empty() {
+            if !is_locked(&shim) {
                emit_log(
                    app,
-                    Some(stage),
+                    Some("update"),
                    LogStream::Stdout,
-                    "[handoff] install files freed after force-kill",
+                    "[update] venv shim freed after force-kill",
                );
            } else {
                emit_log(
                    app,
-                    Some(stage),
+                    Some("update"),
                    LogStream::Stdout,
-                    &format!(
-                        "[handoff] install files still locked ({}); proceeding (--force + quarantine will handle it)",
-                        format_locked_paths(&locked_after_kill)
-                    ),
+                    "[update] venv shim still locked; proceeding (--force + quarantine will handle it)",
                );
            }
            return;
@@ -453,44 +441,13 @@ pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHa
    }
 }

-fn install_lock_probe_paths(install_root: &Path) -> Vec<PathBuf> {
-    let mut paths = vec![venv_hermes(install_root)];
-    paths.extend(desktop_app_payload_paths(install_root));
-    paths
-}
-
-fn desktop_app_payload_paths(install_root: &Path) -> Vec<PathBuf> {
-    let release = install_root.join("apps").join("desktop").join("release");
-    if cfg!(target_os = "windows") {
-        vec![
-            release.join("win-unpacked").join("resources").join("app.asar"),
-            release.join("win-arm64-unpacked").join("resources").join("app.asar"),
-        ]
-    } else if cfg!(target_os = "macos") {
-        vec![
-            release.join("mac").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
-            release.join("mac-arm64").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
-        ]
-    } else {
-        vec![release.join("linux-unpacked").join("resources").join("app.asar")]
-    }
-}
-
-fn locked_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
-    paths.iter().filter(|p| is_locked(p)).cloned().collect()
-}
-
-fn format_locked_paths(paths: &[PathBuf]) -> String {
-    paths.iter().map(|p| p.display().to_string()).collect::<Vec<_>>().join(", ")
-}
-
 /// Force-kill any `hermes.exe` other than this process. Windows-only; a no-op
 /// elsewhere (POSIX has no mandatory-lock contention). We can't selectively
 /// target "the backend" by PID here — the desktop already exited and we never
 /// knew its children — so we kill the whole `hermes.exe` image tree via
 /// taskkill, excluding our own PID.
 ///
-/// Safe w.r.t. our own update child: this runs inside the install-lock wait,
+/// Safe w.r.t. our own update child: this runs inside `wait_for_venv_free`,
 /// which completes BEFORE we spawn `venv\Scripts\hermes.exe update`. At this
 /// point no update-driven hermes.exe exists yet, so the only hermes.exe images
 /// are stragglers from the old desktop — exactly what we want gone. (`/FI PID
@@ -934,29 +891,6 @@ mod tests {
        assert!(!is_locked(Path::new("/nonexistent/does/not/exist/xyz")));
    }

-    #[test]
-    fn lock_probe_paths_include_desktop_app_payload() {
-        let root = Path::new("/x/hermes-agent");
-        let probes = install_lock_probe_paths(root);
-
-        assert!(
-            probes.iter().any(|p| p == &venv_hermes(root)),
-            "venv shim remains part of the update lock probe"
-        );
-        assert!(
-            probes.iter().any(|p| p.ends_with(Path::new("resources/app.asar"))),
-            "packaged app.asar must be probed so repair/re-clone waits for the old desktop to exit"
-        );
-    }
-
-    #[test]
-    fn locked_paths_ignores_missing_payloads() {
-        let root = Path::new("/nonexistent/hermes-agent");
-        let probes = install_lock_probe_paths(root);
-
-        assert!(locked_paths(&probes).is_empty());
-    }
-
    #[test]
    fn parses_update_branch_from_space_or_equals_args() {
        assert_eq!(
--- a/apps/bootstrap-installer/tsconfig.json
+++ b/apps/bootstrap-installer/tsconfig.json
@@ -1,8 +1,8 @@
 {
  "compilerOptions": {
-    "target": "ES2023",
+    "target": "ES2022",
    "useDefineForClassFields": true,
-    "lib": ["ES2023", "DOM", "DOM.Iterable"],
+    "lib": ["ES2022", "DOM", "DOM.Iterable"],
    "module": "ESNext",
    "skipLibCheck": true,
    "moduleResolution": "bundler",
@@ -16,8 +16,9 @@
    "noUnusedParameters": true,
    "esModuleInterop": true,
    "noFallthroughCasesInSwitch": true,
+    "baseUrl": ".",
    "paths": {
-      "@/*": ["./src/*"]
+      "@/*": ["src/*"]
    }
  },
  "include": ["src"],
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -34,7 +34,7 @@ It builds and launches the GUI against your existing install — same config, ke

 ### Prebuilt installers

-Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/).
+Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/desktop).

 ---

@@ -93,7 +93,7 @@ Run before opening a PR (lint may surface pre-existing warnings but must exit cl

 ```bash
 npm run fix
-npm run typecheck
+npm run type-check
 npm run lint
 npm run test:desktop:all
 ```
--- a/apps/desktop/assets/icon.icns
+++ b/apps/desktop/assets/icon.icns
--- a/apps/desktop/assets/icon.ico
+++ b/apps/desktop/assets/icon.ico
--- a/apps/desktop/assets/icon.png
+++ b/apps/desktop/assets/icon.png
--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -1,112 +0,0 @@
-const path = require('node:path')
-
-// Match the POSIX fallback surface used by the Python terminal environment.
-// macOS apps launched from Finder/Dock often inherit only /usr/bin:/bin:/usr/sbin:/sbin,
-// which misses Apple Silicon Homebrew and user-installed CLI tools such as codex.
-const POSIX_SANE_PATH_ENTRIES = Object.freeze([
-  '/opt/homebrew/bin',
-  '/opt/homebrew/sbin',
-  '/usr/local/sbin',
-  '/usr/local/bin',
-  '/usr/sbin',
-  '/usr/bin',
-  '/sbin',
-  '/bin'
-])
-
-function delimiterForPlatform(platform = process.platform) {
-  return platform === 'win32' ? ';' : ':'
-}
-
-function pathModuleForPlatform(platform = process.platform) {
-  return platform === 'win32' ? path.win32 : path.posix
-}
-
-function pathEnvKey(env = process.env, platform = process.platform) {
-  if (platform !== 'win32') return 'PATH'
-  return Object.keys(env || {}).find(key => key.toUpperCase() === 'PATH') || 'PATH'
-}
-
-function currentPathValue(env = process.env, platform = process.platform) {
-  const key = pathEnvKey(env, platform)
-  return env?.[key] || ''
-}
-
-function appendUniquePathEntries(entries, { delimiter = path.delimiter } = {}) {
-  const seen = new Set()
-  const ordered = []
-
-  for (const entry of entries) {
-    if (!entry) continue
-    const parts = Array.isArray(entry) ? entry : String(entry).split(delimiter)
-    for (const part of parts) {
-      if (!part || seen.has(part)) continue
-      seen.add(part)
-      ordered.push(part)
-    }
-  }
-
-  return ordered.join(delimiter)
-}
-
-function buildDesktopBackendPath({
-  hermesHome,
-  venvRoot,
-  currentPath = '',
-  platform = process.platform,
-  pathModule = pathModuleForPlatform(platform)
-} = {}) {
-  const delimiter = delimiterForPlatform(platform)
-  const hermesNodeBin = hermesHome ? pathModule.join(hermesHome, 'node', 'bin') : null
-  const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
-  const saneEntries = platform === 'win32' ? [] : POSIX_SANE_PATH_ENTRIES
-
-  return appendUniquePathEntries(
-    [hermesNodeBin, venvBin, currentPath, saneEntries],
-    { delimiter }
-  )
-}
-
-function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
-  if (!hermesHome) return hermesHome
-  const resolved = pathModule.resolve(String(hermesHome))
-  const parent = pathModule.dirname(resolved)
-  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
-    return pathModule.dirname(parent)
-  }
-  return resolved
-}
-
-function buildDesktopBackendEnv({
-  hermesHome,
-  pythonPathEntries = [],
-  venvRoot,
-  currentEnv = process.env,
-  platform = process.platform,
-  pathModule = pathModuleForPlatform(platform)
-} = {}) {
-  const delimiter = delimiterForPlatform(platform)
-  const currentPythonPath = currentEnv?.PYTHONPATH || ''
-  const key = pathEnvKey(currentEnv, platform)
-
-  return {
-    PYTHONPATH: appendUniquePathEntries([...pythonPathEntries, currentPythonPath], { delimiter }),
-    [key]: buildDesktopBackendPath({
-      hermesHome,
-      venvRoot,
-      currentPath: currentPathValue(currentEnv, platform),
-      platform,
-      pathModule
-    })
-  }
-}
-
-module.exports = {
-  POSIX_SANE_PATH_ENTRIES,
-  appendUniquePathEntries,
-  buildDesktopBackendEnv,
-  buildDesktopBackendPath,
-  delimiterForPlatform,
-  normalizeHermesHomeRoot,
-  pathEnvKey
-}
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -1,111 +0,0 @@
-const test = require('node:test')
-const assert = require('node:assert/strict')
-const path = require('node:path')
-
-const {
-  POSIX_SANE_PATH_ENTRIES,
-  appendUniquePathEntries,
-  buildDesktopBackendEnv,
-  buildDesktopBackendPath,
-  normalizeHermesHomeRoot,
-  pathEnvKey
-} = require('./backend-env.cjs')
-
-test('desktop backend PATH adds Hermes-managed bins and missing POSIX sane entries', () => {
-  const result = buildDesktopBackendPath({
-    hermesHome: '/Users/test/.hermes',
-    venvRoot: '/Users/test/.hermes/hermes-agent/venv',
-    currentPath: '/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin',
-    platform: 'darwin',
-    pathModule: path.posix
-  })
-
-  const entries = result.split(':')
-  assert.equal(entries[0], '/Users/test/.hermes/node/bin')
-  assert.equal(entries[1], '/Users/test/.hermes/hermes-agent/venv/bin')
-  assert.ok(entries.includes('/opt/homebrew/bin'), 'Apple Silicon Homebrew bin is added')
-  assert.ok(entries.includes('/opt/homebrew/sbin'), 'Apple Silicon Homebrew sbin is added')
-  assert.ok(entries.includes('/usr/local/sbin'), 'missing standard sbin is added')
-
-  for (const expected of POSIX_SANE_PATH_ENTRIES) {
-    assert.ok(entries.includes(expected), `${expected} should be present`)
-  }
-})
-
-test('desktop backend PATH preserves first occurrence and avoids duplicates', () => {
-  const result = buildDesktopBackendPath({
-    hermesHome: '/Users/test/.hermes',
-    venvRoot: '/Users/test/.hermes/hermes-agent/venv',
-    currentPath: '/opt/homebrew/bin:/usr/bin:/opt/homebrew/bin:/bin',
-    platform: 'darwin',
-    pathModule: path.posix
-  })
-
-  const entries = result.split(':')
-  assert.equal(entries.filter(entry => entry === '/opt/homebrew/bin').length, 1)
-  assert.ok(
-    entries.indexOf('/opt/homebrew/bin') < entries.indexOf('/opt/homebrew/sbin'),
-    'existing Homebrew bin keeps its precedence over appended missing sane entries'
-  )
-})
-
-test('buildDesktopBackendEnv extends PYTHONPATH and backend PATH together', () => {
-  const env = buildDesktopBackendEnv({
-    hermesHome: '/Users/test/.hermes',
-    pythonPathEntries: ['/repo/hermes-agent'],
-    venvRoot: '/Users/test/.hermes/hermes-agent/venv',
-    currentEnv: {
-      PATH: '/usr/bin:/bin',
-      PYTHONPATH: '/existing/pythonpath'
-    },
-    platform: 'darwin',
-    pathModule: path.posix
-  })
-
-  assert.equal(env.PYTHONPATH, '/repo/hermes-agent:/existing/pythonpath')
-  assert.ok(env.PATH.startsWith('/Users/test/.hermes/node/bin:/Users/test/.hermes/hermes-agent/venv/bin:'))
-  assert.ok(env.PATH.includes('/opt/homebrew/bin'))
-})
-
-test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root', () => {
-  assert.equal(
-    normalizeHermesHomeRoot('/Users/test/.hermes/profiles/oracle', { pathModule: path.posix }),
-    '/Users/test/.hermes'
-  )
-  assert.equal(
-    normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
-    'C:\\Users\\test\\AppData\\Local\\hermes'
-  )
-  assert.equal(
-    normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
-    '/Users/test/.hermes'
-  )
-})
-
-test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
-  const env = buildDesktopBackendEnv({
-    hermesHome: 'C:\\Users\\test\\AppData\\Local\\hermes',
-    pythonPathEntries: ['C:\\repo\\hermes-agent'],
-    venvRoot: 'C:\\Users\\test\\AppData\\Local\\hermes\\hermes-agent\\venv',
-    currentEnv: {
-      Path: 'C:\\Windows\\System32;C:\\Windows',
-      PYTHONPATH: 'C:\\existing\\pythonpath'
-    },
-    platform: 'win32',
-    pathModule: path.win32
-  })
-
-  assert.equal(pathEnvKey({ Path: 'x' }, 'win32'), 'Path')
-  assert.equal(env.PATH, undefined)
-  assert.ok(env.Path.startsWith('C:\\Users\\test\\AppData\\Local\\hermes\\node\\bin;'))
-  assert.ok(env.Path.includes('\\venv\\Scripts;'))
-  assert.ok(env.Path.includes(';C:\\Windows\\System32;C:\\Windows'))
-  assert.equal(env.Path.includes('/opt/homebrew/bin'), false)
-})
-
-test('appendUniquePathEntries drops empty entries and keeps first occurrence', () => {
-  assert.equal(
-    appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }),
-    '/a:/b:/c'
-  )
-})
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -1,66 +0,0 @@
-const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m
-
-/**
- * Watch a child process's stdout for the `HERMES_DASHBOARD_READY port=<N>`
- * line that web_server.py prints after uvicorn binds its socket.
- *
- * Returns the parsed port. Rejects if:
- *   - the child exits before emitting the line
- *   - the child emits an `error` event
- *   - no line arrives within the timeout
- *
- * A single `cleanup()` tears down every listener (data/exit/error/timeout)
- * on every terminal path — resolve, reject, or timeout — so repeated
- * backend spawns don't leak listener slots on the child.
- */
-function waitForDashboardPort(child, timeoutMs = 45_000) {
-  return new Promise((resolve, reject) => {
-    let buf = ''
-    let done = false
-
-    function cleanup() {
-      if (done) return
-      done = true
-      clearTimeout(timer)
-      child.stdout.off('data', onData)
-      child.off('exit', onExit)
-      child.off('error', onError)
-    }
-
-    function onData(chunk) {
-      buf += chunk.toString()
-      let nl
-      while ((nl = buf.indexOf('\n')) !== -1) {
-        const line = buf.slice(0, nl)
-        buf = buf.slice(nl + 1)
-        const m = line.match(_READY_RE)
-        if (m) {
-          cleanup()
-          resolve(parseInt(m[1], 10))
-          return
-        }
-      }
-    }
-
-    function onExit(code, signal) {
-      cleanup()
-      reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`))
-    }
-
-    function onError(err) {
-      cleanup()
-      reject(err)
-    }
-
-    const timer = setTimeout(() => {
-      cleanup()
-      reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`))
-    }, timeoutMs)
-
-    child.stdout.on('data', onData)
-    child.on('exit', onExit)
-    child.on('error', onError)
-  })
-}
-
-module.exports = { waitForDashboardPort }
--- a/apps/desktop/electron/bootstrap-runner.cjs
+++ b/apps/desktop/electron/bootstrap-runner.cjs
@@ -40,15 +40,6 @@ const path = require('node:path')
 const https = require('node:https')
 const { spawn } = require('node:child_process')

-const IS_WINDOWS = process.platform === 'win32'
-
-function hiddenWindowsChildOptions(options = {}) {
-  if (!IS_WINDOWS || Object.prototype.hasOwnProperty.call(options, 'windowsHide')) {
-    return options
-  }
-  return { ...options, windowsHide: true }
-}
-
 const STAMP_COMMIT_RE = /^[0-9a-f]{7,40}$/i

 // Stages flagged needs_user_input=true in the manifest are skipped by the
@@ -293,7 +284,7 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
    const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
    const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]

-    const child = spawn(ps, fullArgs, hiddenWindowsChildOptions({
+    const child = spawn(ps, fullArgs, {
      stdio: ['ignore', 'pipe', 'pipe'],
      env: {
        ...process.env,
@@ -301,7 +292,7 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
        // choice rather than re-computing the default.
        HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
      }
-    }))
+    })

    let stdout = ''
    let stderr = ''
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -166,39 +166,6 @@ function profileRemoteOverride(config, profile) {
  return { url, authMode: normAuthMode(entry.authMode), token: entry.token }
 }

-/**
- * In global-remote mode one backend serves every Desktop profile, so REST calls
- * that are scoped by renderer-side `request.profile` must carry that scope as a
- * query parameter. Local pooled backends and per-profile remote overrides do not
- * need this: they already run against a backend scoped to the target profile.
- */
-function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
-  const scopedProfile = connectionScopeKey(profile)
-  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
-    return path
-  }
-
-  const rawPath = String(path || '')
-  if (!rawPath) {
-    return path
-  }
-
-  let parsed
-  try {
-    parsed = new URL(rawPath, 'http://hermes.local')
-  } catch {
-    return path
-  }
-
-  if (parsed.searchParams.has('profile')) {
-    return path
-  }
-
-  parsed.searchParams.set('profile', scopedProfile)
-
-  return `${parsed.pathname}${parsed.search}${parsed.hash}`
-}
-
 function tokenPreview(value) {
  const raw = String(value || '')

@@ -280,7 +247,6 @@ module.exports = {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -24,7 +24,6 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -91,72 +90,6 @@ test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
  assert.equal(profileRemoteOverride(null, 'coder'), null)
 })

-// --- pathWithGlobalRemoteProfile ---
-
-test('pathWithGlobalRemoteProfile appends profile in global remote mode', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info?profile=iris'
-  )
-})
-
-test('pathWithGlobalRemoteProfile preserves existing query params', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/options?force=1', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/options?force=1&profile=iris'
-  )
-})
-
-test('pathWithGlobalRemoteProfile does not replace an explicit profile query', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info?profile=default', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info?profile=default'
-  )
-})
-
-test('pathWithGlobalRemoteProfile skips local and per-profile remote override paths', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: false,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info'
-  )
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: true
-    }),
-    '/api/model/info'
-  )
-})
-
-test('pathWithGlobalRemoteProfile skips empty profile/path safely', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', '', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info'
-  )
-  assert.equal(
-    pathWithGlobalRemoteProfile('', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    ''
-  )
-})
-
 // --- normalizeRemoteBaseUrl ---

 test('normalizeRemoteBaseUrl strips trailing slashes, hash, and query', () => {
--- a/apps/desktop/electron/dashboard-token.cjs
+++ b/apps/desktop/electron/dashboard-token.cjs
@@ -1,99 +0,0 @@
-/**
- * Helpers for local dashboard session-token discovery.
- *
- * The desktop main process can pass HERMES_DASHBOARD_SESSION_TOKEN when it
- * spawns the local dashboard, but the dashboard is the source of truth for the
- * token it actually serves to the renderer. If those drift, HTTP readiness
- * probes still pass while /api/ws rejects the renderer's token.
- */
-
-const DEFAULT_TOKEN_FETCH_TIMEOUT_MS = 3_000
-
-async function fetchPublicText(url, options = {}) {
-  const { protocol } = new URL(url)
-  if (protocol !== 'http:' && protocol !== 'https:') {
-    throw new Error(`Unsupported Hermes backend URL protocol: ${protocol}`)
-  }
-
-  const timeoutMs = options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
-  const res = await fetch(url, { signal: AbortSignal.timeout(timeoutMs) }).catch(error => {
-    if (error.name === 'TimeoutError') {
-      throw new Error(`Timed out connecting to Hermes backend after ${timeoutMs}ms`)
-    }
-    throw error
-  })
-  const text = await res.text()
-
-  if (!res.ok) throw new Error(`${res.status}: ${text || res.statusText}`)
-
-  return text
-}
-
-function extractInjectedDashboardToken(html) {
-  const match = /window\.__HERMES_SESSION_TOKEN__\s*=\s*("(?:\\.|[^"\\])*")/.exec(String(html || ''))
-  if (!match) return null
-  try {
-    return JSON.parse(match[1])
-  } catch {
-    return null
-  }
-}
-
-function dashboardIndexUrl(baseUrl) {
-  return `${String(baseUrl || '').replace(/\/+$/, '')}/`
-}
-
-async function resolveServedDashboardToken(baseUrl, fallbackToken, options = {}) {
-  const fetchText = options.fetchText || fetchPublicText
-  const html = await fetchText(dashboardIndexUrl(baseUrl), {
-    timeoutMs: options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
-  })
-  const servedToken = extractInjectedDashboardToken(html)
-
-  if (servedToken && servedToken !== fallbackToken && typeof options.rememberLog === 'function') {
-    options.rememberLog('[boot] dashboard served a different session token; using served token for WebSocket auth')
-  }
-
-  return servedToken || fallbackToken
-}
-
-/**
- * A served token that differs from our spawn token while our child is DEAD
- * came from a process we did not spawn (orphan/port squatter that satisfied
- * the public /api/status readiness probe). With a live child the mismatch is
- * benign: our own backend regenerated the token because the env pin did not
- * survive the spawn.
- */
-function isForeignBackendToken({ servedToken, spawnToken, childAlive }) {
-  return Boolean(servedToken) && servedToken !== spawnToken && !childAlive
-}
-
-/**
- * Resolve the token the backend actually serves, adopting benign drift and
- * failing loudly on a foreign backend. `childAlive` is a thunk so liveness is
- * sampled after the fetch, not before.
- */
-async function adoptServedDashboardToken(baseUrl, spawnToken, { childAlive, label = 'Hermes backend', ...options }) {
-  const servedToken = await resolveServedDashboardToken(baseUrl, spawnToken, options).catch(error => {
-    options.rememberLog?.(`[boot] could not read served dashboard token (${label}): ${error.message}`)
-    return spawnToken
-  })
-
-  if (isForeignBackendToken({ servedToken, spawnToken, childAlive: childAlive() })) {
-    throw new Error(
-      `${label} exited and ${dashboardIndexUrl(baseUrl)} is served by a process we did not spawn; refusing its session token.`
-    )
-  }
-
-  return servedToken
-}
-
-module.exports = {
-  DEFAULT_TOKEN_FETCH_TIMEOUT_MS,
-  adoptServedDashboardToken,
-  dashboardIndexUrl,
-  extractInjectedDashboardToken,
-  fetchPublicText,
-  isForeignBackendToken,
-  resolveServedDashboardToken
-}
--- a/apps/desktop/electron/dashboard-token.test.cjs
+++ b/apps/desktop/electron/dashboard-token.test.cjs
@@ -1,142 +0,0 @@
-/**
- * Tests for electron/dashboard-token.cjs.
- *
- * Run with: node --test electron/dashboard-token.test.cjs
- * (Wired into npm test:desktop:platforms in package.json.)
- */
-
-const test = require('node:test')
-const assert = require('node:assert/strict')
-
-const {
-  adoptServedDashboardToken,
-  dashboardIndexUrl,
-  extractInjectedDashboardToken,
-  fetchPublicText,
-  isForeignBackendToken,
-  resolveServedDashboardToken
-} = require('./dashboard-token.cjs')
-
-test('extractInjectedDashboardToken reads the JSON-encoded dashboard token', () => {
-  const html = '<script>window.__HERMES_SESSION_TOKEN__="served-token";window.__HERMES_BASE_PATH__=""</script>'
-  assert.equal(extractInjectedDashboardToken(html), 'served-token')
-})
-
-test('extractInjectedDashboardToken handles escaped token strings', () => {
-  const html = '<script>window.__HERMES_SESSION_TOKEN__="served\\\\token\\"quoted";</script>'
-  assert.equal(extractInjectedDashboardToken(html), 'served\\token"quoted')
-})
-
-test('extractInjectedDashboardToken returns null for missing or malformed values', () => {
-  assert.equal(extractInjectedDashboardToken('<html></html>'), null)
-  assert.equal(extractInjectedDashboardToken('<script>window.__HERMES_SESSION_TOKEN__={bad}</script>'), null)
-})
-
-test('dashboardIndexUrl preserves dashboard path prefixes', () => {
-  assert.equal(dashboardIndexUrl('http://127.0.0.1:9120'), 'http://127.0.0.1:9120/')
-  assert.equal(dashboardIndexUrl('https://host.example/hermes/'), 'https://host.example/hermes/')
-})
-
-test('resolveServedDashboardToken uses the served token and logs when it differs', async () => {
-  const logs = []
-  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
-    fetchText: async url => {
-      assert.equal(url, 'http://127.0.0.1:9120/')
-      return '<script>window.__HERMES_SESSION_TOKEN__="served-token";</script>'
-    },
-    rememberLog: line => logs.push(line)
-  })
-
-  assert.equal(token, 'served-token')
-  assert.equal(logs.length, 1)
-  assert.match(logs[0], /served a different session token/)
-})
-
-test('resolveServedDashboardToken falls back when the served HTML has no token', async () => {
-  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
-    fetchText: async () => '<html></html>',
-    rememberLog: () => {
-      throw new Error('should not log when no served token is present')
-    }
-  })
-
-  assert.equal(token, 'spawn-token')
-})
-
-test('resolveServedDashboardToken does not log when served token matches fallback', async () => {
-  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'same-token', {
-    fetchText: async () => '<script>window.__HERMES_SESSION_TOKEN__="same-token";</script>',
-    rememberLog: () => {
-      throw new Error('should not log when token already matches')
-    }
-  })
-
-  assert.equal(token, 'same-token')
-})
-
-test('resolveServedDashboardToken propagates fetch errors so callers can fall back explicitly', async () => {
-  await assert.rejects(
-    () =>
-      resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
-        fetchText: async () => {
-          throw new Error('boom')
-        }
-      }),
-    /boom/
-  )
-})
-
-test('fetchPublicText rejects unsupported protocols', async () => {
-  await assert.rejects(() => fetchPublicText('file:///tmp/index.html'), /Unsupported Hermes backend URL protocol/)
-})
-
-test('isForeignBackendToken only flags a mismatched token from a dead child', () => {
-  const cases = [
-    [{ servedToken: 'other', spawnToken: 'mine', childAlive: false }, true],
-    // Live child + drift = our backend regenerated the token (env pin lost).
-    [{ servedToken: 'other', spawnToken: 'mine', childAlive: true }, false],
-    [{ servedToken: 'mine', spawnToken: 'mine', childAlive: false }, false],
-    [{ servedToken: 'mine', spawnToken: 'mine', childAlive: true }, false],
-    [{ servedToken: null, spawnToken: 'mine', childAlive: false }, false],
-    [{ servedToken: '', spawnToken: 'mine', childAlive: false }, false]
-  ]
-  for (const [input, expected] of cases) {
-    assert.equal(isForeignBackendToken(input), expected, JSON.stringify(input))
-  }
-})
-
-test('adoptServedDashboardToken adopts drift from a live child', async () => {
-  const token = await adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
-    childAlive: () => true,
-    fetchText: async () => '<script>window.__HERMES_SESSION_TOKEN__="served-token";</script>'
-  })
-
-  assert.equal(token, 'served-token')
-})
-
-test('adoptServedDashboardToken refuses a foreign token when our child is dead', async () => {
-  await assert.rejects(
-    () =>
-      adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
-        childAlive: () => false,
-        fetchText: async () => '<script>window.__HERMES_SESSION_TOKEN__="squatter-token";</script>',
-        label: 'Hermes backend for profile "work"'
-      }),
-    /profile "work".*process we did not spawn/
-  )
-})
-
-test('adoptServedDashboardToken falls back to the spawn token when the fetch fails', async () => {
-  const logs = []
-  const token = await adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
-    childAlive: () => true,
-    fetchText: async () => {
-      throw new Error('boom')
-    },
-    rememberLog: line => logs.push(line)
-  })
-
-  assert.equal(token, 'spawn-token')
-  assert.equal(logs.length, 1)
-  assert.match(logs[0], /could not read served dashboard token \(Hermes backend\): boom/)
-})
--- a/apps/desktop/electron/fs-read-dir.cjs
+++ b/apps/desktop/electron/fs-read-dir.cjs
@@ -1,109 +0,0 @@
-'use strict'
-
-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveDirectoryForIpc } = require('./hardening.cjs')
-
-const FS_READDIR_STAT_CONCURRENCY = 16
-
-// Always-hidden noise (covers non-git projects too; gitignore catches many of
-// these, but the project tree should keep the same hygiene without one).
-const FS_READDIR_HIDDEN = new Set([
-  '.git',
-  '.hg',
-  '.svn',
-  '.cache',
-  '.next',
-  '.turbo',
-  '.venv',
-  '__pycache__',
-  'build',
-  'dist',
-  'node_modules',
-  'target',
-  'venv'
-])
-
-function direntIsDirectory(dirent) {
-  return typeof dirent.isDirectory === 'function' && dirent.isDirectory()
-}
-
-function direntIsFile(dirent) {
-  return typeof dirent.isFile === 'function' && dirent.isFile()
-}
-
-function direntIsSymbolicLink(dirent) {
-  return typeof dirent.isSymbolicLink === 'function' && dirent.isSymbolicLink()
-}
-
-function shouldStatDirent(dirent) {
-  if (direntIsDirectory(dirent)) return false
-
-  return direntIsSymbolicLink(dirent) || !direntIsFile(dirent)
-}
-
-async function entryForDirent(dirent, resolved, fsImpl) {
-  const fullPath = path.join(resolved, dirent.name)
-  let isDirectory = direntIsDirectory(dirent)
-
-  if (!isDirectory && shouldStatDirent(dirent)) {
-    try {
-      isDirectory = (await fsImpl.promises.stat(fullPath)).isDirectory()
-    } catch {
-      isDirectory = false
-    }
-  }
-
-  return { name: dirent.name, path: fullPath, isDirectory }
-}
-
-async function mapWithStatConcurrency(items, mapper) {
-  const results = new Array(items.length)
-  let nextIndex = 0
-
-  async function runWorker() {
-    while (nextIndex < items.length) {
-      const index = nextIndex
-      nextIndex += 1
-      results[index] = await mapper(items[index])
-    }
-  }
-
-  const workerCount = Math.min(FS_READDIR_STAT_CONCURRENCY, items.length)
-  const workers = Array.from({ length: workerCount }, () => runWorker())
-  await Promise.all(workers)
-
-  return results
-}
-
-async function readDirForIpc(dirPath, options = {}) {
-  const fsImpl = options.fs || fs
-  let resolved
-
-  try {
-    ;({ resolvedPath: resolved } = await resolveDirectoryForIpc(dirPath, {
-      fs: fsImpl,
-      purpose: 'Directory read'
-    }))
-  } catch (error) {
-    return { entries: [], error: error?.code || 'read-error' }
-  }
-
-  try {
-    const dirents = await fsImpl.promises.readdir(resolved, { withFileTypes: true })
-    const visibleDirents = dirents.filter(dirent => !FS_READDIR_HIDDEN.has(dirent.name))
-    const entries = await mapWithStatConcurrency(visibleDirents, dirent =>
-      entryForDirent(dirent, resolved, fsImpl)
-    )
-
-    entries.sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name))
-
-    return { entries }
-  } catch (error) {
-    return { entries: [], error: error?.code || 'read-error' }
-  }
-}
-
-module.exports = {
-  readDirForIpc
-}
--- a/apps/desktop/electron/fs-read-dir.test.cjs
+++ b/apps/desktop/electron/fs-read-dir.test.cjs
@@ -1,364 +0,0 @@
-'use strict'
-
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const test = require('node:test')
-const { pathToFileURL } = require('node:url')
-
-const { readDirForIpc } = require('./fs-read-dir.cjs')
-
-function mkTmpDir() {
-  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-fs-read-dir-'))
-}
-
-function fakeDirent(name, flags = {}) {
-  return {
-    name,
-    isDirectory: () => Boolean(flags.directory),
-    isFile: () => Boolean(flags.file),
-    isSymbolicLink: () => Boolean(flags.symlink)
-  }
-}
-
-test('readDirForIpc hides noisy directories and files from the project tree', async () => {
-  const root = mkTmpDir()
-
-  try {
-    fs.mkdirSync(path.join(root, 'node_modules'))
-    fs.mkdirSync(path.join(root, 'src'))
-    fs.writeFileSync(path.join(root, 'target'), 'hidden file')
-    fs.writeFileSync(path.join(root, 'README.md'), 'visible file')
-
-    const result = await readDirForIpc(root)
-
-    assert.equal(result.error, undefined)
-    assert.deepEqual(
-      result.entries.map(entry => entry.name),
-      ['src', 'README.md']
-    )
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc filters a hidden basename whether it is a file or directory', async () => {
-  const dirRoot = mkTmpDir()
-  const fileRoot = mkTmpDir()
-
-  try {
-    fs.mkdirSync(path.join(dirRoot, 'node_modules'))
-    fs.writeFileSync(path.join(dirRoot, 'visible.txt'), 'visible')
-    fs.writeFileSync(path.join(fileRoot, 'node_modules'), 'hidden file')
-    fs.writeFileSync(path.join(fileRoot, 'visible.txt'), 'visible')
-
-    assert.deepEqual(
-      (await readDirForIpc(dirRoot)).entries.map(entry => entry.name),
-      ['visible.txt']
-    )
-    assert.deepEqual(
-      (await readDirForIpc(fileRoot)).entries.map(entry => entry.name),
-      ['visible.txt']
-    )
-  } finally {
-    fs.rmSync(dirRoot, { recursive: true, force: true })
-    fs.rmSync(fileRoot, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc returns directories before files and sorts by name within groups', async () => {
-  const root = mkTmpDir()
-
-  try {
-    fs.writeFileSync(path.join(root, 'z.txt'), 'z')
-    fs.mkdirSync(path.join(root, 'src'))
-    fs.writeFileSync(path.join(root, 'a.txt'), 'a')
-    fs.mkdirSync(path.join(root, 'lib'))
-
-    const result = await readDirForIpc(root)
-
-    assert.equal(result.error, undefined)
-    assert.deepEqual(
-      result.entries.map(entry => entry.name),
-      ['lib', 'src', 'a.txt', 'z.txt']
-    )
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc accepts file URLs for directories', async () => {
-  const root = mkTmpDir()
-
-  try {
-    fs.mkdirSync(path.join(root, 'src'))
-    fs.writeFileSync(path.join(root, 'README.md'), 'visible file')
-
-    const result = await readDirForIpc(pathToFileURL(root).toString())
-
-    assert.equal(result.error, undefined)
-    assert.deepEqual(
-      result.entries.map(entry => entry.name),
-      ['src', 'README.md']
-    )
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc returns invalid-path for blank or non-string input', async () => {
-  let readdirCalls = 0
-  const fsImpl = {
-    promises: {
-      readdir: async () => {
-        readdirCalls += 1
-        return []
-      }
-    }
-  }
-
-  assert.deepEqual(await readDirForIpc('', { fs: fsImpl }), { entries: [], error: 'invalid-path' })
-  assert.deepEqual(await readDirForIpc('   ', { fs: fsImpl }), { entries: [], error: 'invalid-path' })
-  assert.deepEqual(await readDirForIpc(null, { fs: fsImpl }), { entries: [], error: 'invalid-path' })
-  assert.equal(readdirCalls, 0)
-})
-
-test('readDirForIpc rejects Windows device paths before readdir', async () => {
-  let readdirCalls = 0
-  const fsImpl = {
-    promises: {
-      readdir: async () => {
-        readdirCalls += 1
-        return []
-      }
-    }
-  }
-
-  assert.deepEqual(await readDirForIpc('\\\\?\\C:\\secret', { fs: fsImpl }), {
-    entries: [],
-    error: 'device-path'
-  })
-  assert.equal(readdirCalls, 0)
-})
-
-test('readDirForIpc returns filesystem error codes instead of throwing', async () => {
-  const root = mkTmpDir()
-
-  try {
-    const result = await readDirForIpc(path.join(root, 'missing'))
-
-    assert.deepEqual(result, { entries: [], error: 'ENOENT' })
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc marks a symlink to a directory as a directory', async t => {
-  const root = mkTmpDir()
-
-  try {
-    fs.mkdirSync(path.join(root, 'actual-dir'))
-
-    try {
-      fs.symlinkSync(path.join(root, 'actual-dir'), path.join(root, 'linked-dir'), 'dir')
-    } catch (error) {
-      if (error?.code === 'EPERM' || error?.code === 'EACCES') {
-        t.skip(`symlink creation is not permitted on this platform (${error.code})`)
-
-        return
-      }
-
-      throw error
-    }
-
-    const result = await readDirForIpc(root)
-    const linked = result.entries.find(entry => entry.name === 'linked-dir')
-
-    assert.equal(result.error, undefined)
-    assert.equal(linked?.isDirectory, true)
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc marks a Windows junction to a directory as a directory', async t => {
-  if (process.platform !== 'win32') {
-    t.skip('junctions are a Windows-specific symlink type')
-
-    return
-  }
-
-  const root = mkTmpDir()
-
-  try {
-    fs.mkdirSync(path.join(root, 'actual-dir'))
-
-    try {
-      fs.symlinkSync(path.join(root, 'actual-dir'), path.join(root, 'junction-dir'), 'junction')
-    } catch (error) {
-      if (error?.code === 'EPERM' || error?.code === 'EACCES') {
-        t.skip(`junction creation is not permitted on this platform (${error.code})`)
-
-        return
-      }
-
-      throw error
-    }
-
-    const result = await readDirForIpc(root)
-    const junction = result.entries.find(entry => entry.name === 'junction-dir')
-
-    assert.equal(result.error, undefined)
-    assert.equal(junction?.isDirectory, true)
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc allows expanding symlink or junction directories outside the project root', async t => {
-  const root = mkTmpDir()
-  const outside = mkTmpDir()
-
-  try {
-    fs.writeFileSync(path.join(outside, 'outside.txt'), 'ok')
-
-    const linkPath = path.join(root, 'outside-link')
-    try {
-      fs.symlinkSync(outside, linkPath, process.platform === 'win32' ? 'junction' : 'dir')
-    } catch (error) {
-      if (error?.code === 'EPERM' || error?.code === 'EACCES') {
-        t.skip(`directory symlink creation is not permitted on this platform (${error.code})`)
-
-        return
-      }
-
-      throw error
-    }
-
-    const result = await readDirForIpc(linkPath)
-
-    assert.equal(result.error, undefined)
-    assert.deepEqual(result.entries, [
-      { name: 'outside.txt', path: path.join(linkPath, 'outside.txt'), isDirectory: false }
-    ])
-  } finally {
-    fs.rmSync(root, { recursive: true, force: true })
-    fs.rmSync(outside, { recursive: true, force: true })
-  }
-})
-
-test('readDirForIpc stats symbolic links and unknown entries without dropping the whole listing', async () => {
-  const input = path.join('virtual-root')
-  const resolved = path.resolve(input)
-  const statCalls = []
-  const fsImpl = {
-    promises: {
-      readdir: async () => [
-        fakeDirent('unknown-entry'),
-        fakeDirent('linked-dir', { symlink: true }),
-        fakeDirent('broken-link', { symlink: true }),
-        fakeDirent('plain.txt', { file: true })
-      ],
-      stat: async fullPath => {
-        if (fullPath === resolved) {
-          return { isDirectory: () => true }
-        }
-
-        statCalls.push(fullPath)
-        if (fullPath.endsWith(`${path.sep}linked-dir`)) {
-          return { isDirectory: () => true }
-        }
-        throw Object.assign(new Error('gone'), { code: 'ENOENT' })
-      }
-    }
-  }
-
-  const result = await readDirForIpc(input, { fs: fsImpl })
-
-  assert.equal(result.error, undefined)
-  assert.deepEqual(
-    statCalls.sort(),
-    [path.join(resolved, 'broken-link'), path.join(resolved, 'linked-dir'), path.join(resolved, 'unknown-entry')].sort()
-  )
-  assert.deepEqual(result.entries, [
-    { name: 'linked-dir', path: path.join(resolved, 'linked-dir'), isDirectory: true },
-    { name: 'broken-link', path: path.join(resolved, 'broken-link'), isDirectory: false },
-    { name: 'plain.txt', path: path.join(resolved, 'plain.txt'), isDirectory: false },
-    { name: 'unknown-entry', path: path.join(resolved, 'unknown-entry'), isDirectory: false }
-  ])
-})
-
-test('readDirForIpc bounds concurrent stats while preserving complete sorted output', async () => {
-  const input = path.join('virtual-root')
-  const resolved = path.resolve(input)
-  const names = Array.from({ length: 105 }, (_, index) => `entry-${String(104 - index).padStart(3, '0')}`)
-  const failedName = 'entry-100'
-  const directoryNames = new Set(names.filter((_, index) => index % 10 === 4))
-  const successfulDirectoryNames = new Set([...directoryNames].filter(name => name !== failedName))
-  const statCalls = []
-  let active = 0
-  let peak = 0
-  let releaseStats
-  let markFirstStatStarted
-  const statsReleased = new Promise(resolve => {
-    releaseStats = resolve
-  })
-  const firstStatStarted = new Promise(resolve => {
-    markFirstStatStarted = resolve
-  })
-  const fsImpl = {
-    promises: {
-      readdir: async () => [
-        fakeDirent('node_modules', { symlink: true }),
-        ...names.map((name, index) => fakeDirent(name, { symlink: index % 2 === 0 }))
-      ],
-      stat: async fullPath => {
-        if (fullPath === resolved) {
-          return { isDirectory: () => true }
-        }
-
-        statCalls.push(fullPath)
-        active += 1
-        peak = Math.max(peak, active)
-        markFirstStatStarted()
-        await statsReleased
-        active -= 1
-
-        const name = path.basename(fullPath)
-        if (name === failedName) {
-          throw Object.assign(new Error('gone'), { code: 'ENOENT' })
-        }
-
-        return { isDirectory: () => successfulDirectoryNames.has(name) }
-      }
-    }
-  }
-
-  const resultPromise = readDirForIpc(input, { fs: fsImpl })
-  await firstStatStarted
-  await new Promise(resolve => setImmediate(resolve))
-  releaseStats()
-  const result = await resultPromise
-
-  const expectedNames = [
-    ...names.filter(name => successfulDirectoryNames.has(name)).sort(),
-    ...names.filter(name => !successfulDirectoryNames.has(name)).sort()
-  ]
-
-  assert.equal(result.error, undefined)
-  assert.equal(result.entries.length, names.length)
-  assert.equal(statCalls.length, names.length)
-  assert.equal(statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)), false)
-  assert.ok(peak > 1, `expected concurrent stats, observed peak ${peak}`)
-  assert.ok(peak <= 16, `expected at most 16 concurrent stats, observed peak ${peak}`)
-  assert.deepEqual(
-    result.entries.map(entry => entry.name),
-    expectedNames
-  )
-  assert.equal(result.entries.find(entry => entry.name === failedName)?.isDirectory, false)
-  assert.equal(
-    result.entries.filter(entry => entry.isDirectory).length,
-    successfulDirectoryNames.size
-  )
-})
--- a/apps/desktop/electron/git-root.cjs
+++ b/apps/desktop/electron/git-root.cjs
@@ -1,54 +0,0 @@
-'use strict'
-
-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
-
-function findGitRoot(start, fsImpl = fs) {
-  let dir = start
-
-  for (let i = 0; i < 50; i += 1) {
-    try {
-      if (fsImpl.existsSync(path.join(dir, '.git'))) {
-        return dir
-      }
-    } catch {
-      return null
-    }
-
-    const parent = path.dirname(dir)
-
-    if (parent === dir) {
-      return null
-    }
-
-    dir = parent
-  }
-
-  return null
-}
-
-async function gitRootForIpc(startPath, options = {}) {
-  const fsImpl = options.fs || fs
-  let resolved
-
-  try {
-    resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Git root' })
-  } catch {
-    return null
-  }
-
-  try {
-    const stat = await fsImpl.promises.stat(resolved)
-    const start = stat.isDirectory() ? resolved : path.dirname(resolved)
-
-    return findGitRoot(start, fsImpl)
-  } catch {
-    return findGitRoot(resolved, fsImpl)
-  }
-}
-
-module.exports = {
-  findGitRoot,
-  gitRootForIpc
-}
--- a/apps/desktop/electron/git-root.test.cjs
+++ b/apps/desktop/electron/git-root.test.cjs
@@ -1,40 +0,0 @@
-'use strict'
-
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const test = require('node:test')
-const { pathToFileURL } = require('node:url')
-
-const { gitRootForIpc } = require('./git-root.cjs')
-
-function mkTmpDir() {
-  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-git-root-'))
-}
-
-test('gitRootForIpc returns null for invalid and device paths', async () => {
-  assert.equal(await gitRootForIpc(''), null)
-  assert.equal(await gitRootForIpc('   '), null)
-  assert.equal(await gitRootForIpc(null), null)
-  assert.equal(await gitRootForIpc('\\\\?\\C:\\secret'), null)
-  assert.equal(await gitRootForIpc('file:///%E0%A4%A'), null)
-})
-
-test('gitRootForIpc resolves directories files missing descendants and file URLs', async t => {
-  const root = mkTmpDir()
-  t.after(() => fs.rmSync(root, { recursive: true, force: true }))
-
-  const gitDir = path.join(root, '.git')
-  const srcDir = path.join(root, 'src')
-  const filePath = path.join(srcDir, 'index.ts')
-  fs.mkdirSync(gitDir)
-  fs.mkdirSync(srcDir)
-  fs.writeFileSync(filePath, 'export {}\n', 'utf8')
-
-  assert.equal(await gitRootForIpc(root), root)
-  assert.equal(await gitRootForIpc(srcDir), root)
-  assert.equal(await gitRootForIpc(filePath), root)
-  assert.equal(await gitRootForIpc(pathToFileURL(filePath).toString()), root)
-  assert.equal(await gitRootForIpc(path.join(srcDir, 'missing.ts')), root)
-})
--- a/apps/desktop/electron/git-worktrees.cjs
+++ b/apps/desktop/electron/git-worktrees.cjs
@@ -1,174 +0,0 @@
-'use strict'
-
-// Resolve git-worktree relationships for a set of session cwds, reading git's
-// on-disk metadata directly (no `git` spawn per path):
-//
-//   - A normal checkout has a `.git` DIRECTORY at its root → it's the main
-//     worktree; its repo root IS that directory's parent.
-//   - A linked worktree has a `.git` FILE: `gitdir: <repo>/.git/worktrees/<name>`.
-//     That admin dir's `commondir` points back at the shared `<repo>/.git`, whose
-//     parent is the main repo root.
-//
-// Grouping by repoRoot therefore clusters a repo's main checkout with all of its
-// linked worktrees, regardless of how the worktree directories are named. The
-// branch (read from the worktree's own HEAD) gives each worktree a meaningful
-// label.
-
-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
-
-// Walk up from `start` to the nearest ancestor that carries a `.git` entry
-// (file for a linked worktree, dir for the main checkout). Capped so a stray
-// path can't loop forever.
-function findGitHost(start, fsImpl) {
-  let dir = start
-
-  for (let i = 0; i < 64; i += 1) {
-    const dotgit = path.join(dir, '.git')
-
-    try {
-      if (fsImpl.existsSync(dotgit)) {
-        return dir
-      }
-    } catch {
-      return null
-    }
-
-    const parent = path.dirname(dir)
-
-    if (parent === dir) {
-      return null
-    }
-
-    dir = parent
-  }
-
-  return null
-}
-
-function readBranch(gitDir, fsImpl) {
-  try {
-    const head = fsImpl.readFileSync(path.join(gitDir, 'HEAD'), 'utf8').trim()
-    const ref = head.match(/^ref:\s*refs\/heads\/(.+)$/)
-
-    if (ref) {
-      return ref[1]
-    }
-
-    // Detached HEAD: surface a short sha so the worktree still gets a label.
-    return /^[0-9a-f]{7,40}$/i.test(head) ? head.slice(0, 8) : null
-  } catch {
-    return null
-  }
-}
-
-// Given the directory that owns the `.git` entry, resolve its worktree identity.
-function resolveFromHost(host, fsImpl) {
-  const dotgit = path.join(host, '.git')
-  let stat
-
-  try {
-    stat = fsImpl.statSync(dotgit)
-  } catch {
-    return null
-  }
-
-  if (stat.isDirectory()) {
-    return {
-      repoRoot: host,
-      worktreeRoot: host,
-      isMainWorktree: true,
-      branch: readBranch(dotgit, fsImpl)
-    }
-  }
-
-  // Linked worktree: `.git` is a file pointing at the admin dir.
-  let contents
-
-  try {
-    contents = fsImpl.readFileSync(dotgit, 'utf8').trim()
-  } catch {
-    return null
-  }
-
-  const match = contents.match(/^gitdir:\s*(.+)$/m)
-
-  if (!match) {
-    return null
-  }
-
-  const adminDir = path.resolve(host, match[1].trim())
-
-  // `commondir` resolves to the shared `<repo>/.git`; fall back to walking two
-  // levels up from `<repo>/.git/worktrees/<name>` if it's missing.
-  let commonDir
-
-  try {
-    const rel = fsImpl.readFileSync(path.join(adminDir, 'commondir'), 'utf8').trim()
-    commonDir = path.resolve(adminDir, rel)
-  } catch {
-    commonDir = path.dirname(path.dirname(adminDir))
-  }
-
-  return {
-    repoRoot: path.dirname(commonDir),
-    worktreeRoot: host,
-    isMainWorktree: false,
-    branch: readBranch(adminDir, fsImpl)
-  }
-}
-
-function resolveWorktree(startPath, fsImpl = fs) {
-  let resolved
-
-  try {
-    resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Worktree lookup' })
-  } catch {
-    return null
-  }
-
-  let start = resolved
-
-  try {
-    const stat = fsImpl.statSync(resolved)
-
-    if (!stat.isDirectory()) {
-      start = path.dirname(resolved)
-    }
-  } catch {
-    return null
-  }
-
-  const host = findGitHost(start, fsImpl)
-
-  if (!host) {
-    return null
-  }
-
-  return resolveFromHost(host, fsImpl)
-}
-
-// Batch entry point for the renderer: maps each requested cwd to its worktree
-// info (or null when it isn't inside a git checkout / can't be read). Dedupes so
-// many sessions sharing a cwd cost one lookup.
-async function worktreesForIpc(cwds, options = {}) {
-  const fsImpl = options.fs || fs
-  const list = Array.isArray(cwds) ? cwds : []
-  const out = {}
-
-  for (const cwd of list) {
-    if (typeof cwd !== 'string' || !cwd.trim() || cwd in out) {
-      continue
-    }
-
-    out[cwd] = resolveWorktree(cwd, fsImpl)
-  }
-
-  return out
-}
-
-module.exports = {
-  resolveWorktree,
-  worktreesForIpc
-}
--- a/apps/desktop/electron/hardening.cjs
+++ b/apps/desktop/electron/hardening.cjs
@@ -1,5 +1,4 @@
 const fs = require('node:fs')
-const os = require('node:os')
 const path = require('node:path')
 const { fileURLToPath } = require('node:url')

@@ -107,162 +106,71 @@ function sensitiveFileBlockReason(filePath) {
  return null
 }

-function ipcPathError(code, message) {
-  const error = new Error(message)
-  error.code = code
-  return error
-}
-
-function rejectUnsafePathSyntax(filePath, purpose = 'File read') {
-  if (typeof filePath !== 'string') {
-    throw ipcPathError('invalid-path', `${purpose} failed: file path is required.`)
-  }
-
-  const raw = filePath.trim()
+function resolveRequestedFilePath(filePath, baseDir = process.cwd(), purpose = 'File read') {
+  const raw = String(filePath || '').trim()

  if (!raw) {
-    throw ipcPathError('invalid-path', `${purpose} failed: file path is required.`)
+    throw new Error(`${purpose} failed: file path is required.`)
  }

  if (raw.includes('\0')) {
-    throw ipcPathError('invalid-path', `${purpose} failed: file path is invalid.`)
-  }
-
-  const normalized = raw.replace(/\\/g, '/').toLowerCase()
-  if (
-    normalized.startsWith('//?/') ||
-    normalized.startsWith('//./') ||
-    normalized.startsWith('globalroot/device/') ||
-    normalized.includes('/globalroot/device/')
-  ) {
-    throw ipcPathError('device-path', `${purpose} blocked: Windows device paths are not allowed.`)
-  }
-
-  return raw
-}
-
-function resolveRequestedPathForIpc(filePath, options = {}) {
-  const purpose = String(options.purpose || 'File read')
-  let raw = rejectUnsafePathSyntax(filePath, purpose)
-
-  // Gateway-reported cwds (config `terminal.cwd`, remote sessions) routinely
-  // arrive as `~/...`. Node's fs has no shell — without expansion the path
-  // resolves under process.cwd() and every read "ENOENT"s forever.
-  if (raw === '~' || raw.startsWith('~/') || raw.startsWith('~\\')) {
-    raw = path.join(os.homedir(), raw.slice(1))
+    throw new Error(`${purpose} failed: file path is invalid.`)
  }

  if (/^file:/i.test(raw)) {
-    let resolvedPath
    try {
-      const parsed = new URL(raw)
-      if (parsed.protocol !== 'file:') {
-        throw new Error('not a file URL')
-      }
-      resolvedPath = fileURLToPath(parsed)
+      return fileURLToPath(raw)
    } catch {
-      throw ipcPathError('invalid-path', `${purpose} failed: file URL is invalid.`)
+      throw new Error(`${purpose} failed: file URL is invalid.`)
    }
-
-    rejectUnsafePathSyntax(resolvedPath, purpose)
-    return path.resolve(resolvedPath)
  }

-  const baseInput = typeof options.baseDir === 'string' && options.baseDir.trim() ? options.baseDir : process.cwd()
-  const safeBaseInput = rejectUnsafePathSyntax(baseInput, purpose)
-  const resolvedBase = path.resolve(safeBaseInput)
-  rejectUnsafePathSyntax(resolvedBase, purpose)
-  const resolvedPath = path.resolve(resolvedBase, raw)
-  rejectUnsafePathSyntax(resolvedPath, purpose)
-
-  return resolvedPath
-}
-
-async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
-  try {
-    return await fsImpl.promises.stat(resolvedPath)
-  } catch (error) {
-    const code = error && typeof error === 'object' ? error.code : ''
-    if (code === 'ENOENT' || code === 'ENOTDIR') {
-      throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
-    }
-    throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
-  }
-}
-
-async function realpathForIpc(fsImpl, resolvedPath, purpose) {
-  if (typeof fsImpl.promises.realpath !== 'function') {
-    return resolvedPath
-  }
-
-  try {
-    const realPath = await fsImpl.promises.realpath(resolvedPath)
-    rejectUnsafePathSyntax(realPath, purpose)
-    return realPath
-  } catch (error) {
-    const code = error && typeof error === 'object' ? error.code : ''
-    throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
-  }
-}
-
-function rejectSensitiveFilePath(filePath, purpose) {
-  const blockReason = sensitiveFileBlockReason(filePath)
-  if (blockReason) {
-    throw ipcPathError('sensitive-file', `${purpose} blocked for sensitive file: ${blockReason}`)
-  }
-}
-
-async function resolveDirectoryForIpc(dirPath, options = {}) {
-  const purpose = String(options.purpose || 'Directory read')
-  const fsImpl = options.fs || fs
-  const resolvedPath = resolveRequestedPathForIpc(dirPath, { baseDir: options.baseDir, purpose })
-  const stat = await statForIpc(fsImpl, resolvedPath, purpose, 'directory')
-
-  if (!stat.isDirectory()) {
-    throw ipcPathError('ENOTDIR', `${purpose} failed: path is not a directory.`)
-  }
-
-  const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose)
-
-  return { realPath, resolvedPath, stat }
+  const resolvedBase = path.resolve(String(baseDir || process.cwd()))
+  return path.resolve(resolvedBase, raw)
 }

 async function resolveReadableFileForIpc(filePath, options = {}) {
  const purpose = String(options.purpose || 'File read')
-  const fsImpl = options.fs || fs
-  const resolvedPath = resolveRequestedPathForIpc(filePath, { baseDir: options.baseDir, purpose })
+  const resolvedPath = resolveRequestedFilePath(filePath, options.baseDir, purpose)

  if (options.blockSensitive !== false) {
-    rejectSensitiveFilePath(resolvedPath, purpose)
+    const blockReason = sensitiveFileBlockReason(resolvedPath)
+    if (blockReason) {
+      throw new Error(`${purpose} blocked for sensitive file: ${blockReason}`)
+    }
  }

-  const stat = await statForIpc(fsImpl, resolvedPath, purpose, 'file')
+  let stat
+  try {
+    stat = await fs.promises.stat(resolvedPath)
+  } catch (error) {
+    const code = error && typeof error === 'object' ? error.code : ''
+    if (code === 'ENOENT' || code === 'ENOTDIR') {
+      throw new Error(`${purpose} failed: file does not exist.`)
+    }
+    throw new Error(`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
+  }

  if (stat.isDirectory()) {
-    throw ipcPathError('EISDIR', `${purpose} failed: path points to a directory.`)
+    throw new Error(`${purpose} failed: path points to a directory.`)
  }

  if (!stat.isFile()) {
-    throw ipcPathError('EINVAL', `${purpose} failed: only regular files can be read.`)
-  }
-
-  const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose)
-  if (options.blockSensitive !== false) {
-    rejectSensitiveFilePath(realPath, purpose)
+    throw new Error(`${purpose} failed: only regular files can be read.`)
  }

  const maxBytes = Number.isFinite(options.maxBytes) && Number(options.maxBytes) > 0 ? Number(options.maxBytes) : null
  if (maxBytes && stat.size > maxBytes) {
-    throw ipcPathError('EFBIG', `${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`)
+    throw new Error(`${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`)
  }

  try {
-    await fsImpl.promises.access(resolvedPath, fs.constants.R_OK)
+    await fs.promises.access(resolvedPath, fs.constants.R_OK)
  } catch {
-    throw ipcPathError('EACCES', `${purpose} failed: file is not readable.`)
+    throw new Error(`${purpose} failed: file is not readable.`)
  }

-  return { realPath, resolvedPath, stat }
+  return { resolvedPath, stat }
 }

 module.exports = {
@@ -270,10 +178,7 @@ module.exports = {
  DEFAULT_FETCH_TIMEOUT_MS,
  TEXT_PREVIEW_SOURCE_MAX_BYTES,
  encryptDesktopSecret,
-  rejectUnsafePathSyntax,
-  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
-  resolveRequestedPathForIpc,
  resolveTimeoutMs,
  sensitiveFileBlockReason
 }
--- a/apps/desktop/electron/hardening.test.cjs
+++ b/apps/desktop/electron/hardening.test.cjs
@@ -8,20 +8,11 @@ const { pathToFileURL } = require('node:url')
 const {
  DEFAULT_FETCH_TIMEOUT_MS,
  encryptDesktopSecret,
-  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
-  resolveRequestedPathForIpc,
  resolveTimeoutMs,
  sensitiveFileBlockReason
 } = require('./hardening.cjs')

-async function rejectsWithCode(promise, code) {
-  await assert.rejects(promise, error => {
-    assert.equal(error?.code, code)
-    return true
-  })
-}
-
 test('resolveTimeoutMs falls back to defaults and accepts overrides', () => {
  assert.equal(resolveTimeoutMs(undefined), DEFAULT_FETCH_TIMEOUT_MS)
  assert.equal(resolveTimeoutMs(0), DEFAULT_FETCH_TIMEOUT_MS)
@@ -60,65 +51,6 @@ test('sensitiveFileBlockReason blocks obvious secret file patterns', () => {
  assert.match(String(sensitiveFileBlockReason('/tmp/server-cert.pem')), /\.pem/)
 })

-test('path helpers reject blank non-string NUL and Windows device syntax', async () => {
-  await rejectsWithCode(resolveReadableFileForIpc('', { purpose: 'File preview' }), 'invalid-path')
-  await rejectsWithCode(resolveReadableFileForIpc('   ', { purpose: 'File preview' }), 'invalid-path')
-  await rejectsWithCode(resolveReadableFileForIpc(null, { purpose: 'File preview' }), 'invalid-path')
-  await rejectsWithCode(resolveReadableFileForIpc(`safe${String.fromCharCode(0)}name.txt`), 'invalid-path')
-
-  const devicePaths = [
-    '\\\\?\\C:\\secret.txt',
-    '\\\\.\\C:\\secret.txt',
-    '\\\\?\\UNC\\server\\share\\secret.txt',
-    'GLOBALROOT/Device/HarddiskVolumeShadowCopy1/secret.txt'
-  ]
-
-  for (const devicePath of devicePaths) {
-    assert.throws(
-      () => resolveRequestedPathForIpc(devicePath, { purpose: 'File preview' }),
-      error => {
-        assert.equal(error?.code, 'device-path')
-        return true
-      }
-    )
-    await rejectsWithCode(resolveReadableFileForIpc(devicePath, { purpose: 'File preview' }), 'device-path')
-  }
-
-  assert.throws(
-    () => resolveRequestedPathForIpc('file:///%E0%A4%A', { purpose: 'File preview' }),
-    error => {
-      assert.equal(error?.code, 'invalid-path')
-      return true
-    }
-  )
-  await rejectsWithCode(resolveReadableFileForIpc('file:///%E0%A4%A', { purpose: 'File preview' }), 'invalid-path')
-})
-
-test('resolveRequestedPathForIpc resolves relative paths from the trimmed base directory', () => {
-  const baseDir = path.join(os.tmpdir(), 'hermes-desktop-base')
-
-  assert.equal(
-    resolveRequestedPathForIpc('notes.txt', {
-      baseDir: `  ${baseDir}  `,
-      purpose: 'File preview'
-    }),
-    path.resolve(baseDir, 'notes.txt')
-  )
-})
-
-test('resolveRequestedPathForIpc expands ~ to the home directory', () => {
-  assert.equal(resolveRequestedPathForIpc('~', { purpose: 'Directory read' }), path.resolve(os.homedir()))
-  assert.equal(
-    resolveRequestedPathForIpc('~/www/project', { purpose: 'Directory read' }),
-    path.resolve(os.homedir(), 'www/project')
-  )
-  // `~user` shorthand is NOT expanded — only the caller's own home.
-  assert.equal(
-    resolveRequestedPathForIpc('~other/secret', { baseDir: os.tmpdir(), purpose: 'Directory read' }),
-    path.resolve(os.tmpdir(), '~other/secret')
-  )
-})
-
 test('resolveReadableFileForIpc validates existence type size and sensitivity', async t => {
  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-hardening-'))
  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
@@ -139,13 +71,6 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',
  })
  assert.equal(fromFileUrl.resolvedPath, textPath)

-  const spacedPath = path.join(tempDir, 'notes with spaces.txt')
-  fs.writeFileSync(spacedPath, 'space ok', 'utf8')
-  const fromSpacedFileUrl = await resolveReadableFileForIpc(pathToFileURL(spacedPath).toString(), {
-    purpose: 'File preview'
-  })
-  assert.equal(fromSpacedFileUrl.resolvedPath, spacedPath)
-
  await assert.rejects(
    resolveReadableFileForIpc('missing.txt', {
      baseDir: tempDir,
@@ -189,91 +114,3 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',
  })
  assert.equal(envTemplate.resolvedPath, envTemplatePath)
 })
-
-test('resolveReadableFileForIpc blocks common sensitive files', async t => {
-  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-sensitive-'))
-  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
-
-  const sshDir = path.join(tempDir, '.ssh')
-  fs.mkdirSync(sshDir)
-
-  const blockedFiles = [
-    path.join(tempDir, '.env'),
-    path.join(tempDir, '.npmrc'),
-    path.join(sshDir, 'id_ed25519'),
-    path.join(tempDir, 'cert.pem'),
-    path.join(tempDir, 'cert.p12'),
-    path.join(tempDir, 'cert.pfx')
-  ]
-
-  for (const filePath of blockedFiles) {
-    fs.writeFileSync(filePath, 'secret', 'utf8')
-    await rejectsWithCode(resolveReadableFileForIpc(filePath, { purpose: 'File preview' }), 'sensitive-file')
-  }
-
-  const allowed = path.join(tempDir, '.env.example')
-  fs.writeFileSync(allowed, 'EXAMPLE_TOKEN=value', 'utf8')
-  assert.equal((await resolveReadableFileForIpc(allowed, { purpose: 'File preview' })).resolvedPath, allowed)
-})
-
-test('resolveReadableFileForIpc blocks symlinks whose realpath is sensitive', async t => {
-  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-realpath-'))
-  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
-
-  const envPath = path.join(tempDir, '.env')
-  const linkPath = path.join(tempDir, 'safe-name.txt')
-  fs.writeFileSync(envPath, 'SECRET_TOKEN=123', 'utf8')
-
-  try {
-    fs.symlinkSync(envPath, linkPath, 'file')
-  } catch (error) {
-    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
-      t.skip(`symlink creation is not permitted on this platform (${error.code})`)
-      return
-    }
-    throw error
-  }
-
-  await rejectsWithCode(resolveReadableFileForIpc(linkPath, { purpose: 'File preview' }), 'sensitive-file')
-})
-
-test('resolveDirectoryForIpc accepts directories and rejects invalid directory targets', async t => {
-  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-dir-'))
-  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
-
-  const directory = path.join(tempDir, 'project')
-  const filePath = path.join(tempDir, 'file.txt')
-  fs.mkdirSync(directory)
-  fs.writeFileSync(filePath, 'not a directory', 'utf8')
-
-  const resolved = await resolveDirectoryForIpc(directory)
-  assert.equal(resolved.resolvedPath, directory)
-  assert.equal(resolved.stat.isDirectory(), true)
-
-  await rejectsWithCode(resolveDirectoryForIpc(filePath), 'ENOTDIR')
-  await rejectsWithCode(resolveDirectoryForIpc(path.join(tempDir, 'missing')), 'ENOENT')
-  await rejectsWithCode(resolveDirectoryForIpc('\\\\?\\C:\\secret'), 'device-path')
-})
-
-test('resolveDirectoryForIpc accepts directory symlinks or junctions', async t => {
-  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-dir-link-'))
-  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
-
-  const directory = path.join(tempDir, 'actual-project')
-  const linkPath = path.join(tempDir, 'linked-project')
-  fs.mkdirSync(directory)
-
-  try {
-    fs.symlinkSync(directory, linkPath, process.platform === 'win32' ? 'junction' : 'dir')
-  } catch (error) {
-    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
-      t.skip(`directory symlink creation is not permitted on this platform (${error.code})`)
-      return
-    }
-    throw error
-  }
-
-  const resolved = await resolveDirectoryForIpc(linkPath)
-  assert.equal(resolved.resolvedPath, linkPath)
-  assert.equal(resolved.stat.isDirectory(), true)
-})
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -5,8 +5,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  revalidateConnection: () => ipcRenderer.invoke('hermes:connection:revalidate'),
  touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
  getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
-  openSessionWindow: (sessionId, opts) => ipcRenderer.invoke('hermes:window:openSession', sessionId, opts),
-  openNewSessionWindow: () => ipcRenderer.invoke('hermes:window:openNewSession'),
  getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
  getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
  saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
@@ -40,12 +38,9 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  watchPreviewFile: url => ipcRenderer.invoke('hermes:watchPreviewFile', url),
  stopPreviewFileWatch: id => ipcRenderer.invoke('hermes:stopPreviewFileWatch', id),
  setTitleBarTheme: payload => ipcRenderer.send('hermes:titlebar-theme', payload),
-  setNativeTheme: mode => ipcRenderer.send('hermes:native-theme', mode),
-  setTranslucency: payload => ipcRenderer.send('hermes:translucency', payload),
  setPreviewShortcutActive: active => ipcRenderer.send('hermes:previewShortcutActive', Boolean(active)),
  openExternal: url => ipcRenderer.invoke('hermes:openExternal', url),
  fetchLinkTitle: url => ipcRenderer.invoke('hermes:fetchLinkTitle', url),
-  sanitizeWorkspaceCwd: cwd => ipcRenderer.invoke('hermes:workspace:sanitize', cwd),
  settings: {
    getDefaultProjectDir: () => ipcRenderer.invoke('hermes:setting:defaultProjectDir:get'),
    setDefaultProjectDir: dir => ipcRenderer.invoke('hermes:setting:defaultProjectDir:set', dir),
@@ -55,7 +50,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  getRecentLogs: () => ipcRenderer.invoke('hermes:logs:recent'),
  readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
  gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
-  worktrees: cwds => ipcRenderer.invoke('hermes:fs:worktrees', cwds),
  terminal: {
    dispose: id => ipcRenderer.invoke('hermes:terminal:dispose', id),
    resize: (id, size) => ipcRenderer.invoke('hermes:terminal:resize', id, size),
@@ -84,27 +78,11 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    ipcRenderer.on('hermes:open-updates', listener)
    return () => ipcRenderer.removeListener('hermes:open-updates', listener)
  },
-  onDeepLink: callback => {
-    const listener = (_event, payload) => callback(payload)
-    ipcRenderer.on('hermes:deep-link', listener)
-    return () => ipcRenderer.removeListener('hermes:deep-link', listener)
-  },
-  signalDeepLinkReady: () => ipcRenderer.invoke('hermes:deep-link-ready'),
  onWindowStateChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:window-state-changed', listener)
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
-  onFocusSession: callback => {
-    const listener = (_event, sessionId) => callback(sessionId)
-    ipcRenderer.on('hermes:focus-session', listener)
-    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
-  },
-  onNotificationAction: callback => {
-    const listener = (_event, payload) => callback(payload)
-    ipcRenderer.on('hermes:notification-action', listener)
-    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
-  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
@@ -154,9 +132,5 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
      ipcRenderer.on('hermes:updates:progress', listener)
      return () => ipcRenderer.removeListener('hermes:updates:progress', listener)
    }
-  },
-  themes: {
-    fetchMarketplace: id => ipcRenderer.invoke('hermes:vscode-theme:fetch', id),
-    searchMarketplace: query => ipcRenderer.invoke('hermes:vscode-theme:search', query)
  }
 })
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -1,100 +0,0 @@
-// Secondary "session windows" — one extra OS window per chat so a user can
-// work with multiple chats side by side. The pure, Electron-free pieces live
-// here so they can be unit-tested with node --test (mirroring how the rest of
-// electron/*.cjs splits testable logic out of the main.cjs monolith).
-
-const { pathToFileURL } = require('node:url')
-
-// Secondary windows open at the minimum usable size — a compact side panel for
-// subagent watch / cmd-click session pop-out, not a second full desktop.
-const SESSION_WINDOW_MIN_WIDTH = 420
-const SESSION_WINDOW_MIN_HEIGHT = 620
-
-// Build the renderer URL for a secondary window. The renderer uses a
-// HashRouter, so the session route lives after the '#'. The `?win=secondary`
-// flag MUST sit in the query string BEFORE the '#': anything after the '#' is
-// treated as the route by HashRouter and would break routeSessionId(). The
-// renderer reads the flag from window.location.search to suppress the install /
-// onboarding overlays and the global session sidebar. `new=1` marks the compact
-// scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
-// session): the renderer resumes it lazily so the gateway never builds an agent
-// just to stream into it.
-function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
-  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
-  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`
-
-  if (devServer) {
-    const base = devServer.endsWith('/') ? devServer.slice(0, -1) : devServer
-
-    return `${base}/${query}${route}`
-  }
-
-  return `${pathToFileURL(rendererIndexPath).toString()}${query}${route}`
-}
-
-// A small registry keyed by sessionId that guarantees one window per chat:
-// opening a session that already has a live window focuses it instead of
-// spawning a duplicate, and a window removes itself from the registry when it
-// closes. The actual BrowserWindow construction is injected (the `factory`) so
-// this module stays free of Electron and is unit-testable.
-function createSessionWindowRegistry() {
-  const windows = new Map()
-
-  function openOrFocus(sessionId, factory) {
-    const key = typeof sessionId === 'string' ? sessionId.trim() : ''
-
-    if (!key) {
-      return null
-    }
-
-    const existing = windows.get(key)
-
-    if (existing && !existing.isDestroyed()) {
-      // Focus-or-create: never duplicate a window for the same chat.
-      if (typeof existing.isMinimized === 'function' && existing.isMinimized()) {
-        existing.restore?.()
-      }
-
-      if (typeof existing.isVisible === 'function' && !existing.isVisible()) {
-        existing.show?.()
-      }
-
-      existing.focus?.()
-
-      return existing
-    }
-
-    const win = factory(key)
-
-    if (!win) {
-      return null
-    }
-
-    windows.set(key, win)
-
-    // Self-cleanup on close so the registry never holds a destroyed window.
-    win.on?.('closed', () => {
-      if (windows.get(key) === win) {
-        windows.delete(key)
-      }
-    })
-
-    return win
-  }
-
-  return {
-    openOrFocus,
-    get: key => windows.get(key),
-    has: key => windows.has(key),
-    get size() {
-      return windows.size
-    }
-  }
-}
-
-module.exports = {
-  buildSessionWindowUrl,
-  createSessionWindowRegistry,
-  SESSION_WINDOW_MIN_HEIGHT,
-  SESSION_WINDOW_MIN_WIDTH
-}
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -1,177 +0,0 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
-
-const { buildSessionWindowUrl, createSessionWindowRegistry } = require('./session-windows.cjs')
-
-// A minimal fake BrowserWindow: tracks listeners + destroyed state and lets a
-// test fire the 'closed' event, mirroring the slice of the Electron API the
-// registry actually touches.
-function makeFakeWindow() {
-  const listeners = {}
-  const calls = { focus: 0, show: 0, restore: 0 }
-  let destroyed = false
-  let minimized = false
-  let visible = true
-
-  return {
-    on(event, handler) {
-      listeners[event] = handler
-
-      return this
-    },
-    emit(event) {
-      listeners[event]?.()
-    },
-    isDestroyed: () => destroyed,
-    destroy() {
-      destroyed = true
-    },
-    isMinimized: () => minimized,
-    setMinimized(value) {
-      minimized = value
-    },
-    isVisible: () => visible,
-    setVisible(value) {
-      visible = value
-    },
-    restore() {
-      calls.restore += 1
-      minimized = false
-    },
-    show() {
-      calls.show += 1
-      visible = true
-    },
-    focus() {
-      calls.focus += 1
-    },
-    calls
-  }
-}
-
-test('buildSessionWindowUrl puts the secondary flag before the hash route (dev server)', () => {
-  const url = buildSessionWindowUrl('abc123', { devServer: 'http://localhost:5173' })
-
-  assert.equal(url, 'http://localhost:5173/?win=secondary#/abc123')
-})
-
-test('buildSessionWindowUrl avoids a double slash when the dev server has a trailing slash', () => {
-  const url = buildSessionWindowUrl('abc123', { devServer: 'http://localhost:5173/' })
-
-  assert.equal(url, 'http://localhost:5173/?win=secondary#/abc123')
-})
-
-test('buildSessionWindowUrl encodes the session id in the hash route', () => {
-  const url = buildSessionWindowUrl('a b/c', { devServer: 'http://localhost:5173' })
-
-  // The query flag must precede the '#' or HashRouter would swallow it as the
-  // route; the id is URL-encoded so slashes/spaces survive routeSessionId().
-  assert.equal(url, 'http://localhost:5173/?win=secondary#/a%20b%2Fc')
-  assert.ok(url.indexOf('?win=secondary') < url.indexOf('#'))
-})
-
-test('buildSessionWindowUrl builds a packaged file URL with the flag before the hash', () => {
-  const url = buildSessionWindowUrl('abc', { rendererIndexPath: '/opt/app/index.html' })
-
-  assert.match(url, /^file:\/\/.*index\.html\?win=secondary#\/abc$/)
-})
-
-test('buildSessionWindowUrl adds the watch flag for spectator windows, before the hash', () => {
-  const url = buildSessionWindowUrl('abc', { devServer: 'http://localhost:5173', watch: true })
-
-  assert.equal(url, 'http://localhost:5173/?win=secondary&watch=1#/abc')
-})
-
-test('buildSessionWindowUrl routes new-session windows to the draft (#/)', () => {
-  const url = buildSessionWindowUrl(null, { devServer: 'http://localhost:5173', newSession: true })
-
-  assert.equal(url, 'http://localhost:5173/?win=secondary&new=1#/')
-})
-
-test('registry opens one window per session and focuses on re-open', () => {
-  const registry = createSessionWindowRegistry()
-  let built = 0
-  const win = makeFakeWindow()
-  const factory = () => {
-    built += 1
-
-    return win
-  }
-
-  const first = registry.openOrFocus('s1', factory)
-  const second = registry.openOrFocus('s1', factory)
-
-  assert.equal(built, 1, 'factory runs once for the same session')
-  assert.equal(first, second)
-  assert.equal(registry.size, 1)
-  assert.equal(win.calls.focus, 1, 'second open focuses the existing window')
-})
-
-test('registry restores + shows a minimized/hidden window on re-open', () => {
-  const registry = createSessionWindowRegistry()
-  const win = makeFakeWindow()
-  registry.openOrFocus('s1', () => win)
-
-  win.setMinimized(true)
-  win.setVisible(false)
-  registry.openOrFocus('s1', () => win)
-
-  assert.equal(win.calls.restore, 1)
-  assert.equal(win.calls.show, 1)
-  assert.equal(win.calls.focus, 1)
-})
-
-test('registry drops the entry when the window closes', () => {
-  const registry = createSessionWindowRegistry()
-  const win = makeFakeWindow()
-  registry.openOrFocus('s1', () => win)
-  assert.equal(registry.size, 1)
-
-  win.emit('closed')
-
-  assert.equal(registry.size, 0)
-  assert.equal(registry.has('s1'), false)
-})
-
-test('registry rebuilds a fresh window after the previous one was destroyed', () => {
-  const registry = createSessionWindowRegistry()
-  const first = makeFakeWindow()
-  registry.openOrFocus('s1', () => first)
-  first.destroy()
-
-  let built = 0
-  const second = makeFakeWindow()
-  const result = registry.openOrFocus('s1', () => {
-    built += 1
-
-    return second
-  })
-
-  assert.equal(built, 1, 'a destroyed window is replaced, not focused')
-  assert.equal(result, second)
-})
-
-test('registry ignores empty / non-string session ids', () => {
-  const registry = createSessionWindowRegistry()
-  let built = 0
-  const factory = () => {
-    built += 1
-
-    return makeFakeWindow()
-  }
-
-  assert.equal(registry.openOrFocus('', factory), null)
-  assert.equal(registry.openOrFocus('   ', factory), null)
-  assert.equal(registry.openOrFocus(null, factory), null)
-  assert.equal(registry.openOrFocus(42, factory), null)
-  assert.equal(built, 0)
-  assert.equal(registry.size, 0)
-})
-
-test('registry trims the session id before keying', () => {
-  const registry = createSessionWindowRegistry()
-  const win = makeFakeWindow()
-  registry.openOrFocus('  s1  ', () => win)
-
-  assert.equal(registry.has('s1'), true)
-})
--- a/apps/desktop/electron/update-remote.cjs
+++ b/apps/desktop/electron/update-remote.cjs
@@ -1,56 +0,0 @@
-/**
- * Pure helpers for choosing a remote URL during passive update checks.
- *
- * A public install can end up with `origin=git@github.com:NousResearch/hermes-agent.git`.
- * If the user's GitHub SSH key is FIDO2/passkey-backed, a background `git fetch
- * origin` triggers an unexplained hardware-touch prompt. For passive checks
- * against the official repo we substitute the public HTTPS `ls-remote` path,
- * which needs no auth and cannot prompt. Active update/apply flows are left
- * unchanged.
- *
- * Extracted from main.cjs so the security-critical remote detection is unit
- * testable without booting Electron (main.cjs requires('electron') at load).
- */
-
-const OFFICIAL_REPO_HTTPS_URL = 'https://github.com/NousResearch/hermes-agent.git'
-const OFFICIAL_REPO_CANONICAL = 'github.com/nousresearch/hermes-agent'
-
-// Normalize common GitHub remote URL forms to `host/owner/repo` (lowercased,
-// no trailing slash, no .git suffix) so SSH and HTTPS forms of the same repo
-// compare equal.
-function canonicalGitHubRemote(url) {
-  if (!url) return ''
-  let value = String(url).trim()
-  if (value.startsWith('git@github.com:')) {
-    value = `github.com/${value.slice('git@github.com:'.length)}`
-  } else if (value.startsWith('ssh://git@github.com/')) {
-    value = `github.com/${value.slice('ssh://git@github.com/'.length)}`
-  } else {
-    try {
-      const parsed = new URL(value)
-      if (parsed.hostname && parsed.pathname) value = `${parsed.hostname}${parsed.pathname}`
-    } catch {
-      // Leave non-URL forms unchanged.
-    }
-  }
-  value = value.trim().replace(/\/+$/, '')
-  if (value.endsWith('.git')) value = value.slice(0, -4)
-  return value.toLowerCase()
-}
-
-function isSshRemote(url) {
-  const value = String(url || '').trim().toLowerCase()
-  return value.startsWith('git@') || value.startsWith('ssh://')
-}
-
-function isOfficialSshRemote(url) {
-  return isSshRemote(url) && canonicalGitHubRemote(url) === OFFICIAL_REPO_CANONICAL
-}
-
-module.exports = {
-  OFFICIAL_REPO_HTTPS_URL,
-  OFFICIAL_REPO_CANONICAL,
-  canonicalGitHubRemote,
-  isSshRemote,
-  isOfficialSshRemote
-}
--- a/apps/desktop/electron/update-remote.test.cjs
+++ b/apps/desktop/electron/update-remote.test.cjs
@@ -1,78 +0,0 @@
-/**
- * Tests for electron/update-remote.cjs — the remote-detection helpers that
- * keep passive update checks off the SSH origin for official installs.
- *
- * Run with: node --test electron/update-remote.test.cjs
- * (Wired into npm test:desktop:platforms in package.json.)
- *
- * Why this matters: a public install can carry
- * origin=git@github.com:NousResearch/hermes-agent.git. A background
- * `git fetch origin` then authenticates over SSH and, with a FIDO2/passkey
- * key, triggers an unexplained hardware-touch prompt. isOfficialSshRemote
- * must reliably recognize the official SSH remote (in every URL form,
- * case-insensitively) so the caller can swap in the anonymous HTTPS path —
- * while NOT misclassifying forks, other hosts, or the HTTPS remote (which
- * never prompts and should keep the normal fetch path).
- */
-
-const test = require('node:test')
-const assert = require('node:assert/strict')
-
-const {
-  OFFICIAL_REPO_HTTPS_URL,
-  OFFICIAL_REPO_CANONICAL,
-  canonicalGitHubRemote,
-  isSshRemote,
-  isOfficialSshRemote
-} = require('./update-remote.cjs')
-
-test('canonicalGitHubRemote normalizes SSH and HTTPS forms to the same value', () => {
-  assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
-  assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent'), OFFICIAL_REPO_CANONICAL)
-  assert.equal(canonicalGitHubRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
-  assert.equal(canonicalGitHubRemote('https://github.com/NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
-  // Case-insensitive: an uppercased owner still canonicalizes to the same repo.
-  assert.equal(canonicalGitHubRemote('git@github.com:nousresearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
-  // Trailing slashes are stripped.
-  assert.equal(canonicalGitHubRemote('https://github.com/NousResearch/hermes-agent/'), OFFICIAL_REPO_CANONICAL)
-})
-
-test('canonicalGitHubRemote is empty for falsy input', () => {
-  assert.equal(canonicalGitHubRemote(''), '')
-  assert.equal(canonicalGitHubRemote(null), '')
-  assert.equal(canonicalGitHubRemote(undefined), '')
-})
-
-test('isSshRemote detects scp-like and ssh:// forms only', () => {
-  assert.equal(isSshRemote('git@github.com:NousResearch/hermes-agent.git'), true)
-  assert.equal(isSshRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), true)
-  assert.equal(isSshRemote('https://github.com/NousResearch/hermes-agent.git'), false)
-  assert.equal(isSshRemote(''), false)
-  assert.equal(isSshRemote(null), false)
-})
-
-test('isOfficialSshRemote is true only for the official repo over SSH', () => {
-  assert.equal(isOfficialSshRemote('git@github.com:NousResearch/hermes-agent.git'), true)
-  assert.equal(isOfficialSshRemote('git@github.com:NousResearch/hermes-agent'), true)
-  assert.equal(isOfficialSshRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), true)
-  // Case-insensitive owner/repo match.
-  assert.equal(isOfficialSshRemote('git@github.com:nousresearch/hermes-agent.git'), true)
-})
-
-test('isOfficialSshRemote does NOT match forks, other hosts, or HTTPS', () => {
-  // A fork over SSH belongs to the user — fetching it is their own remote,
-  // not the official upstream, so the SSH-avoidance swap must not apply.
-  assert.equal(isOfficialSshRemote('git@github.com:someuser/hermes-agent.git'), false)
-  // Same repo name on a different host is not the official repo.
-  assert.equal(isOfficialSshRemote('git@gitlab.com:NousResearch/hermes-agent.git'), false)
-  // HTTPS to the official repo never prompts for SSH/FIDO2, so it keeps the
-  // normal fetch path — must not be flagged as an official SSH remote.
-  assert.equal(isOfficialSshRemote('https://github.com/NousResearch/hermes-agent.git'), false)
-  assert.equal(isOfficialSshRemote(''), false)
-  assert.equal(isOfficialSshRemote(null), false)
-})
-
-test('OFFICIAL_REPO_HTTPS_URL canonicalizes to OFFICIAL_REPO_CANONICAL', () => {
-  // Invariant: the URL we substitute in must be the same repo we detect.
-  assert.equal(canonicalGitHubRemote(OFFICIAL_REPO_HTTPS_URL), OFFICIAL_REPO_CANONICAL)
-})
--- a/apps/desktop/electron/vscode-marketplace.cjs
+++ b/apps/desktop/electron/vscode-marketplace.cjs
@@ -1,331 +0,0 @@
-'use strict'
-
-/**
- * VS Code Marketplace color-theme fetcher (main process).
- *
- * Resolves an extension's latest version via the (undocumented but stable)
- * gallery ExtensionQuery API, downloads the `.vsix` (a zip), and extracts the
- * color-theme JSON files it contributes. No theme code is ever executed — we
- * only read `package.json` + the referenced `*.json` theme files out of the
- * archive and hand their text back to the renderer to convert.
- *
- * Dependency-free on purpose: a `.vsix` is a plain zip, so we parse the central
- * directory and inflate just the entries we need with `zlib`. Avoids pulling a
- * zip library into the desktop bundle for a feature this small.
- */
-
-const https = require('node:https')
-const zlib = require('node:zlib')
-
-const GALLERY_QUERY_URL = 'https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery'
-const VSIX_ASSET_TYPE = 'Microsoft.VisualStudio.Services.VSIXPackage'
-const MAX_VSIX_BYTES = 40 * 1024 * 1024 // 40 MB — themes are tiny; this is paranoia.
-const MAX_REDIRECTS = 5
-const REQUEST_TIMEOUT_MS = 20_000
-
-const ID_RE = /^[\w-]+\.[\w-]+$/
-
-/** Minimal HTTPS helper with redirect-following, timeout, and a size cap. */
-function request(url, { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {}, redirectsLeft = MAX_REDIRECTS) {
-  return new Promise((resolve, reject) => {
-    const req = https.request(url, { method, headers }, res => {
-      const status = res.statusCode ?? 0
-
-      if (status >= 300 && status < 400 && res.headers.location) {
-        if (redirectsLeft <= 0) {
-          res.resume()
-          reject(new Error('Too many redirects.'))
-
-          return
-        }
-
-        const next = new URL(res.headers.location, url).toString()
-        res.resume()
-        // Redirects to the CDN are plain GETs (drop the POST body).
-        resolve(request(next, { method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes }, redirectsLeft - 1))
-
-        return
-      }
-
-      if (status < 200 || status >= 300) {
-        res.resume()
-        reject(new Error(`Request failed (${status}) for ${url}`))
-
-        return
-      }
-
-      const chunks = []
-      let total = 0
-
-      res.on('data', chunk => {
-        total += chunk.length
-
-        if (total > maxBytes) {
-          req.destroy()
-          reject(new Error('Response exceeded the size limit.'))
-
-          return
-        }
-
-        chunks.push(chunk)
-      })
-      res.on('end', () => resolve(Buffer.concat(chunks)))
-    })
-
-    req.on('error', reject)
-    req.setTimeout(REQUEST_TIMEOUT_MS, () => req.destroy(new Error('Request timed out.')))
-
-    if (body) {
-      req.write(body)
-    }
-
-    req.end()
-  })
-}
-
-/** Resolve `{ displayName, vsixUrl }` for the latest version of `id`. */
-async function resolveExtension(id) {
-  const json = await queryGallery({
-    // FilterType 7 = ExtensionName (the full publisher.extension id).
-    filters: [{ criteria: [{ filterType: 7, value: id }], pageNumber: 1, pageSize: 1 }],
-    // Flags: IncludeFiles | IncludeVersionProperties | IncludeAssetUri |
-    // IncludeCategoryAndTags | IncludeLatestVersionOnly = 914.
-    flags: 914
-  })
-  const extension = json?.results?.[0]?.extensions?.[0]
-
-  if (!extension) {
-    throw new Error(`Extension "${id}" was not found on the Marketplace.`)
-  }
-
-  const version = extension.versions?.[0]
-
-  if (!version) {
-    throw new Error(`Extension "${id}" has no published versions.`)
-  }
-
-  const asset = (version.files ?? []).find(file => file.assetType === VSIX_ASSET_TYPE)
-  const vsixUrl = asset?.source
-
-  if (!vsixUrl) {
-    throw new Error(`Could not find a downloadable package for "${id}".`)
-  }
-
-  return { displayName: extension.displayName || id, vsixUrl }
-}
-
-/** POST an ExtensionQuery payload and return the parsed gallery response. */
-async function queryGallery(payload, { maxBytes = 4 * 1024 * 1024 } = {}) {
-  const body = JSON.stringify(payload)
-  const raw = await request(GALLERY_QUERY_URL, {
-    method: 'POST',
-    headers: {
-      Accept: 'application/json;api-version=3.0-preview.1',
-      'Content-Type': 'application/json',
-      'Content-Length': Buffer.byteLength(body),
-      'User-Agent': 'Hermes-Desktop'
-    },
-    body,
-    maxBytes
-  })
-
-  return JSON.parse(raw.toString('utf8'))
-}
-
-/**
- * Search the Marketplace for color-theme extensions. With an empty query this
- * returns the most-installed themes; with a query it's a full-text search
- * scoped to the Themes category. Returns lightweight cards (no download).
- */
-/**
- * The "Themes" category also contains file-icon and product-icon themes (the
- * gallery has no color-only category). We can't see an extension's actual
- * contributions without downloading it, so filter the obvious icon packs out by
- * tag + name/description. Color themes that also ship icons are rare; worst case
- * a user installs them by exact id from settings.
- */
-function looksLikeIconTheme(extension) {
-  const tags = (extension.tags ?? []).map(tag => String(tag).toLowerCase())
-
-  if (tags.includes('icon-theme') || tags.includes('product-icon-theme')) {
-    return true
-  }
-
-  const text = `${extension.displayName ?? ''} ${extension.shortDescription ?? ''}`.toLowerCase()
-
-  return /\b(icon theme|file icons?|product icons?|icon pack|fileicons)\b/.test(text)
-}
-
-async function searchMarketplaceThemes(query, limit = 20) {
-  const text = String(query || '').trim()
-  const pageSize = Math.min(Math.max(Number(limit) || 20, 1), 50)
-
-  // FilterType: 8=Target, 5=Category, 10=SearchText, 12=ExcludeWithFlags.
-  const criteria = [
-    { filterType: 8, value: 'Microsoft.VisualStudio.Code' },
-    { filterType: 5, value: 'Themes' },
-    { filterType: 12, value: '4096' } // Exclude unpublished (Unpublished = 0x1000).
-  ]
-
-  if (text) {
-    criteria.push({ filterType: 10, value: text })
-  }
-
-  const json = await queryGallery({
-    // Over-fetch so the icon-theme filter below still leaves a full page.
-    filters: [{ criteria, pageNumber: 1, pageSize: Math.min(pageSize * 2, 50), sortBy: 4, sortOrder: 0 }],
-    // IncludeStatistics (0x100) | IncludeLatestVersionOnly (0x200) | IncludeCategoryAndTags (0x4).
-    flags: 772
-  })
-
-  const extensions = json?.results?.[0]?.extensions ?? []
-
-  return extensions
-    .filter(extension => !looksLikeIconTheme(extension))
-    .slice(0, pageSize)
-    .map(extension => {
-      const publisherName = extension.publisher?.publisherName ?? ''
-      const installStat = (extension.statistics ?? []).find(stat => stat.statisticName === 'install')
-
-      return {
-        extensionId: `${publisherName}.${extension.extensionName}`,
-        displayName: extension.displayName || extension.extensionName,
-        publisher: extension.publisher?.displayName || publisherName,
-        description: extension.shortDescription || '',
-        installs: Math.round(installStat?.value ?? 0)
-      }
-    })
-}
-
-// ─── Minimal zip reader ─────────────────────────────────────────────────────
-
-function findEndOfCentralDirectory(buf) {
-  // EOCD signature 0x06054b50, scanning back from the end (comment is rare).
-  for (let i = buf.length - 22; i >= 0; i--) {
-    if (buf.readUInt32LE(i) === 0x06054b50) {
-      return i
-    }
-  }
-
-  throw new Error('Not a valid zip archive (no end-of-central-directory).')
-}
-
-/** Parse the central directory into a name → record map. */
-function readCentralDirectory(buf) {
-  const eocd = findEndOfCentralDirectory(buf)
-  const count = buf.readUInt16LE(eocd + 10)
-  let offset = buf.readUInt32LE(eocd + 16)
-  const records = new Map()
-
-  for (let i = 0; i < count; i++) {
-    if (buf.readUInt32LE(offset) !== 0x02014b50) {
-      break
-    }
-
-    const method = buf.readUInt16LE(offset + 10)
-    const compressedSize = buf.readUInt32LE(offset + 20)
-    const nameLen = buf.readUInt16LE(offset + 28)
-    const extraLen = buf.readUInt16LE(offset + 30)
-    const commentLen = buf.readUInt16LE(offset + 32)
-    const localOffset = buf.readUInt32LE(offset + 42)
-    const name = buf.toString('utf8', offset + 46, offset + 46 + nameLen)
-
-    records.set(name, { method, compressedSize, localOffset })
-    offset += 46 + nameLen + extraLen + commentLen
-  }
-
-  return records
-}
-
-/** Inflate a single entry to a string. */
-function extractEntry(buf, record) {
-  // The local header's name/extra lengths can differ from the central record,
-  // so re-read them here to locate the compressed payload.
-  if (buf.readUInt32LE(record.localOffset) !== 0x04034b50) {
-    throw new Error('Corrupt zip: bad local file header.')
-  }
-
-  const nameLen = buf.readUInt16LE(record.localOffset + 26)
-  const extraLen = buf.readUInt16LE(record.localOffset + 28)
-  const dataStart = record.localOffset + 30 + nameLen + extraLen
-  const data = buf.subarray(dataStart, dataStart + record.compressedSize)
-
-  // 0 = stored, 8 = deflate. Theme files are one or the other.
-  return record.method === 0 ? data.toString('utf8') : zlib.inflateRawSync(data).toString('utf8')
-}
-
-/** Normalize a package.json theme path to its zip entry name. */
-function themeEntryName(themePath) {
-  const clean = String(themePath).replace(/^\.\//, '').replace(/^\//, '')
-
-  return `extension/${clean}`
-}
-
-/** Extract every contributed color theme from a `.vsix` buffer. */
-function extractThemes(vsixBuffer) {
-  const records = readCentralDirectory(vsixBuffer)
-  const pkgRecord = records.get('extension/package.json')
-
-  if (!pkgRecord) {
-    throw new Error('Package manifest missing from the extension.')
-  }
-
-  const pkg = JSON.parse(extractEntry(vsixBuffer, pkgRecord))
-  const contributed = pkg?.contributes?.themes
-
-  if (!Array.isArray(contributed) || contributed.length === 0) {
-    return []
-  }
-
-  const themes = []
-
-  for (const entry of contributed) {
-    if (!entry?.path) {
-      continue
-    }
-
-    const record = records.get(themeEntryName(entry.path))
-
-    if (!record) {
-      continue
-    }
-
-    try {
-      themes.push({
-        label: entry.label || entry.id || pkg.displayName || pkg.name || 'VS Code Theme',
-        uiTheme: entry.uiTheme,
-        contents: extractEntry(vsixBuffer, record)
-      })
-    } catch {
-      // Skip an entry we can't inflate rather than failing the whole install.
-    }
-  }
-
-  return themes
-}
-
-/**
- * Public entry: resolve, download, and extract color themes for `id`
- * (`publisher.extension`). Returns `{ extensionId, displayName, themes }`.
- */
-async function fetchMarketplaceThemes(id) {
-  const trimmed = String(id || '').trim()
-
-  if (!ID_RE.test(trimmed)) {
-    throw new Error('Expected a Marketplace id like "publisher.extension".')
-  }
-
-  const { displayName, vsixUrl } = await resolveExtension(trimmed)
-  const vsix = await request(vsixUrl, { headers: { 'User-Agent': 'Hermes-Desktop' } })
-  const themes = extractThemes(vsix)
-
-  return { extensionId: trimmed, displayName, themes }
-}
-
-module.exports = {
-  fetchMarketplaceThemes,
-  searchMarketplaceThemes,
-  extractThemes,
-  readCentralDirectory,
-  __testing: { themeEntryName, looksLikeIconTheme }
-}
--- a/apps/desktop/electron/vscode-marketplace.test.cjs
+++ b/apps/desktop/electron/vscode-marketplace.test.cjs
@@ -1,113 +0,0 @@
-'use strict'
-
-const assert = require('node:assert')
-const test = require('node:test')
-
-const { __testing, extractThemes, readCentralDirectory } = require('./vscode-marketplace.cjs')
-
-// Build a minimal zip with stored (uncompressed) entries so the test controls
-// the bytes exactly — exercises the central-directory reader + theme extraction
-// without a deflate dependency.
-function makeZip(entries) {
-  const locals = []
-  const centrals = []
-  let offset = 0
-
-  for (const { name, data } of entries) {
-    const nameBuf = Buffer.from(name, 'utf8')
-    const body = Buffer.from(data, 'utf8')
-
-    const local = Buffer.alloc(30 + nameBuf.length)
-    local.writeUInt32LE(0x04034b50, 0)
-    local.writeUInt16LE(0, 8) // method: stored
-    local.writeUInt32LE(body.length, 18) // compressed size
-    local.writeUInt32LE(body.length, 22) // uncompressed size
-    local.writeUInt16LE(nameBuf.length, 26)
-    nameBuf.copy(local, 30)
-
-    locals.push(local, body)
-
-    const central = Buffer.alloc(46 + nameBuf.length)
-    central.writeUInt32LE(0x02014b50, 0)
-    central.writeUInt16LE(0, 10) // method: stored
-    central.writeUInt32LE(body.length, 20)
-    central.writeUInt32LE(body.length, 24)
-    central.writeUInt16LE(nameBuf.length, 28)
-    central.writeUInt32LE(offset, 42) // local header offset
-    nameBuf.copy(central, 46)
-
-    centrals.push(central)
-    offset += local.length + body.length
-  }
-
-  const centralStart = offset
-  const centralBuf = Buffer.concat(centrals)
-
-  const eocd = Buffer.alloc(22)
-  eocd.writeUInt32LE(0x06054b50, 0)
-  eocd.writeUInt16LE(entries.length, 8)
-  eocd.writeUInt16LE(entries.length, 10)
-  eocd.writeUInt32LE(centralBuf.length, 12)
-  eocd.writeUInt32LE(centralStart, 16)
-
-  return Buffer.concat([...locals, centralBuf, eocd])
-}
-
-test('readCentralDirectory finds every entry', () => {
-  const zip = makeZip([
-    { name: 'extension/package.json', data: '{}' },
-    { name: 'extension/themes/x.json', data: '{}' }
-  ])
-
-  const records = readCentralDirectory(zip)
-  assert.ok(records.has('extension/package.json'))
-  assert.ok(records.has('extension/themes/x.json'))
-})
-
-test('extractThemes reads contributed color themes (resolving ./ paths)', () => {
-  const pkg = JSON.stringify({
-    name: 'theme-dracula',
-    displayName: 'Dracula',
-    contributes: {
-      themes: [{ label: 'Dracula', uiTheme: 'vs-dark', path: './themes/dracula.json' }]
-    }
-  })
-  const themeJson = JSON.stringify({ name: 'Dracula', type: 'dark', colors: { 'editor.background': '#282a36' } })
-
-  const zip = makeZip([
-    { name: 'extension/package.json', data: pkg },
-    { name: 'extension/themes/dracula.json', data: themeJson }
-  ])
-
-  const themes = extractThemes(zip)
-  assert.strictEqual(themes.length, 1)
-  assert.strictEqual(themes[0].label, 'Dracula')
-  assert.strictEqual(themes[0].uiTheme, 'vs-dark')
-  assert.match(themes[0].contents, /editor\.background/)
-})
-
-test('extractThemes returns empty when the extension contributes no themes', () => {
-  const zip = makeZip([{ name: 'extension/package.json', data: JSON.stringify({ name: 'x', contributes: {} }) }])
-  assert.deepStrictEqual(extractThemes(zip), [])
-})
-
-test('extractThemes throws when the manifest is missing', () => {
-  const zip = makeZip([{ name: 'extension/other.txt', data: 'hi' }])
-  assert.throws(() => extractThemes(zip), /manifest missing/i)
-})
-
-test('looksLikeIconTheme filters icon/product-icon packs out of theme search', () => {
-  const { looksLikeIconTheme } = __testing
-
-  // Tagged contribution points are the strongest signal.
-  assert.strictEqual(looksLikeIconTheme({ tags: ['theme', 'icon-theme'] }), true)
-  assert.strictEqual(looksLikeIconTheme({ tags: ['product-icon-theme'] }), true)
-
-  // Name/description fallback for packs that don't tag themselves.
-  assert.strictEqual(looksLikeIconTheme({ displayName: 'Material Icon Theme' }), true)
-  assert.strictEqual(looksLikeIconTheme({ shortDescription: 'A pack of file icons.' }), true)
-
-  // Real color themes survive.
-  assert.strictEqual(looksLikeIconTheme({ displayName: 'Dracula Official', tags: ['theme', 'color-theme'] }), false)
-  assert.strictEqual(looksLikeIconTheme({ displayName: 'One Dark Pro' }), false)
-})
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -1,57 +0,0 @@
-'use strict'
-
-const test = require('node:test')
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const path = require('node:path')
-
-const ELECTRON_DIR = __dirname
-
-function readElectronFile(name) {
-  return fs.readFileSync(path.join(ELECTRON_DIR, name), 'utf8').replace(/\r\n/g, '\n')
-}
-
-function requireHiddenChildOptions(source, needle) {
-  const index = source.indexOf(needle)
-  assert.notEqual(index, -1, `missing call site: ${needle}`)
-  const snippet = source.slice(index, index + 700)
-  assert.match(
-    snippet,
-    /hiddenWindowsChildOptions\(/,
-    `expected ${needle} to wrap child-process options with hiddenWindowsChildOptions`
-  )
-}
-
-test('desktop background child processes opt into hidden Windows consoles', () => {
-  const source = readElectronFile('main.cjs')
-
-  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
-
-  requireHiddenChildOptions(source, "execFileSync(\n          'reg'")
-  requireHiddenChildOptions(source, 'execFileSync(pyExe')
-  requireHiddenChildOptions(source, 'spawn(resolveGitBinary()')
-  requireHiddenChildOptions(source, "execFileSync('taskkill'")
-  requireHiddenChildOptions(source, 'spawn(command, args')
-  requireHiddenChildOptions(source, "spawn('curl'")
-  requireHiddenChildOptions(source, 'spawn(backend.command, backend.args')
-  requireHiddenChildOptions(source, 'hermesProcess = spawn(backend.command, backend.args')
-  requireHiddenChildOptions(source, "spawn(py, ['-m', 'hermes_cli.main', 'uninstall', '--gui-summary']")
-})
-
-test('intentional or interactive desktop child processes stay documented', () => {
-  const source = readElectronFile('main.cjs')
-
-  assert.match(source, /windowsHide: false/)
-  assert.match(source, /handOffWindowsBootstrapRecovery/)
-  assert.match(source, /'--repair', '--branch'/)
-  assert.match(source, /'--update', '--branch'/)
-  assert.match(source, /nodePty\.spawn\(command, args/)
-  assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
-})
-
-test('bootstrap PowerShell runner hides Windows console children', () => {
-  const source = readElectronFile('bootstrap-runner.cjs')
-
-  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
-  requireHiddenChildOptions(source, 'spawn(ps, fullArgs')
-})
--- a/apps/desktop/electron/windows-user-env.cjs
+++ b/apps/desktop/electron/windows-user-env.cjs
@@ -1,76 +0,0 @@
-// windows-user-env.cjs
-//
-// Read a User-scoped environment variable straight from the Windows registry
-// (HKCU\Environment).
-//
-// A GUI app launched from Explorer inherits the environment block captured at
-// login, so a variable set via `setx` AFTER login is invisible in process.env
-// even though a fresh shell — and the Hermes CLI — sees it immediately. The
-// desktop's HERMES_HOME resolution relies on process.env, so that stale-snapshot
-// gap silently sends the backend to the default %LOCALAPPDATA%\hermes. Reading
-// the live registry value closes the gap. See #45471.
-
-const { execFileSync } = require('node:child_process')
-
-// Parse the output of `reg query HKCU\Environment /v <name>`, which looks like:
-//
-//   HKEY_CURRENT_USER\Environment
-//       HERMES_HOME    REG_SZ    F:\Hermes\data
-//
-// Returns the raw value string (spaces inside the value preserved), or null when
-// the requested value line isn't present.
-function parseRegQueryValue(stdout, name) {
-  if (!stdout || !name) return null
-  const typePattern =
-    /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
-  for (const rawLine of String(stdout).split(/\r?\n/)) {
-    const line = rawLine.trim()
-    const match = line.match(typePattern)
-    if (match && match[1].toLowerCase() === name.toLowerCase()) {
-      return match[2]
-    }
-  }
-  return null
-}
-
-// Expand %VAR% references against an env map. REG_EXPAND_SZ values store
-// unexpanded references; plain REG_SZ paths have none, so this is a no-op for
-// the common F:\... case. Unknown references are left verbatim.
-function expandWindowsEnvRefs(value, env = process.env) {
-  if (!value) return value
-  return value.replace(/%([^%]+)%/g, (whole, name) => {
-    const key = Object.keys(env).find(k => k.toUpperCase() === String(name).toUpperCase())
-    return key != null && env[key] != null ? env[key] : whole
-  })
-}
-
-// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
-// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
-// (the value doesn't exist), or when the value is empty.
-function readWindowsUserEnvVar(
-  name,
-  { platform = process.platform, env = process.env, exec = execFileSync } = {}
-) {
-  if (platform !== 'win32' || !name) return null
-  let stdout
-  try {
-    stdout = exec('reg', ['query', 'HKCU\\Environment', '/v', name], {
-      encoding: 'utf8',
-      windowsHide: true,
-      timeout: 5000
-    })
-  } catch {
-    // `reg` missing, or value absent (reg exits 1) — caller falls back.
-    return null
-  }
-  const raw = parseRegQueryValue(stdout, name)
-  if (raw == null) return null
-  const expanded = expandWindowsEnvRefs(raw, env).trim()
-  return expanded || null
-}
-
-module.exports = {
-  expandWindowsEnvRefs,
-  parseRegQueryValue,
-  readWindowsUserEnvVar
-}
--- a/Show More
+++ b/Show More