change(tooling): update node to 26 everywhere, keep node version managed

2026-06-21 17:41:08 +08:00 · 2026-06-12 13:44:29 -04:00
634 changed files with 14633 additions and 42105 deletions
--- a/.github/workflows/build-windows-installer.yml
+++ b/.github/workflows/build-windows-installer.yml
@@ -48,7 +48,7 @@ jobs:
      - name: Setup Node.js
        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm

      - name: Install npm dependencies
--- a/.github/workflows/contributor-check.yml
+++ b/.github/workflows/contributor-check.yml
@@ -1,11 +1,12 @@
 name: Contributor Attribution Check

 on:
+  pull_request:
+    branches: [main]
  # No paths filter — the job must always run so the required check
  # reports a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-  pull_request:
-    branches: [main]
+
 permissions:
  contents: read

--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -11,20 +11,8 @@ on:
      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:
-    inputs:
-      skills_index_run_id:
-        description: 'Optional Build Skills Index run ID whose skills-index artifact should be deployed'
-        required: false
-        type: string
-      rebuild_skills_index:
-        description: 'Force a fresh multi-source crawl instead of reusing the latest healthy index'
-        required: false
-        default: false
-        type: boolean

 permissions:
-  contents: read
-  actions: read
  pages: write
  id-token: write

@@ -56,7 +44,7 @@ jobs:

      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm
          cache-dependency-path: website/package-lock.json

@@ -67,81 +55,26 @@ jobs:
      - name: Install PyYAML for skill extraction
        run: pip install pyyaml==6.0.2 httpx==0.28.1

-      - name: Prepare skills index (unified multi-source catalog)
+      - name: Build skills index (unified multi-source catalog)
        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_TOKEN: ${{ github.token }}
-          SKILLS_INDEX_RUN_ID: ${{ github.event.inputs.skills_index_run_id || '' }}
-          REBUILD_SKILLS_INDEX: ${{ github.event.inputs.rebuild_skills_index || 'false' }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          # The unified external catalog is expensive to crawl and can burn
-          # through the repository installation's GitHub API quota when several
-          # docs deploys land close together.  Normal docs deploys therefore
-          # reuse the latest healthy catalog: first the artifact from a
-          # scheduled skills-index run, then the currently live index.  Only a
-          # manual force rebuild does a fresh crawl here.
+          # Rebuild the unified catalog. The file is gitignored, so a fresh
+          # checkout starts without it and we want the freshest crawl in
+          # every deploy.
          #
-          # If we do crawl, the build remains fatal. build_skills_index.py runs
-          # the health check BEFORE writing and exits non-zero on source
-          # collapse, keeping the last good Pages deployment live instead of
-          # publishing a degenerate catalog.
-          set -euo pipefail
-          INDEX_PATH="website/static/api/skills-index.json"
-          mkdir -p "$(dirname "$INDEX_PATH")"
-
-          validate_index() {
-            python3 - "$INDEX_PATH" <<'PY'
-          import json
-          import sys
-          from pathlib import Path
-
-          path = Path(sys.argv[1])
-          try:
-              data = json.loads(path.read_text(encoding="utf-8"))
-          except Exception as exc:
-              print(f"invalid skills index JSON: {exc}", file=sys.stderr)
-              sys.exit(1)
-          skills = data.get("skills")
-          if not isinstance(skills, list) or len(skills) < 1500:
-              count = len(skills) if isinstance(skills, list) else "missing"
-              print(f"skills index too small: {count}", file=sys.stderr)
-              sys.exit(1)
-          print(f"skills index ready: {len(skills)} skills")
-          PY
-          }
-
-          if [ "$REBUILD_SKILLS_INDEX" = "true" ]; then
-            python3 scripts/build_skills_index.py
-            validate_index
-            exit 0
-          fi
-
-          if [ -n "$SKILLS_INDEX_RUN_ID" ]; then
-            tmpdir="$(mktemp -d)"
-            echo "Downloading skills-index artifact from run $SKILLS_INDEX_RUN_ID"
-            if gh run download "$SKILLS_INDEX_RUN_ID" --name skills-index --dir "$tmpdir"; then
-              candidate="$(find "$tmpdir" -name skills-index.json -type f | head -n 1 || true)"
-              if [ -n "$candidate" ]; then
-                cp "$candidate" "$INDEX_PATH"
-                if validate_index; then
-                  exit 0
-                fi
-              fi
-            fi
-            echo "::warning::Could not use skills-index artifact from run $SKILLS_INDEX_RUN_ID; trying live index"
-          fi
-
-          echo "Downloading currently live skills index"
-          if curl -fsSL --retry 3 --retry-delay 5 \
-            "https://hermes-agent.nousresearch.com/docs/api/skills-index.json" \
-            -o "$INDEX_PATH" && validate_index; then
-            exit 0
-          fi
-
-          echo "::warning::Live skills index unavailable or unhealthy; falling back to a fresh crawl"
-          rm -f "$INDEX_PATH"
+          # This MUST be fatal. build_skills_index.py runs a health check and
+          # exits non-zero WITHOUT writing the output file when a source
+          # collapses (e.g. a GitHub API rate limit zeroes the github /
+          # claude-marketplace / well-known taps all at once). Letting the
+          # deploy continue would either (a) ship a degenerate index missing
+          # whole hubs — the June 2026 regression where OpenAI/Anthropic/
+          # HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
+          # local-only catalog. Failing here keeps the last good deployment
+          # live (GitHub Pages serves the previous build) instead of
+          # publishing a broken catalog. Re-run the workflow once the
+          # transient rate limit clears.
          python3 scripts/build_skills_index.py
-          validate_index

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -18,12 +18,13 @@ on:
      - docker/**
      - .hadolint.yaml
      - .github/workflows/docker-lint.yml
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths:
+      - Dockerfile
+      - docker/**
+      - .hadolint.yaml
+      - .github/workflows/docker-lint.yml

 permissions:
  contents: read
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -11,13 +11,16 @@ on:
      - 'docker/**'
      - '.github/workflows/docker-publish.yml'
      - '.github/actions/hermes-smoke-test/**'
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
+      - '.github/actions/hermes-smoke-test/**'
  release:
    types: [published]

@@ -87,7 +90,7 @@ jobs:
      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
      # shard would otherwise reach the session-scoped ``built_image``
      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
-      # ``docker build`` — guaranteed to
+      # ``docker build`` under a 180s pytest-timeout cap — guaranteed to
      # die in fixture setup.
      #
      # Piggybacking here avoids a second image build: the smoke test
@@ -111,7 +114,7 @@ jobs:
        run: |
          uv venv .venv --python 3.11
          source .venv/bin/activate
-          # ``dev`` extra pulls in pytest, pytest-asyncio —
+          # ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -1,12 +1,10 @@
 name: Docs Site Checks

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
-    branches: [main]
-
+    paths:
+      - 'website/**'
+      - '.github/workflows/docs-site-checks.yml'
  workflow_dispatch:

 permissions:
@@ -16,11 +14,11 @@ jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm
          cache-dependency-path: website/package-lock.json

@@ -28,9 +26,9 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.11"
+          python-version: '3.11'

      - name: Install ascii-guard
        run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
--- a/.github/workflows/history-check.yml
+++ b/.github/workflows/history-check.yml
@@ -14,9 +14,6 @@ name: History Check
 # the PR head and main to be non-empty.

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -27,9 +24,9 @@ jobs:
  check-common-ancestor:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
-          fetch-depth: 0 # full history both sides for merge-base
+          fetch-depth: 0  # full history both sides for merge-base

      - name: Reject PRs with no common ancestor on main
        run: |
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -15,12 +15,12 @@ on:
      - "**/*.md"
      - "docs/**"
      - "website/**"
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "website/**"

 permissions:
  contents: read
@@ -154,6 +154,7 @@ jobs:
              });
            }

+
  ruff-blocking:
    # Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
    # PLW1514 (unspecified-encoding) — catches bare ``open()`` /
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -0,0 +1,255 @@
+name: Nix Lockfile Fix
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'package-lock.json'
+      - 'package.json'
+      - 'ui-tui/package.json'
+      - 'apps/desktop/package.json'
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to fix (leave empty to run on the selected branch)'
+        required: false
+        type: string
+  issue_comment:
+    types: [edited]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  # ── Auto-fix on main ───────────────────────────────────────────────
+  # Fires when a push to main touches package.json or package-lock.json.
+  # Runs fix-lockfiles and pushes the hash update commit directly to main
+  # so Nix builds never stay broken.
+  #
+  # Safety invariants:
+  #   1. The fix commit only touches nix/*.nix files, which are NOT in
+  #      the paths filter above, so this cannot re-trigger itself.
+  #   2. An explicit file-whitelist check before commit aborts if
+  #      fix-lockfiles ever modifies unexpected files.
+  #   3. Job-level concurrency with cancel-in-progress: true ensures
+  #      back-to-back pushes collapse to the newest; ref: main checkout
+  #      always operates on the latest branch state.
+  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
+  #      triggers downstream nix.yml verification.
+  auto-fix-main:
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    concurrency:
+      group: auto-fix-main
+      cancel-in-progress: true
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: main
+          token: ${{ steps.app-token.outputs.token }}
+
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Ensure only nix/lib.nix (home of the single npmDepsHash) was
+          # modified — prevents accidental self-triggering if fix-lockfiles
+          # ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/lib\.nix$' || true)"
+          if [ -n "$unexpected" ]; then
+            echo "::error::Unexpected modified files: $unexpected"
+            exit 1
+          fi
+
+          # Record the base SHA before committing — used to detect package
+          # file changes if we need to rebase after a non-fast-forward push.
+          BASE_SHA="$(git rev-parse HEAD)"
+
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/lib.nix
+          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
+            -m "Source: $GITHUB_SHA" \
+            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+
+          # Retry push with rebase in case main advanced with an unrelated
+          # commit during the nix build. Without this, a non-fast-forward
+          # rejection silently loses the fix. If package files changed during
+          # the rebase, abort — a fresh auto-fix run will handle the new state.
+          for attempt in 1 2 3; do
+            if git push origin HEAD:main; then
+              exit 0
+            fi
+            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
+            git fetch origin main
+
+            # If package files changed between our base and the new main,
+            # our computed hashes are stale. Abort and let the next triggered
+            # run recompute from the correct package-lock state.
+            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
+              'package-lock.json' 'package.json' \
+              'ui-tui/package.json' 'apps/desktop/package.json' || true)"
+            if [ -n "$pkg_changed" ]; then
+              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
+              exit 0
+            fi
+
+            git rebase origin/main
+          done
+          echo "::error::Failed to push after 3 rebase attempts"
+          exit 1
+
+  # ── PR fix (manual / checkbox) ─────────────────────────────────────
+  # Existing behavior: run on manual dispatch OR when a task-list
+  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
+  fix:
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment'
+       && github.event.issue.pull_request != null
+       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
+       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Authorize & resolve PR
+        id: resolve
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
+        with:
+          script: |
+            // 1. Verify the actor has write access — applies to both checkbox
+            //    clicks and manual dispatch.
+            const { data: perm } =
+              await github.rest.repos.getCollaboratorPermissionLevel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                username: context.actor,
+              });
+            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
+              core.setFailed(
+                `${context.actor} lacks write access (has: ${perm.permission})`
+              );
+              return;
+            }
+
+            // 2. Resolve which ref to check out.
+            let prNumber = '';
+            if (context.eventName === 'issue_comment') {
+              prNumber = String(context.payload.issue.number);
+            } else if (context.eventName === 'workflow_dispatch') {
+              prNumber = context.payload.inputs.pr_number || '';
+            }
+
+            if (!prNumber) {
+              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
+              core.setOutput('repo', context.repo.repo);
+              core.setOutput('owner', context.repo.owner);
+              core.setOutput('pr', '');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number(prNumber),
+            });
+            core.setOutput('ref', pr.head.ref);
+            core.setOutput('repo', pr.head.repo.name);
+            core.setOutput('owner', pr.head.repo.owner.login);
+            core.setOutput('pr', String(pr.number));
+
+      # Wipe the sticky lockfile-check comment to a "running" state as soon
+      # as the job is authorized, so the user sees their click was picked up
+      # before the ~minute of nix build work.
+      - name: Mark sticky as running
+        if: steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### 🔄 Applying lockfile fix…
+
+            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
+          ref: ${{ steps.resolve.outputs.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/lib.nix
+          git commit -m "fix(nix): refresh npm lockfile hashes"
+          git push
+
+      - name: Update sticky (applied)
+        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile fix applied
+
+            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (already current)
+        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile hashes already current
+
+            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (failed)
+        if: failure() && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ❌ Lockfile fix failed
+
+            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -0,0 +1,105 @@
+name: Nix
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  nix:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Resolve head SHA
+        if: github.event_name == 'pull_request'
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check flake
+        id: flake
+        continue-on-error: true
+        run: nix flake check --print-build-logs
+
+      # When the flake check fails, run a targeted diagnostic to see if
+      # the failure is specifically a stale npm lockfile hash in one of the
+      # known npm subpackages (tui / web).  This avoids surfacing a generic
+      # "build failed" message when the fix is a single known command.
+      - name: Diagnose npm lockfile hashes
+        id: hash_check
+        if: steps.flake.outcome == 'failure' && runner.os == 'Linux'
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
+      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
+      # mode rather than silently ignoring it.
+      - name: Fail if hash check crashed without reporting
+        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
+      - name: Post sticky PR comment (stale hashes)
+        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.hash_check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles` and commit the diff
+
+      # Clear the sticky comment when either the flake check passed outright (no
+      # hash check needed) or the hash check explicitly returned stale=false
+      # (check failed for a non-hash reason).
+      - name: Clear sticky PR comment (resolved)
+        if: |
+          github.event_name == 'pull_request' &&
+          (steps.hash_check.outputs.stale == 'false' ||
+           steps.flake.outcome == 'success')
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Final fail if flake check failed
+        if: steps.flake.outcome == 'failure'
+        run: |
+          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
+            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
+          else
+            echo "::error::Nix flake check failed. See logs above."
+          fi
+          exit 1
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -20,23 +20,29 @@ name: OSV-Scanner
 # vulnerabilities in pinned deps that we may need to patch deliberately.

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths:
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'ui-tui/package.json'
+      - 'website/package.json'
+      - 'website/package-lock.json'
+      - '.github/workflows/osv-scanner.yml'
  push:
    branches: [main]
    paths:
-      - "uv.lock"
-      - "pyproject.toml"
-      - "package.json"
-      - "package-lock.json"
-      - "website/package-lock.json"
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'website/package-lock.json'
  schedule:
    # Weekly scan against main — catches CVEs published after merge for
    # deps that haven't changed since.
-    - cron: "0 9 * * 1"
+    - cron: '0 9 * * 1'
  workflow_dispatch:

 permissions:
@@ -48,7 +54,7 @@ permissions:
 jobs:
  scan:
    name: Scan lockfiles
-    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
+    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2  # v2.3.8
    with:
      # Scan explicit lockfiles rather than recursing, so we only look at
      # the three sources of truth and skip vendored / test / worktree dirs.
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -53,4 +53,4 @@ jobs:
      - name: Trigger Deploy Site workflow
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh workflow run deploy-site.yml --repo ${{ github.repository }} -f skills_index_run_id=${{ github.run_id }}
+        run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -1,11 +1,11 @@
 name: Supply Chain Audit

 on:
+  pull_request:
+    types: [opened, synchronize, reopened]
  # No paths filter — the jobs must always run so required checks
  # report a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-  pull_request:
-    types: [opened, synchronize, reopened]

 permissions:
  pull-requests: write
@@ -29,10 +29,8 @@ jobs:
      scan: ${{ steps.filter.outputs.scan }}
      # True when pyproject.toml changed in this PR
      deps: ${{ steps.filter.outputs.deps }}
-      # True when the curated MCP catalog / bundled MCP manifests changed.
-      mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0
      - name: Check for relevant file changes
@@ -56,14 +54,6 @@ jobs:
          else
            echo "deps=false" >> "$GITHUB_OUTPUT"
          fi
-          MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
-            'optional-mcps/**' \
-            'hermes_cli/mcp_catalog.py' || true)
-          if [ -n "$MCP_CATALOG_FILES" ]; then
-            echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
-          fi

  scan:
    name: Scan PR for critical supply chain risks
@@ -72,7 +62,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

@@ -207,7 +197,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

@@ -278,50 +268,3 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - run: echo "No pyproject.toml changes, skipping dependency bounds check."
-
-  mcp-catalog-review:
-    name: MCP catalog security review
-    needs: changes
-    if: needs.changes.outputs.mcp_catalog == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 0
-
-      - name: Require explicit MCP catalog review label
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set -euo pipefail
-          PR="${{ github.event.pull_request.number }}"
-          LABELS=$(gh pr view "$PR" --json labels --jq '.labels[].name' || true)
-          if echo "$LABELS" | grep -Fxq 'mcp-catalog-reviewed'; then
-            echo "MCP catalog review label present."
-            exit 0
-          fi
-
-          BODY="## ⚠️ MCP catalog security review required
-
-          This PR changes the bundled MCP catalog or MCP catalog installer code. MCP entries can define local commands that users later install into \`mcp_servers\`, so this needs explicit maintainer review before merge.
-
-          A maintainer should verify:
-          - any new/changed \`optional-mcps/**/manifest.yaml\` command and args are expected,
-          - stdio transports do not use shell+egress/exfiltration payloads,
-          - git install refs are pinned and bootstrap commands are minimal,
-          - requested env vars/secrets match the upstream MCP's documented needs.
-
-          After review, add the \`mcp-catalog-reviewed\` label and re-run this check."
-
-          gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
-          echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
-          exit 1
-
-  mcp-catalog-review-gate:
-    name: MCP catalog security review
-    needs: changes
-    if: always() && needs.changes.outputs.mcp_catalog != 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo "No MCP catalog changes, skipping MCP catalog security review."
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,13 +4,13 @@ on:
  push:
    branches: [main]
    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
+      - '**/*.md'
+      - 'docs/**'
  pull_request:
    branches: [main]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'

 permissions:
  contents: read
@@ -30,17 +30,13 @@ jobs:
        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Restore duration cache
-        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: test_durations.json
-          # main always writes a new suffix, but jobs pick the latest one with the same prefix
-          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
-          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
-          # If there are no exact matches, the action searches for partial matches of the restore keys.
-          # When the action finds a partial match, the most recent cache is restored to the path directory.
+          # Single stable key. main always overwrites, PRs always find it.
          key: test-durations

      - name: Install ripgrep (prebuilt binary)
@@ -58,7 +54,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -119,7 +115,7 @@ jobs:
          NOUS_API_KEY: ""

      - name: Upload per-slice durations
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
        with:
          name: test-durations-slice-${{ matrix.slice }}
          path: test_durations.json
@@ -129,11 +125,11 @@ jobs:
  # (including PRs) get balanced slicing.
  save-durations:
    needs: test
-    if: needs.test.result == 'success' && github.ref == 'refs/heads/main'
+    if: always() && github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - name: Download all slice durations
-        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
        with:
          pattern: test-durations-slice-*
          path: durations
@@ -153,17 +149,17 @@ jobs:
          "

      - name: Save merged duration cache
-        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: test_durations.json
-          key: test-durations-${{ github.run_id }}
+          key: test-durations

  e2e:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Install ripgrep (prebuilt binary)
        run: |
@@ -180,7 +176,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -219,4 +215,4 @@ jobs:
        env:
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -4,42 +4,21 @@ name: Typecheck
 on:
  push:
    branches: [main]
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

 jobs:
  typecheck:
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        package:
-          [ui-tui, web, apps/bootstrap-installer, apps/desktop, apps/shared]
-      fail-fast: false # report all failures, not just the first one
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm
      - run: npm ci
-      - run: npm run --prefix ${{ matrix.package }} typecheck
-
-  # Production build of the desktop renderer. `typecheck` runs `tsc` only,
-  # which does NOT exercise Vite/Rolldown module resolution — so an
-  # unresolvable package export (e.g. a transitive @assistant-ui/tap that no
-  # longer exports "./react-shim") slips past typecheck and only explodes when
-  # users build apps/desktop from source on install/update. Run the real
-  # `vite build` here so that class of break fails in CI instead.
-  desktop-build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
-        with:
-          node-version: 22
-          cache: npm
-      - run: npm ci
-      - run: npm run --prefix apps/desktop build
+      - run: npm run --prefix ui-tui typecheck
+      - run: npm run --prefix web typecheck
+      - run: npm run --prefix apps/bootstrap-installer typecheck
+      - run: npm run --prefix apps/desktop typecheck
+      - run: npm run --prefix apps/shared typecheck
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -53,7 +53,7 @@ jobs:
      - name: Set up Node.js
        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: '22'
+          node-version: '26'

      - name: Build web dashboard
        run: cd web && npm ci && npm run build
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -47,15 +47,15 @@ on:
  push:
    branches: [main]
    paths:
-      - "pyproject.toml"
-      - "uv.lock"
-      - ".github/workflows/uv-lockfile-check.yml"
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - '.github/workflows/uv-lockfile-check.yml'
  pull_request:
    branches: [main]
+    paths:
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - '.github/workflows/uv-lockfile-check.yml'

 permissions:
  contents: read
@@ -71,10 +71,10 @@ jobs:
    timeout-minutes: 5
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
--- a/.nvmrc
+++ b/.nvmrc
@@ -0,0 +1 @@
+26.3.0
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,41 +78,7 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
 | **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

-### Install with the standard installer
-
-For most contributors, the best development bootstrap is the same path users
-take: run the standard installer, then work inside the repository it cloned.
-The installer creates the Hermes venv, wires the `hermes` command, stamps the
-install method for `hermes update`, and clones the full git project into
-`$HERMES_HOME/hermes-agent` (usually `~/.hermes/hermes-agent`). That keeps your
-development environment on the same layout the CLI, updater, lazy dependency
-installer, gateway, and docs assume.
-
-```bash
-curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
-cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
-
-# Add dev/test extras on top of the standard install.
-uv pip install -e ".[all,dev]"
-
-# Optional: browser tools / docs site dependencies.
-npm install
-```
-
-After that, create branches and run tests from that checkout:
-
-```bash
-git checkout -b fix/description
-scripts/run_tests.sh
-```
-
-### Manual clone fallback
-
-Use this only if you intentionally do not want Hermes' managed install layout
-(for example, a throwaway clone inside a container or CI job). If you install
-this way, make sure you run the `hermes` entrypoint from this venv; running the
-system `python3 -m hermes_cli.main` can pick up unrelated system Python
-packages.
+### Clone and install

 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
@@ -143,17 +109,13 @@ echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ### Run

 ```bash
-# The standard installer already put `hermes` on PATH.
-hermes doctor
-hermes chat -q "Hello"
-```
-
-If you used the manual clone fallback, run `./hermes` from the checkout or
-symlink this clone's venv explicitly:
-
-```bash
+# Symlink for global access
 mkdir -p ~/.local/bin
 ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
+
+# Verify
+hermes doctor
+hermes chat -q "Hello"
 ```

 ### Run tests
--- a/18
+++ b/18
@@ -1,12 +1,12 @@
 FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
-# Node 22 LTS source stage. Debian trixie's bundled nodejs is pinned to 20.x
+# Node 26 source stage. Debian trixie's bundled nodejs is pinned to 20.x
 # which reached EOL in April 2026 — we copy node + npm + corepack from the
-# upstream node:22 image instead so we can stay on a supported LTS without
-# waiting for Debian 14 (forky, ~mid-2027).  Bookworm-based slim image used
+# upstream node:26 image instead so we can stay on the supported node without
+# waiting for Debian 15+. Bookworm-based slim image used
 # so the produced binary links against glibc 2.36, which runs cleanly on
 # our Debian 13 (trixie, glibc 2.41) runtime.  Bumping to a new Node major
 # is a one-line ARG change; see #4977.
-FROM node:22-bookworm-slim@sha256:7af03b14a13c8cdd38e45058fd957bf00a72bbe17feac43b1c15a689c029c732 AS node_source
+FROM node:26-bookworm-slim@sha256:3fe807a03a4436e7bc76b7e84e6861899cd75c9028ae99bc00581940141ae150 AS node_source
 FROM debian:13.4

 # Disable Python stdout buffering to ensure logs are printed immediately
@@ -90,17 +90,15 @@ RUN useradd -u 10000 -m -d /opt/data hermes

 COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/

-# Node 22 LTS: copy the node binary plus the bundled npm + corepack JS
-# installs from the upstream image.  npm and npx are recreated as symlinks
+# Node 26: copy the node binary plus the bundled npm JS
+# installs from the upstream image. npm and npx are recreated as symlinks
 # because they're symlinks in the source image (and need to live on PATH).
 # See node_source stage at the top of the file for the version-bump
 # rationale (#4977).
 COPY --chmod=0755 --from=node_source /usr/local/bin/node /usr/local/bin/
 COPY --from=node_source /usr/local/lib/node_modules/npm /usr/local/lib/node_modules/npm
-COPY --from=node_source /usr/local/lib/node_modules/corepack /usr/local/lib/node_modules/corepack
 RUN ln -sf /usr/local/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm && \
-    ln -sf /usr/local/lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx && \
-    ln -sf /usr/local/lib/node_modules/corepack/dist/corepack.js /usr/local/bin/corepack
+    ln -sf /usr/local/lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx

 WORKDIR /opt/hermes

@@ -119,7 +117,7 @@ COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/

 # `npm_config_install_links=false` forces npm to install `file:` deps as
 # symlinks instead of copies.  This is the default since npm 10+, which is
-# what the image ships now (via the node:22 source stage).  We set it
+# what the image ships now (via the node:26 source stage).  We set it
 # explicitly anyway as defense-in-depth: the previous Debian-bundled npm
 # 9.x defaulted to install-as-copy, which produced a hidden
 # node_modules/.package-lock.json that permanently disagreed with the root
--- a/README.md
+++ b/README.md
@@ -181,20 +181,16 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration

 We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.

-Quick start for contributors — use the standard installer, then work from the
-full git checkout it creates at `$HERMES_HOME/hermes-agent` (usually
-`~/.hermes/hermes-agent`). This matches the layout used by `hermes update`, the
-managed venv, lazy dependencies, gateway, and docs tooling.
+Quick start for contributors — clone and go with `setup-hermes.sh`:

 ```bash
-curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
-cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
-uv pip install -e ".[all,dev]"
-scripts/run_tests.sh
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh     # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
+./hermes              # auto-detects the venv, no need to `source` first
 ```

-Manual clone fallback (for throwaway clones/CI where you intentionally do not
-want the managed install layout):
+Manual path (equivalent to the above):

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -164,18 +164,16 @@ hermes claw migrate --overwrite  # 覆盖已有冲突

 欢迎贡献！请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。

-贡献者快速开始——使用标准安装器，然后在它创建的完整 git checkout 中开发：
-`$HERMES_HOME/hermes-agent`（通常是 `~/.hermes/hermes-agent`）。这会匹配
-`hermes update`、托管 venv、lazy dependencies、gateway 和 docs tooling 使用的布局。
+贡献者快速开始——克隆并使用 `setup-hermes.sh`：

 ```bash
-curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
-cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
-uv pip install -e ".[all,dev]"
-scripts/run_tests.sh
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh     # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
+./hermes              # 自动检测 venv，无需先 source
 ```

-手动克隆备用路径（用于一次性 clone / CI，或你明确不想使用 managed install layout 时）：
+手动安装（等效于上述命令）：

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -824,7 +824,6 @@ class HermesACPAgent(acp.Agent):

        try:
            from model_tools import get_tool_definitions
-            from agent.memory_manager import inject_memory_provider_tools

            enabled_toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
@@ -840,7 +839,6 @@ class HermesACPAgent(acp.Agent):
            state.agent.valid_tool_names = {
                tool["function"]["name"] for tool in state.agent.tools or []
            }
-            inject_memory_provider_tools(state.agent)
            invalidate = getattr(state.agent, "_invalidate_system_prompt", None)
            if callable(invalidate):
                invalidate()
@@ -1781,25 +1779,10 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
-            from types import SimpleNamespace
-            from agent.memory_manager import inject_memory_provider_tools
-
            toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            )
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
-            tool_view = SimpleNamespace(
-                tools=list(tools or []),
-                valid_tool_names={
-                    tool.get("function", {}).get("name")
-                    for tool in tools or []
-                    if isinstance(tool, dict)
-                },
-                enabled_toolsets=toolsets,
-                _memory_manager=getattr(state.agent, "_memory_manager", None),
-            )
-            inject_memory_provider_tools(tool_view)
-            tools = tool_view.tools
            if not tools:
                return "No tools available."
            lines = [f"Available tools ({len(tools)}):"]
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -27,7 +27,7 @@ import threading
 import time
 import uuid
 from datetime import datetime
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 from urllib.parse import urlparse, parse_qs, urlunparse

 from agent.context_compressor import ContextCompressor
@@ -195,7 +195,6 @@ def init_agent(
    status_callback: callable = None,
    notice_callback: callable = None,
    notice_clear_callback: callable = None,
-    event_callback: Optional[Callable[[str, dict], None]] = None,
    max_tokens: int = None,
    reasoning_config: Dict[str, Any] = None,
    service_tier: str = None,
@@ -300,7 +299,6 @@ def init_agent(
    # would mangle the escape sequences.  None = use builtins.print.
    agent._print_fn = None
    agent.background_review_callback = None  # Optional sync callback for gateway delivery
-    agent.memory_notifications = "on"  # Memory update notifications: "off", "on", "verbose"
    agent.skip_context_files = skip_context_files
    agent.load_soul_identity = load_soul_identity
    agent.pass_session_id = pass_session_id
@@ -427,7 +425,6 @@ def init_agent(
    agent.status_callback = status_callback
    agent.notice_callback = notice_callback
    agent.notice_clear_callback = notice_clear_callback
-    agent.event_callback = event_callback
    agent.tool_gen_callback = tool_gen_callback

    
@@ -599,7 +596,6 @@ def init_agent(
    # (e.g. CLI voice mode adds a temporary prefix for the live call only).
    agent._persist_user_message_idx = None
    agent._persist_user_message_override = None
-    agent._persist_user_message_timestamp = None

    # Cache anthropic image-to-text fallbacks per image payload/URL so a
    # single tool loop does not repeatedly re-run auxiliary vision on the
@@ -904,9 +900,6 @@ def init_agent(
        agent.api_key = client_kwargs.get("api_key", "")
        agent.base_url = client_kwargs.get("base_url", agent.base_url)
        try:
-            from agent.ssl_guard import verify_ca_bundle_with_fallback
-
-            verify_ca_bundle_with_fallback()
            agent.client = agent._create_openai_client(client_kwargs, reason="agent_init", shared=True)
            if not agent.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {agent.model}")
@@ -1200,8 +1193,38 @@ def init_agent(
            _ra().logger.warning("Memory provider plugin init failed: %s", _mpe)
            agent._memory_manager = None

-    from agent.memory_manager import inject_memory_provider_tools as _inject_memory_provider_tools
-    _inject_memory_provider_tools(agent)
+    # Inject memory provider tool schemas into the tool surface.
+    # Skip tools whose names already exist (plugins may register the
+    # same tools via ctx.register_tool(), which lands in agent.tools
+    # through _ra().get_tool_definitions()).  Duplicate function names cause
+    # 400 errors on providers that enforce unique names (e.g. Xiaomi
+    # MiMo via Nous Portal).
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    #   enabled_toolsets is None        → no filter, inject (backward compat)
+    #   "memory" in enabled_toolsets    → user opted in, inject
+    #   otherwise (incl. [])            → user excluded memory, skip injection
+    #
+    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
+    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
+    # penalty on local models and a frequent trigger of tool-call loops.
+    if agent._memory_manager and agent.tools is not None and (
+        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
+    ):
+        _existing_tool_names = {
+            t.get("function", {}).get("name")
+            for t in agent.tools
+            if isinstance(t, dict)
+        }
+        for _schema in agent._memory_manager.get_all_tool_schemas():
+            _tname = _schema.get("name", "")
+            if _tname and _tname in _existing_tool_names:
+                continue  # already registered via plugin path
+            _wrapped = {"type": "function", "function": _schema}
+            agent.tools.append(_wrapped)
+            if _tname:
+                agent.valid_tool_names.add(_tname)
+                _existing_tool_names.add(_tname)

    # Skills config: nudge interval for skill creation reminders
    agent._skill_nudge_interval = 10
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -445,45 +445,6 @@ def repair_message_sequence(agent, messages: List[Dict]) -> int:
    return repairs


-def repair_message_sequence_with_cursor(agent, messages: List[Dict]) -> int:
-    """Run :func:`repair_message_sequence` and keep the SessionDB flush
-    cursor consistent with the compacted list (#44837).
-
-    ``repair_message_sequence`` merges/drops messages in place, shrinking
-    the list. ``_last_flushed_db_idx`` (the DB-write cursor) indexes into
-    that list, so after compaction it can point past the new end — the
-    turn-end flush would then skip the assistant/tool chain entirely — or
-    past unflushed messages shifted to lower indexes.
-
-    Repair preserves object identity for surviving messages, so counting
-    the survivors from the previously-flushed prefix gives the exact new
-    cursor even when messages are dropped/merged at indexes *before* the
-    cursor — a plain ``min()`` clamp would silently skip that many
-    unflushed rows. Falls back to the clamp when no prefix snapshot is
-    available.
-
-    Returns the number of repairs made (same as ``repair_message_sequence``).
-    """
-    pre_repair_flushed_ids = None
-    flush_cursor = getattr(agent, "_last_flushed_db_idx", None)
-    if isinstance(flush_cursor, int) and flush_cursor > 0:
-        pre_repair_flushed_ids = {id(m) for m in messages[:flush_cursor]}
-
-    repairs = repair_message_sequence(agent, messages)
-
-    if repairs > 0 and hasattr(agent, "_last_flushed_db_idx"):
-        if pre_repair_flushed_ids is not None:
-            agent._last_flushed_db_idx = sum(
-                1 for m in messages if id(m) in pre_repair_flushed_ids
-            )
-        else:
-            agent._last_flushed_db_idx = min(
-                agent._last_flushed_db_idx, len(messages)
-            )
-
-    return repairs
-
-

 def strip_think_blocks(agent, content: str) -> str:
    """Remove reasoning/thinking blocks from content, returning only visible text.
@@ -618,33 +579,12 @@ def recover_with_credential_pool(
    current_provider = (getattr(agent, "provider", "") or "").strip().lower()
    pool_provider = (getattr(pool, "provider", "") or "").strip().lower()
    if current_provider and pool_provider and current_provider != pool_provider:
-        # Custom endpoints use two naming conventions for the SAME provider:
-        # the agent carries the generic ``custom`` label while the pool is
-        # keyed ``custom:<name>`` (see CUSTOM_POOL_PREFIX). A literal string
-        # compare treats them as a mismatch and skips recovery for every
-        # custom-provider user — 401s/429s then burn the full retry cycle
-        # with no rotation or refresh. Accept the pair as matching only when
-        # the agent's CURRENT base_url actually resolves to this pool key,
-        # so a fallback provider (or a different custom endpoint) still
-        # triggers the guard.
-        _custom_match = False
-        if current_provider == "custom" and pool_provider.startswith("custom:"):
-            try:
-                from agent.credential_pool import get_custom_provider_pool_key
-                _agent_base = (getattr(agent, "base_url", "") or "").strip()
-                _custom_match = bool(_agent_base) and (
-                    (get_custom_provider_pool_key(_agent_base) or "").strip().lower()
-                    == pool_provider
-                )
-            except Exception:
-                _custom_match = False
-        if not _custom_match:
-            _ra().logger.warning(
-                "Credential pool provider mismatch: pool=%s, agent=%s — "
-                "skipping pool mutation to avoid cross-provider contamination",
-                pool_provider, current_provider,
-            )
-            return False, has_retried_429
+        _ra().logger.warning(
+            "Credential pool provider mismatch: pool=%s, agent=%s — "
+            "skipping pool mutation to avoid cross-provider contamination",
+            pool_provider, current_provider,
+        )
+        return False, has_retried_429

    effective_reason = classified_reason
    if effective_reason is None:
@@ -881,8 +821,6 @@ def try_recover_primary_transport(

 def drop_thinking_only_and_merge_users(
    messages: List[Dict[str, Any]],
-    *,
-    drop_codex_reasoning_items: bool = True,
 ) -> List[Dict[str, Any]]:
    """Drop thinking-only assistant turns; merge any adjacent user messages left behind.

@@ -904,13 +842,7 @@ def drop_thinking_only_and_merge_users(
        return messages

    # Pass 1: drop thinking-only assistant turns.
-    kept = [
-        m for m in messages
-        if not _ra().AIAgent._is_thinking_only_assistant(
-            m,
-            drop_codex_reasoning_items=drop_codex_reasoning_items,
-        )
-    ]
+    kept = [m for m in messages if not _ra().AIAgent._is_thinking_only_assistant(m)]
    dropped = len(messages) - len(kept)
    if dropped == 0:
        return messages
@@ -1217,23 +1149,12 @@ def dump_api_request_debug(

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
-
-        # Redact secrets before persisting/printing. This dump captures the
-        # full request body (system prompt, tool defs, context-embedded
-        # values), and this path fires unconditionally on API errors — so it
-        # otherwise lands any context-embedded secret in cleartext on disk.
-        # Run the serialized dump through the same scrubber used for logs/tool
-        # output, then hand the resulting payload back to the shared atomic
-        # JSON writer so request dumps keep the same write semantics as before.
-        from agent.redact import redact_sensitive_text
-        _serialized = json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)
-        _redacted_payload = json.loads(redact_sensitive_text(_serialized, force=True))
-        atomic_json_write(dump_file, _redacted_payload, default=str)
+        atomic_json_write(dump_file, dump_payload, default=str)

        agent._vprint(f"{agent.log_prefix}🧾 Request debug dump written to: {dump_file}")

        if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"):
-            print(json.dumps(_redacted_payload, ensure_ascii=False, indent=2, default=str))
+            print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))

        return dump_file
    except Exception as dump_error:
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -751,9 +751,6 @@ def build_anthropic_client(
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
-    if normalized_base_url:
-        import re as _re
-        normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1144,8 +1144,7 @@ def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
    normalized = (base_url or "").strip().lower().rstrip("/")
    if not normalized:
        return False
-    path = urlparse(normalized).path.rstrip("/")
-    if path.endswith("/anthropic") or path.endswith("/anthropic/v1"):
+    if normalized.endswith("/anthropic"):
        return True
    hostname = base_url_hostname(normalized)
    if hostname == "api.anthropic.com":
@@ -3079,20 +3078,23 @@ def _try_configured_fallback_chain(
        if not fb_provider or fb_provider.lower() == skip:
            continue
        fb_model = str(entry.get("model", "")).strip() or None
+        fb_base_url = str(entry.get("base_url", "")).strip() or None
+        fb_api_key = str(entry.get("api_key", "")).strip() or None

        label = f"fallback_chain[{i}]({fb_provider})"

        try:
-            fb_client, resolved_model = _resolve_fallback_entry(entry)
+            fb_client = _resolve_single_provider(
+                fb_provider, fb_model, fb_base_url, fb_api_key)
        except Exception:
-            fb_client, resolved_model = None, None
+            fb_client = None

        if fb_client is not None:
            logger.info(
                "Auxiliary %s: %s on %s — configured fallback to %s (%s)",
-                task, reason, failed_provider, label, resolved_model or fb_model or "default",
+                task, reason, failed_provider, label, fb_model or "default",
            )
-            return fb_client, resolved_model or fb_model, label
+            return fb_client, fb_model, label
        tried.append(label)

    if tried:
@@ -3103,103 +3105,6 @@ def _try_configured_fallback_chain(
    return None, None, ""


-def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
-    """Resolve inline or env-backed API key from a fallback-chain entry."""
-    explicit = str(entry.get("api_key") or "").strip()
-    if explicit:
-        return explicit
-    key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
-    if key_env:
-        return os.getenv(key_env, "").strip() or None
-    return None
-
-
-def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
-    """Resolve one fallback entry through the central provider router."""
-    provider = str(entry.get("provider") or "").strip()
-    model = str(entry.get("model") or "").strip() or None
-    if not provider or not model:
-        return None, None
-    base_url = str(entry.get("base_url") or "").strip() or None
-    api_key = _fallback_entry_api_key(entry)
-    api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
-    return resolve_provider_client(
-        provider,
-        model=model,
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
-        api_mode=api_mode,
-    )
-
-
-def _try_main_fallback_chain(
-    task: Optional[str],
-    failed_provider: str = "",
-    reason: str = "error",
-) -> Tuple[Optional[Any], Optional[str], str]:
-    """Try the top-level main-agent fallback chain for an auxiliary call.
-
-    ``provider: auto`` auxiliary tasks should respect the user's declared
-    main fallback policy before dropping into Hermes' built-in discovery
-    chain. The top-level chain is read through ``get_fallback_chain`` so
-    both modern ``fallback_providers`` and legacy ``fallback_model`` entries
-    participate in the same order as the main agent.
-    """
-    try:
-        from hermes_cli.config import load_config
-        from hermes_cli.fallback_config import get_fallback_chain
-
-        chain = get_fallback_chain(load_config())
-    except Exception as exc:
-        logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
-        return None, None, ""
-
-    if not chain:
-        return None, None, ""
-
-    failed_norm = (failed_provider or "").strip().lower()
-    main_norm = (_read_main_provider() or "").strip().lower()
-    skip = {p for p in (failed_norm, main_norm, "auto") if p}
-    tried: List[str] = []
-
-    for i, entry in enumerate(chain):
-        if not isinstance(entry, dict):
-            continue
-        fb_provider = str(entry.get("provider") or "").strip()
-        fb_model = str(entry.get("model") or "").strip()
-        if not fb_provider or not fb_model:
-            continue
-        fb_norm = fb_provider.lower()
-        label = f"fallback_providers[{i}]({fb_provider})"
-        if fb_norm in skip:
-            tried.append(f"{label} (skipped)")
-            continue
-        if _is_provider_unhealthy(fb_norm):
-            _log_skip_unhealthy(fb_norm, task)
-            tried.append(f"{label} (unhealthy)")
-            continue
-        try:
-            fb_client, resolved_model = _resolve_fallback_entry(entry)
-        except Exception as exc:
-            logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
-            fb_client, resolved_model = None, None
-        if fb_client is not None:
-            logger.info(
-                "Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
-                task or "call", reason, failed_provider or "auto", label,
-                resolved_model or fb_model,
-            )
-            return fb_client, resolved_model or fb_model, fb_provider
-        tried.append(label)
-
-    if tried:
-        logger.debug(
-            "Auxiliary %s: main fallback chain exhausted (tried: %s)",
-            task or "call", ", ".join(tried),
-        )
-    return None, None, ""
-
-
 def _resolve_single_provider(
    provider: str,
    model: Optional[str] = None,
@@ -3210,19 +3115,16 @@ def _resolve_single_provider(

    Uses the existing provider resolution infrastructure where possible.
    """
-    # Reuse resolve_provider_client which handles provider→client mapping.
+    # Reuse resolve_provider_client which handles provider→client mapping
    client, resolved_model = resolve_provider_client(
        provider=provider,
        model=model,
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
+        base_url=base_url,
+        api_key=api_key,
    )
    return client

-def _resolve_auto(
-    main_runtime: Optional[Dict[str, Any]] = None,
-    task: Optional[str] = None,
-) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -3288,7 +3190,7 @@ def _resolve_auto(
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
-        explicit_base_url = runtime_base_url or None
+        explicit_base_url = None
        explicit_api_key = None
        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
            resolved_provider = "custom"
@@ -3320,22 +3222,7 @@ def _resolve_auto(
                            main_provider, resolved or main_model)
                return client, resolved or main_model

-    # ── Step 2: user-configured fallback policy ─────────────────────────
-    # In auto mode, respect the task-specific fallback chain first, then the
-    # main agent's top-level fallback_providers/fallback_model chain. The
-    # hardcoded provider discovery chain below is only the convenience default
-    # for users who have not declared a fallback policy.
-    if task:
-        fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
-            task, main_provider or "auto", reason="main provider unavailable")
-        if fb_client is not None:
-            return fb_client, fb_model
-    fb_client, fb_model, _fb_label = _try_main_fallback_chain(
-        task, main_provider or "auto", reason="main provider unavailable")
-    if fb_client is not None:
-        return fb_client, fb_model
-
-    # ── Step 3: aggregator / fallback chain ──────────────────────────────
+    # ── Step 2: aggregator / fallback chain ──────────────────────────────
    tried = []
    for label, try_fn in _get_provider_chain():
        if _is_provider_unhealthy(label):
@@ -3456,7 +3343,6 @@ def resolve_provider_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -3577,7 +3463,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
+        client, resolved = _resolve_auto(main_runtime=main_runtime)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -4470,16 +4356,11 @@ def _client_cache_key(
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    # `auto` can now resolve through task-specific or main fallback policy,
-    # so the task participates in the cache key. Non-auto providers keep the
-    # old cache shape because the explicit provider/model tuple is sufficient.
-    task_key = (task or "") if provider == "auto" else ""
    pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -4672,7 +4553,6 @@ def _get_cached_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -4710,7 +4590,6 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=main_runtime,
        is_vision=is_vision,
-        task=task,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -4755,7 +4634,6 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=runtime,
        is_vision=is_vision,
-        task=task,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -5126,7 +5004,7 @@ def _build_call_kwargs(

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
-    if provider == "nous":
+    if provider == "nous" or auxiliary_is_nous:
        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
    if merged_extra:
        kwargs["extra_body"] = merged_extra
@@ -5261,7 +5139,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5587,19 +5465,14 @@ def call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. For auto: top-level main fallback_providers/fallback_model
-            #   3. For auto: built-in auxiliary discovery chain
-            #   4. For explicit aux providers: main agent model safety net
+            #   2. Main agent model (last-resort safety net)
+            # For auto users (no explicit aux provider), use the full
+            # auto-detection chain instead — its Step 1 IS the main agent
+            # model, so users on `auto` already get main-model fallback.
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
-                    task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
-                        task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_payment_fallback(
-                        resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
@@ -5762,7 +5635,7 @@ async def async_call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
+                client, final_model = _get_cached_client("auto", async_mode=True)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -6030,19 +5903,13 @@ async def async_call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. For auto: top-level main fallback_providers/fallback_model
-            #   3. For auto: built-in auxiliary discovery chain
-            #   4. For explicit aux providers: main agent model safety net
+            #   2. Main agent model (last-resort safety net)
+            # Auto users get the full auto-detection chain instead — its
+            # Step 1 IS the main agent model.
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
-                    task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
-                        task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_payment_fallback(
-                        resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -237,25 +237,18 @@ _COMBINED_REVIEW_PROMPT = (
 def summarize_background_review_actions(
    review_messages: List[Dict],
    prior_snapshot: List[Dict],
-    notification_mode: str = "on",
 ) -> List[str]:
    """Build the human-facing action summary for a background review pass.

-    Walks the review agent's session messages and collects successful memory
-    and skill-management actions to surface to the user. Tool messages already
-    present in ``prior_snapshot`` are skipped so stale inherited results are
-    not re-surfaced as fresh background work (issue #14944).
+    Walks the review agent's session messages and collects "successful tool
+    action" descriptions to surface to the user (e.g. "Memory updated").
+    Tool messages already present in ``prior_snapshot`` are skipped so we
+    don't re-surface stale results from the prior conversation that the
+    review agent inherited via ``conversation_history`` (issue #14944).

-    ``notification_mode`` controls display detail:
-    - ``off``: return no actions.
-    - ``on``: generic "Memory updated"/tool messages.
-    - ``verbose``: include compact content previews from tool-call arguments.
+    Matching is by ``tool_call_id`` when available, with a content-equality
+    fallback for tool messages that lack one.
    """
-    mode = str(notification_mode or "on").lower()
-    if mode == "off":
-        return []
-    verbose = mode == "verbose"
-
    existing_tool_call_ids = set()
    existing_tool_contents = set()
    for prior in prior_snapshot or []:
@@ -269,42 +262,6 @@ def summarize_background_review_actions(
            if isinstance(content, str):
                existing_tool_contents.add(content)

-    # Map review-agent tool results back to the calls that produced them.  The
-    # result JSON only says "Entry added"; the call arguments contain action,
-    # target, and content previews.  Restricting to notify_tools also prevents
-    # helper tools from surfacing as memory work just because they succeeded.
-    notify_tools = {"memory", "skill_manage"}
-    all_tool_call_ids: set = set()
-    call_details: dict = {}
-    for msg in review_messages or []:
-        if not isinstance(msg, dict) or msg.get("role") != "assistant":
-            continue
-        for tc in msg.get("tool_calls", []) or []:
-            if not isinstance(tc, dict):
-                continue
-            fn = tc.get("function", {}) or {}
-            fn_name = fn.get("name", "")
-            tcid = tc.get("id")
-            if tcid:
-                all_tool_call_ids.add(tcid)
-            if fn_name not in notify_tools:
-                continue
-            try:
-                args = json.loads(fn.get("arguments", "{}"))
-            except (json.JSONDecodeError, TypeError):
-                args = {}
-            if tcid:
-                call_details[tcid] = {
-                    "tool": fn_name,
-                    "action": args.get("action", "?"),
-                    "target": args.get("target", "memory"),
-                    "content": args.get("content", ""),
-                    "old_text": args.get("old_text", ""),
-                    "name": args.get("name", ""),
-                    "old_string": args.get("old_string", ""),
-                    "new_string": args.get("new_string", ""),
-                }
-
    actions: List[str] = []
    for msg in review_messages or []:
        if not isinstance(msg, dict) or msg.get("role") != "tool":
@@ -316,8 +273,6 @@ def summarize_background_review_actions(
            content_str = msg.get("content")
            if isinstance(content_str, str) and content_str in existing_tool_contents:
                continue
-        if tcid and all_tool_call_ids and tcid not in call_details:
-            continue
        try:
            data = json.loads(msg.get("content", "{}"))
        except (json.JSONDecodeError, TypeError):
@@ -325,75 +280,19 @@ def summarize_background_review_actions(
        if not isinstance(data, dict) or not data.get("success"):
            continue
        message = data.get("message", "")
-        detail = call_details.get(tcid, {})
-        target = data.get("target", "") or detail.get("target", "")
-        is_skill = detail.get("tool") == "skill_manage"
-
-        message_lower = message.lower()
-        if not verbose:
-            if "created" in message_lower:
-                actions.append(message)
-                continue
-            if "updated" in message_lower:
-                actions.append(message)
-                continue
-            if is_skill and "patched" in message_lower:
-                actions.append(message)
-                continue
-
-        if is_skill:
-            label = "Skill"
-        elif target:
+        target = data.get("target", "")
+        if "created" in message.lower():
+            actions.append(message)
+        elif "updated" in message.lower():
+            actions.append(message)
+        elif "added" in message.lower() or (target and "add" in message.lower()):
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "Entry added" in message:
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "removed" in message.lower() or "replaced" in message.lower():
            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-        else:
-            continue
-
-        if verbose:
-            action = detail.get("action", "")
-            content = detail.get("content", "")
-            old_text = detail.get("old_text", "")
-            skill_name = detail.get("name", "")
-            max_preview = 120
-            if is_skill:
-                change = data.get("_change", {})
-                old_string = change.get("old", "") or detail.get("old_string", "")
-                new_string = change.get("new", "") or detail.get("new_string", "")
-                description = change.get("description", "")
-                if action == "patch" and (old_string or new_string):
-                    old_preview = old_string[:80].replace("\n", " ") + (
-                        "…" if len(old_string) > 80 else ""
-                    )
-                    new_preview = new_string[:80].replace("\n", " ") + (
-                        "…" if len(new_string) > 80 else ""
-                    )
-                    actions.append(
-                        f"📝 Skill '{skill_name}' patched: "
-                        f"\"{old_preview}\" → \"{new_preview}\""
-                    )
-                elif action == "create" and description:
-                    actions.append(f"📝 Skill '{skill_name}' created: {description}")
-                elif action == "edit" and description:
-                    actions.append(f"📝 Skill '{skill_name}' rewritten: {description}")
-                else:
-                    actions.append(f"📝 {message}" if message else f"Skill {action}")
-            elif action == "add" and content:
-                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
-                actions.append(f"{label} ➕ {preview}")
-            elif action == "replace" and content:
-                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
-                actions.append(f"{label} ✏️ {preview}")
-            elif action == "remove" and old_text:
-                preview = old_text[:60] + ("…" if len(old_text) > 60 else "")
-                actions.append(f"{label} ➖ {preview}")
-            else:
-                actions.append(f"{label} updated")
-        elif (
-            "added" in message_lower
-            or "replaced" in message_lower
-            or "removed" in message_lower
-            or (target and "add" in message.lower())
-            or "Entry added" in message
-        ):
            actions.append(f"{label} updated")
    return actions

@@ -623,7 +522,6 @@ def _run_review_in_thread(
        actions = summarize_background_review_actions(
            review_messages,
            messages_snapshot,
-            notification_mode=getattr(agent, "memory_notifications", "on"),
        )

        if actions:
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -58,34 +58,17 @@ _bedrock_runtime_client_cache: Dict[str, Any] = {}
 _bedrock_control_client_cache: Dict[str, Any] = {}


-_MIN_BOTO3_VERSION = (1, 34, 59)
-
-
 def _require_boto3():
-    """Import boto3, raising a clear error if not installed or too old."""
+    """Import boto3, raising a clear error if not installed."""
    try:
        import boto3
+        return boto3
    except ImportError:
        raise ImportError(
            "The 'boto3' package is required for the AWS Bedrock provider. "
            "Install it with: pip install boto3\n"
            "Or install Hermes with Bedrock support: pip install -e '.[bedrock]'"
        )
-    # converse() / converse_stream() were added in boto3 1.34.59.
-    # When Hermes is installed editable into system Python, the system boto3
-    # (e.g. Ubuntu 24.04 ships 1.34.46) may take precedence over the venv
-    # version pinned in pyproject.toml.
-    try:
-        version = tuple(int(x) for x in boto3.__version__.split(".")[:3])
-    except (AttributeError, ValueError):
-        return boto3  # can't parse — don't block on version check
-    if version < _MIN_BOTO3_VERSION:
-        raise RuntimeError(
-            f"boto3 {boto3.__version__} does not support converse_stream "
-            f"(minimum 1.34.59 required). Upgrade with: "
-            f"pip install --upgrade boto3"
-        )
-    return boto3


 def _get_bedrock_runtime_client(region: str):
@@ -952,14 +935,11 @@ def build_converse_kwargs(
    if system_prompt:
        kwargs["system"] = system_prompt

-    from agent.anthropic_adapter import _forbids_sampling_params
+    if temperature is not None:
+        kwargs["inferenceConfig"]["temperature"] = temperature

-    if not _forbids_sampling_params(model):
-        if temperature is not None:
-            kwargs["inferenceConfig"]["temperature"] = temperature
-
-        if top_p is not None:
-            kwargs["inferenceConfig"]["topP"] = top_p
+    if top_p is not None:
+        kwargs["inferenceConfig"]["topP"] = top_p

    if stop_sequences:
        kwargs["inferenceConfig"]["stopSequences"] = stop_sequences
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -1081,7 +1081,6 @@ def _normalize_codex_response(
    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
-    saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"}
    saw_commentary_phase = False
    saw_final_answer_phase = False
    saw_reasoning_item = False
@@ -1096,7 +1095,6 @@ def _normalize_codex_response(

        if item_status in {"queued", "in_progress", "incomplete"}:
            has_incomplete_items = True
-            saw_streaming_or_item_incomplete = True

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
@@ -1254,9 +1252,7 @@ def _normalize_codex_response(
        finish_reason = "tool_calls"
    elif leaked_tool_call_text:
        finish_reason = "incomplete"
-    elif saw_streaming_or_item_incomplete:
-        finish_reason = "incomplete"
-    elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase:
+    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
        # Response contains only reasoning (encrypted thinking state and/or
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -69,31 +69,6 @@ SUMMARY_PREFIX = (
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

-# Metadata key added to context compression summary messages so that frontends
-# (CLI, Desktop, gateway, TUI) can distinguish them from real assistant/user
-# messages and filter or render them appropriately without content-prefix
-# heuristics. See https://github.com/NousResearch/hermes-agent/issues/38389
-#
-# Underscore-prefixed ON PURPOSE: the wire sanitizers
-# (agent/transports/chat_completions.py convert_messages and the summary-path
-# mirror in agent/chat_completion_helpers.py) strip every top-level message
-# key starting with "_" before the request leaves the process. Strict
-# OpenAI-compatible gateways (Fireworks, Mistral, Moonshot/Kimi, opencode-go)
-# reject payloads carrying unknown keys with "Extra inputs are not permitted",
-# poisoning every subsequent request in the session — a bare key like
-# "is_compressed_summary" would reach the wire and trip exactly that.
-COMPRESSED_SUMMARY_METADATA_KEY = "_compressed_summary"
-
-# Appended to every standalone summary message (and to the merged-into-tail
-# prefix) so the model has an unambiguous "summary ends here" boundary.
-# Without it, weak models read the verbatim "## Active Task" quote as fresh
-# user input (#11475, #14521) or regurgitate an assistant-role summary as
-# their own output (#33256).
-_SUMMARY_END_MARKER = (
-    "--- END OF CONTEXT SUMMARY — "
-    "respond to the message below, not the summary above ---"
-)
-
 # Handoff prefixes that shipped in earlier releases. A summary persisted under
 # one of these can be inherited into a resumed lineage (#35344); when it is
 # re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
@@ -171,11 +146,6 @@ _FALLBACK_TURN_MAX_CHARS = 700
 _AUTO_FOCUS_MAX_TURNS = 3
 _AUTO_FOCUS_TURN_MAX_CHARS = 260
 _AUTO_FOCUS_MAX_CHARS = 700
-# Keep a short run of recent messages verbatim even when the token budget is
-# already exhausted.  The public ``protect_last_n`` default is intentionally
-# high for small/light tails, but using all 20 as a hard floor here would bring
-# back the old large-tool-output case where nothing can be compacted.
-_MAX_TAIL_MESSAGE_FLOOR = 8


 _PATH_MENTION_RE = re.compile(r"(?:/|~/?|[A-Za-z]:\\)[^\s`'\")\]}<>]+")
@@ -1646,13 +1616,7 @@ This compaction should PRIORITISE preserving all information related to the focu
        text = (summary or "").strip()
        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
            if text.startswith(prefix):
-                text = text[len(prefix):].lstrip()
-                break
-        # Strip the trailing end marker too — a rehydrated handoff body that
-        # keeps it would leak the boundary directive into the iterative-update
-        # summarizer prompt (and the marker is re-appended on insertion anyway).
-        if text.endswith(_SUMMARY_END_MARKER):
-            text = text[: -len(_SUMMARY_END_MARKER)].rstrip()
+                return text[len(prefix):].lstrip()
        return text

    @classmethod
@@ -1668,19 +1632,6 @@ This compaction should PRIORITISE preserving all information related to the focu
            return True
        return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)

-    @staticmethod
-    def _has_compressed_summary_metadata(message: Any) -> bool:
-        """Return True if *message* carries the compressed-summary flag.
-
-        Callers (frontends, CLI, gateway) can use this to distinguish context
-        compaction summaries from real assistant or user messages without
-        relying on content-prefix heuristics.  The flag is in-process only —
-        the wire sanitizers strip underscore-prefixed keys before API calls.
-        """
-        if not isinstance(message, dict):
-            return False
-        return bool(message.get(COMPRESSED_SUMMARY_METADATA_KEY))
-
    @classmethod
    def _derive_auto_focus_topic(
        cls,
@@ -1866,105 +1817,6 @@ This compaction should PRIORITISE preserving all information related to the focu
                return i
        return -1

-    def _find_last_assistant_message_idx(
-        self, messages: List[Dict[str, Any]], head_end: int
-    ) -> int:
-        """Return the index of the last user-visible assistant reply at or
-        after *head_end*, or -1.
-
-        A "user-visible reply" is an assistant message with non-empty
-        textual content — i.e. one that the WebUI / TUI / SessionsPage
-        rendered as a bubble the operator could read. We deliberately
-        skip assistant messages that contain only ``tool_calls`` (and
-        no text), because those render as small "calling tool X"
-        indicators and aren't what the reporter means by "the output
-        of the last message you sent" (#29824).
-
-        Falling back to the most recent assistant message of ANY kind
-        only kicks in when no content-bearing assistant message exists
-        in the compressible region — typically a fresh session that
-        just started a multi-step tool sequence with no prior reply
-        to anchor. In that case the agent fix is a no-op and the
-        existing user-message anchor carries the load.
-        """
-        last_any = -1
-        for i in range(len(messages) - 1, head_end - 1, -1):
-            msg = messages[i]
-            if msg.get("role") != "assistant":
-                continue
-            if last_any < 0:
-                last_any = i
-            content = msg.get("content")
-            if isinstance(content, str) and content.strip():
-                return i
-            if isinstance(content, list):
-                # Multimodal / Anthropic-style content: look for any
-                # text block with non-empty text.
-                for part in content:
-                    if isinstance(part, dict):
-                        text = part.get("text") or part.get("content")
-                        if isinstance(text, str) and text.strip():
-                            return i
-        return last_any
-
-    def _ensure_last_assistant_message_in_tail(
-        self,
-        messages: List[Dict[str, Any]],
-        cut_idx: int,
-        head_end: int,
-    ) -> int:
-        """Guarantee the most recent assistant message is in the protected tail.
-
-        WebUI / TUI / SessionsPage bug (#29824). Without this anchor,
-        ``_find_tail_cut_by_tokens`` can leave the user's most recent
-        visible assistant response inside the compressed middle region —
-        especially when the conversation has a single oversized tool
-        result or a long stretch of tool-call/result pairs after the
-        last assistant reply. The summariser then rolls that reply up
-        into the single ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block
-        persisted as ``role="user"`` or ``role="assistant"``. From the
-        operator's perspective the WebUI session viewer
-        (``web/src/pages/SessionsPage.tsx``) and the TUI chat panel
-        both suddenly show the opaque "Context compaction" block in the
-        slot where they were just reading the assistant's actual reply:
-
-            User:       "i cant see the output of the last message you
-                         sent, i did see it previously, however now see
-                         'context compaction'"
-
-        Mirror of ``_ensure_last_user_message_in_tail`` but anchors on
-        the last assistant-role message. Re-runs the tool-group
-        alignment so we don't split a ``tool_call`` / ``tool_result``
-        group that immediately precedes the anchored message — orphaned
-        tool messages would otherwise be removed by
-        ``_sanitize_tool_pairs`` and trigger the same data-loss symptom
-        we're trying to prevent.
-        """
-        last_asst_idx = self._find_last_assistant_message_idx(messages, head_end)
-        if last_asst_idx < 0:
-            # No assistant message in the compressible region — nothing
-            # to anchor (single-turn pre-reply state, etc.).
-            return cut_idx
-        if last_asst_idx >= cut_idx:
-            # Already in the tail — the token-budget walk did the right
-            # thing on its own.
-            return cut_idx
-        # Pull cut_idx back to the assistant message, then re-align so
-        # we don't split a tool group that immediately precedes it
-        # (e.g. an ``assistant(tool_calls)`` → ``tool(result)`` →
-        # ``assistant(final reply)`` sequence would otherwise leave the
-        # ``tool`` orphan when cut lands at the final reply).
-        new_cut = self._align_boundary_backward(messages, last_asst_idx)
-        if not self.quiet_mode:
-            logger.debug(
-                "Anchoring tail cut to last assistant message at index %d "
-                "(was %d, aligned to %d) to keep the previously-visible "
-                "reply out of the compaction summary (#29824)",
-                last_asst_idx, cut_idx, new_cut,
-            )
-        # Safety: never go back into the head region.
-        return max(new_cut, head_end + 1)
-
    def _ensure_last_user_message_in_tail(
        self,
        messages: List[Dict[str, Any]],
@@ -2023,12 +1875,11 @@ This compaction should PRIORITISE preserving all information related to the focu
        derived from ``summary_target_ratio * context_length``, so it
        scales automatically with the model's context window.

-        Token budget is the primary criterion.  A bounded message-count floor
-        keeps a short run of recent turns verbatim even when the budget is
-        exhausted, but the budget is allowed to exceed by up to 1.5x to avoid
-        cutting inside an oversized message (tool output, file read, etc.). If
-        even that floor exceeds 1.5x the budget, the cut is placed right after
-        the head so compression still runs.
+        Token budget is the primary criterion.  A hard minimum of 3 messages
+        is always protected, but the budget is allowed to exceed by up to
+        1.5x to avoid cutting inside an oversized message (tool output, file
+        read, etc.).  If even the minimum 3 messages exceed 1.5x the budget
+        the cut is placed right after the head so compression still runs.

        Never cuts inside a tool_call/result group.  Always ensures the most
        recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
@@ -2036,19 +1887,8 @@ This compaction should PRIORITISE preserving all information related to the focu
        if token_budget is None:
            token_budget = self.tail_token_budget
        n = len(messages)
-        # Hard minimum: always keep a bounded recent-message floor in the tail.
-        # ``protect_last_n`` remains a minimum up to the cap; the cap avoids
-        # preserving a whole run of bulky tool outputs on every compaction.
-        available_tail = max(0, n - head_end - 1)
-        min_tail_floor = max(3, min(self.protect_last_n, _MAX_TAIL_MESSAGE_FLOOR))
-        # Leave at least two non-head messages available to summarize on short
-        # transcripts; otherwise compression can replace a tiny middle with a
-        # summary and save no messages at all.
-        compressible_tail_cap = max(3, available_tail - 2)
-        min_tail = (
-            min(min_tail_floor, compressible_tail_cap, available_tail)
-            if available_tail > 1 else 0
-        )
+        # Hard minimum: always keep at least 3 messages in the tail
+        min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
        soft_ceiling = int(token_budget * 1.5)
        accumulated = 0
        cut_idx = n  # start from beyond the end
@@ -2120,13 +1960,6 @@ This compaction should PRIORITISE preserving all information related to the focu
        # active task is never lost to compression (fixes #10896).
        cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)

-        # Ensure the most recent assistant message is always in the tail
-        # so the previously-visible reply isn't silently rolled into the
-        # ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block (fixes #29824).
-        # Each anchor only walks ``cut_idx`` backward, so chaining them is
-        # monotonic — the tail can only grow, never shrink.
-        cut_idx = self._ensure_last_assistant_message_in_tail(messages, cut_idx, head_end)
-
        return max(cut_idx, head_end + 1)

    # ------------------------------------------------------------------
@@ -2360,33 +2193,32 @@ This compaction should PRIORITISE preserving all information related to the focu

        # When the summary lands as a standalone role="user" message,
        # weak models read the verbatim "## Active Task" quote of a past
-        # user request as fresh input (#11475, #14521).
-        # When it lands as role="assistant", models may regurgitate the
-        # summary text as their own output (#33256). In both cases, append
-        # the explicit end marker so the model has a clear "summary ends
-        # here, respond to the message below" signal.
-        if not _merge_summary_into_tail:
-            summary = summary + "\n\n" + _SUMMARY_END_MARKER
+        # user request as fresh input (#11475, #14521). Append the explicit
+        # end marker — the same one used in the merge-into-tail path — so
+        # the model has a clear "summary above, not new input" signal.
+        if not _merge_summary_into_tail and summary_role == "user":
+            summary = (
+                summary
+                + "\n\n--- END OF CONTEXT SUMMARY — "
+                "respond to the message below, not the summary above ---"
+            )

        if not _merge_summary_into_tail:
-            compressed.append({
-                "role": summary_role,
-                "content": summary,
-                COMPRESSED_SUMMARY_METADATA_KEY: True,
-            })
+            compressed.append({"role": summary_role, "content": summary})

        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = summary + "\n\n" + _SUMMARY_END_MARKER + "\n\n"
+                merged_prefix = (
+                    summary
+                    + "\n\n--- END OF CONTEXT SUMMARY — "
+                    "respond to the message below, not the summary above ---\n\n"
+                )
                msg["content"] = _append_text_to_content(
                    msg.get("content"),
                    merged_prefix,
                    prepend=True,
                )
-                # Mark the merged message so frontends can identify it as
-                # containing a compression summary prefix.
-                msg[COMPRESSED_SUMMARY_METADATA_KEY] = True
                _merge_summary_into_tail = False
            compressed.append(msg)

--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -40,16 +40,6 @@ from agent.model_metadata import estimate_request_tokens_rough

 logger = logging.getLogger(__name__)

-# Stable marker the gateway matches on to re-tag the auto-compaction lifecycle
-# status as ``kind="compacting"`` (tui_gateway/server.py::_status_update), so
-# drivers like the desktop app can show an explicit "Summarizing…" indicator
-# instead of the transcript appearing to silently reset. Keep the marker phrase
-# intact if you reword COMPACTION_STATUS.
-COMPACTION_STATUS_MARKER = "Compacting context"
-COMPACTION_STATUS = (
-    f"🗜️ {COMPACTION_STATUS_MARKER} — summarizing earlier conversation so I can continue..."
-)
-

 def _compression_lock_holder(agent: Any) -> str:
    """Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
@@ -334,7 +324,9 @@ def compress_context(
        f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
        focus_topic,
    )
-    agent._emit_status(COMPACTION_STATUS)
+    agent._emit_status(
+        "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
+    )

    # ── Compression lock ────────────────────────────────────────────────
    # Atomic, state.db-backed lock per session_id.  Without this, two
@@ -603,20 +595,6 @@ def compress_context(
            force=True,
        )

-    # Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
-    # the completed old session before its details are lost.
-    _old_sid_for_event = locals().get("old_session_id")
-    if getattr(agent, "event_callback", None):
-        try:
-            agent.event_callback("session:compress", {
-                "platform": agent.platform or "",
-                "session_id": agent.session_id,
-                "old_session_id": _old_sid_for_event or "",
-                "compression_count": agent.context_compressor.compression_count,
-            })
-        except Exception as e:
-            logger.debug("event_callback error on session:compress: %s", e)
-
    # Keep the post-compression rough estimate for diagnostics, but do not
    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
    # can remain above threshold even after the next real API request fits.
@@ -653,11 +631,7 @@ def compress_context(
    return compressed, new_system_prompt


-def try_shrink_image_parts_in_messages(
-    api_messages: list,
-    *,
-    max_dimension: int = 8000,
-) -> bool:
+def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    """Re-encode all native image parts at a smaller size to recover from
    image-too-large errors (Anthropic 5 MB, unknown other providers).

@@ -668,8 +642,7 @@ def try_shrink_image_parts_in_messages(
    Strategy: look for ``image_url`` / ``input_image`` parts carrying a
    ``data:image/...;base64,...`` payload.  For each one whose encoded
    size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
-    ceiling with header overhead) or whose longest side exceeds
-    ``max_dimension``, write the base64 to a tempfile, call
+    ceiling with header overhead), write the base64 to a tempfile, call
    ``vision_tools._resize_image_for_vision`` to produce a smaller data
    URL, and substitute it in place.

@@ -691,9 +664,10 @@ def try_shrink_image_parts_in_messages(
    # after a confirmed provider rejection, so the alternative is failure.
    target_bytes = 4 * 1024 * 1024
    # Anthropic enforces an 8000px per-side dimension cap independently of
-    # the 5 MB byte cap.  In many-image requests, the provider can report a
-    # lower cap (observed: 2000px).  The caller passes that parsed ceiling
-    # when the rejection includes it.
+    # the 5 MB byte cap.  A tall screenshot can be well under 5 MB yet far
+    # over 8000px (e.g. 1200×12000 at 0.06 MB).  We check pixel dimensions
+    # even when the byte budget is fine.
+    max_dimension = 8000
    changed_count = 0
    # Track parts that are over the target but could NOT be shrunk under it.
    # If any survive, retrying is pointless — the same oversized payload will
@@ -710,9 +684,9 @@ def try_shrink_image_parts_in_messages(
        # Check both byte size AND pixel dimensions.
        needs_shrink = len(url) > target_bytes  # over byte budget
        if not needs_shrink:
-            # Even if bytes are fine, check pixel dimensions against the
-            # provider's reported per-side cap.  A screenshot can be tiny in
-            # bytes yet too large in pixels.
+            # Even if bytes are fine, check pixel dimensions against
+            # Anthropic's 8000px cap.  A tall image can be tiny in bytes
+            # yet huge in pixels.
            try:
                import base64 as _b64_dim
                header_d, _, data_d = url.partition(",")
@@ -821,8 +795,6 @@ def try_shrink_image_parts_in_messages(


 __all__ = [
-    "COMPACTION_STATUS",
-    "COMPACTION_STATUS_MARKER",
    "check_compression_model_feasibility",
    "replay_compression_warning",
    "compress_context",
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -71,35 +71,6 @@ logger = logging.getLogger(__name__)
 INTERRUPT_WAITING_FOR_MODEL_PREFIX = "Operation interrupted: waiting for model response ("


-def _image_error_max_dimension(error: Exception) -> Optional[int]:
-    """Extract a provider-reported image dimension ceiling, if present."""
-    parts = []
-    for value in (
-        error,
-        getattr(error, "message", None),
-        getattr(error, "body", None),
-    ):
-        if value:
-            try:
-                parts.append(str(value))
-            except Exception:
-                pass
-    text = " ".join(parts).lower()
-    if "image" not in text or "dimension" not in text or "max allowed size" not in text:
-        return None
-
-    match = re.search(r"max allowed size(?:\s+for [^:]+)?:\s*(\d{3,5})\s*pixels?", text)
-    if not match:
-        return None
-    try:
-        max_dimension = int(match.group(1))
-    except ValueError:
-        return None
-    if 512 <= max_dimension <= 8000:
-        return max_dimension
-    return None
-
-
 def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
    """Return a user-facing error when Ollama is loaded with too little context."""
    if not getattr(agent, "tools", None):
@@ -300,20 +271,11 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
                agent.session_id, exc,
            )

-    if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
+    if stored_prompt:
        # Continuing session — reuse the exact system prompt from the
        # previous turn so the Anthropic cache prefix matches.
        agent._cached_system_prompt = stored_prompt
        return
-    if stored_prompt:
-        stored_state = "stale_runtime"
-        logger.info(
-            "Stored system prompt for session %s has stale runtime identity; "
-            "rebuilding for model=%s provider=%s.",
-            agent.session_id,
-            getattr(agent, "model", "") or "",
-            getattr(agent, "provider", "") or "",
-        )

    if conversation_history and stored_state in ("null", "empty"):
        # Continuing session whose stored prompt is unusable.  The
@@ -375,30 +337,6 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
            )


-def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
-    """Return False when the persisted Model/Provider lines are stale."""
-
-    def line_value(label: str) -> str:
-        prefix = f"{label}:"
-        value = ""
-        for line in prompt.splitlines():
-            if line.startswith(prefix):
-                value = line[len(prefix):].strip()
-        return value
-
-    stored_model = line_value("Model")
-    current_model = str(getattr(agent, "model", "") or "").strip()
-    if stored_model and current_model and stored_model != current_model:
-        return False
-
-    stored_provider = line_value("Provider")
-    current_provider = str(getattr(agent, "provider", "") or "").strip()
-    if stored_provider and current_provider and stored_provider != current_provider:
-        return False
-
-    return True
-
-
 def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
    if is_partial_stub and dropped_tools:
        tool_list = ", ".join(dropped_tools[:3])
@@ -430,42 +368,6 @@ def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List
        )


-# Shared recovery hint appended to every content-policy refusal message. Both
-# the HTTP-200 refusal path (``finish_reason=content_filter``) and the
-# exception path (a provider moderation error classified as
-# ``content_policy_blocked``) end with the same actionable next steps, so they
-# share one trailer to keep the guidance from drifting between the two sites.
-_CONTENT_POLICY_RECOVERY_HINT = (
-    "Try rephrasing the request, narrowing the context, or "
-    "adding a fallback provider with `hermes fallback add`."
-)
-
-
-def _content_policy_blocked_result(
-    messages: List[Dict],
-    api_call_count: int,
-    *,
-    final_response: str,
-    error_detail: str,
-) -> Dict[str, Any]:
-    """Build the terminal turn result for a content-policy block.
-
-    A content-policy refusal is deterministic for the unchanged prompt, so the
-    turn ends here (no retry). Both the HTTP-200 refusal handler and the
-    exception-path handler return the identical shape — a failed, non-completed
-    turn carrying the user-facing message and a ``content_policy_blocked:``
-    prefixed error — so they funnel through this one builder.
-    """
-    return {
-        "final_response": final_response,
-        "messages": messages,
-        "api_calls": api_call_count,
-        "completed": False,
-        "failed": True,
-        "error": f"content_policy_blocked: {error_detail}",
-    }
-
-
 def run_conversation(
    agent,
    user_message: str,
@@ -474,7 +376,6 @@ def run_conversation(
    task_id: str = None,
    stream_callback: Optional[callable] = None,
    persist_user_message: Optional[str] = None,
-    persist_user_timestamp: Optional[float] = None,
 ) -> Dict[str, Any]:
    """
    Run a complete conversation with tool calling until completion.
@@ -490,8 +391,6 @@ def run_conversation(
        persist_user_message: Optional clean user message to store in
            transcripts/history when user_message contains API-only
            synthetic prefixes.
-        persist_user_timestamp: Optional platform event timestamp to store
-            as metadata on that persisted user message.
                or queuing follow-up prefetch work.

    Returns:
@@ -513,7 +412,6 @@ def run_conversation(
        task_id,
        stream_callback,
        persist_user_message,
-        persist_user_timestamp,
        restore_or_build_system_prompt=_restore_or_build_system_prompt,
        install_safe_stdio=_install_safe_stdio,
        sanitize_surrogates=_sanitize_surrogates,
@@ -697,11 +595,7 @@ def run_conversation(
        # landed after an orphan tool result). Most providers return
        # empty content on malformed sequences, which would otherwise
        # retrigger the empty-retry loop indefinitely.
-        # repair_message_sequence_with_cursor also recomputes the SessionDB
-        # flush cursor (_last_flushed_db_idx) when repair compacts the list,
-        # so the turn-end flush doesn't skip the assistant/tool chain (#44837).
-        from agent.agent_runtime_helpers import repair_message_sequence_with_cursor
-        repaired_seq = repair_message_sequence_with_cursor(agent, messages)
+        repaired_seq = agent._repair_message_sequence(messages)
        if repaired_seq > 0:
            request_logger.info(
                "Repaired %s message-alternation violations before request (session=%s)",
@@ -809,10 +703,7 @@ def run_conversation(
        # a thinking-only turn. Runs on the per-call copy only — the
        # stored conversation history keeps the reasoning block for the
        # UI transcript and session persistence.
-        api_messages = agent._drop_thinking_only_and_merge_users(
-            api_messages,
-            drop_codex_reasoning_items=agent.api_mode != "codex_responses",
-        )
+        api_messages = agent._drop_thinking_only_and_merge_users(api_messages)

        # Normalize message whitespace and tool-call JSON for consistent
        # prefix matching.  Ensures bit-perfect prefixes across turns,
@@ -1421,106 +1312,6 @@ def run_conversation(
                        )
                        finish_reason = "length"

-                # ── Content-policy refusal (HTTP 200) ──────────────────
-                # The model — or the provider's safety system — returned a
-                # *successful* response whose stop/finish reason is a refusal:
-                # Anthropic ``stop_reason="refusal"`` → ``content_filter``;
-                # OpenAI / portal ``finish_reason="content_filter"`` or a
-                # populated ``message.refusal`` (mapped in the chat_completions
-                # transport); Bedrock ``guardrail_intervened``. The content is
-                # typically empty, so without this branch the response falls
-                # through to the empty-response / invalid-response retry loops
-                # and is mis-surfaced as "rate limited" / "no content after
-                # retries" — burning paid attempts reproducing a deterministic
-                # refusal. Surface it clearly and stop. Mirrors the
-                # exception-based ``content_policy_blocked`` recovery: try a
-                # configured fallback once, otherwise return the refusal.
-                if finish_reason == "content_filter":
-                    _refusal_transport = agent._get_transport()
-                    if agent.api_mode == "anthropic_messages":
-                        _refusal_result = _refusal_transport.normalize_response(
-                            response, strip_tool_prefix=agent._is_anthropic_oauth
-                        )
-                    else:
-                        _refusal_result = _refusal_transport.normalize_response(response)
-                    _refusal_text = (getattr(_refusal_result, "content", None) or "").strip()
-                    # Some refusals carry the explanation only in the reasoning
-                    # channel; fall back to it so the user sees *something*.
-                    if not _refusal_text:
-                        _refusal_text = (agent._extract_reasoning(_refusal_result) or "").strip()
-
-                    agent._invoke_api_request_error_hook(
-                        task_id=effective_task_id,
-                        turn_id=turn_id,
-                        api_request_id=api_request_id,
-                        api_call_count=api_call_count,
-                        api_start_time=api_start_time,
-                        api_kwargs=api_kwargs,
-                        error_type="ContentPolicyBlocked",
-                        error_message=_refusal_text or "model declined to respond (content_filter)",
-                        status_code=None,
-                        retry_count=retry_count,
-                        max_retries=max_retries,
-                        retryable=False,
-                        reason=FailoverReason.content_policy_blocked.value,
-                    )
-
-                    if thinking_spinner:
-                        thinking_spinner.stop("")
-                        thinking_spinner = None
-                    if agent.thinking_callback:
-                        agent.thinking_callback("")
-
-                    # Deterministic for the unchanged prompt — never retry.
-                    # Try a configured fallback once (a different model may not
-                    # refuse); otherwise surface the refusal terminally.
-                    if agent._has_pending_fallback():
-                        agent._buffer_status(
-                            "⚠️ Model declined to respond (safety refusal) — trying fallback..."
-                        )
-                    if agent._try_activate_fallback():
-                        retry_count = 0
-                        compression_attempts = 0
-                        _retry.primary_recovery_attempted = False
-                        continue
-
-                    agent._flush_status_buffer()
-                    _refusal_log = (
-                        _refusal_text[:500] + "..."
-                        if len(_refusal_text) > 500
-                        else _refusal_text
-                    )
-                    logger.warning(
-                        "%sModel declined to respond (finish_reason=content_filter). "
-                        "model=%s provider=%s refusal=%s",
-                        agent.log_prefix, agent.model, agent.provider,
-                        _refusal_log or "(no text)",
-                    )
-                    agent._emit_status(
-                        "⚠️ The model declined to respond to this request (safety refusal)."
-                    )
-
-                    _refusal_detail = (
-                        f"Model's explanation: {_refusal_text}"
-                        if _refusal_text
-                        else "The model returned no explanation."
-                    )
-                    _refusal_response = (
-                        "⚠️  The model declined to respond to this request "
-                        "(safety refusal — not a Hermes/gateway failure).\n\n"
-                        f"{_refusal_detail}\n\n"
-                        f"{_CONTENT_POLICY_RECOVERY_HINT}"
-                    )
-
-                    agent._cleanup_task_resources(effective_task_id)
-                    agent._persist_session(messages, conversation_history)
-                    return _content_policy_blocked_result(
-                        messages,
-                        api_call_count,
-                        final_response=_refusal_response,
-                        error_detail=_refusal_text or "model declined (content_filter)",
-                    )
-
                if finish_reason == "length":
                    if getattr(response, "id", "") == PARTIAL_STREAM_STUB_ID:
                        agent._vprint(
@@ -2272,11 +2063,7 @@ def run_conversation(
                    and not _retry.image_shrink_retry_attempted
                ):
                    _retry.image_shrink_retry_attempted = True
-                    image_max_dimension = _image_error_max_dimension(api_error) or 8000
-                    if agent._try_shrink_image_parts_in_messages(
-                        api_messages,
-                        max_dimension=image_max_dimension,
-                    ):
+                    if agent._try_shrink_image_parts_in_messages(api_messages):
                        agent._vprint(
                            f"{agent.log_prefix}📐 Image(s) exceeded provider size limit — "
                            f"shrank and retrying...",
@@ -2844,13 +2631,10 @@ def run_conversation(
                    except Exception:
                        pass
                    if _genuine_nous_rate_limit:
-                        # Re-enter the loop exactly once so the
-                        # top-of-loop Nous guard handles fallback or
-                        # bails cleanly. (Setting retry_count to
-                        # max_retries would make the while condition
-                        # false immediately and the guard would never
-                        # run -- no fallback, generic exhaustion error.)
-                        retry_count = max(0, max_retries - 1)
+                        # Skip straight to max_retries -- the
+                        # top-of-loop guard will handle fallback or
+                        # bail cleanly.
+                        retry_count = max_retries
                        continue
                    # Upstream capacity 429: fall through to normal
                    # retry logic.  A different model (or the same
@@ -3292,17 +3076,20 @@ def run_conversation(
                    if classified.reason == FailoverReason.content_policy_blocked:
                        _summary = agent._summarize_api_error(api_error)
                        _policy_response = (
-                            "⚠️  The model provider's safety filter blocked this request "
-                            "(not a Hermes/gateway failure).\n\n"
+                            f"⚠️  The model provider's safety filter blocked this request "
+                            f"(not a Hermes/gateway failure).\n\n"
                            f"Provider message: {_summary}\n\n"
-                            f"{_CONTENT_POLICY_RECOVERY_HINT}"
-                        )
-                        return _content_policy_blocked_result(
-                            messages,
-                            api_call_count,
-                            final_response=_policy_response,
-                            error_detail=_summary,
+                            f"Try rephrasing the request, narrowing the context, or "
+                            f"adding a fallback provider with `hermes fallback add`."
                        )
+                        return {
+                            "final_response": _policy_response,
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "failed": True,
+                            "error": f"content_policy_blocked: {_summary}",
+                        }
                    return {
                        "final_response": None,
                        "messages": messages,
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -70,6 +70,16 @@ def _resolve_args() -> list[str]:

 def _resolve_home_dir() -> str:
    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
    home = os.environ.get("HOME", "").strip()
    if home:
        return home
@@ -95,10 +105,7 @@ def _resolve_home_dir() -> str:

 def _build_subprocess_env() -> dict[str, str]:
    env = os.environ.copy()
-    home = _resolve_home_dir()
-    env["HOME"] = home
-    from hermes_constants import apply_subprocess_home_env
-    apply_subprocess_home_env(env)
+    env["HOME"] = _resolve_home_dir()
    return env


--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -454,16 +454,16 @@ def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
        report["attempted"] = True  # we tried but there was nothing to do
        return report

-    # Load and rewrite the live jobs under the scheduler's cross-process lock.
+    # Load and rewrite the live jobs under the scheduler's lock.
    try:
-        from cron.jobs import load_jobs, save_jobs, _jobs_lock
+        from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
    except ImportError as e:
        report["error"] = f"cron module unavailable: {e}"
        return report

    report["attempted"] = True
    try:
-        with _jobs_lock():
+        with _jobs_file_lock:
            live_jobs = load_jobs()
            changed = False

--- a/agent/display.py
+++ b/agent/display.py
@@ -12,7 +12,6 @@ import time
 from dataclasses import dataclass, field
 from difflib import unified_diff
 from pathlib import Path
-from typing import Any

 from utils import safe_json_loads
 from agent.tool_result_classification import file_mutation_result_landed
@@ -169,27 +168,6 @@ def _oneline(text: str) -> str:
    return " ".join(text.split())


-def _truncate_preview(text: str, max_len: int | None) -> str:
-    if max_len and max_len > 0 and len(text) > max_len:
-        if max_len <= 3:
-            return "." * max_len
-        return text[:max_len - 3] + "..."
-    return text
-
-
-def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
-    if not isinstance(tasks, list):
-        return 0, []
-    goals: list[str] = []
-    for task in tasks:
-        if not isinstance(task, dict):
-            continue
-        raw_goal = task.get("goal")
-        goal = "?" if raw_goal is None else _oneline(str(raw_goal))
-        goals.append(_truncate_preview(goal or "?", per_goal_len))
-    return len(goals), goals
-
-
 def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
    """Build a short preview of a tool call's primary argument for display.

@@ -213,22 +191,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
        "clarify": "question", "skill_manage": "name",
    }

-    # delegate_task: show goal (single) or individual task goals (batch)
-    if tool_name == "delegate_task":
-        tasks = args.get("tasks")
-        if tasks and isinstance(tasks, list):
-            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=40)
-            preview = (
-                f"{task_count} tasks: " + " | ".join(goals)
-                if goals else f"{len(tasks)} parallel tasks"
-            )
-            return _truncate_preview(preview, max_len)
-        goal = args.get("goal", "")
-        if goal is None:
-            return None
-        preview = _oneline(str(goal))
-        return _truncate_preview(preview, max_len) if preview else None
-
    if tool_name == "process":
        action = args.get("action", "")
        sid = args.get("session_id", "")
@@ -896,6 +858,20 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
    return False, ""


+def _used_free_parallel(result: str | None) -> bool:
+    """True when a web result came from Parallel's free Search MCP.
+
+    Only the keyless Parallel path tags its result with ``provider="parallel"``;
+    the paid REST path and every other provider omit it. Used to label the tool
+    line "Parallel search" / "Parallel fetch" exactly when the free MCP served
+    the call.
+    """
+    if not isinstance(result, str) or '"provider"' not in result:
+        return False
+    data = safe_json_loads(result)
+    return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
+
+
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
@@ -933,15 +909,17 @@ def get_cute_tool_message(
        return f"{line}{failure_suffix}"

    if tool_name == "web_search":
-        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
+        verb = "Parallel search" if _used_free_parallel(result) else "search"
+        return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
+        verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
-        return _wrap(f"┊ 📄 fetch     pages  {dur}")
+            return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 {verb:<9} pages  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
@@ -1057,10 +1035,7 @@ def get_cute_tool_message(
    if tool_name == "delegate_task":
        tasks = args.get("tasks")
        if tasks and isinstance(tasks, list):
-            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=30)
-            detail = " | ".join(goals) if goals else "parallel"
-            count_label = task_count or len(tasks)
-            return _wrap(f"┊ 🔀 delegate  {count_label}x: {_trunc(detail, 35)}  {dur}")
+            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")

    preview = build_tool_preview(tool_name, args) or ""
--- a/agent/errors.py
+++ b/agent/errors.py
@@ -1,3 +0,0 @@
-class SSLConfigurationError(Exception):
-    """Raised when SSL/TLS certificate bundle configuration fails."""
-    pass
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -46,6 +46,11 @@ def build_write_denied_paths(home: str) -> set[str]:
            # Top-level Anthropic PKCE credential store remains sensitive even
            # when a profile is active; default/non-profile sessions still read it.
            str(hermes_root / ".anthropic_oauth.json"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
            os.path.join(home, ".netrc"),
            os.path.join(home, ".pgpass"),
            os.path.join(home, ".npmrc"),
@@ -99,6 +104,12 @@ def is_write_denied(path: str) -> bool:
        if resolved.startswith(prefix):
            return True

+    # Hermes control-plane files: block both the ACTIVE profile's view
+    # (hermes_home) AND the global root view. Without the root pass, a
+    # profile-mode session leaves <root>/auth.json + <root>/config.yaml
+    # writable — letting a prompt-injected write_file overwrite the global
+    # files that every profile inherits from (same shape as #15981).
+    control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
    mcp_tokens_dir_name = "mcp-tokens"

    hermes_dirs = []
@@ -111,6 +122,12 @@ def is_write_denied(path: str) -> bool:
            continue

    for base_real in hermes_dirs:
+        for name in control_file_names:
+            try:
+                if resolved == os.path.realpath(os.path.join(base_real, name)):
+                    return True
+            except Exception:
+                continue
        try:
            mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
            if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -41,16 +41,6 @@ DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
 GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535


-def bare_gemini_model_id(model: str) -> str:
-    """Strip Gemini's own provider prefix from an aggregator-style model id."""
-    name = (model or "").strip()
-    lowered = name.lower()
-    for prefix in ("google/", "gemini/"):
-        if lowered.startswith(prefix):
-            return name[len(prefix):].strip() or name
-    return name
-
-
 def is_native_gemini_base_url(base_url: str) -> bool:
    """Return True when the endpoint speaks Gemini's native REST API."""
    normalized = str(base_url or "").strip().rstrip("/").lower()
@@ -340,7 +330,7 @@ def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[st
    system_instruction = None
    joined_system = "\n".join(part for part in system_text_parts if part).strip()
    if joined_system:
-        system_instruction = {"role": "system", "parts": [{"text": joined_system}]}
+        system_instruction = {"parts": [{"text": joined_system}]}
    return contents, system_instruction


@@ -924,7 +914,6 @@ class GeminiNativeClient:
            thinking_config=thinking_config,
        )

-        model = bare_gemini_model_id(model)
        if stream:
            return self._stream_completion(model=model, request=request, timeout=timeout)

--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -33,7 +33,6 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
-from agent.skill_commands import extract_user_instruction_from_skill_message
 from tools.registry import tool_error

 logger = logging.getLogger(__name__)
@@ -45,66 +44,6 @@ logger = logging.getLogger(__name__)
 _SYNC_DRAIN_TIMEOUT_S = 5.0


-def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
-    """Return whether external memory-provider tools should be exposed."""
-    if enabled_toolsets is None:
-        return True
-    if not enabled_toolsets:
-        return False
-    if "memory" in enabled_toolsets:
-        return True
-
-    try:
-        from toolsets import resolve_toolset
-
-        return any("memory" in resolve_toolset(name) for name in enabled_toolsets)
-    except Exception:
-        logger.debug("Failed to resolve enabled toolsets for memory-provider tools", exc_info=True)
-        return False
-
-
-def inject_memory_provider_tools(agent: Any) -> int:
-    """Append external memory-provider tool schemas to an agent tool surface."""
-    memory_manager = getattr(agent, "_memory_manager", None)
-    tools = getattr(agent, "tools", None)
-    if not memory_manager or tools is None:
-        return 0
-
-    existing_tool_names = {
-        tool.get("function", {}).get("name")
-        for tool in tools
-        if isinstance(tool, dict)
-    }
-    if (
-        "memory" not in existing_tool_names
-        and not memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None))
-    ):
-        return 0
-
-    get_schemas = getattr(memory_manager, "get_all_tool_schemas", None)
-    if not callable(get_schemas):
-        return 0
-
-    valid_tool_names = getattr(agent, "valid_tool_names", None)
-    if valid_tool_names is None:
-        valid_tool_names = set()
-        agent.valid_tool_names = valid_tool_names
-
-    added = 0
-    for schema in get_schemas():
-        if not isinstance(schema, dict):
-            continue
-        tool_name = schema.get("name", "")
-        if not tool_name or tool_name in existing_tool_names:
-            continue
-        tools.append({"type": "function", "function": schema})
-        valid_tool_names.add(tool_name)
-        existing_tool_names.add(tool_name)
-        added += 1
-
-    return added
-
-
 # ---------------------------------------------------------------------------
 # Context fencing helpers
 # ---------------------------------------------------------------------------
@@ -431,37 +370,16 @@ class MemoryManager:

    # -- Prefetch / recall ---------------------------------------------------

-    @staticmethod
-    def _strip_skill_scaffolding(text: str) -> Optional[str]:
-        """Return memory-worthy user text, or None to skip the turn.
-
-        When a user invokes a /skill or /bundle, Hermes expands the turn into
-        a model-facing message that embeds the entire skill body. Feeding that
-        verbatim to memory providers pollutes their stores/embeddings with
-        prompt scaffolding instead of what the user actually asked. We recover
-        just the user's instruction here, once, for every provider — so this
-        is fixed for the whole provider fan-out, not per backend.
-
-        - Non-skill messages pass through unchanged.
-        - Skill turns with a user instruction return that instruction.
-        - Bare skill invocations (no instruction) return None → callers skip
-          the turn, since there is no user content worth remembering.
-        """
-        return extract_user_instruction_from_skill_message(text)
-
    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
        """Collect prefetch context from all providers.

        Returns merged context text labeled by provider. Empty providers
        are skipped. Failures in one provider don't block others.
        """
-        clean_query = self._strip_skill_scaffolding(query)
-        if not clean_query:
-            return ""
        parts = []
        for provider in self._providers:
            try:
-                result = provider.prefetch(clean_query, session_id=session_id)
+                result = provider.prefetch(query, session_id=session_id)
                if result and result.strip():
                    parts.append(result)
            except Exception as e:
@@ -482,14 +400,10 @@ class MemoryManager:
        if not providers:
            return

-        clean_query = self._strip_skill_scaffolding(query)
-        if not clean_query:
-            return
-
        def _run() -> None:
            for provider in providers:
                try:
-                    provider.queue_prefetch(clean_query, session_id=session_id)
+                    provider.queue_prefetch(query, session_id=session_id)
                except Exception as e:
                    logger.debug(
                        "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
@@ -541,11 +455,6 @@ class MemoryManager:
        if not providers:
            return

-        clean_user_content = self._strip_skill_scaffolding(user_content)
-        if not clean_user_content:
-            return
-        user_content = clean_user_content
-
        def _run() -> None:
            for provider in providers:
                try:
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -5,7 +5,6 @@ and run_agent.py for pre-flight context checks.
 """

 import ipaddress
-import json
 import logging
 import os
 import re
@@ -17,7 +16,7 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import atomic_json_write, base_url_host_matches, base_url_hostname
+from utils import base_url_host_matches, base_url_hostname

 from hermes_constants import OPENROUTER_MODELS_URL

@@ -112,57 +111,6 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
 _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

-
-def _get_model_metadata_cache_path() -> Path:
-    """Return path to the OpenRouter model metadata disk cache."""
-    from hermes_constants import get_hermes_home
-    return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
-
-
-def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
-    """Return disk-cache age in seconds, or None if freshness is unknown."""
-    try:
-        cache_path = _get_model_metadata_cache_path()
-        if not cache_path.exists():
-            return None
-        age = time.time() - cache_path.stat().st_mtime
-        if age < 0:
-            return None
-        return age
-    except Exception:
-        return None
-
-
-def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
-    """Load processed OpenRouter metadata cache from disk."""
-    try:
-        cache_path = _get_model_metadata_cache_path()
-        with cache_path.open("r", encoding="utf-8") as f:
-            data = json.load(f)
-        if not isinstance(data, dict):
-            return {}
-        return {
-            str(key): value
-            for key, value in data.items()
-            if isinstance(value, dict)
-        }
-    except Exception as e:
-        logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
-        return {}
-
-
-def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
-    """Save processed OpenRouter metadata cache to disk atomically."""
-    try:
-        atomic_json_write(
-            _get_model_metadata_cache_path(),
-            data,
-            indent=0,
-            separators=(",", ":"),
-        )
-    except Exception as e:
-        logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
-
 # Descending tiers for context length probing when the model is unknown.
 # We start at 256K (covers GPT-5.x, many current large-context models) and
 # step down on context-length errors until one works.  Tier[0] is also the
@@ -261,13 +209,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    # https://platform.minimax.io/docs/api-reference/text-chat-openai
    "minimax-m3": 1000000,
    "minimax": 204800,
-    # GLM — GLM-5.2 ships with a 1M context window (verified empirically:
-    # needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
-    # zero errors on api.z.ai/api/coding/paas/v4).  Older GLM models
-    # (5, 5.1, 5-turbo) are ~202K.  Longest-key-first substring matching
-    # ensures "glm-5.2" resolves to 1M while older variants still hit the
-    # generic 202K fallback.
-    "glm-5.2": 1_048_576,
+    # GLM
    "glm": 202752,
    # xAI Grok — xAI /v1/models does not return context_length metadata,
    # so these hardcoded fallbacks prevent Hermes from probing-down to
@@ -685,15 +627,6 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
        return _model_metadata_cache

-    if not force_refresh:
-        disk_age = _model_metadata_disk_cache_age_seconds()
-        if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
-            disk_cache = _load_model_metadata_disk_cache()
-            if disk_cache:
-                _model_metadata_cache = disk_cache
-                _model_metadata_cache_time = time.time() - disk_age
-                return _model_metadata_cache
-
    try:
        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
@@ -715,24 +648,12 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any

        _model_metadata_cache = cache
        _model_metadata_cache_time = time.time()
-        _save_model_metadata_disk_cache(cache)
        logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
        return cache

    except Exception as e:
        logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
-        if _model_metadata_cache:
-            return _model_metadata_cache
-        disk_cache = _load_model_metadata_disk_cache()
-        if disk_cache:
-            _model_metadata_cache = disk_cache
-            disk_age = _model_metadata_disk_cache_age_seconds()
-            if disk_age is not None:
-                _model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
-            else:
-                _model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
-            return _model_metadata_cache
-        return {}
+        return _model_metadata_cache or {}


 def fetch_endpoint_model_metadata(
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -135,14 +135,7 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:

 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
    """Infer a reasonable ``type`` if this schema node has none."""
-    node_type = node.get("type")
-    if isinstance(node_type, list):
-        concrete = next(
-            (t for t in node_type if isinstance(t, str) and t not in {"", "null"}),
-            "string",
-        )
-        return {**node, "type": concrete}
-    if "type" in node and node_type not in {None, ""}:
+    if "type" in node and node["type"] not in {None, ""}:
        return node

    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -8,7 +8,6 @@ import json
 import logging
 import os
 import threading
-import contextvars
 from collections import OrderedDict
 from pathlib import Path

@@ -509,22 +508,13 @@ PLATFORM_HINTS = {
    ),
    "telegram": (
        "You are on a text messaging communication platform, Telegram. "
-        "Standard Markdown is automatically converted to Telegram formatting. "
+        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram now supports rich Markdown, so lean into it: whenever it "
-        "makes the answer clearer or easier to scan, actively reach for real "
-        "Markdown tables (pipe `| col | col |` syntax), bullet and numbered "
-        "lists, task lists (`- [ ]` / `- [x]`), headings, nested blockquotes, "
-        "collapsible details, footnotes/references, math/formulas (`$...$`, "
-        "`$$...$$`), underline, subscript/superscript, marked (highlighted) "
-        "text, and anchors. Default to structured formatting over dense "
-        "paragraphs for any comparison, set of steps, key/value summary, or "
-        "tabular data. Prefer real Markdown tables and task lists over "
-        "hand-built bullet substitutes when presenting structured data; these "
-        "degrade gracefully (tables become readable bullet groups) when rich "
-        "rendering is unavailable, but advanced constructs like math and "
-        "collapsible details may render as plain source text in that case. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -959,52 +949,6 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2


-def _get_context_file_max_chars() -> int:
-    """Return the configured context-file truncation limit.
-
-    ``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
-    fallback. Users with larger context windows can raise
-    ``context_file_max_chars`` in config.yaml without patching Hermes.
-    """
-    try:
-        from hermes_cli.config import load_config
-
-        val = load_config().get("context_file_max_chars")
-        if isinstance(val, (int, float)) and val > 0:
-            return int(val)
-    except Exception as e:
-        logger.debug("Could not read context_file_max_chars from config: %s", e)
-    return CONTEXT_FILE_MAX_CHARS
-
-# Collect truncation warnings so the caller (run_agent) can surface them.
-# A ContextVar (not a module-global list) isolates accumulation per thread /
-# per async task, so concurrent gateway-session prompt builds can't drain or
-# clear each other's pending warnings (cross-session leak). Each build runs in
-# its own context, collects its own warnings, and drains them synchronously.
-_truncation_warnings: "contextvars.ContextVar[Optional[list]]" = contextvars.ContextVar(
-    "context_file_truncation_warnings", default=None
-)
-
-
-def _record_truncation_warning(msg: str) -> None:
-    """Append a truncation warning to the current context's accumulator."""
-    warnings = _truncation_warnings.get()
-    if warnings is None:
-        warnings = []
-        _truncation_warnings.set(warnings)
-    warnings.append(msg)
-
-
-def drain_truncation_warnings() -> list:
-    """Return and clear any truncation warnings accumulated in this context."""
-    warnings = _truncation_warnings.get()
-    if not warnings:
-        return []
-    drained = list(warnings)
-    warnings.clear()
-    return drained
-
-
 # =========================================================================
 # Skills prompt cache
 # =========================================================================
@@ -1211,7 +1155,7 @@ def build_skills_system_prompt(
        or get_session_env("HERMES_SESSION_PLATFORM")
        or ""
    )
-    disabled = get_disabled_skill_names(_platform_hint or None)
+    disabled = get_disabled_skill_names()
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
@@ -1510,19 +1454,10 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================

-def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
+def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
    """Head/tail truncation with a marker in the middle."""
-    if max_chars is None:
-        max_chars = _get_context_file_max_chars()
    if len(content) <= max_chars:
        return content
-    msg = (
-        f"⚠️  Context file {filename} TRUNCATED: "
-        f"{len(content)} chars exceeds limit of {max_chars} — "
-        f"increase context_file_max_chars or trim the file!"
-    )
-    logger.warning(msg)
-    _record_truncation_warning(msg)
    head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
    tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
    head = content[:head_chars]
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -104,7 +104,6 @@ _PREFIX_PATTERNS = [
    r"mem0_[A-Za-z0-9]{10,}",           # Mem0 Platform API key
    r"brv_[A-Za-z0-9]{10,}",            # ByteRover API key
    r"xai-[A-Za-z0-9]{30,}",            # xAI (Grok) API key
-    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -26,91 +26,6 @@ _skill_commands_platform: Optional[str] = None
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# ---------------------------------------------------------------------------
-# Skill-scaffolding markers and the canonical extractor.
-#
-# When a user invokes a /skill (or /bundle), Hermes expands the turn into a
-# model-facing message that embeds the full skill body plus scaffolding. That
-# expanded text is what flows into the agent loop — and into memory providers
-# via MemoryManager. Providers that store or embed the raw user turn (mem0,
-# openviking, hindsight, retaindb, byterover, honcho, supermemory) would
-# otherwise capture the entire skill body instead of what the user actually
-# asked. ``extract_user_instruction_from_skill_message`` recovers just the
-# user's instruction so memory stays clean.
-#
-# These markers MUST stay byte-identical to the builders below
-# (``_build_skill_message`` here, ``build_bundle_invocation_message`` in
-# agent/skill_bundles.py). They are co-located with the single-skill builder
-# on purpose, and the bundle markers are asserted against the bundle builder in
-# tests/openviking_plugin/test_openviking.py::test_skill_markers_match_hermes_scaffolding.
-# ---------------------------------------------------------------------------
-_SKILL_INVOCATION_PREFIX = "[IMPORTANT: The user has invoked the "
-_SINGLE_SKILL_MARKER = "The full skill content is loaded below.]"
-_SINGLE_SKILL_INSTRUCTION = (
-    "The user has provided the following instruction alongside the skill invocation: "
-)
-_RUNTIME_NOTE = "\n\n[Runtime note:"
-_BUNDLE_MARKER = " skill bundle,"
-_BUNDLE_USER_INSTRUCTION = "\nUser instruction: "
-_BUNDLE_FIRST_SKILL_BLOCK = "\n\n[Loaded as part of the "
-
-
-def extract_user_instruction_from_skill_message(content: Any) -> Optional[str]:
-    """Recover the user's instruction from a slash-skill-expanded turn.
-
-    Returns:
-        - The original string unchanged when it is NOT skill scaffolding
-          (a normal user message passes straight through).
-        - The extracted user instruction when the scaffolding carried one.
-        - ``None`` when the content is skill scaffolding with no user
-          instruction (i.e. a bare ``/skill`` invocation). Callers that feed
-          memory providers should skip the turn in that case — there is no
-          user content worth storing.
-    """
-    if not isinstance(content, str):
-        return None
-
-    if not content.startswith(_SKILL_INVOCATION_PREFIX):
-        return content
-
-    if _BUNDLE_MARKER in content:
-        return _extract_bundle_user_instruction(content)
-
-    if _SINGLE_SKILL_MARKER in content:
-        return _extract_single_skill_user_instruction(content)
-
-    return None
-
-
-def _extract_single_skill_user_instruction(message: str) -> Optional[str]:
-    # Single-skill format appends the user instruction after the skill body, so
-    # the last occurrence is the user-provided one; the body may quote this text.
-    marker_idx = message.rfind(_SINGLE_SKILL_INSTRUCTION)
-    if marker_idx < 0:
-        return None
-
-    instruction = message[marker_idx + len(_SINGLE_SKILL_INSTRUCTION):]
-    runtime_idx = instruction.find(_RUNTIME_NOTE)
-    if runtime_idx >= 0:
-        instruction = instruction[:runtime_idx]
-    instruction = instruction.strip()
-    return instruction or None
-
-
-def _extract_bundle_user_instruction(message: str) -> Optional[str]:
-    # Bundle format puts the user instruction before the loaded skills, so the
-    # first occurrence is the user-provided one.
-    marker_idx = message.find(_BUNDLE_USER_INSTRUCTION)
-    if marker_idx < 0:
-        return None
-
-    instruction = message[marker_idx + len(_BUNDLE_USER_INSTRUCTION):]
-    first_skill_idx = instruction.find(_BUNDLE_FIRST_SKILL_BLOCK)
-    if first_skill_idx >= 0:
-        instruction = instruction[:first_skill_idx]
-    instruction = instruction.strip()
-    return instruction or None
-

 def _resolve_skill_commands_platform() -> Optional[str]:
    """Return the current platform scope used for disabled-skill filtering.
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -43,20 +43,14 @@ EXCLUDED_SKILL_DIRS = frozenset(
    )
 )

-# Supporting files live inside a skill package and are loaded explicitly via
-# skill_view(skill, file_path=...). They are not standalone skills and must not
-# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or
-# archive workflow preserves a complete old skill package under references/.
-SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts"))
-

 def is_excluded_skill_path(path) -> bool:
-    """True if *path* should be skipped by active skill scanners.
+    """True if any component of *path* is in EXCLUDED_SKILL_DIRS.

-    Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to
-    prune dependency, virtualenv, VCS, cache, and progressive-disclosure
-    support-package paths. Centralising the check here keeps every
-    skill-scanning site in sync with the shared exclusion set.
+    Use this on every SKILL.md path produced by ``rglob`` to prune
+    dependency, virtualenv, VCS, and cache directories. Centralising the
+    check here keeps every skill-scanning site in sync with the shared
+    exclusion set.

    Accepts a Path or string.
    """
@@ -65,36 +59,7 @@ def is_excluded_skill_path(path) -> bool:
    except AttributeError:
        from pathlib import PurePath
        parts = PurePath(str(path)).parts
-    return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path(
-        path
-    )
-
-
-def is_skill_support_path(path) -> bool:
-    """True if *path* is under a support dir of an actual skill root.
-
-    ``references/``, ``templates/``, ``assets/``, and ``scripts/`` are
-    progressive-disclosure support areas when they sit directly inside a skill
-    directory containing ``SKILL.md``. They are not active discovery roots for
-    standalone skills. A preserved package such as
-    ``some-skill/references/old-skill-package/SKILL.md`` is documentation data
-    unless the caller explicitly loads it via ``file_path``.
-
-    Legitimate categories or skill names such as ``skills/scripts/foo`` remain
-    discoverable because their ``scripts`` component is not directly under a
-    directory that contains ``SKILL.md``.
-    """
-    path_obj = path if isinstance(path, Path) else Path(str(path))
-    parts = path_obj.parts
-    # Last component may be a file or candidate skill directory name. Only
-    # components before the leaf can be containing support directories.
-    for idx, part in enumerate(parts[:-1]):
-        if part not in SKILL_SUPPORT_DIRS or idx == 0:
-            continue
-        skill_root = Path(*parts[:idx])
-        if (skill_root / "SKILL.md").exists():
-            return True
-    return False
+    return any(part in EXCLUDED_SKILL_DIRS for part in parts)


 # ── Lazy YAML loader ─────────────────────────────────────────────────────
@@ -307,65 +272,27 @@ def skill_matches_environment(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


-_RAW_CONFIG_CACHE: Dict[Tuple[str, int, int], Dict[str, Any]] = {}
-
-
-def _raw_config_cache_clear() -> None:
-    """Test hook — drop the shared raw config cache."""
-    _RAW_CONFIG_CACHE.clear()
-
-
-def _load_raw_config() -> Dict[str, Any]:
-    """Read config.yaml with a shared mtime+size keyed cache.
-
-    This module intentionally avoids importing ``hermes_cli.config`` on the
-    skill prompt/build path. A tiny local cache gives the same repeated-read
-    win without pulling the heavier CLI config stack into startup.
-    """
-    config_path = get_config_path()
-    if not config_path.exists():
-        return {}
-    try:
-        stat = config_path.stat()
-        cache_key = (str(config_path), stat.st_mtime_ns, stat.st_size)
-    except OSError:
-        cache_key = None
-
-    if cache_key is not None:
-        cached = _RAW_CONFIG_CACHE.get(cache_key)
-        if cached is not None:
-            return cached
-
-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception as e:
-        logger.debug("Could not read skill config %s: %s", config_path, e)
-        return {}
-    if not isinstance(parsed, dict):
-        return {}
-
-    if cache_key is not None:
-        _RAW_CONFIG_CACHE.clear()
-        _RAW_CONFIG_CACHE[cache_key] = parsed
-    return parsed
-
-
 def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    """Read disabled skill names from config.yaml.

    Args:
        platform: Explicit platform name (e.g. ``"telegram"``).  When
            *None*, resolves from ``HERMES_PLATFORM`` or
-            ``HERMES_SESSION_PLATFORM`` env vars.  Returns the global
-            disabled list, unioned with the platform-specific list when a
-            platform is resolved (a globally-disabled skill stays disabled
-            on every platform).
+            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
+            global disabled list when no platform is determined.

    Reads the config file directly (no CLI config imports) to stay
    lightweight.
    """
-    parsed = _load_raw_config()
-    if not parsed:
+    config_path = get_config_path()
+    if not config_path.exists():
+        return set()
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception as e:
+        logger.debug("Could not read skill config %s: %s", config_path, e)
+        return set()
+    if not isinstance(parsed, dict):
        return set()

    skills_cfg = parsed.get("skills")
@@ -378,14 +305,13 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
        or os.getenv("HERMES_PLATFORM")
        or get_session_env("HERMES_SESSION_PLATFORM")
    )
-    global_disabled = _normalize_string_set(skills_cfg.get("disabled"))
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
        )
        if platform_disabled is not None:
-            return global_disabled | _normalize_string_set(platform_disabled)
-    return global_disabled
+            return _normalize_string_set(platform_disabled)
+    return _normalize_string_set(skills_cfg.get("disabled"))


 def _normalize_string_set(values) -> Set[str]:
@@ -410,7 +336,6 @@ _EXTERNAL_DIRS_CACHE: Dict[Tuple[str, int], List[Path]] = {}
 def _external_dirs_cache_clear() -> None:
    """Test hook — drop the in-process cache."""
    _EXTERNAL_DIRS_CACHE.clear()
-    _raw_config_cache_clear()


 def get_external_skills_dirs() -> List[Path]:
@@ -443,8 +368,11 @@ def get_external_skills_dirs() -> List[Path]:
            # Return a copy so callers can't mutate the cached list.
            return list(cached)

-    parsed = _load_raw_config()
-    if not parsed:
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+    if not isinstance(parsed, dict):
        return []

    skills_cfg = parsed.get("skills")
@@ -656,7 +584,15 @@ def resolve_skill_config_values(
    current values (or the declared default if the key isn't set).
    Path values are expanded via ``os.path.expanduser``.
    """
-    config = _load_raw_config()
+    config_path = get_config_path()
+    config: Dict[str, Any] = {}
+    if config_path.exists():
+        try:
+            parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+            if isinstance(parsed, dict):
+                config = parsed
+        except Exception:
+            pass

    resolved: Dict[str, Any] = {}
    for var in config_vars:
@@ -696,21 +632,12 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill
-    support directories. Support directories (references/templates/assets/
-    scripts) can contain arbitrary markdown and even archived package
-    ``SKILL.md`` files, but they are progressive-disclosure data loaded through
-    ``skill_view(..., file_path=...)`` rather than active skill roots.
+    Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
+    directories so dependencies cannot register nested skills.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
-        has_skill_md = "SKILL.md" in files
-        dirs[:] = [
-            d
-            for d in dirs
-            if d not in EXCLUDED_SKILL_DIRS
-            and not (has_skill_md and d in SKILL_SUPPORT_DIRS)
-        ]
+        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
    for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
--- a/agent/ssl_guard.py
+++ b/agent/ssl_guard.py
@@ -1,94 +0,0 @@
-"""Preventive SSL CA certificate checks for Hermes Agent.
-
-This module catches broken CA bundle paths before OpenAI/httpx turns them into
-opaque ``FileNotFoundError: [Errno 2] No such file or directory`` failures.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import ssl
-from pathlib import Path
-
-from agent.errors import SSLConfigurationError
-
-logger = logging.getLogger(__name__)
-
-_CA_BUNDLE_ENV_VARS = (
-    "HERMES_CA_BUNDLE",
-    "SSL_CERT_FILE",
-    "REQUESTS_CA_BUNDLE",
-    "CURL_CA_BUNDLE",
-)
-
-_SKIP_VALUES = {"1", "true", "yes", "on"}
-
-
-def _skip_ssl_guard_enabled() -> bool:
-    return os.getenv("HERMES_SKIP_SSL_GUARD", "").strip().lower() in _SKIP_VALUES
-
-
-def _repair_hint() -> str:
-    return (
-        "Repair: python -m pip install --force-reinstall certifi openai httpx\n"
-        "If you configured a custom corporate CA bundle, fix or unset the "
-        "broken CA bundle environment variable."
-    )
-
-
-def _ssl_err(message: str) -> SSLConfigurationError:
-    """Create a consistent, user-actionable SSL configuration error."""
-    return SSLConfigurationError(f"{message}\n{_repair_hint()}")
-
-
-def _validate_bundle_path(label: str, value: str, *, require_substantial: bool = False) -> None:
-    path = Path(value).expanduser()
-    if not path.exists():
-        raise _ssl_err(f"{label} points to a missing CA bundle: {value}")
-    if not path.is_file():
-        raise _ssl_err(f"{label} does not point to a CA bundle file: {value}")
-    if require_substantial and path.stat().st_size < 1024:
-        raise _ssl_err(f"{label} at {value} appears corrupted (too small)")
-    try:
-        ctx = ssl.create_default_context(cafile=str(path))
-    except Exception as exc:
-        raise _ssl_err(f"{label} CA bundle at {value} cannot be loaded: {exc}") from exc
-    if not ctx.get_ca_certs():
-        raise _ssl_err(f"{label} CA bundle at {value} did not load any certificates")
-
-
-def verify_ca_bundle() -> None:
-    """Verify configured and bundled CA certificates are present and loadable.
-
-    Raises:
-        SSLConfigurationError: If an explicit CA-bundle environment variable
-            points at a bad path, or if certifi's bundled ``cacert.pem`` is
-            missing/corrupt.
-    """
-    if _skip_ssl_guard_enabled():
-        logger.debug("SSL CA bundle guard skipped via HERMES_SKIP_SSL_GUARD")
-        return
-
-    for env_var in _CA_BUNDLE_ENV_VARS:
-        value = os.getenv(env_var)
-        if value:
-            _validate_bundle_path(env_var, value)
-
-    try:
-        import certifi
-    except Exception as exc:
-        raise _ssl_err(f"certifi is not importable: {exc}") from exc
-
-    ca_bundle = str(certifi.where())
-    _validate_bundle_path("certifi", ca_bundle, require_substantial=True)
-
-
-def verify_ca_bundle_with_fallback() -> None:
-    """Backward-compatible wrapper for older call sites.
-
-    The old PR name mentioned a platform fallback, but allowing startup with a
-    broken certifi bundle still leaves httpx/OpenAI and requests call sites
-    failing later. Keep the wrapper name but enforce the same check.
-    """
-    verify_ca_bundle()
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -40,7 +40,6 @@ from agent.prompt_builder import (
    TASK_COMPLETION_GUIDANCE,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
-    drain_truncation_warnings,
 )
 from agent.runtime_cwd import resolve_context_cwd

@@ -401,14 +400,7 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
    warm across turns.
    """
    parts = build_system_prompt_parts(agent, system_message=system_message)
-    joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
-
-    # Surface context-file truncation warnings through the normal agent status
-    # channel so gateway/CLI users see them in chat instead of only in logs.
-    for warning in drain_truncation_warnings():
-        agent._emit_status(warning)
-
-    return joined
+    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)


 def invalidate_system_prompt(agent: Any) -> None:
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -186,21 +186,10 @@ class AnthropicTransport(ProviderTransport):
    def validate_response(self, response: Any) -> bool:
        """Check Anthropic response structure is valid.

-        An empty content list is legitimate for terminal stop reasons that
-        carry no text payload:
-
-        - ``end_turn`` — the model's canonical "nothing more to add" after a
-          tool turn that already delivered the user-facing text.
-        - ``refusal`` — the model declined to respond (Claude 4.5+). The
-          Messages API returns an empty ``content`` list with this stop
-          reason. Treating it as invalid sends a deterministic refusal into
-          the invalid-response retry loop, which reproduces the refusal on
-          every attempt and surfaces a misleading "rate limited / invalid
-          response" error instead of the refusal. ``normalize_response`` maps
-          ``refusal`` → ``content_filter`` so the agent loop's refusal handler
-          can surface it.
-
-        Treating either as invalid falsely retries a completed response.
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
        """
        if response is None:
            return False
@@ -208,7 +197,7 @@ class AnthropicTransport(ProviderTransport):
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return getattr(response, "stop_reason", None) in {"end_turn", "refusal"}
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -531,7 +531,6 @@ class ChatCompletionsTransport(ProviderTransport):
                supports_reasoning=params.get("supports_reasoning", False),
                qwen_session_metadata=params.get("qwen_session_metadata"),
                model=model,
-                base_url=params.get("base_url"),
                ollama_num_ctx=params.get("ollama_num_ctx"),
                session_id=params.get("session_id"),
            )
@@ -665,42 +664,8 @@ class ChatCompletionsTransport(ProviderTransport):
        if rd:
            provider_data["reasoning_details"] = rd

-        # OpenAI structured-refusal field. When a model declines, the SDK
-        # populates ``message.refusal`` with the explanation and leaves
-        # ``content`` empty. OpenAI-compatible proxies that front Anthropic /
-        # Bedrock (e.g. Nous Portal) surface a Claude refusal this way — or via
-        # ``finish_reason="content_filter"`` — instead of the native
-        # ``stop_reason="refusal"``. Without capturing it the refusal looks
-        # like an empty response, so the agent loop retries a deterministic
-        # refusal three times and gives up with "no content after retries".
-        # Promote it to content + a ``content_filter`` finish reason so the
-        # loop's refusal handler surfaces it clearly and stops. ``refusal`` is
-        # ``None`` for normal responses, so this is a no-op in the common case.
-        content = msg.content
-        refusal = getattr(msg, "refusal", None)
-        if refusal is None and hasattr(msg, "model_extra"):
-            _msg_extra = getattr(msg, "model_extra", None) or {}
-            if isinstance(_msg_extra, dict):
-                refusal = _msg_extra.get("refusal")
-        if isinstance(refusal, str) and refusal.strip():
-            # Record the refusal explanation regardless — it's useful provider
-            # metadata even when the model also returned a usable payload.
-            provider_data["refusal"] = refusal
-            _has_text = isinstance(content, str) and content.strip()
-            _has_tool_calls = bool(tool_calls)
-            # Only promote to a terminal ``content_filter`` when the refusal is
-            # the *sole* payload — no visible text and no tool calls. A response
-            # that carries real content (or tool calls) alongside a refusal note
-            # is a normal, usable turn: surfacing it as a failed safety refusal
-            # would discard the model's actual work. In the empty-payload case,
-            # adopt the refusal as content so the loop has something to show.
-            if not _has_text and not _has_tool_calls:
-                content = refusal
-                if finish_reason in (None, "stop"):
-                    finish_reason = "content_filter"
-
        return NormalizedResponse(
-            content=content,
+            content=msg.content,
            tool_calls=tool_calls,
            finish_reason=finish_reason,
            reasoning=reasoning,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -218,10 +218,22 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs.pop("timeout", None)

        if is_codex_backend:
-            # chatgpt.com/backend-api/codex rejects body-level
-            # ``extra_headers`` with HTTP 400. Correlation/cache routing for
-            # this backend must not be sent through the Responses payload.
-            kwargs.pop("extra_headers", None)
+            prompt_cache_key = kwargs.get("prompt_cache_key")
+            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
+            if cache_scope_id:
+                existing_extra_headers = kwargs.get("extra_headers")
+                merged_extra_headers: Dict[str, str] = {}
+                if isinstance(existing_extra_headers, dict):
+                    merged_extra_headers.update(
+                        {
+                            str(key): str(value)
+                            for key, value in existing_extra_headers.items()
+                            if key and value is not None
+                        }
+                    )
+                merged_extra_headers["session_id"] = cache_scope_id
+                merged_extra_headers["x-client-request-id"] = cache_scope_id
+                kwargs["extra_headers"] = merged_extra_headers

        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -69,7 +69,6 @@ def build_turn_context(
    task_id: Optional[str],
    stream_callback,
    persist_user_message: Optional[str],
-    persist_user_timestamp: Optional[float] = None,
    *,
    restore_or_build_system_prompt,
    install_safe_stdio,
@@ -122,7 +121,6 @@ def build_turn_context(
    agent._stream_callback = stream_callback
    agent._persist_user_message_idx = None
    agent._persist_user_message_override = persist_user_message
-    agent._persist_user_message_timestamp = persist_user_timestamp
    # Generate unique task_id if not provided to isolate VMs between tasks.
    effective_task_id = task_id or str(uuid.uuid4())
    agent._current_task_id = effective_task_id
--- a/apps/bootstrap-installer/package.json
+++ b/apps/bootstrap-installer/package.json
@@ -16,7 +16,7 @@
  },
  "dependencies": {
    "@nous-research/ui": "0.16.0",
-    "@tailwindcss/vite": "^4.2.4",
+    "@tailwindcss/vite": "^4.2.1",
    "@tailwindcss/typography": "^0.5.19",
    "@tauri-apps/api": "^2.0.0",
    "@tauri-apps/plugin-dialog": "^2.0.0",
@@ -40,8 +40,8 @@
    "@tauri-apps/cli": "^2.0.0",
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
-    "@vitejs/plugin-react": "^6.0.2",
+    "@vitejs/plugin-react": "^5.2.0",
    "typescript": "^6.0.3",
-    "vite": "^8.0.16"
+    "vite": "^7.3.1"
  }
 }
--- a/apps/bootstrap-installer/src-tauri/src/update.rs
+++ b/apps/bootstrap-installer/src-tauri/src/update.rs
@@ -3,9 +3,8 @@
 //! Driven when the installer is launched as `Hermes-Setup.exe --update` (see
 //! `AppMode` in lib.rs). The desktop app hands off to us — it exits, then we:
 //!
-//!   1. wait for the old Hermes desktop process to fully exit (so both the
-//!      venv shim and packaged app.asar are free; otherwise `hermes update`
-//!      or repair bootstrap can race locked files),
+//!   1. wait for the old Hermes desktop process to fully exit (so the venv
+//!      shim is free; otherwise `hermes update` aborts with exit code 2),
 //!   2. run `hermes update --yes --gateway` (Python/repo update; this does NOT
 //!      rebuild apps/desktop by design — see cmd_update in hermes_cli/main.py),
 //!   3. run `hermes desktop --build-only` (the rebuild step update skips),
@@ -39,8 +38,8 @@ use crate::events::{BootstrapEvent, LogStream, StageInfo, StageState};
 /// hermes_cli/main.py (sys.exit(2)). We surface a targeted message for this.
 const UPDATE_EXIT_CONCURRENT: i32 = 2;

-/// How long to wait for the old desktop process to release files under the
-/// install tree before giving up and letting `hermes update`'s own guard decide.
+/// How long to wait for the old desktop process to release the venv shim
+/// before giving up and letting `hermes update`'s own guard decide.
 const DESKTOP_EXIT_WAIT: Duration = Duration::from_secs(20);
 const DESKTOP_EXIT_POLL: Duration = Duration::from_millis(500);

@@ -151,10 +150,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
    // ---- pre-step: wait for the old desktop to die -----------------------
    // The desktop exec'd us then called app.exit(), but process teardown is
    // async on Windows. If it still holds the venv shim, `hermes update`
-    // aborts with exit 2. If it still holds the packaged app.asar,
-    // install.ps1's repair/re-clone path cannot move/remove the install tree.
-    // Give both handles a bounded window to clear.
-    wait_for_install_locks_free(&install_root, &app, "update").await;
+    // aborts with exit 2. Give it a bounded window to clear.
+    wait_for_venv_free(&install_root, &app).await;

    // ---- stage 1: hermes update -----------------------------------------
    // Pass --branch so `hermes update` targets the branch this installer was
@@ -176,8 +173,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
        vec!["update".into(), "--yes".into(), "--gateway".into()];
    // --force skips `hermes update`'s Windows running-exe guard (which would
    // `sys.exit(2)` and dead-end the handoff). By contract the desktop has
-    // already exited and waited for the install locks to clear before launching
-    // us, and wait_for_install_locks_free below force-kills any straggler — so by the
+    // already exited and waited for the venv shim to unlock before launching
+    // us, and wait_for_venv_free below force-kills any straggler — so by the
    // time `hermes update` runs there is no legitimate hermes.exe to protect,
    // and the guard would only produce a false "Hermes is still running" stop.
    update_args.push("--force".into());
@@ -394,57 +391,48 @@ async fn run_update(app: AppHandle) -> Result<()> {
    Ok(())
 }

-/// Poll until the venv shim AND packaged desktop app bundle are no longer locked
-/// (Windows) or a bounded timeout elapses. On non-Windows this is a short fixed
-/// grace since file locking isn't the failure mode there.
-pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHandle, stage: &str) {
-    let lock_targets = install_lock_probe_paths(install_root);
+/// Poll until the venv shim is no longer locked (Windows) or a bounded timeout
+/// elapses. On non-Windows this is a short fixed grace since file locking
+/// isn't the failure mode there.
+async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
+    let shim = venv_hermes(install_root);
    let deadline = Instant::now() + DESKTOP_EXIT_WAIT;

-    emit_log(app, Some(stage), LogStream::Stdout, "[handoff] waiting for Hermes to exit…");
+    emit_log(app, Some("update"), LogStream::Stdout, "[update] waiting for Hermes to exit…");

    loop {
-        let locked = locked_paths(&lock_targets);
-        if locked.is_empty() {
+        if !is_locked(&shim) {
            return;
        }
        if Instant::now() >= deadline {
-            // Last resort: a backend hermes.exe (or the desktop Hermes.exe
-            // itself) is still holding one of the update-sensitive files. The
-            // desktop should have reaped its tree before handing off, but
-            // SIGTERM races / detached grandchildren / AV handles can leave a
-            // straggler. Rather than "proceed anyway" straight into uv's
-            // "Access is denied" or install.ps1's locked app.asar failure,
-            // force-kill every Hermes.exe except ourselves, then give the OS a
-            // beat to unload the image.
+            // Last resort: a backend hermes.exe (or a grandchild it spawned)
+            // is still holding the shim. The desktop should have reaped its
+            // tree before handing off, but SIGTERM races / detached
+            // grandchildren / AV handles can leave a straggler. Rather than
+            // "proceed anyway" straight into uv's "Access is denied", force-kill
+            // every hermes.exe except ourselves, then give the OS a beat to
+            // unload the image.
            emit_log(
                app,
-                Some(stage),
+                Some("update"),
                LogStream::Stdout,
-                &format!(
-                    "[handoff] Hermes still holding install files ({}); force-killing stragglers…",
-                    format_locked_paths(&locked)
-                ),
+                "[update] Hermes still holding the venv shim; force-killing stragglers…",
            );
            force_kill_other_hermes();
            tokio::time::sleep(Duration::from_millis(800)).await;
-            let locked_after_kill = locked_paths(&lock_targets);
-            if locked_after_kill.is_empty() {
+            if !is_locked(&shim) {
                emit_log(
                    app,
-                    Some(stage),
+                    Some("update"),
                    LogStream::Stdout,
-                    "[handoff] install files freed after force-kill",
+                    "[update] venv shim freed after force-kill",
                );
            } else {
                emit_log(
                    app,
-                    Some(stage),
+                    Some("update"),
                    LogStream::Stdout,
-                    &format!(
-                        "[handoff] install files still locked ({}); proceeding (--force + quarantine will handle it)",
-                        format_locked_paths(&locked_after_kill)
-                    ),
+                    "[update] venv shim still locked; proceeding (--force + quarantine will handle it)",
                );
            }
            return;
@@ -453,44 +441,13 @@ pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHa
    }
 }

-fn install_lock_probe_paths(install_root: &Path) -> Vec<PathBuf> {
-    let mut paths = vec![venv_hermes(install_root)];
-    paths.extend(desktop_app_payload_paths(install_root));
-    paths
-}
-
-fn desktop_app_payload_paths(install_root: &Path) -> Vec<PathBuf> {
-    let release = install_root.join("apps").join("desktop").join("release");
-    if cfg!(target_os = "windows") {
-        vec![
-            release.join("win-unpacked").join("resources").join("app.asar"),
-            release.join("win-arm64-unpacked").join("resources").join("app.asar"),
-        ]
-    } else if cfg!(target_os = "macos") {
-        vec![
-            release.join("mac").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
-            release.join("mac-arm64").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
-        ]
-    } else {
-        vec![release.join("linux-unpacked").join("resources").join("app.asar")]
-    }
-}
-
-fn locked_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
-    paths.iter().filter(|p| is_locked(p)).cloned().collect()
-}
-
-fn format_locked_paths(paths: &[PathBuf]) -> String {
-    paths.iter().map(|p| p.display().to_string()).collect::<Vec<_>>().join(", ")
-}
-
 /// Force-kill any `hermes.exe` other than this process. Windows-only; a no-op
 /// elsewhere (POSIX has no mandatory-lock contention). We can't selectively
 /// target "the backend" by PID here — the desktop already exited and we never
 /// knew its children — so we kill the whole `hermes.exe` image tree via
 /// taskkill, excluding our own PID.
 ///
-/// Safe w.r.t. our own update child: this runs inside the install-lock wait,
+/// Safe w.r.t. our own update child: this runs inside `wait_for_venv_free`,
 /// which completes BEFORE we spawn `venv\Scripts\hermes.exe update`. At this
 /// point no update-driven hermes.exe exists yet, so the only hermes.exe images
 /// are stragglers from the old desktop — exactly what we want gone. (`/FI PID
@@ -934,29 +891,6 @@ mod tests {
        assert!(!is_locked(Path::new("/nonexistent/does/not/exist/xyz")));
    }

-    #[test]
-    fn lock_probe_paths_include_desktop_app_payload() {
-        let root = Path::new("/x/hermes-agent");
-        let probes = install_lock_probe_paths(root);
-
-        assert!(
-            probes.iter().any(|p| p == &venv_hermes(root)),
-            "venv shim remains part of the update lock probe"
-        );
-        assert!(
-            probes.iter().any(|p| p.ends_with(Path::new("resources/app.asar"))),
-            "packaged app.asar must be probed so repair/re-clone waits for the old desktop to exit"
-        );
-    }
-
-    #[test]
-    fn locked_paths_ignores_missing_payloads() {
-        let root = Path::new("/nonexistent/hermes-agent");
-        let probes = install_lock_probe_paths(root);
-
-        assert!(locked_paths(&probes).is_empty());
-    }
-
    #[test]
    fn parses_update_branch_from_space_or_equals_args() {
        assert_eq!(
--- a/apps/bootstrap-installer/tsconfig.json
+++ b/apps/bootstrap-installer/tsconfig.json
@@ -1,8 +1,8 @@
 {
  "compilerOptions": {
-    "target": "ES2023",
+    "target": "ES2022",
    "useDefineForClassFields": true,
-    "lib": ["ES2023", "DOM", "DOM.Iterable"],
+    "lib": ["ES2022", "DOM", "DOM.Iterable"],
    "module": "ESNext",
    "skipLibCheck": true,
    "moduleResolution": "bundler",
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -34,7 +34,7 @@ It builds and launches the GUI against your existing install — same config, ke

 ### Prebuilt installers

-Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/).
+Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/desktop).

 ---

--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -67,16 +67,6 @@ function buildDesktopBackendPath({
  )
 }

-function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
-  if (!hermesHome) return hermesHome
-  const resolved = pathModule.resolve(String(hermesHome))
-  const parent = pathModule.dirname(resolved)
-  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
-    return pathModule.dirname(parent)
-  }
-  return resolved
-}
-
 function buildDesktopBackendEnv({
  hermesHome,
  pythonPathEntries = [],
@@ -107,6 +97,5 @@ module.exports = {
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  delimiterForPlatform,
-  normalizeHermesHomeRoot,
  pathEnvKey
 }
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -7,7 +7,6 @@ const {
  appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
-  normalizeHermesHomeRoot,
  pathEnvKey
 } = require('./backend-env.cjs')

@@ -67,21 +66,6 @@ test('buildDesktopBackendEnv extends PYTHONPATH and backend PATH together', () =
  assert.ok(env.PATH.includes('/opt/homebrew/bin'))
 })

-test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root', () => {
-  assert.equal(
-    normalizeHermesHomeRoot('/Users/test/.hermes/profiles/oracle', { pathModule: path.posix }),
-    '/Users/test/.hermes'
-  )
-  assert.equal(
-    normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
-    'C:\\Users\\test\\AppData\\Local\\hermes'
-  )
-  assert.equal(
-    normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
-    '/Users/test/.hermes'
-  )
-})
-
 test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
  const env = buildDesktopBackendEnv({
    hermesHome: 'C:\\Users\\test\\AppData\\Local\\hermes',
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -1,66 +0,0 @@
-const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m
-
-/**
- * Watch a child process's stdout for the `HERMES_DASHBOARD_READY port=<N>`
- * line that web_server.py prints after uvicorn binds its socket.
- *
- * Returns the parsed port. Rejects if:
- *   - the child exits before emitting the line
- *   - the child emits an `error` event
- *   - no line arrives within the timeout
- *
- * A single `cleanup()` tears down every listener (data/exit/error/timeout)
- * on every terminal path — resolve, reject, or timeout — so repeated
- * backend spawns don't leak listener slots on the child.
- */
-function waitForDashboardPort(child, timeoutMs = 45_000) {
-  return new Promise((resolve, reject) => {
-    let buf = ''
-    let done = false
-
-    function cleanup() {
-      if (done) return
-      done = true
-      clearTimeout(timer)
-      child.stdout.off('data', onData)
-      child.off('exit', onExit)
-      child.off('error', onError)
-    }
-
-    function onData(chunk) {
-      buf += chunk.toString()
-      let nl
-      while ((nl = buf.indexOf('\n')) !== -1) {
-        const line = buf.slice(0, nl)
-        buf = buf.slice(nl + 1)
-        const m = line.match(_READY_RE)
-        if (m) {
-          cleanup()
-          resolve(parseInt(m[1], 10))
-          return
-        }
-      }
-    }
-
-    function onExit(code, signal) {
-      cleanup()
-      reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`))
-    }
-
-    function onError(err) {
-      cleanup()
-      reject(err)
-    }
-
-    const timer = setTimeout(() => {
-      cleanup()
-      reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`))
-    }, timeoutMs)
-
-    child.stdout.on('data', onData)
-    child.on('exit', onExit)
-    child.on('error', onError)
-  })
-}
-
-module.exports = { waitForDashboardPort }
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -166,39 +166,6 @@ function profileRemoteOverride(config, profile) {
  return { url, authMode: normAuthMode(entry.authMode), token: entry.token }
 }

-/**
- * In global-remote mode one backend serves every Desktop profile, so REST calls
- * that are scoped by renderer-side `request.profile` must carry that scope as a
- * query parameter. Local pooled backends and per-profile remote overrides do not
- * need this: they already run against a backend scoped to the target profile.
- */
-function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
-  const scopedProfile = connectionScopeKey(profile)
-  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
-    return path
-  }
-
-  const rawPath = String(path || '')
-  if (!rawPath) {
-    return path
-  }
-
-  let parsed
-  try {
-    parsed = new URL(rawPath, 'http://hermes.local')
-  } catch {
-    return path
-  }
-
-  if (parsed.searchParams.has('profile')) {
-    return path
-  }
-
-  parsed.searchParams.set('profile', scopedProfile)
-
-  return `${parsed.pathname}${parsed.search}${parsed.hash}`
-}
-
 function tokenPreview(value) {
  const raw = String(value || '')

@@ -280,7 +247,6 @@ module.exports = {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -24,7 +24,6 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -91,72 +90,6 @@ test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
  assert.equal(profileRemoteOverride(null, 'coder'), null)
 })

-// --- pathWithGlobalRemoteProfile ---
-
-test('pathWithGlobalRemoteProfile appends profile in global remote mode', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info?profile=iris'
-  )
-})
-
-test('pathWithGlobalRemoteProfile preserves existing query params', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/options?force=1', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/options?force=1&profile=iris'
-  )
-})
-
-test('pathWithGlobalRemoteProfile does not replace an explicit profile query', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info?profile=default', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info?profile=default'
-  )
-})
-
-test('pathWithGlobalRemoteProfile skips local and per-profile remote override paths', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: false,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info'
-  )
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: true
-    }),
-    '/api/model/info'
-  )
-})
-
-test('pathWithGlobalRemoteProfile skips empty profile/path safely', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', '', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info'
-  )
-  assert.equal(
-    pathWithGlobalRemoteProfile('', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    ''
-  )
-})
-
 // --- normalizeRemoteBaseUrl ---

 test('normalizeRemoteBaseUrl strips trailing slashes, hash, and query', () => {
--- a/apps/desktop/electron/git-worktrees.cjs
+++ b/apps/desktop/electron/git-worktrees.cjs
@@ -1,174 +0,0 @@
-'use strict'
-
-// Resolve git-worktree relationships for a set of session cwds, reading git's
-// on-disk metadata directly (no `git` spawn per path):
-//
-//   - A normal checkout has a `.git` DIRECTORY at its root → it's the main
-//     worktree; its repo root IS that directory's parent.
-//   - A linked worktree has a `.git` FILE: `gitdir: <repo>/.git/worktrees/<name>`.
-//     That admin dir's `commondir` points back at the shared `<repo>/.git`, whose
-//     parent is the main repo root.
-//
-// Grouping by repoRoot therefore clusters a repo's main checkout with all of its
-// linked worktrees, regardless of how the worktree directories are named. The
-// branch (read from the worktree's own HEAD) gives each worktree a meaningful
-// label.
-
-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
-
-// Walk up from `start` to the nearest ancestor that carries a `.git` entry
-// (file for a linked worktree, dir for the main checkout). Capped so a stray
-// path can't loop forever.
-function findGitHost(start, fsImpl) {
-  let dir = start
-
-  for (let i = 0; i < 64; i += 1) {
-    const dotgit = path.join(dir, '.git')
-
-    try {
-      if (fsImpl.existsSync(dotgit)) {
-        return dir
-      }
-    } catch {
-      return null
-    }
-
-    const parent = path.dirname(dir)
-
-    if (parent === dir) {
-      return null
-    }
-
-    dir = parent
-  }
-
-  return null
-}
-
-function readBranch(gitDir, fsImpl) {
-  try {
-    const head = fsImpl.readFileSync(path.join(gitDir, 'HEAD'), 'utf8').trim()
-    const ref = head.match(/^ref:\s*refs\/heads\/(.+)$/)
-
-    if (ref) {
-      return ref[1]
-    }
-
-    // Detached HEAD: surface a short sha so the worktree still gets a label.
-    return /^[0-9a-f]{7,40}$/i.test(head) ? head.slice(0, 8) : null
-  } catch {
-    return null
-  }
-}
-
-// Given the directory that owns the `.git` entry, resolve its worktree identity.
-function resolveFromHost(host, fsImpl) {
-  const dotgit = path.join(host, '.git')
-  let stat
-
-  try {
-    stat = fsImpl.statSync(dotgit)
-  } catch {
-    return null
-  }
-
-  if (stat.isDirectory()) {
-    return {
-      repoRoot: host,
-      worktreeRoot: host,
-      isMainWorktree: true,
-      branch: readBranch(dotgit, fsImpl)
-    }
-  }
-
-  // Linked worktree: `.git` is a file pointing at the admin dir.
-  let contents
-
-  try {
-    contents = fsImpl.readFileSync(dotgit, 'utf8').trim()
-  } catch {
-    return null
-  }
-
-  const match = contents.match(/^gitdir:\s*(.+)$/m)
-
-  if (!match) {
-    return null
-  }
-
-  const adminDir = path.resolve(host, match[1].trim())
-
-  // `commondir` resolves to the shared `<repo>/.git`; fall back to walking two
-  // levels up from `<repo>/.git/worktrees/<name>` if it's missing.
-  let commonDir
-
-  try {
-    const rel = fsImpl.readFileSync(path.join(adminDir, 'commondir'), 'utf8').trim()
-    commonDir = path.resolve(adminDir, rel)
-  } catch {
-    commonDir = path.dirname(path.dirname(adminDir))
-  }
-
-  return {
-    repoRoot: path.dirname(commonDir),
-    worktreeRoot: host,
-    isMainWorktree: false,
-    branch: readBranch(adminDir, fsImpl)
-  }
-}
-
-function resolveWorktree(startPath, fsImpl = fs) {
-  let resolved
-
-  try {
-    resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Worktree lookup' })
-  } catch {
-    return null
-  }
-
-  let start = resolved
-
-  try {
-    const stat = fsImpl.statSync(resolved)
-
-    if (!stat.isDirectory()) {
-      start = path.dirname(resolved)
-    }
-  } catch {
-    return null
-  }
-
-  const host = findGitHost(start, fsImpl)
-
-  if (!host) {
-    return null
-  }
-
-  return resolveFromHost(host, fsImpl)
-}
-
-// Batch entry point for the renderer: maps each requested cwd to its worktree
-// info (or null when it isn't inside a git checkout / can't be read). Dedupes so
-// many sessions sharing a cwd cost one lookup.
-async function worktreesForIpc(cwds, options = {}) {
-  const fsImpl = options.fs || fs
-  const list = Array.isArray(cwds) ? cwds : []
-  const out = {}
-
-  for (const cwd of list) {
-    if (typeof cwd !== 'string' || !cwd.trim() || cwd in out) {
-      continue
-    }
-
-    out[cwd] = resolveWorktree(cwd, fsImpl)
-  }
-
-  return out
-}
-
-module.exports = {
-  resolveWorktree,
-  worktreesForIpc
-}
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -35,14 +35,12 @@ const {
 const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
 const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs')
 const { adoptServedDashboardToken } = require('./dashboard-token.cjs')
-const { waitForDashboardPort } = require('./backend-ready.cjs')
+const { PortPool } = require('./port-pool.cjs')
 const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')
 const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs')
-const { buildDesktopBackendEnv, normalizeHermesHomeRoot } = require('./backend-env.cjs')
-const { readWindowsUserEnvVar } = require('./windows-user-env.cjs')
+const { buildDesktopBackendEnv } = require('./backend-env.cjs')
 const { readDirForIpc } = require('./fs-read-dir.cjs')
 const { gitRootForIpc } = require('./git-root.cjs')
-const { worktreesForIpc } = require('./git-worktrees.cjs')
 const { OFFICIAL_REPO_HTTPS_URL, isOfficialSshRemote } = require('./update-remote.cjs')
 const {
  buildPosixCleanupScript,
@@ -63,7 +61,6 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -114,6 +111,12 @@ if (USER_DATA_OVERRIDE) {
  app.setPath('userData', resolvedUserData)
 }

+const PORT_FLOOR = 9120
+const PORT_CEILING = 9199
+// In-process port reservations that close the pickPort() TOCTOU window where
+// two concurrent backend spawns could be handed the same port. See
+// port-pool.cjs for the full rationale.
+const portPool = new PortPool(PORT_FLOOR, PORT_CEILING)
 const DEV_SERVER = process.env.HERMES_DESKTOP_DEV_SERVER
 const IS_PACKAGED = app.isPackaged
 const IS_MAC = process.platform === 'darwin'
@@ -242,18 +245,8 @@ if (INSTALL_STAMP) {
 // HERMES_HOME beneath the throwaway userData dir so a fresh-install run never
 // touches the user's real ~/.hermes / %LOCALAPPDATA%\hermes.
 function resolveHermesHome() {
-  if (process.env.HERMES_HOME) return normalizeHermesHomeRoot(process.env.HERMES_HOME)
+  if (process.env.HERMES_HOME) return path.resolve(process.env.HERMES_HOME)
  if (USER_DATA_OVERRIDE) return path.join(path.resolve(USER_DATA_OVERRIDE), 'hermes-home')
-  if (IS_WINDOWS) {
-    // A GUI app launched from Explorer inherits the environment block captured
-    // at login, so a HERMES_HOME set via `setx` AFTER login is invisible in
-    // process.env even though the CLI (a fresh shell) sees it. Without this the
-    // backend silently falls back to %LOCALAPPDATA%\hermes and reports "No
-    // inference provider configured" despite a valid configured home (#45471).
-    // Consult the live User-scoped registry value before the default below.
-    const fromRegistry = readWindowsUserEnvVar('HERMES_HOME')
-    if (fromRegistry) return normalizeHermesHomeRoot(fromRegistry)
-  }
  if (IS_WINDOWS && process.env.LOCALAPPDATA) {
    const localappdata = path.join(process.env.LOCALAPPDATA, 'hermes')
    const legacy = path.join(app.getPath('home'), '.hermes')
@@ -1847,44 +1840,6 @@ async function applyUpdates(opts = {}) {
  }
 }

-async function handOffWindowsBootstrapRecovery(reason) {
-  if (!IS_WINDOWS || !IS_PACKAGED) return false
-
-  const updater = resolveUpdaterBinary()
-  if (!updater) return false
-
-  const updateRoot = resolveUpdateRoot()
-  const { branch: configuredBranch } = readDesktopUpdateConfig()
-  const branch = directoryExists(path.join(updateRoot, '.git'))
-    ? await resolveHealedBranch(updateRoot, configuredBranch || DEFAULT_UPDATE_BRANCH)
-    : configuredBranch || DEFAULT_UPDATE_BRANCH
-  const venvBin = path.join(updateRoot, 'venv', IS_WINDOWS ? 'Scripts' : 'bin')
-  const venvHermes = path.join(venvBin, IS_WINDOWS ? 'hermes.exe' : 'hermes')
-  const updaterArgs = fileExists(venvHermes) ? ['--update', '--branch', branch] : ['--repair', '--branch', branch]
-
-  await releaseBackendLockForUpdate(updateRoot)
-
-  const child = spawn(updater, updaterArgs, {
-    cwd: HERMES_HOME,
-    env: {
-      ...process.env,
-      HERMES_HOME,
-      PATH: [path.join(HERMES_HOME, 'node', 'bin'), venvBin, process.env.PATH].filter(Boolean).join(path.delimiter)
-    },
-    detached: true,
-    stdio: 'ignore',
-    windowsHide: false
-  })
-  child.unref()
-
-  rememberLog(`[bootstrap] handed off ${reason} recovery to updater: ${updater} ${updaterArgs.join(' ')}; exiting desktop to release app.asar`)
-  setTimeout(() => {
-    app.quit()
-  }, 600)
-
-  return true
-}
-
 // Resolve the hermes CLI to drive an in-app update: prefer the venv shim in
 // the install we're updating, fall back to `hermes` on PATH.
 function resolveHermesCliBinary(updateRoot) {
@@ -2482,14 +2437,6 @@ async function ensureRuntime(backend) {
  if (backend.kind === 'bootstrap-needed') {
    rememberLog('[bootstrap] no Hermes install found; starting first-launch bootstrap')

-    if (await handOffWindowsBootstrapRecovery('bootstrap-needed')) {
-      const handoffError = new Error('Hermes recovery was handed off to Hermes Setup. The desktop will restart when recovery completes.')
-      handoffError.isBootstrapFailure = true
-      handoffError.bootstrapHandedOff = true
-      bootstrapFailure = handoffError
-      throw handoffError
-    }
-
    // Eagerly flip the bootstrap UI state to 'active' so the renderer
    // shows the install overlay BEFORE the runner finishes fetching the
    // manifest (which on slow networks can take tens of seconds and would
@@ -2619,6 +2566,24 @@ async function ensureRuntime(backend) {
  return backend
 }

+function isPortAvailable(port) {
+  return new Promise(resolve => {
+    const server = net.createServer()
+    server.once('error', () => resolve(false))
+    server.once('listening', () => {
+      server.close(() => resolve(true))
+    })
+    server.listen(port, '127.0.0.1')
+  })
+}
+
+async function pickPort() {
+  const port = await portPool.reserve(isPortAvailable)
+  if (port === null) {
+    throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`)
+  }
+  return port
+}

 function fetchJson(url, token, options = {}) {
  return new Promise((resolve, reject) => {
@@ -4696,14 +4661,25 @@ async function spawnPoolBackend(profile, entry) {
    }
  }

+  const port = await pickPort()
  const token = crypto.randomBytes(32).toString('base64url')
  // --profile wins over the inherited HERMES_HOME env (see _apply_profile_override
  // step 3 in hermes_cli/main.py), so the child re-homes to this profile.
-  // --port 0: the OS assigns an ephemeral port; the child announces it on stdout.
-  const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', '0']
-  const backend = await ensureRuntime(resolveHermesBackend(dashboardArgs))
-  const hermesCwd = resolveHermesCwd()
-  const webDist = resolveWebDist()
+  const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)]
+  let backend
+  let hermesCwd
+  let webDist
+  try {
+    backend = await ensureRuntime(resolveHermesBackend(dashboardArgs))
+    hermesCwd = resolveHermesCwd()
+    webDist = resolveWebDist()
+  } catch (error) {
+    // These run before the child exists / its exit handler is attached, so a
+    // throw here would otherwise leak the reservation and slowly exhaust the
+    // 9120-9199 range across switch cycles in one app session.
+    portPool.release(port)
+    throw error
+  }

  rememberLog(`Starting Hermes backend for profile "${profile}" via ${backend.label}`)

@@ -4731,6 +4707,7 @@ async function spawnPoolBackend(profile, entry) {
    })
  )
  entry.process = child
+  entry.port = port
  entry.token = token

  child.stdout.on('data', rememberLog)
@@ -4744,11 +4721,13 @@ async function spawnPoolBackend(profile, entry) {
  child.once('error', error => {
    rememberLog(`Hermes backend for profile "${profile}" failed to start: ${error.message}`)
    backendPool.delete(profile)
+    portPool.release(port)
    rejectStart?.(error)
  })
  child.once('exit', (code, signal) => {
    rememberLog(`Hermes backend for profile "${profile}" exited (${signal || code})`)
    backendPool.delete(profile)
+    portPool.release(port)
    if (!ready) {
      rejectStart?.(
        new Error(`Hermes backend for profile "${profile}" exited before it became ready (${signal || code}).`)
@@ -4756,10 +4735,6 @@ async function spawnPoolBackend(profile, entry) {
    }
  })

-  // Discover the ephemeral port the child bound to
-  const port = await Promise.race([waitForDashboardPort(child), startFailed])
-  entry.port = port
-
  const baseUrl = `http://127.0.0.1:${port}`
  await Promise.race([waitForHermes(baseUrl, token), startFailed])
  ready = true
@@ -4787,6 +4762,7 @@ function stopPoolBackend(profile) {
  const entry = backendPool.get(profile)
  if (!entry) return
  backendPool.delete(profile)
+  if (entry.port) portPool.release(entry.port)
  if (entry.process && !entry.process.killed) {
    try {
      entry.process.kill('SIGTERM')
@@ -4872,6 +4848,11 @@ async function startHermes() {
  }
  if (connectionPromise) return connectionPromise

+  // Hoisted so the outer .catch can release a port reserved by pickPort() when
+  // a throw (e.g. ensureRuntime failing) happens before the child's exit
+  // handler is attached. Stays null on the remote path (no port picked).
+  let reservedPort = null
+
  connectionPromise = (async () => {
    await advanceBootProgress('backend.resolve', 'Resolving Hermes backend', 8)
    // Resolve for the desktop's primary profile so a per-profile remote
@@ -4899,9 +4880,11 @@ async function startHermes() {
      }
    }

+    await advanceBootProgress('backend.port', 'Finding an open local port', 16)
+    const port = await pickPort()
+    reservedPort = port
    const token = crypto.randomBytes(32).toString('base64url')
-    // --port 0: the OS assigns an ephemeral port; the child announces it on stdout.
-    const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', '0']
+    const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)]
    // Pin the desktop's chosen profile via the global --profile flag. This is
    // deterministic (it wins over the sticky ~/.hermes/active_profile file) and
    // resolves HERMES_HOME the same way `hermes -p <name>` does on the CLI. An
@@ -4968,6 +4951,7 @@ async function startHermes() {
      )
      hermesProcess = null
      connectionPromise = null
+      portPool.release(port)
      sendBackendExit({ code: null, signal: null, error: error.message })
      rejectBackendStart?.(error)
    })
@@ -4975,6 +4959,7 @@ async function startHermes() {
      rememberLog(`Hermes backend exited (${signal || code})`)
      hermesProcess = null
      connectionPromise = null
+      portPool.release(port)
      sendBackendExit({ code, signal })
      if (!backendReady) {
        const message = `Hermes backend exited before it became ready (${signal || code}).`
@@ -4995,10 +4980,6 @@ async function startHermes() {
      }
    })

-    await advanceBootProgress('backend.port', 'Waiting for Hermes backend to launch', 86)
-    // Discover the ephemeral port the child bound to
-    const port = await Promise.race([waitForDashboardPort(hermesProcess), backendStartFailed])
-
    const baseUrl = `http://127.0.0.1:${port}`
    await advanceBootProgress('backend.wait', 'Waiting for Hermes backend to become ready', 90)
    await Promise.race([waitForHermes(baseUrl, token), backendStartFailed])
@@ -5038,6 +5019,7 @@ async function startHermes() {
      { allowDecrease: true }
    )
    connectionPromise = null
+    portPool.release(reservedPort)
    throw error
  })

@@ -5084,75 +5066,65 @@ function focusWindow(win) {
  win.focus()
 }

-function spawnSecondaryWindow({ sessionId, watch, newSession } = {}) {
-  const icon = getAppIconPath()
-  const win = new BrowserWindow({
-    width: SESSION_WINDOW_MIN_WIDTH,
-    height: SESSION_WINDOW_MIN_HEIGHT,
-    minWidth: SESSION_WINDOW_MIN_WIDTH,
-    minHeight: SESSION_WINDOW_MIN_HEIGHT,
-    title: 'Hermes',
-    titleBarStyle: 'hidden',
-    titleBarOverlay: getTitleBarOverlayOptions(),
-    trafficLightPosition: IS_MAC ? WINDOW_BUTTON_POSITION : undefined,
-    vibrancy: IS_MAC ? 'sidebar' : undefined,
-    opacity: windowOpacity(),
-    icon,
-    // Don't show until the renderer's first themed paint is ready. macOS
-    // `vibrancy` ignores `backgroundColor` and paints a translucent OS
-    // material (which follows the OS appearance, not the app theme), so a
-    // dark-themed app on a light-mode Mac flashes white until the renderer
-    // covers it. ready-to-show fires after the boot-time paint in
-    // themes/context.tsx, so the window appears already themed.
-    show: false,
-    backgroundColor: getWindowBackgroundColor(),
-    webPreferences: {
-      preload: path.join(__dirname, 'preload.cjs'),
-      contextIsolation: true,
-      webviewTag: true,
-      sandbox: true,
-      nodeIntegration: false,
-      devTools: true
-    }
-  })
-
-  if (IS_MAC) {
-    win.setWindowButtonPosition?.(WINDOW_BUTTON_POSITION)
-  }
-
-  win.once('ready-to-show', () => {
-    if (!win.isDestroyed()) win.show()
-  })
-
-  win.on('will-enter-full-screen', () => sendWindowStateChanged(true))
-  win.on('enter-full-screen', () => sendWindowStateChanged(true))
-  win.on('will-leave-full-screen', () => sendWindowStateChanged(false))
-  win.on('leave-full-screen', () => sendWindowStateChanged(false))
-
-  wireCommonWindowHandlers(win)
-
-  win.loadURL(
-    buildSessionWindowUrl(sessionId, {
-      devServer: DEV_SERVER,
-      rendererIndexPath: DEV_SERVER ? undefined : resolveRendererIndex(),
-      watch,
-      newSession
-    })
-  )
-
-  return win
-}
-
 // Open (or focus) a standalone window for a single chat session.
 function createSessionWindow(sessionId, { watch = false } = {}) {
-  return sessionWindows.openOrFocus(sessionId, () => spawnSecondaryWindow({ sessionId, watch }))
-}
+  return sessionWindows.openOrFocus(sessionId, () => {
+    const icon = getAppIconPath()
+    const win = new BrowserWindow({
+      width: SESSION_WINDOW_MIN_WIDTH,
+      height: SESSION_WINDOW_MIN_HEIGHT,
+      minWidth: SESSION_WINDOW_MIN_WIDTH,
+      minHeight: SESSION_WINDOW_MIN_HEIGHT,
+      title: 'Hermes',
+      titleBarStyle: 'hidden',
+      titleBarOverlay: getTitleBarOverlayOptions(),
+      trafficLightPosition: IS_MAC ? WINDOW_BUTTON_POSITION : undefined,
+      vibrancy: IS_MAC ? 'sidebar' : undefined,
+      opacity: windowOpacity(),
+      icon,
+      // Don't show until the renderer's first themed paint is ready. macOS
+      // `vibrancy` ignores `backgroundColor` and paints a translucent OS
+      // material (which follows the OS appearance, not the app theme), so a
+      // dark-themed app on a light-mode Mac flashes white until the renderer
+      // covers it. ready-to-show fires after the boot-time paint in
+      // themes/context.tsx, so the window appears already themed.
+      show: false,
+      backgroundColor: getWindowBackgroundColor(),
+      webPreferences: {
+        preload: path.join(__dirname, 'preload.cjs'),
+        contextIsolation: true,
+        webviewTag: true,
+        sandbox: true,
+        nodeIntegration: false,
+        devTools: true
+      }
+    })

-// Open a fresh compact window on the new-session draft (#/). Not registry-keyed:
-// like ⌘N in a browser, every press opens a new window — and a draft window that
-// later converts to a real session must not get refocused as if it were blank.
-function createNewSessionWindow() {
-  return spawnSecondaryWindow({ newSession: true })
+    if (IS_MAC) {
+      win.setWindowButtonPosition?.(WINDOW_BUTTON_POSITION)
+    }
+
+    win.once('ready-to-show', () => {
+      if (!win.isDestroyed()) win.show()
+    })
+
+    win.on('will-enter-full-screen', () => sendWindowStateChanged(true))
+    win.on('enter-full-screen', () => sendWindowStateChanged(true))
+    win.on('will-leave-full-screen', () => sendWindowStateChanged(false))
+    win.on('leave-full-screen', () => sendWindowStateChanged(false))
+
+    wireCommonWindowHandlers(win)
+
+    win.loadURL(
+      buildSessionWindowUrl(sessionId, {
+        devServer: DEV_SERVER,
+        rendererIndexPath: DEV_SERVER ? undefined : resolveRendererIndex(),
+        watch
+      })
+    )
+
+    return win
+  })
 }

 function createWindow() {
@@ -5339,11 +5311,6 @@ ipcMain.handle('hermes:window:openSession', async (_event, sessionId, opts) => {

  return { ok: true }
 })
-ipcMain.handle('hermes:window:openNewSession', async () => {
-  createNewSessionWindow()
-
-  return { ok: true }
-})
 ipcMain.handle('hermes:bootstrap:reset', async () => {
  // Renderer's "Reload and retry" path. Clear the latched failure and
  // reset connection state so the next startHermes() call restarts the
@@ -5613,14 +5580,9 @@ ipcMain.handle('hermes:api', async (_event, request) => {

  await prepareProfileDeleteRequest(request)

-  const profile = request?.profile
-  const connection = await ensureBackend(profile)
+  const connection = await ensureBackend(request?.profile)
  const timeoutMs = resolveTimeoutMs(request?.timeoutMs, DEFAULT_FETCH_TIMEOUT_MS)
-  const requestPath = pathWithGlobalRemoteProfile(request.path, profile, {
-    globalRemote: globalRemoteActive(),
-    profileRemoteOverride: profileHasRemoteOverride(profile)
-  })
-  const url = `${connection.baseUrl}${requestPath}`
+  const url = `${connection.baseUrl}${request.path}`
  // OAuth gateways authenticate REST via the HttpOnly session cookie held in
  // the OAuth partition — route through Electron's net stack bound to that
  // session so the cookie attaches automatically. Token/local modes keep using
@@ -5641,30 +5603,11 @@ ipcMain.handle('hermes:api', async (_event, request) => {

 ipcMain.handle('hermes:notify', (_event, payload) => {
  if (!Notification.isSupported()) return false
-  // Action buttons render only on signed macOS builds; elsewhere they're dropped
-  // and the body click still works.
-  const actions = Array.isArray(payload?.actions) ? payload.actions : []
-  const notification = new Notification({
+  new Notification({
    title: payload?.title || 'Hermes',
    body: payload?.body || '',
-    silent: Boolean(payload?.silent),
-    actions: actions.map(action => ({ type: 'button', text: String(action?.text || '') }))
-  })
-  notification.on('click', () => {
-    if (!mainWindow || mainWindow.isDestroyed()) return
-    focusWindow(mainWindow)
-    if (payload?.sessionId) {
-      mainWindow.webContents.send('hermes:focus-session', payload.sessionId)
-    }
-  })
-  notification.on('action', (_actionEvent, index) => {
-    if (!mainWindow || mainWindow.isDestroyed()) return
-    const action = actions[index]
-    if (action?.id) {
-      mainWindow.webContents.send('hermes:notification-action', { sessionId: payload?.sessionId, actionId: action.id })
-    }
-  })
-  notification.show()
+    silent: Boolean(payload?.silent)
+  }).show()
  return true
 })

@@ -6052,8 +5995,6 @@ ipcMain.handle('hermes:fs:readDir', async (_event, dirPath) => readDirForIpc(dir

 ipcMain.handle('hermes:fs:gitRoot', async (_event, startPath) => gitRootForIpc(startPath))

-ipcMain.handle('hermes:fs:worktrees', async (_event, cwds) => worktreesForIpc(cwds))
-
 ipcMain.handle('hermes:terminal:start', async (event, payload = {}) => {
  if (!nodePty) {
    throw new Error('PTY support is unavailable. Reinstall desktop dependencies and restart Hermes.')
--- a/apps/desktop/electron/port-pool.cjs
+++ b/apps/desktop/electron/port-pool.cjs
@@ -0,0 +1,73 @@
+'use strict'
+
+/**
+ * In-process port reservation pool for the desktop backend launcher.
+ *
+ * pickPort() probes a localhost port with a throwaway server and closes it
+ * before the real bind happens in a separate Python child. Between that probe
+ * and the child's bind there is a TOCTOU window: a second concurrent spawn
+ * (the primary backend racing a pool backend) can be handed the SAME port, and
+ * one then dies with EADDRINUSE ("address already in use" -> "Object has been
+ * destroyed" boot loop). Reserving the chosen port in THIS process until the
+ * child exits closes that window.
+ *
+ * The OS bind remains the source of truth; this only deconflicts racers inside
+ * this process — it can't stop a foreign squatter, which the probe + the
+ * EADDRINUSE self-heal still cover.
+ *
+ * The pool is dependency-injected (the availability probe is passed in) and
+ * free of Electron/Node socket I/O, so it is unit-tested without real sockets
+ * (see port-pool.test.cjs).
+ */
+class PortPool {
+  /**
+   * @param {number} floor   inclusive lowest port to hand out
+   * @param {number} ceiling inclusive highest port to hand out
+   */
+  constructor(floor, ceiling) {
+    this.floor = floor
+    this.ceiling = ceiling
+    this._reserved = new Set()
+  }
+
+  /** @returns {boolean} whether `port` is currently reserved in-process. */
+  has(port) {
+    return this._reserved.has(port)
+  }
+
+  /** Release a previously reserved port. No-op if it was not reserved. */
+  release(port) {
+    this._reserved.delete(port)
+  }
+
+  /** Drop all reservations. */
+  clear() {
+    this._reserved.clear()
+  }
+
+  /** @returns {number} count of currently reserved ports. */
+  get size() {
+    return this._reserved.size
+  }
+
+  /**
+   * Reserve and return the lowest port in [floor, ceiling] that is neither
+   * already reserved in-process nor rejected by `isAvailable(port)`, or null
+   * if every port is taken. `isAvailable` may be sync (boolean) or async
+   * (Promise<boolean>); it is awaited either way.
+   *
+   * @param {(port: number) => boolean | Promise<boolean>} isAvailable
+   * @returns {Promise<number|null>}
+   */
+  async reserve(isAvailable) {
+    for (let port = this.floor; port <= this.ceiling; port += 1) {
+      if (this._reserved.has(port)) continue
+      if (!(await isAvailable(port))) continue
+      this._reserved.add(port)
+      return port
+    }
+    return null
+  }
+}
+
+module.exports = { PortPool }
--- a/apps/desktop/electron/port-pool.test.cjs
+++ b/apps/desktop/electron/port-pool.test.cjs
@@ -0,0 +1,77 @@
+/**
+ * Tests for electron/port-pool.cjs.
+ *
+ * Run with: node --test electron/port-pool.test.cjs
+ *
+ * PortPool is the in-process reservation that closes the pickPort() TOCTOU
+ * window. These cover selection order, skipping reserved/unavailable ports,
+ * release/reuse, exhaustion, and async probes — without real sockets.
+ */
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+
+const { PortPool } = require('./port-pool.cjs')
+
+const allFree = () => true
+
+test('reserve returns the lowest free port and reserves it', async () => {
+  const pool = new PortPool(9120, 9199)
+  const port = await pool.reserve(allFree)
+  assert.equal(port, 9120)
+  assert.ok(pool.has(9120))
+  assert.equal(pool.size, 1)
+})
+
+test('reserve skips ports already reserved in-process', async () => {
+  const pool = new PortPool(9120, 9199)
+  const first = await pool.reserve(allFree)
+  const second = await pool.reserve(allFree)
+  assert.equal(first, 9120)
+  assert.equal(second, 9121)
+})
+
+test('reserve skips ports the probe rejects', async () => {
+  const pool = new PortPool(9120, 9199)
+  const busy = new Set([9120, 9121])
+  const port = await pool.reserve(p => !busy.has(p))
+  assert.equal(port, 9122)
+})
+
+test('reserve returns null when every port is taken', async () => {
+  const pool = new PortPool(9120, 9121)
+  await pool.reserve(allFree)
+  await pool.reserve(allFree)
+  assert.equal(await pool.reserve(allFree), null)
+})
+
+test('release frees a reserved port for reuse', async () => {
+  const pool = new PortPool(9120, 9120)
+  assert.equal(await pool.reserve(allFree), 9120)
+  assert.equal(await pool.reserve(allFree), null) // exhausted
+  pool.release(9120)
+  assert.ok(!pool.has(9120))
+  assert.equal(await pool.reserve(allFree), 9120) // reusable
+})
+
+test('release is a no-op for an unreserved port', () => {
+  const pool = new PortPool(9120, 9199)
+  pool.release(9120)
+  assert.equal(pool.size, 0)
+})
+
+test('reserve awaits an async probe', async () => {
+  const pool = new PortPool(9120, 9199)
+  const busy = new Set([9120])
+  const port = await pool.reserve(p => Promise.resolve(!busy.has(p)))
+  assert.equal(port, 9121)
+})
+
+test('clear drops all reservations', async () => {
+  const pool = new PortPool(9120, 9199)
+  await pool.reserve(allFree)
+  await pool.reserve(allFree)
+  assert.equal(pool.size, 2)
+  pool.clear()
+  assert.equal(pool.size, 0)
+})
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -6,7 +6,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
  getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
  openSessionWindow: (sessionId, opts) => ipcRenderer.invoke('hermes:window:openSession', sessionId, opts),
-  openNewSessionWindow: () => ipcRenderer.invoke('hermes:window:openNewSession'),
  getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
  getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
  saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
@@ -55,7 +54,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  getRecentLogs: () => ipcRenderer.invoke('hermes:logs:recent'),
  readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
  gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
-  worktrees: cwds => ipcRenderer.invoke('hermes:fs:worktrees', cwds),
  terminal: {
    dispose: id => ipcRenderer.invoke('hermes:terminal:dispose', id),
    resize: (id, size) => ipcRenderer.invoke('hermes:terminal:resize', id, size),
@@ -95,16 +93,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    ipcRenderer.on('hermes:window-state-changed', listener)
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
-  onFocusSession: callback => {
-    const listener = (_event, sessionId) => callback(sessionId)
-    ipcRenderer.on('hermes:focus-session', listener)
-    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
-  },
-  onNotificationAction: callback => {
-    const listener = (_event, payload) => callback(payload)
-    ipcRenderer.on('hermes:notification-action', listener)
-    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
-  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -15,13 +15,12 @@ const SESSION_WINDOW_MIN_HEIGHT = 620
 // flag MUST sit in the query string BEFORE the '#': anything after the '#' is
 // treated as the route by HashRouter and would break routeSessionId(). The
 // renderer reads the flag from window.location.search to suppress the install /
-// onboarding overlays and the global session sidebar. `new=1` marks the compact
-// scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
-// session): the renderer resumes it lazily so the gateway never builds an agent
-// just to stream into it.
-function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
-  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
-  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`
+// onboarding overlays and the global session sidebar. `watch=1` marks a
+// spectator window (e.g. a running subagent's session): the renderer resumes
+// it lazily so the gateway never builds an agent just to stream into it.
+function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch } = {}) {
+  const query = `?win=secondary${watch ? '&watch=1' : ''}`
+  const route = `#/${encodeURIComponent(sessionId)}`

  if (devServer) {
    const base = devServer.endsWith('/') ? devServer.slice(0, -1) : devServer
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -82,12 +82,6 @@ test('buildSessionWindowUrl adds the watch flag for spectator windows, before th
  assert.equal(url, 'http://localhost:5173/?win=secondary&watch=1#/abc')
 })

-test('buildSessionWindowUrl routes new-session windows to the draft (#/)', () => {
-  const url = buildSessionWindowUrl(null, { devServer: 'http://localhost:5173', newSession: true })
-
-  assert.equal(url, 'http://localhost:5173/?win=secondary&new=1#/')
-})
-
 test('registry opens one window per session and focuses on re-open', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -42,9 +42,6 @@ test('intentional or interactive desktop child processes stay documented', () =>
  const source = readElectronFile('main.cjs')

  assert.match(source, /windowsHide: false/)
-  assert.match(source, /handOffWindowsBootstrapRecovery/)
-  assert.match(source, /'--repair', '--branch'/)
-  assert.match(source, /'--update', '--branch'/)
  assert.match(source, /nodePty\.spawn\(command, args/)
  assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
 })
--- a/apps/desktop/electron/windows-user-env.cjs
+++ b/apps/desktop/electron/windows-user-env.cjs
@@ -1,76 +0,0 @@
-// windows-user-env.cjs
-//
-// Read a User-scoped environment variable straight from the Windows registry
-// (HKCU\Environment).
-//
-// A GUI app launched from Explorer inherits the environment block captured at
-// login, so a variable set via `setx` AFTER login is invisible in process.env
-// even though a fresh shell — and the Hermes CLI — sees it immediately. The
-// desktop's HERMES_HOME resolution relies on process.env, so that stale-snapshot
-// gap silently sends the backend to the default %LOCALAPPDATA%\hermes. Reading
-// the live registry value closes the gap. See #45471.
-
-const { execFileSync } = require('node:child_process')
-
-// Parse the output of `reg query HKCU\Environment /v <name>`, which looks like:
-//
-//   HKEY_CURRENT_USER\Environment
-//       HERMES_HOME    REG_SZ    F:\Hermes\data
-//
-// Returns the raw value string (spaces inside the value preserved), or null when
-// the requested value line isn't present.
-function parseRegQueryValue(stdout, name) {
-  if (!stdout || !name) return null
-  const typePattern =
-    /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
-  for (const rawLine of String(stdout).split(/\r?\n/)) {
-    const line = rawLine.trim()
-    const match = line.match(typePattern)
-    if (match && match[1].toLowerCase() === name.toLowerCase()) {
-      return match[2]
-    }
-  }
-  return null
-}
-
-// Expand %VAR% references against an env map. REG_EXPAND_SZ values store
-// unexpanded references; plain REG_SZ paths have none, so this is a no-op for
-// the common F:\... case. Unknown references are left verbatim.
-function expandWindowsEnvRefs(value, env = process.env) {
-  if (!value) return value
-  return value.replace(/%([^%]+)%/g, (whole, name) => {
-    const key = Object.keys(env).find(k => k.toUpperCase() === String(name).toUpperCase())
-    return key != null && env[key] != null ? env[key] : whole
-  })
-}
-
-// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
-// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
-// (the value doesn't exist), or when the value is empty.
-function readWindowsUserEnvVar(
-  name,
-  { platform = process.platform, env = process.env, exec = execFileSync } = {}
-) {
-  if (platform !== 'win32' || !name) return null
-  let stdout
-  try {
-    stdout = exec('reg', ['query', 'HKCU\\Environment', '/v', name], {
-      encoding: 'utf8',
-      windowsHide: true,
-      timeout: 5000
-    })
-  } catch {
-    // `reg` missing, or value absent (reg exits 1) — caller falls back.
-    return null
-  }
-  const raw = parseRegQueryValue(stdout, name)
-  if (raw == null) return null
-  const expanded = expandWindowsEnvRefs(raw, env).trim()
-  return expanded || null
-}
-
-module.exports = {
-  expandWindowsEnvRefs,
-  parseRegQueryValue,
-  readWindowsUserEnvVar
-}
--- a/apps/desktop/electron/windows-user-env.test.cjs
+++ b/apps/desktop/electron/windows-user-env.test.cjs
@@ -1,90 +0,0 @@
-const assert = require('node:assert/strict')
-const { test } = require('node:test')
-
-const {
-  expandWindowsEnvRefs,
-  parseRegQueryValue,
-  readWindowsUserEnvVar
-} = require('./windows-user-env.cjs')
-
-// ── parseRegQueryValue ─────────────────────────────────────────────────────
-
-test('parseRegQueryValue extracts a REG_SZ value', () => {
-  const out = [
-    '',
-    'HKEY_CURRENT_USER\\Environment',
-    '    HERMES_HOME    REG_SZ    F:\\Hermes\\data',
-    ''
-  ].join('\r\n')
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
-})
-
-test('parseRegQueryValue matches the name case-insensitively', () => {
-  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Hermes_Home    REG_EXPAND_SZ    %USERPROFILE%\\h\r\n'
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), '%USERPROFILE%\\h')
-})
-
-test('parseRegQueryValue preserves spaces inside the value', () => {
-  const out = '    HERMES_HOME    REG_SZ    C:\\Program Files\\Hermes\r\n'
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'C:\\Program Files\\Hermes')
-})
-
-test('parseRegQueryValue returns null when the value line is absent', () => {
-  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Path    REG_SZ    C:\\x\r\n'
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), null)
-  assert.equal(parseRegQueryValue('', 'HERMES_HOME'), null)
-  assert.equal(parseRegQueryValue('garbage', 'HERMES_HOME'), null)
-})
-
-// ── expandWindowsEnvRefs ───────────────────────────────────────────────────
-
-test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
-  assert.equal(
-    expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
-    'C:\\Users\\jeff\\h'
-  )
-})
-
-test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {
-  assert.equal(expandWindowsEnvRefs('F:\\Hermes\\data', {}), 'F:\\Hermes\\data')
-  assert.equal(expandWindowsEnvRefs('%NOPE%\\x', {}), '%NOPE%\\x')
-})
-
-// ── readWindowsUserEnvVar ──────────────────────────────────────────────────
-
-test('readWindowsUserEnvVar returns null off Windows without spawning', () => {
-  let spawned = false
-  const exec = () => {
-    spawned = true
-    return ''
-  }
-  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'linux', exec }), null)
-  assert.equal(spawned, false)
-})
-
-test('readWindowsUserEnvVar queries HKCU\\Environment and expands the value', () => {
-  const calls = []
-  const exec = (cmd, args) => {
-    calls.push([cmd, args])
-    return 'HKEY_CURRENT_USER\\Environment\r\n    HERMES_HOME    REG_EXPAND_SZ    %DRIVE%\\Hermes\r\n'
-  }
-  const value = readWindowsUserEnvVar('HERMES_HOME', {
-    platform: 'win32',
-    env: { DRIVE: 'F:' },
-    exec
-  })
-  assert.equal(value, 'F:\\Hermes')
-  assert.deepEqual(calls, [['reg', ['query', 'HKCU\\Environment', '/v', 'HERMES_HOME']]])
-})
-
-test('readWindowsUserEnvVar returns null when reg exits non-zero (value missing)', () => {
-  const exec = () => {
-    throw new Error('reg exited 1')
-  }
-  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
-})
-
-test('readWindowsUserEnvVar returns null for an empty value', () => {
-  const exec = () => '    HERMES_HOME    REG_SZ    \r\n'
-  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
-})
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -8,7 +8,7 @@
  "type": "module",
  "main": "electron/main.cjs",
  "engines": {
-    "node": "^20.19.0 || >=22.12.0"
+    "node": ">=26.0.0"
  },
  "scripts": {
    "dev": "concurrently -k \"npm:dev:renderer\" \"npm:dev:electron\"",
@@ -20,7 +20,6 @@
    "start": "npm run build && electron .",
    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
    "postbuild": "node scripts/assert-dist-built.cjs",
-    "prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
    "builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 electron-builder",
    "pack": "npm run build && npm run builder -- --dir",
    "dist": "npm run build && npm run builder",
@@ -37,7 +36,7 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/windows-user-env.test.cjs",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/port-pool.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
@@ -91,7 +90,6 @@
    "react-router-dom": "^7.17.0",
    "react-shiki": "^0.9.3",
    "remark-math": "^6.0.0",
-    "remend": "^1.3.0",
    "shiki": "^4.0.2",
    "streamdown": "^2.5.0",
    "tailwind-merge": "^3.5.0",
@@ -100,7 +98,6 @@
    "unicode-animations": "^1.0.3",
    "unified": "^11.0.5",
    "unist-util-visit-parents": "^6.0.2",
-    "use-stick-to-bottom": "^1.1.6",
    "vfile": "^6.0.3",
    "web-haptics": "^0.0.6"
  },
@@ -135,7 +132,6 @@
  },
  "build": {
    "electronVersion": "40.9.3",
-    "electronDist": "../../node_modules/electron/dist",
    "appId": "com.nousresearch.hermes",
    "productName": "Hermes",
    "executableName": "Hermes",
--- a/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
+++ b/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
@@ -1,59 +0,0 @@
-const fs = require('node:fs')
-const path = require('node:path')
-
-if (process.platform !== 'darwin') {
-  process.exit(0)
-}
-
-const desktopRoot = path.resolve(__dirname, '..')
-const repoRoot = path.resolve(desktopRoot, '..', '..')
-const electronMacPath = path.join(repoRoot, 'node_modules', 'app-builder-lib', 'out', 'electron', 'electronMac.js')
-
-const marker = 'hermes-macos-electron-binary-fallback'
-const needle = `    await Promise.all([
-        doRename(path.join(contentsPath, "MacOS"), electronBranding.productName, appPlist.CFBundleExecutable),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
-    ]);`
-const replacement = `    // ${marker}: electron-builder 26.8.x can sometimes copy
-    // Electron.app without its main MacOS/Electron binary before this rename.
-    // Restore it from the installed Electron runtime so local desktop installs
-    // do not fail with ENOENT during macOS arm64 packaging.
-    const macosDir = path.join(contentsPath, "MacOS");
-    const bundledElectronBinary = path.join(macosDir, electronBranding.productName);
-    if (!fs.existsSync(bundledElectronBinary)) {
-        const candidates = [
-            path.join(packager.info.framework.distMacOsAppName, "Contents", "MacOS", electronBranding.productName),
-            path.join(process.cwd(), "..", "..", "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
-        ];
-        const sourceBinary = candidates.find(candidate => fs.existsSync(candidate));
-        if (sourceBinary == null) {
-            throw new Error("Electron binary missing from packaged app and Electron runtime: " + bundledElectronBinary);
-        }
-        await (0, promises_1.copyFile)(sourceBinary, bundledElectronBinary);
-        await (0, promises_1.chmod)(bundledElectronBinary, 0o755);
-    }
-    await Promise.all([
-        doRename(macosDir, electronBranding.productName, appPlist.CFBundleExecutable),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
-    ]);`
-
-if (!fs.existsSync(electronMacPath)) {
-  console.warn(`[patch-electron-builder] skipped: ${electronMacPath} not found`)
-  process.exit(0)
-}
-
-const source = fs.readFileSync(electronMacPath, 'utf8')
-if (source.includes(marker)) {
-  console.log('[patch-electron-builder] macOS Electron binary fallback already applied')
-  process.exit(0)
-}
-
-if (!source.includes(needle)) {
-  console.warn('[patch-electron-builder] skipped: expected electronMac.js shape not found')
-  process.exit(0)
-}
-
-fs.writeFileSync(electronMacPath, source.replace(needle, replacement))
-console.log('[patch-electron-builder] applied macOS Electron binary fallback')
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -23,7 +23,6 @@ import { type Translations, useI18n } from '@/i18n'
 import { sessionTitle } from '@/lib/chat-runtime'
 import { ExternalLink, ExternalLinkIcon, hostPathLabel, urlSlugTitleLabel, useLinkTitle } from '@/lib/external-link'
 import { FileImage, FileText, FolderOpen, Link2 } from '@/lib/icons'
-import { mediaExternalUrl } from '@/lib/media'
 import { cn } from '@/lib/utils'
 import { notifyError } from '@/store/notifications'
 import type { SessionInfo, SessionMessage } from '@/types/hermes'
@@ -125,12 +124,17 @@ function artifactKind(value: string): ArtifactKind {
 }

 function artifactHref(value: string): string {
-  if (value.startsWith('http://') || value.startsWith('https://') || value.startsWith('data:')) {
+  if (
+    value.startsWith('http://') ||
+    value.startsWith('https://') ||
+    value.startsWith('file://') ||
+    value.startsWith('data:')
+  ) {
    return value
  }

-  if (value.startsWith('file://') || value.startsWith('/')) {
-    return mediaExternalUrl(value)
+  if (value.startsWith('/')) {
+    return `file://${encodeURI(value)}`
  }

  return value
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -9,7 +9,6 @@ import { formatCombo } from '@/lib/keybinds/combo'
 import { cn } from '@/lib/utils'

 import type { ConversationStatus } from './hooks/use-voice-conversation'
-import { ModelPill } from './model-pill'
 import type { ChatBarState, VoiceStatus } from './types'

 export const ICON_BTN = 'size-(--composer-control-size) shrink-0 rounded-md'
@@ -67,7 +66,6 @@ export function ComposerControls({
  const c = t.composer
  const steerCombo = formatCombo('mod+enter')
  const steerLabel = `${c.steer} (${steerCombo})`
-
  const steerTip = (
    <span className="inline-flex items-center gap-1.5">
      {c.steer}
@@ -83,10 +81,8 @@ export function ComposerControls({

  return (
    <div className="ml-auto flex shrink-0 items-center gap-(--composer-control-gap)">
-      <ModelPill disabled={disabled} model={state.model} />
-      {/* While the agent runs and the user is typing, steer takes over the mic's
-          slot rather than crowding the row with an extra button. */}
-      {canSteer ? (
+      <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
+      {canSteer && (
        <Tip label={steerTip}>
          <Button
            aria-label={steerLabel}
@@ -100,8 +96,6 @@ export function ComposerControls({
            <SteeringWheel size={16} />
          </Button>
        </Tip>
-      ) : (
-        <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
      )}
      {showVoicePrimary ? (
        <Tip label={c.startVoice}>
--- a/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
+++ b/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
@@ -24,7 +24,6 @@ afterEach(cleanup)
 // state stays stale while the DOM already holds the text.
 function Harness({
  busy = false,
-  disabled = false,
  queued = [],
  onSubmit,
  onQueue,
@@ -32,7 +31,6 @@ function Harness({
  onDrain
 }: {
  busy?: boolean
-  disabled?: boolean
  queued?: readonly string[]
  onSubmit: (text: string) => void
  onQueue: (text: string) => void
@@ -54,10 +52,6 @@ function Harness({
  }

  const submitDraft = () => {
-    if (disabled) {
-      return
-    }
-
    const editor = editorRef.current
    if (editor) {
      const domText = composerPlainText(editor)
@@ -90,10 +84,6 @@ function Harness({
      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0

-      if (disabled) {
-        return
-      }
-
      if (!busy && !hasLivePayload && queued.length > 0) {
        onDrain()

@@ -196,23 +186,4 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
    expect(onDrain).toHaveBeenCalledTimes(1)
    expect(onSubmit).not.toHaveBeenCalled()
  })
-
-  it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
-    const onSubmit = vi.fn()
-    const onDrain = vi.fn()
-    const { getByTestId } = render(
-      <Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
-    )
-    const editor = getByTestId('editor')
-
-    await act(async () => {
-      editor.textContent = 'draft while reconnecting'
-      fireEvent.input(editor)
-      fireEvent.keyDown(editor, { key: 'Enter' })
-    })
-
-    expect(editor.textContent).toBe('draft while reconnecting')
-    expect(onDrain).not.toHaveBeenCalled()
-    expect(onSubmit).not.toHaveBeenCalled()
-  })
 })
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -43,16 +43,13 @@ import {
 import {
  $queuedPromptsBySession,
  enqueueQueuedPrompt,
-  MAX_AUTO_DRAIN_ATTEMPTS,
-  migrateQueuedPrompts,
  promoteQueuedPrompt,
  type QueuedPromptEntry,
  removeQueuedPrompt,
-  shouldAutoDrain,
+  shouldAutoDrainOnSettle,
  updateQueuedPrompt
 } from '@/store/composer-queue'
 import { $statusItemsBySession } from '@/store/composer-status'
-import { notify } from '@/store/notifications'
 import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
 import { useTheme } from '@/themes'
@@ -85,8 +82,6 @@ import {
 import { QueuePanel } from './queue-panel'
 import {
  composerPlainText,
-  deleteSelectionInEditor,
-  insertPlainTextAtCaret,
  normalizeComposerEditorDom,
  placeCaretEnd,
  refChipElement,
@@ -137,12 +132,6 @@ function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind {
  return 'command'
 }

-/** A `/` query is at its arg stage once it's past the command name. */
-const slashArgStage = (query: string) => query.includes(' ')
-
-/** The `/command` token of a slash query (`personality x` → `/personality`). */
-const slashCommandToken = (query: string) => `/${query.split(/\s+/, 1)[0]?.toLowerCase() ?? ''}`
-
 interface QueueEditState {
  attachments: ComposerAttachment[]
  draft: string
@@ -185,6 +174,7 @@ export function ChatBar({
  const queuedPromptsBySession = useStore($queuedPromptsBySession)
  const statusItemsBySession = useStore($statusItemsBySession)
  const scrolledUp = useStore($threadScrolledUp)
+  const sessionMessages = useStore($messages)
  const activeQueueSessionKey = queueSessionKey || sessionId || null

  const queuedPrompts = useMemo(
@@ -207,14 +197,11 @@ export function ChatBar({
  const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
  const editorRef = useRef<HTMLDivElement | null>(null)
  const draftRef = useRef(draft)
+  const previousBusyRef = useRef(busy)
  const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null)
  const activeQueueSessionKeyRef = useRef(activeQueueSessionKey)
  activeQueueSessionKeyRef.current = activeQueueSessionKey
-  const prevQueueKeyRef = useRef(activeQueueSessionKey)
  const drainingQueueRef = useRef(false)
-  // Per-entry auto-drain failure counts; bounds retries so a persistent 404
-  // can't spin-loop. Cleared on success; reset naturally on remount/reconnect.
-  const drainFailuresRef = useRef(new Map<string, number>())
  const urlInputRef = useRef<HTMLInputElement | null>(null)

  const [urlOpen, setUrlOpen] = useState(false)
@@ -255,8 +242,6 @@ export function ChatBar({
  const gatewayState = useStore($gatewayState)
  const newSessionPlaceholders = t.composer.newSessionPlaceholders
  const followUpPlaceholders = t.composer.followUpPlaceholders
-  const reconnecting = gatewayState === 'closed' || gatewayState === 'error'
-  const inputDisabled = disabled && !reconnecting

  // Resting placeholder: a starter for brand-new sessions, a continuation for
  // existing ones. Picked once and only re-rolled when we genuinely move to a
@@ -287,13 +272,11 @@ export function ChatBar({
    setRestingPlaceholder(pickPlaceholder(sessionId ? followUpPlaceholders : newSessionPlaceholders))
  }, [followUpPlaceholders, newSessionPlaceholders, sessionId])

-  // When the transport is disabled it's because the gateway isn't open.
-  // Distinguish a cold start ("Starting Hermes...") from a dropped connection
-  // we're trying to restore. During reconnect, keep the textbox editable so a
-  // flaky network doesn't block drafting; only submit/backend actions stay
-  // disabled until the gateway is open again.
+  // When the bar is disabled it's because the gateway isn't open. Distinguish a
+  // cold start ("Starting Hermes...") from a dropped connection we're trying to
+  // restore (e.g. after the Mac slept) so the stuck state reads as recoverable.
  const placeholder = disabled
-    ? reconnecting
+    ? gatewayState === 'closed' || gatewayState === 'error'
      ? t.composer.placeholderReconnecting
      : t.composer.placeholderStarting
    : restingPlaceholder
@@ -335,13 +318,13 @@ export function ChatBar({
  )

  useEffect(() => {
-    if (!inputDisabled) {
+    if (!disabled) {
      focusInput()
    }
-  }, [focusInput, focusKey, focusRequestId, inputDisabled])
+  }, [disabled, focusInput, focusKey, focusRequestId])

  useEffect(() => {
-    if (inputDisabled) {
+    if (disabled) {
      return undefined
    }

@@ -361,7 +344,7 @@ export function ChatBar({
      offFocus()
      offInsert()
    }
-  }, [appendExternalText, inputDisabled])
+  }, [appendExternalText, disabled])

  // Keep draftRef in sync with the assistant-ui composer state for callers
  // that read the latest text outside the React render cycle. We don't push
@@ -540,6 +523,48 @@ export function ChatBar({
    })
  }, [])

+  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
+    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
+
+    if (imageBlobs.length > 0) {
+      event.preventDefault()
+
+      if (onAttachImageBlob) {
+        triggerHaptic('selection')
+
+        for (const blob of imageBlobs) {
+          void onAttachImageBlob(blob)
+        }
+      }
+
+      return
+    }
+
+    // Trim surrounding whitespace so a copy that dragged along leading/trailing
+    // blank lines (common when selecting from terminals, code blocks, web pages)
+    // doesn't dump multiline padding into the composer. Internal newlines are
+    // preserved — only the edges are cleaned up.
+    const pastedText = event.clipboardData.getData('text').trim()
+
+    if (!pastedText) {
+      event.preventDefault()
+
+      return
+    }
+
+    if (DATA_IMAGE_URL_RE.test(pastedText)) {
+      event.preventDefault()
+
+      return
+    }
+
+    event.preventDefault()
+    document.execCommand('insertText', false, pastedText)
+    const nextDraft = composerPlainText(event.currentTarget)
+    draftRef.current = nextDraft
+    aui.composer().setText(nextDraft)
+  }
+
  const [trigger, setTrigger] = useState<TriggerState | null>(null)
  const [triggerActive, setTriggerActive] = useState(0)
  const [triggerItems, setTriggerItems] = useState<readonly Unstable_TriggerItem[]>([])
@@ -576,15 +601,7 @@ export function ChatBar({
    }

    const before = textBeforeCaret(editor)
-    const found = detectTrigger(before ?? composerPlainText(editor))
-
-    // The arg-stage popover is only useful for commands with an options screen.
-    // For a no-arg command it would dead-end on "No matches", so drop it — the
-    // directive is already complete.
-    const detected =
-      found?.kind === '/' && slashArgStage(found.query) && !desktopSlashCommandTakesArgs(slashCommandToken(found.query))
-        ? null
-        : found
+    const detected = detectTrigger(before ?? composerPlainText(editor))

    setTrigger(detected)

@@ -624,46 +641,6 @@ export function ChatBar({
    flushEditorToDraft(event.currentTarget)
  }

-  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
-    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
-
-    if (imageBlobs.length > 0) {
-      event.preventDefault()
-
-      if (onAttachImageBlob) {
-        triggerHaptic('selection')
-
-        for (const blob of imageBlobs) {
-          void onAttachImageBlob(blob)
-        }
-      }
-
-      return
-    }
-
-    // Trim surrounding whitespace so a copy that dragged along leading/trailing
-    // blank lines (common when selecting from terminals, code blocks, web pages)
-    // doesn't dump multiline padding into the composer. Internal newlines are
-    // preserved — only the edges are cleaned up.
-    const pastedText = event.clipboardData.getData('text').trim()
-
-    if (!pastedText) {
-      event.preventDefault()
-
-      return
-    }
-
-    if (DATA_IMAGE_URL_RE.test(pastedText)) {
-      event.preventDefault()
-
-      return
-    }
-
-    event.preventDefault()
-    insertPlainTextAtCaret(event.currentTarget, pastedText)
-    flushEditorToDraft(event.currentTarget)
-  }
-
  const triggerAdapter: Unstable_TriggerAdapter | null =
    trigger?.kind === '@' ? at.adapter : trigger?.kind === '/' ? slash.adapter : null

@@ -679,12 +656,6 @@ export function ChatBar({

  const triggerLoading = trigger?.kind === '@' ? at.loading : trigger?.kind === '/' ? slash.loading : false

-  // Suppress the "No matches" empty state once a slash command is past its name:
-  // a no-arg command has nothing to offer, and a fully-typed arg commits on
-  // Space/Tab — neither should dead-end on a popover.
-  const argStageEmpty =
-    trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length
-
  const closeTrigger = () => {
    setTrigger(null)
    setTriggerItems([])
@@ -695,25 +666,6 @@ export function ChatBar({
    setTriggerActive(idx => Math.min(idx, Math.max(0, triggerItems.length - 1)))
  }, [triggerItems.length])

-  // Commit the literally-typed `/command arg` as a directive chip — used when
-  // the completion list is empty because the arg is already fully typed (the
-  // backend completer drops exact matches). Reuses the chip path via a
-  // synthetic item whose serialized form is the verbatim text.
-  const commitTypedSlashDirective = () => {
-    if (trigger?.kind !== '/') {
-      return
-    }
-
-    const text = `/${trigger.query.trimEnd()}`
-
-    replaceTriggerWithChip({
-      id: text,
-      type: 'slash',
-      label: text.slice(1),
-      metadata: { command: slashCommandToken(trigger.query), display: text, meta: '', group: '', action: '', rawText: text }
-    })
-  }
-
  const replaceTriggerWithChip = (item: Unstable_TriggerItem) => {
    const editor = editorRef.current

@@ -832,18 +784,6 @@ export function ChatBar({
      return
    }

-    // Non-collapsed Backspace/Delete: native selection-delete is ~O(n²) on large
-    // drafts (Ctrl+A → Delete froze ~1.3s). Collapsed carets fall through.
-    if (
-      (event.key === 'Backspace' || event.key === 'Delete') &&
-      deleteSelectionInEditor(event.currentTarget)
-    ) {
-      event.preventDefault()
-      flushEditorToDraft(event.currentTarget)
-
-      return
-    }
-
    // Cmd/Ctrl+Shift+K drains the next queued message. Plain Cmd/Ctrl+K is
    // reserved for the global command palette.
    if ((event.metaKey || event.ctrlKey) && !event.altKey && event.shiftKey && event.key.toLowerCase() === 'k') {
@@ -873,15 +813,7 @@ export function ChatBar({
        return
      }

-      // Enter / Tab / Space all accept the highlighted item: a no-arg command
-      // commits its directive chip, an arg-taking command expands to its
-      // options step, and an arg option commits the full `/cmd arg` chip. Space
-      // is slash-only (an `@` mention takes a literal space) and gated to a
-      // non-empty query so a bare `/ ` still types a space.
-      const acceptOnSpace = event.key === ' ' && trigger.kind === '/' && Boolean(trigger.query.trim())
-      const accept = event.key === 'Enter' || event.key === 'Tab' || acceptOnSpace
-
-      if (accept) {
+      if (event.key === 'Enter' || event.key === 'Tab') {
        event.preventDefault()
        triggerKeyConsumedRef.current = true
        const item = triggerItems[triggerActive]
@@ -902,24 +834,6 @@ export function ChatBar({
      }
    }

-    // Arg stage with nothing left to suggest — a fully-typed arg the backend
-    // completer no longer echoes (it drops the exact match), e.g.
-    // `/personality creative`. Space/Tab still commit what's typed as a single
-    // directive chip; Enter falls through to submit (send it as-is).
-    if (
-      trigger?.kind === '/' &&
-      !triggerItems.length &&
-      (event.key === ' ' || event.key === 'Tab') &&
-      slashArgStage(trigger.query) &&
-      trigger.query.trim()
-    ) {
-      event.preventDefault()
-      triggerKeyConsumedRef.current = true
-      commitTypedSlashDirective()
-
-      return
-    }
-
    // ArrowUp/ArrowDown navigate, in priority order: the queue (edit entries in
    // place) then sent-message history. The history ring is derived from live
    // session messages each press — single source of truth, no mirror.
@@ -952,9 +866,7 @@ export function ChatBar({
      event.preventDefault()
      triggerKeyConsumedRef.current = true

-      // $messages is read imperatively (not subscribed) so the composer
-      // doesn't re-render on every streaming delta flush.
-      const history = deriveUserHistory($messages.get(), chatMessageText)
+      const history = deriveUserHistory(sessionMessages, chatMessageText)
      const entry = browseBackward(sessionId, currentDraft, history)

      if (entry !== null) {
@@ -979,7 +891,7 @@ export function ChatBar({
        event.preventDefault()
        triggerKeyConsumedRef.current = true

-        const history = deriveUserHistory($messages.get(), chatMessageText)
+        const history = deriveUserHistory(sessionMessages, chatMessageText)
        const result = browseForward(sessionId, history)

        if (result !== null) {
@@ -1015,10 +927,6 @@ export function ChatBar({
      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0

-      if (disabled) {
-        return
-      }
-
      if (!busy && !hasLivePayload && queuedPrompts.length > 0) {
        void drainNextQueued()

@@ -1417,7 +1325,6 @@ export function ChatBar({
          return false
        }

-        drainFailuresRef.current.delete(entry.id)
        removeQueuedPrompt(activeQueueSessionKey, entry.id)
        resetBrowseState(sessionId)

@@ -1429,17 +1336,16 @@ export function ChatBar({
    [activeQueueSessionKey, onSubmit, queuedPrompts, sessionId]
  )

-  const pickDrainHead = useCallback(
-    (entries: QueuedPromptEntry[]) => {
-      const skip = queueEditRef.current?.entryId
+  const drainNextQueued = useCallback(
+    () =>
+      runDrain(entries => {
+        const skip = queueEdit?.entryId

-      return skip ? entries.find(e => e.id !== skip) : entries[0]
-    },
-    [] // reads the edit id off a ref so the lock-holder always sees the latest
+        return skip ? entries.find(e => e.id !== skip) : entries[0]
+      }),
+    [queueEdit, runDrain]
  )

-  const drainNextQueued = useCallback(() => runDrain(pickDrainHead), [pickDrainHead, runDrain])
-
  const sendQueuedNow = useCallback(
    (id: string) => {
      if (!activeQueueSessionKey || id === queueEdit?.entryId) {
@@ -1457,76 +1363,30 @@ export function ChatBar({
        return true
      }

-      // A manual send clears the auto-drain backoff so a stuck entry the user
-      // taps gets a fresh attempt (and re-enables auto-retry on success).
-      drainFailuresRef.current.delete(id)
-
      return runDrain(entries => entries.find(e => e.id === id))
    },
    [activeQueueSessionKey, busy, onCancel, queueEdit, runDrain]
  )

-  // Edge-independent auto-drain: send the head whenever the session is idle and
-  // the queue is non-empty, bounding retries so a thrown/rejected onSubmit (e.g.
-  // a stale-session 404) can't strand the entry permanently nor spin-loop. The
-  // drain lock serializes sends; a remount/reconnect resets the failure counts.
-  const autoDrainNext = useCallback(() => {
-    if (busy || drainingQueueRef.current || !activeQueueSessionKey) {
-      return
-    }
+  // Auto-drain on busy → false (turn settled). Queued turns always flow once
+  // the session is idle again — whether the turn finished naturally or the
+  // user interrupted it. Interrupting to reach a queued message is the whole
+  // point of the queue, so we never suppress the drain. To cancel queued
+  // turns, the user deletes them from the panel.
+  useEffect(() => {
+    const wasBusy = previousBusyRef.current
+    previousBusyRef.current = busy

-    const entry = pickDrainHead(queuedPrompts)
-
-    if (!entry || (drainFailuresRef.current.get(entry.id) ?? 0) >= MAX_AUTO_DRAIN_ATTEMPTS) {
-      return
-    }
-
-    const onFail = () => {
-      const fails = (drainFailuresRef.current.get(entry.id) ?? 0) + 1
-      drainFailuresRef.current.set(entry.id, fails)
-
-      if (fails >= MAX_AUTO_DRAIN_ATTEMPTS) {
-        notify({
-          id: 'composer-queue-stuck',
-          kind: 'error',
-          title: t.composer.queueStuckTitle,
-          message: t.composer.queueStuckBody
-        })
-      }
-    }
-
-    void runDrain(() => entry)
-      .then(sent => {
-        if (!sent) {
-          onFail()
-        }
+    if (
+      shouldAutoDrainOnSettle({
+        isBusy: busy,
+        queueLength: queuedPrompts.length,
+        wasBusy
      })
-      .catch(onFail)
-  }, [activeQueueSessionKey, busy, pickDrainHead, queuedPrompts, runDrain, t])
-
-  // Re-key on a runtime session-id change. A stable stored id (queueSessionKey)
-  // never churns, so a change there is a real session switch and must NOT
-  // migrate; only the runtime-derived key (queueSessionKey falsy → key is
-  // sessionId) churns on a backend bounce/resume of the same conversation.
-  useEffect(() => {
-    const prev = prevQueueKeyRef.current
-    prevQueueKeyRef.current = activeQueueSessionKey
-
-    if (queueSessionKey || !prev || !activeQueueSessionKey || prev === activeQueueSessionKey) {
-      return
+    ) {
+      void drainNextQueued()
    }
-
-    migrateQueuedPrompts(prev, activeQueueSessionKey)
-  }, [activeQueueSessionKey, queueSessionKey])
-
-  // Queued turns flow whenever the session is idle — on the busy→false settle
-  // edge, on mount/reconnect, and after a re-key — so a swallowed edge can't
-  // strand them. To cancel queued turns, the user deletes them from the panel.
-  useEffect(() => {
-    if (shouldAutoDrain({ isBusy: busy, queueLength: queuedPrompts.length })) {
-      autoDrainNext()
-    }
-  }, [autoDrainNext, busy, queuedPrompts.length])
+  }, [busy, drainNextQueued, queuedPrompts.length])

  // Queue-edit cleanup: on session swap the scope effect already stashed the
  // edit snapshot; only restore into the composer when still on the same scope.
@@ -1561,10 +1421,6 @@ export function ChatBar({
  }

  const submitDraft = () => {
-    if (disabled) {
-      return
-    }
-
    // Source the text from the DOM editor, not React state. The AUI composer
    // state (`draft`) and the derived `hasComposerPayload` lag the DOM by a
    // render, so on fast typing or IME composition the final keystroke(s) may
@@ -1745,7 +1601,6 @@ export function ChatBar({
  const input = (
    <div className={cn('relative', stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1')}>
      <div
-        aria-disabled={inputDisabled ? true : undefined}
        aria-label={t.composer.message}
        autoCapitalize="off"
        autoCorrect="off"
@@ -1756,7 +1611,7 @@ export function ChatBar({
          stacked && 'pl-3',
          stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1'
        )}
-        contentEditable={!inputDisabled}
+        contentEditable={!disabled}
        data-placeholder={placeholder}
        data-slot={RICH_INPUT_SLOT}
        onBlur={() => window.setTimeout(closeTrigger, 80)}
@@ -1842,7 +1697,7 @@ export function ChatBar({
          ref={composerRef}
        >
          {showHelpHint && <HelpHint />}
-          {trigger && !argStageEmpty && (
+          {trigger && (
            <ComposerTriggerPopover
              activeIndex={triggerActive}
              items={triggerItems}
@@ -1886,6 +1741,7 @@ export function ChatBar({
                'group/composer-surface relative z-4 isolate rounded-[inherit] border border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(18%*var(--composer-ring-strength)),var(--dt-input))] transition-[border-color] duration-200 ease-out focus-within:border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(45%*var(--composer-ring-strength)),transparent)]',
                COMPOSER_DROP_FADE_CLASS,
                'group-has-data-[state=open]/composer:border-t-transparent',
+                'group-data-[status-stack]/composer:border-t-transparent',
                dragActive && COMPOSER_DROP_ACTIVE_CLASS
              )}
              data-slot="composer-surface"
--- a/apps/desktop/src/app/chat/composer/model-pill.tsx
+++ b/apps/desktop/src/app/chat/composer/model-pill.tsx
@@ -1,86 +0,0 @@
-import { useStore } from '@nanostores/react'
-import { useState } from 'react'
-
-import { ModelMenuCloseContext } from '@/app/shell/model-menu-panel'
-import { Button } from '@/components/ui/button'
-import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
-import { GlyphSpinner } from '@/components/ui/glyph-spinner'
-import { useI18n } from '@/i18n'
-import { ChevronDown } from '@/lib/icons'
-import { formatModelStatusLabel } from '@/lib/model-status-label'
-import { cn } from '@/lib/utils'
-import {
-  $currentFastMode,
-  $currentModel,
-  $currentProvider,
-  $currentReasoningEffort,
-  setModelPickerOpen
-} from '@/store/session'
-
-import type { ChatBarState } from './types'
-
-const PILL = cn(
-  'h-(--composer-control-size) max-w-40 shrink-0 gap-1 rounded-md px-2 text-xs font-normal',
-  'text-(--ui-text-tertiary) hover:bg-(--chrome-action-hover) hover:text-foreground'
-)
-
-/**
- * Composer model selector — the relocated status-bar pill. Reuses the live
- * `model.options` dropdown (`modelMenuContent`) verbatim; falls back to the
- * full picker when the gateway is closed and no live menu exists.
- */
-export function ModelPill({ disabled, model }: { disabled: boolean; model: ChatBarState['model'] }) {
-  const copy = useI18n().t.shell.statusbar
-  const currentModel = useStore($currentModel)
-  const currentProvider = useStore($currentProvider)
-  const fastMode = useStore($currentFastMode)
-  const reasoningEffort = useStore($currentReasoningEffort)
-  const [open, setOpen] = useState(false)
-
-  // The model resolves a beat after the gateway/session comes up. Rather than
-  // flash a literal "No model", show a quiet loader (inherits the pill text
-  // color at half opacity) until a model lands.
-  const label = (
-    <>
-      {currentModel.trim() ? (
-        <span className="truncate">{formatModelStatusLabel(currentModel, { fastMode, reasoningEffort })}</span>
-      ) : (
-        <GlyphSpinner className="opacity-50" spinner="braille" />
-      )}
-      <ChevronDown className="size-2.5 shrink-0 opacity-50" />
-    </>
-  )
-
-  const title = currentProvider ? copy.modelTitle(currentProvider, currentModel || copy.modelNone) : copy.switchModel
-
-  if (!model.modelMenuContent) {
-    return (
-      <Button
-        aria-label={copy.openModelPicker}
-        className={PILL}
-        disabled={disabled}
-        onClick={() => setModelPickerOpen(true)}
-        title={copy.openModelPicker}
-        type="button"
-        variant="ghost"
-      >
-        {label}
-      </Button>
-    )
-  }
-
-  return (
-    <DropdownMenu onOpenChange={setOpen} open={open}>
-      <DropdownMenuTrigger asChild>
-        <Button aria-label={title} className={PILL} disabled={disabled} title={title} type="button" variant="ghost">
-          {label}
-        </Button>
-      </DropdownMenuTrigger>
-      <DropdownMenuContent align="end" className="w-64 p-0" side="top" sideOffset={8}>
-        <ModelMenuCloseContext.Provider value={() => setOpen(false)}>
-          {model.modelMenuContent}
-        </ModelMenuCloseContext.Provider>
-      </DropdownMenuContent>
-    </DropdownMenu>
-  )
-}
--- a/apps/desktop/src/app/chat/composer/queue-panel.tsx
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -1,9 +1,7 @@
 import { StatusRow } from '@/components/chat/status-row'
 import { StatusSection } from '@/components/chat/status-section'
 import { Button } from '@/components/ui/button'
-import { Tip } from '@/components/ui/tooltip'
 import { type Translations, useI18n } from '@/i18n'
-import { ArrowUp, Pencil, Trash2 } from '@/lib/icons'
 import { cn } from '@/lib/utils'
 import type { QueuedPromptEntry } from '@/store/composer-queue'

@@ -40,46 +38,32 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
              isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
            )}
            key={entry.id}
+            leading={
+              <span aria-hidden className="size-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent" />
+            }
            trailing={
              <>
-                <Tip label={c.queueEdit}>
-                  <Button
-                    aria-label={c.queueEdit}
-                    className="size-5 rounded-md"
-                    disabled={Boolean(editingId) && !isEditing}
-                    onClick={() => onEdit(entry)}
-                    size="icon-xs"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <Pencil size={11} />
-                  </Button>
-                </Tip>
-                <Tip label={busy ? c.queueSendNext : c.queueSend}>
-                  <Button
-                    aria-label={busy ? c.queueSendNext : c.queueSend}
-                    className="size-5 rounded-md"
-                    disabled={isEditing}
-                    onClick={() => onSendNow(entry.id)}
-                    size="icon-xs"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <ArrowUp size={11} />
-                  </Button>
-                </Tip>
-                <Tip label={c.queueDelete}>
-                  <Button
-                    aria-label={c.queueDelete}
-                    className="size-5 rounded-md"
-                    onClick={() => onDelete(entry.id)}
-                    size="icon-xs"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <Trash2 size={11} />
-                  </Button>
-                </Tip>
+                <Button
+                  disabled={Boolean(editingId) && !isEditing}
+                  onClick={() => onEdit(entry)}
+                  size="micro"
+                  type="button"
+                  variant="text"
+                >
+                  {c.queueEdit}
+                </Button>
+                <Button
+                  disabled={isEditing}
+                  onClick={() => onSendNow(entry.id)}
+                  size="micro"
+                  type="button"
+                  variant="secondary"
+                >
+                  {busy ? c.queueSendNext : c.queueSend}
+                </Button>
+                <Button onClick={() => onDelete(entry.id)} size="micro" type="button" variant="text">
+                  {c.queueDelete}
+                </Button>
              </>
            }
            trailingVisible={isEditing}
--- a/apps/desktop/src/app/chat/composer/rich-editor.test.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.test.ts
@@ -3,24 +3,12 @@ import { describe, expect, it } from 'vitest'
 import { insertInlineRefsIntoEditor } from './inline-refs'
 import {
  composerPlainText,
-  deleteSelectionInEditor,
-  insertPlainTextAtCaret,
  normalizeComposerEditorDom,
  refChipElement,
  renderComposerContents,
  RICH_INPUT_SLOT
 } from './rich-editor'

-const caretIn = (editor: HTMLElement) => {
-  const range = document.createRange()
-  const selection = window.getSelection()!
-
-  range.selectNodeContents(editor)
-  range.collapse(false)
-  selection.removeAllRanges()
-  selection.addRange(range)
-}
-
 describe('renderComposerContents', () => {
  it('renders refs and raw text without interpreting user text as HTML', () => {
    const editor = document.createElement('div')
@@ -71,64 +59,3 @@ describe('insertInlineRefsIntoEditor', () => {
    expect(composerPlainText(editor)).toBe('@file:`src/foo.ts` ')
  })
 })
-
-describe('insertPlainTextAtCaret', () => {
-  it('inserts multiline text as text nodes + br', () => {
-    const editor = document.createElement('div')
-    editor.dataset.slot = RICH_INPUT_SLOT
-    document.body.append(editor)
-    caretIn(editor)
-
-    insertPlainTextAtCaret(editor, 'one\ntwo\nthree')
-
-    expect(editor.querySelectorAll('br').length).toBe(2)
-    expect(composerPlainText(editor)).toBe('one\ntwo\nthree')
-
-    editor.remove()
-  })
-
-  it('replaces the selected span', () => {
-    const editor = document.createElement('div')
-    editor.dataset.slot = RICH_INPUT_SLOT
-    editor.textContent = 'abXYef'
-    document.body.append(editor)
-
-    const text = editor.firstChild!
-    const selection = window.getSelection()!
-    const range = document.createRange()
-
-    range.setStart(text, 2)
-    range.setEnd(text, 4)
-    selection.removeAllRanges()
-    selection.addRange(range)
-
-    insertPlainTextAtCaret(editor, 'cd')
-
-    expect(composerPlainText(editor)).toBe('abcdef')
-
-    editor.remove()
-  })
-})
-
-describe('deleteSelectionInEditor', () => {
-  it('clears a non-collapsed range and leaves a collapsed caret', () => {
-    const editor = document.createElement('div')
-    editor.dataset.slot = RICH_INPUT_SLOT
-    editor.textContent = 'hello world'
-    document.body.append(editor)
-
-    const selection = window.getSelection()!
-    const range = document.createRange()
-
-    range.selectNodeContents(editor)
-    selection.removeAllRanges()
-    selection.addRange(range)
-
-    expect(deleteSelectionInEditor(editor)).toBe(true)
-    expect(composerPlainText(editor)).toBe('')
-    expect(selection.getRangeAt(0).collapsed).toBe(true)
-    expect(deleteSelectionInEditor(editor)).toBe(false)
-
-    editor.remove()
-  })
-})
--- a/apps/desktop/src/app/chat/composer/rich-editor.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.ts
@@ -132,63 +132,6 @@ export function renderComposerContents(target: HTMLElement, text: string) {
  appendComposerContents(target, text)
 }

-/** Caret range when the selection lives inside `editor`; else null. */
-function composerSelectionRange(editor: HTMLElement) {
-  const selection = window.getSelection()
-  const range = selection?.rangeCount ? selection.getRangeAt(0) : null
-
-  if (!selection || !range || !editor.contains(range.commonAncestorContainer)) {
-    return null
-  }
-
-  return { range, selection }
-}
-
-/** Insert plain text at the caret (replacing any selection). Pastes use this
- *  instead of `execCommand('insertText')` — Chromium's editing pipeline is
- *  ~O(n²) on large multiline blobs. */
-export function insertPlainTextAtCaret(editor: HTMLElement, text: string) {
-  const hit = composerSelectionRange(editor)
-  const fragment = document.createDocumentFragment()
-
-  appendTextWithBreaks(fragment, text)
-
-  const tail = fragment.lastChild
-
-  if (hit) {
-    hit.range.deleteContents()
-    hit.range.insertNode(fragment)
-  } else {
-    editor.append(fragment)
-  }
-
-  if (tail) {
-    const caret = document.createRange()
-    caret.setStartAfter(tail)
-    caret.collapse(true)
-    const selection = hit?.selection ?? window.getSelection()
-    selection?.removeAllRanges()
-    selection?.addRange(caret)
-  }
-}
-
-/** Remove a non-collapsed selection in-editor. Skips collapsed carets so word/
- *  line delete (Opt/Cmd+Backspace) stays native. Returns whether anything ran. */
-export function deleteSelectionInEditor(editor: HTMLElement) {
-  const hit = composerSelectionRange(editor)
-
-  if (!hit || hit.range.collapsed) {
-    return false
-  }
-
-  hit.range.deleteContents()
-  hit.range.collapse(true)
-  hit.selection.removeAllRanges()
-  hit.selection.addRange(hit.range)
-
-  return true
-}
-
 /** Serialize a draft string into chip-HTML for the contenteditable surface. */
 export function composerHtml(text: string) {
  let cursor = 0
--- a/apps/desktop/src/app/chat/composer/status-stack/index.tsx
+++ b/apps/desktop/src/app/chat/composer/status-stack/index.tsx
@@ -170,22 +170,14 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro

  return (
    <div
-      // Sits above the composer (bottom-full), nudged down by the shell's 0.5rem
-      // top pad (pt-2 on composer-root) plus 1px so its bottom edge overlaps the
-      // composer surface's top border. z BELOW the surface (z-4) so the surface's
-      // top border paints over our transparent bottom border — one seam, no
-      // double line.
-      className="absolute inset-x-0 bottom-full z-3 max-h-[40vh] translate-y-[calc(0.5rem+1px)] overflow-y-auto"
+      className="absolute inset-x-0 bottom-full z-6 -mb-[9px] max-h-[40vh] overflow-y-auto"
      onPointerDownCapture={() => blurComposerInput()}
      ref={stackRef}
    >
      {/* The card paints the shared --composer-fill (rest / scrolled / focused
          all match the composer surface by construction); on scroll we only
-          ghost the CONTENT — element opacity on the card would kill the blur.
-          Rounded top, square bottom; the bottom border is TRANSPARENT — the
-          composer surface's visible top border (which sits at a higher z) is the
-          single shared seam, so the two read as one fused capsule. */}
-      <div className={cn(composerDockCard('top'), 'mx-2 rounded-b-none border-b border-b-transparent pt-0.5 pb-1')}>
+          ghost the CONTENT — element opacity on the card would kill the blur. */}
+      <div className={cn(composerDockCard('top'), 'mx-1 pt-0.5 pb-1')}>
        <div
          className={cn(
            'transition-opacity duration-200 ease-out',
--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -1,5 +1,3 @@
-import type { ReactNode } from 'react'
-
 import type { HermesGateway } from '@/hermes'
 import type { ComposerAttachment } from '@/store/composer'

@@ -24,8 +22,6 @@ export interface ChatBarState {
    canSwitch: boolean
    loading?: boolean
    quickModels?: QuickModelOption[]
-    /** Reused status-bar dropdown (built with gateway + selectModel upstream). */
-    modelMenuContent?: ReactNode
  }
  tools: { enabled: boolean; label: string; suggestions?: ContextSuggestion[] }
  voice: { enabled: boolean; active: boolean }
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -35,14 +35,11 @@ import {
  $gatewayState,
  $introPersonality,
  $introSeed,
-  $lastVisibleMessageIsUser,
  $messages,
-  $messagesEmpty,
  $selectedStoredSessionId,
  $sessions,
  sessionPinId
 } from '@/store/session'
-import { isSecondaryWindow } from '@/store/windows'
 import type { ModelOptionsResponse } from '@/types/hermes'

 import { routeSessionId } from '../routes'
@@ -56,13 +53,11 @@ import { droppedFileInlineRefs, type SessionDragPayload, sessionInlineRef } from
 import type { ChatBarState } from './composer/types'
 import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-actions'
 import { useFileDropZone } from './hooks/use-file-drop-zone'
-import { ScrollToBottomButton } from './scroll-to-bottom-button'
 import { SessionActionsMenu } from './sidebar/session-actions-menu'
-import { threadLoadingState } from './thread-loading'
+import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading'

 interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  gateway: HermesGateway | null
-  modelMenuContent?: React.ReactNode
  onToggleSelectedPin: () => void
  onDeleteSelectedSession: () => void
  onCancel: () => Promise<void> | void
@@ -121,22 +116,16 @@ function ChatHeader({
      ? pinnedSessionIds.includes(selectedSessionId)
      : false

-  // Secondary windows (new-session scratch, subagent watch, cmd-click pop-out)
-  // are compact side panels — they drop the session-actions header + border
-  // entirely. A brand-new draft has nothing to pin/delete/rename either.
-  if (isSecondaryWindow() || (!selectedSessionId && !activeSessionId && !isRoutedSessionView)) {
+  // A brand-new session has no session to pin/delete/rename, so the header is
+  // just a dead "New session" label + chevron. Drop it (and its border)
+  // entirely until there's a real session to act on.
+  if (!selectedSessionId && !activeSessionId && !isRoutedSessionView) {
    return null
  }

  return (
    <header className={cn(titlebarHeaderBaseClass, isRoutedSessionView && titlebarHeaderShadowClass)}>
-      <div
-        className={titlebarHeaderTitleClass}
-        style={{
-          maxWidth:
-            'calc(100vw - var(--titlebar-content-inset,0px) - var(--titlebar-tools-right) - var(--titlebar-tools-width) - 1.5rem)'
-        }}
-      >
+      <div className={titlebarHeaderTitleClass}>
        <SessionActionsMenu
          align="start"
          onDelete={selectedSessionId ? onDeleteSelectedSession : undefined}
@@ -147,7 +136,7 @@ function ChatHeader({
          title={title}
        >
          <Button
-            className="pointer-events-auto flex h-6 min-w-0 max-w-full gap-1 overflow-hidden border border-transparent bg-transparent px-2 py-0 text-(--ui-text-secondary) hover:border-(--ui-stroke-tertiary) hover:bg-(--ui-control-hover-background) hover:text-foreground data-[state=open]:border-(--ui-stroke-tertiary) data-[state=open]:bg-(--ui-control-active-background) [-webkit-app-region:no-drag]"
+            className="pointer-events-auto flex h-6 w-full min-w-0 max-w-full gap-1 overflow-hidden border border-transparent bg-transparent px-2 py-0 text-(--ui-text-secondary) hover:border-(--ui-stroke-tertiary) hover:bg-(--ui-control-hover-background) hover:text-foreground data-[state=open]:border-(--ui-stroke-tertiary) data-[state=open]:bg-(--ui-control-active-background) [-webkit-app-region:no-drag]"
            type="button"
            variant="ghost"
          >
@@ -160,42 +149,105 @@ function ChatHeader({
  )
 }

-interface ChatRuntimeBoundaryProps {
-  busy: boolean
-  children: React.ReactNode
-  onCancel: () => Promise<void> | void
-  onEdit: (message: AppendMessage) => Promise<void>
-  onReload: (parentId: string | null) => Promise<void>
-  onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
-  /** Route points at an unloaded session — render empty until resume swaps in
-   *  the new transcript, so the previous session's messages don't linger. */
-  suppressMessages: boolean
-}
-
-const NO_MESSAGES: ChatMessage[] = []
-
-/**
- * Owns the $messages subscription and the assistant-ui external-store runtime.
- *
- * Isolated from ChatView so the per-token delta flush (which replaces the
- * $messages atom ~30×/s during streaming) only re-renders this component and
- * the runtime provider. The children (Thread, ChatBar) are created by
- * ChatView, whose render output is stable across flushes — so React bails out
- * of re-rendering them by element identity and the stream's render cost stays
- * confined to the streaming message's own subtree.
- */
-function ChatRuntimeBoundary({
-  busy,
-  children,
+export function ChatView({
+  className,
+  gateway,
+  onToggleSelectedPin,
+  onDeleteSelectedSession,
  onCancel,
+  onAddContextRef,
+  onAddUrl,
+  onAttachImageBlob,
+  onAttachDroppedItems,
+  onBranchInNewChat,
+  maxVoiceRecordingSeconds,
+  onPasteClipboardImage,
+  onPickFiles,
+  onPickFolders,
+  onPickImages,
+  onRemoveAttachment,
+  onSteer,
+  onSubmit,
+  onThreadMessagesChange,
  onEdit,
  onReload,
-  onThreadMessagesChange,
-  suppressMessages
-}: ChatRuntimeBoundaryProps) {
-  const storeMessages = useStore($messages)
-  const messages = suppressMessages ? NO_MESSAGES : storeMessages
+  onRestoreToMessage,
+  onTranscribeAudio
+}: ChatViewProps) {
+  const location = useLocation()
+  const activeSessionId = useStore($activeSessionId)
+  const awaitingResponse = useStore($awaitingResponse)
+  const busy = useStore($busy)
+  const contextSuggestions = useStore($contextSuggestions)
+  const currentCwd = useStore($currentCwd)
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const freshDraftReady = useStore($freshDraftReady)
+  const gatewayState = useStore($gatewayState)
+  const gatewaySwapTarget = useStore($gatewaySwapTarget)
+  const gatewayOpen = gatewayState === 'open'
+  const introPersonality = useStore($introPersonality)
+  const introSeed = useStore($introSeed)
+  const messages = useStore($messages)
+  const selectedSessionId = useStore($selectedStoredSessionId)
  const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
+  const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
+
+  const showIntro =
+    freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0
+
+  // Session is still loading if the route references a session we haven't
+  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
+  // session exists — even if it has zero messages (a brand-new routed
+  // session). The flicker where `busy` flips true briefly during hydrate
+  // is handled by `threadLoadingState`'s last-visible-user gate.
+  const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId
+  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages))
+  const showChatBar = !loadingSession
+  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
+
+  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
+    queryKey: ['model-options', activeSessionId || 'global'],
+    queryFn: () => {
+      if (!activeSessionId) {
+        return getGlobalModelOptions()
+      }
+
+      if (!gateway) {
+        throw new Error('Hermes gateway unavailable')
+      }
+
+      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
+    },
+    enabled: gatewayOpen
+  })
+
+  const quickModels = useMemo(
+    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
+    [currentModel, currentProvider, modelOptionsQuery.data]
+  )
+
+  const chatBarState = useMemo<ChatBarState>(
+    () => ({
+      model: {
+        model: currentModel,
+        provider: currentProvider,
+        canSwitch: gatewayOpen,
+        loading: !gatewayOpen || (!currentModel && !currentProvider),
+        quickModels
+      },
+      tools: {
+        enabled: true,
+        label: 'Add context',
+        suggestions: contextSuggestions
+      },
+      voice: {
+        enabled: true,
+        active: false
+      }
+    }),
+    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
+  )

  const runtimeMessageRepository = useMemo(() => {
    const items: { message: ThreadMessage; parentId: string | null }[] = []
@@ -245,125 +297,6 @@ function ChatRuntimeBoundary({
    onReload
  })

-  return <AssistantRuntimeProvider runtime={runtime}>{children}</AssistantRuntimeProvider>
-}
-
-export function ChatView({
-  className,
-  gateway,
-  modelMenuContent,
-  onToggleSelectedPin,
-  onDeleteSelectedSession,
-  onCancel,
-  onAddContextRef,
-  onAddUrl,
-  onAttachImageBlob,
-  onAttachDroppedItems,
-  onBranchInNewChat,
-  maxVoiceRecordingSeconds,
-  onPasteClipboardImage,
-  onPickFiles,
-  onPickFolders,
-  onPickImages,
-  onRemoveAttachment,
-  onSteer,
-  onSubmit,
-  onThreadMessagesChange,
-  onEdit,
-  onReload,
-  onRestoreToMessage,
-  onTranscribeAudio
-}: ChatViewProps) {
-  const location = useLocation()
-  const activeSessionId = useStore($activeSessionId)
-  const awaitingResponse = useStore($awaitingResponse)
-  const busy = useStore($busy)
-  const contextSuggestions = useStore($contextSuggestions)
-  const currentCwd = useStore($currentCwd)
-  const currentModel = useStore($currentModel)
-  const currentProvider = useStore($currentProvider)
-  const freshDraftReady = useStore($freshDraftReady)
-  const gatewayState = useStore($gatewayState)
-  const gatewaySwapTarget = useStore($gatewaySwapTarget)
-  const gatewayOpen = gatewayState === 'open'
-  const introPersonality = useStore($introPersonality)
-  const introSeed = useStore($introSeed)
-  // PERF: ChatView must not subscribe to $messages — the atom is replaced on
-  // every streaming delta flush (~30×/s) and a subscription here re-renders
-  // the entire chat shell (header, chat bar, thread wrapper) per token. The
-  // runtime that DOES need the messages lives in ChatRuntimeBoundary below;
-  // this component only needs streaming-stable derivations.
-  const messagesEmpty = useStore($messagesEmpty)
-  const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
-  const selectedSessionId = useStore($selectedStoredSessionId)
-  const routedSessionId = routeSessionId(location.pathname)
-  const isRoutedSessionView = Boolean(routedSessionId)
-
-  // The URL points at a session the store hasn't loaded yet (sidebar / cmd-K /
-  // direct nav). Derived in render so the swap reads instantly: the same frame
-  // the id changes we drop the old transcript and show the loader, instead of
-  // waiting for the resume effect (which paints a frame later) to clear them.
-  const routeSessionMismatch = isRoutedSessionView && routedSessionId !== selectedSessionId
-
-  // The compact new-session pop-out skips the wordmark/tagline intro — it's a
-  // scratch window, not the full-height empty state.
-  const showIntro =
-    !isSecondaryWindow() && freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
-
-  // Session is still loading if the route references a session we haven't
-  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
-  // session exists — even if it has zero messages (a brand-new routed
-  // session). The flicker where `busy` flips true briefly during hydrate
-  // is handled by `threadLoadingState`'s last-visible-user gate.
-  const loadingSession = isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
-  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
-  const showChatBar = !loadingSession
-  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
-
-  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
-    queryKey: ['model-options', activeSessionId || 'global'],
-    queryFn: () => {
-      if (!activeSessionId) {
-        return getGlobalModelOptions()
-      }
-
-      if (!gateway) {
-        throw new Error('Hermes gateway unavailable')
-      }
-
-      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
-    },
-    enabled: gatewayOpen
-  })
-
-  const quickModels = useMemo(
-    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
-    [currentModel, currentProvider, modelOptionsQuery.data]
-  )
-
-  const chatBarState = useMemo<ChatBarState>(
-    () => ({
-      model: {
-        model: currentModel,
-        provider: currentProvider,
-        canSwitch: gatewayOpen,
-        loading: !gatewayOpen || (!currentModel && !currentProvider),
-        modelMenuContent,
-        quickModels
-      },
-      tools: {
-        enabled: true,
-        label: 'Add context',
-        suggestions: contextSuggestions
-      },
-      voice: {
-        enabled: true,
-        active: false
-      }
-    }),
-    [contextSuggestions, currentModel, currentProvider, gatewayOpen, modelMenuContent, quickModels]
-  )
-
  // Drop files anywhere in the conversation area, not just on the composer
  // input. In-app drags (project tree / gutter) carry workspace-relative paths
  // the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder
@@ -416,14 +349,7 @@ export function ChatView({
        className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]"
        {...dropHandlers}
      >
-        <ChatRuntimeBoundary
-          busy={busy}
-          onCancel={onCancel}
-          onEdit={onEdit}
-          onReload={onReload}
-          onThreadMessagesChange={onThreadMessagesChange}
-          suppressMessages={routeSessionMismatch}
-        >
+        <AssistantRuntimeProvider runtime={runtime}>
          <Thread
            clampToComposer={showChatBar}
            cwd={currentCwd}
@@ -458,14 +384,13 @@ export function ChatView({
                onSteer={onSteer}
                onSubmit={onSubmit}
                onTranscribeAudio={onTranscribeAudio}
-                queueSessionKey={selectedSessionId}
+                queueSessionKey={selectedSessionId || activeSessionId}
                sessionId={activeSessionId}
                state={chatBarState}
              />
            </Suspense>
          )}
-        </ChatRuntimeBoundary>
-        {showChatBar && <ScrollToBottomButton />}
+        </AssistantRuntimeProvider>
        <ChatDropOverlay kind={dragKind} />
        <ChatSwapOverlay profile={gatewaySwapTarget} />
      </div>
--- a/apps/desktop/src/app/chat/scroll-to-bottom-button.test.tsx
+++ b/apps/desktop/src/app/chat/scroll-to-bottom-button.test.tsx
@@ -1,67 +0,0 @@
-import { cleanup, fireEvent, render, screen } from '@testing-library/react'
-import { afterEach, describe, expect, it, vi } from 'vitest'
-
-import { clearAllPrompts, setApprovalRequest } from '@/store/prompts'
-import { $activeSessionId } from '@/store/session'
-import { onScrollToBottomRequest, resetThreadScroll, setThreadAtBottom } from '@/store/thread-scroll'
-
-import { ScrollToBottomButton } from './scroll-to-bottom-button'
-
-function pendingApproval() {
-  $activeSessionId.set('sess-1')
-  setApprovalRequest({ command: 'rm -rf /tmp/x', description: 'dangerous command', sessionId: 'sess-1' })
-}
-
-afterEach(() => {
-  cleanup()
-  clearAllPrompts()
-  resetThreadScroll()
-  $activeSessionId.set(null)
-})
-
-// `getByRole('button')` excludes aria-hidden nodes, so "queryByRole null" is the
-// control's hidden (parked-at-bottom) state.
-describe('ScrollToBottomButton', () => {
-  it('stays hidden while parked at the bottom', () => {
-    render(<ScrollToBottomButton />)
-
-    expect(screen.queryByRole('button')).toBeNull()
-  })
-
-  it('is a plain jump-to-bottom control when scrolled up with no approval', () => {
-    setThreadAtBottom(false)
-    render(<ScrollToBottomButton />)
-
-    expect(screen.getByRole('button', { name: 'Scroll to bottom' })).toBeTruthy()
-    expect(screen.queryByText('Approval needed')).toBeNull()
-  })
-
-  it('morphs into the approval pill when scrolled up with a pending approval', () => {
-    pendingApproval()
-    setThreadAtBottom(false)
-    render(<ScrollToBottomButton />)
-
-    expect(screen.getByRole('button', { name: 'Approval needed' })).toBeTruthy()
-    expect(screen.getByText('Approval needed')).toBeTruthy()
-  })
-
-  it('does not morph while a pending approval is still in view (at bottom)', () => {
-    pendingApproval()
-    render(<ScrollToBottomButton />)
-
-    // Parked at bottom → control hidden, so it can't claim "approval needed".
-    expect(screen.queryByRole('button')).toBeNull()
-  })
-
-  it('re-arms sticky-bottom on click', () => {
-    const handler = vi.fn()
-    const stop = onScrollToBottomRequest(handler)
-    setThreadAtBottom(false)
-    render(<ScrollToBottomButton />)
-
-    fireEvent.click(screen.getByRole('button'))
-
-    expect(handler).toHaveBeenCalledTimes(1)
-    stop()
-  })
-})
--- a/apps/desktop/src/app/chat/scroll-to-bottom-button.tsx
+++ b/apps/desktop/src/app/chat/scroll-to-bottom-button.tsx
@@ -1,74 +0,0 @@
-import { useStore } from '@nanostores/react'
-import { useRef } from 'react'
-
-import { Codicon } from '@/components/ui/codicon'
-import { useI18n } from '@/i18n'
-import { triggerHaptic } from '@/lib/haptics'
-import { cn } from '@/lib/utils'
-import { $approvalRequest } from '@/store/prompts'
-import { $threadJumpButtonVisible, requestScrollToBottom } from '@/store/thread-scroll'
-
-/**
- * Floating "jump to bottom" control. Sits centered just above the composer,
- * clearing the out-of-flow status stack via the same measured-height CSS vars
- * the thread's bottom clearance uses (`--composer-measured-height` +
- * `--status-stack-measured-height`), so it never overlaps the queue / subagent
- * / background cards. Visible only while the user has scrolled meaningfully
- * away from the bottom; clicking re-arms sticky-bottom and pins the viewport.
- *
- * When the turn is BLOCKED on an approval, this same control morphs into an
- * "Approval needed" pill — the only response surface is the inline Run/Reject
- * bar on the parked tool row, which is always the bottom-most content, so the
- * existing scroll-to-bottom action lands the user right on it. One control, no
- * collision, no second scroll path (native scrollIntoView would scroll
- * overflow:hidden ancestors that can't scroll back and wreck the layout).
- *
- * Enter/exit motion lives in styles.css under `.thread-jump-button` — a
- * directional scale (contract in from 1.1, contract out to 0.9) keyed off
- * `data-state`. `idle` (never-shown) stays silent so it can't flash on mount;
- * `in`/`out` only swap once it has actually appeared.
- */
-export function ScrollToBottomButton() {
-  const { t } = useI18n()
-  const visible = useStore($threadJumpButtonVisible)
-  const request = useStore($approvalRequest)
-  // Scrolled away while an approval is pending → the inline Run/Reject bar is
-  // below the fold. Relabel so the user knows the session needs them, not just
-  // that there's more to read.
-  const approval = visible && Boolean(request)
-  const hasShownRef = useRef(false)
-
-  if (visible) {
-    hasShownRef.current = true
-  }
-
-  const state = visible ? 'in' : hasShownRef.current ? 'out' : 'idle'
-  const label = approval ? t.assistant.approval.jumpToApproval : t.assistant.thread.scrollToBottom
-
-  return (
-    <button
-      aria-hidden={!visible}
-      aria-label={label}
-      className={cn(
-        'thread-jump-button absolute left-1/2 z-20 grid place-items-center backdrop-blur-[0.75rem] [-webkit-backdrop-filter:blur(0.75rem)]',
-        approval
-          ? 'h-8 grid-flow-col gap-1.5 rounded-full border border-primary/40 bg-(--composer-fill) px-3 text-primary hover:bg-primary/10'
-          : 'size-8 rounded-full border border-border/65 bg-(--composer-fill) text-muted-foreground hover:text-foreground',
-        !visible && 'pointer-events-none'
-      )}
-      data-state={state}
-      onClick={() => {
-        triggerHaptic('selection')
-        requestScrollToBottom()
-      }}
-      style={{
-        bottom: 'calc(var(--composer-measured-height) + var(--status-stack-measured-height) + 0.625rem)'
-      }}
-      tabIndex={visible ? 0 : -1}
-      type="button"
-    >
-      <Codicon name="arrow-down" size={approval ? '0.875rem' : '1rem'} />
-      {approval && <span className="text-xs font-medium">{label}</span>}
-    </button>
-  )
-}
--- a/apps/desktop/src/app/chat/sidebar/index.tsx
+++ b/apps/desktop/src/app/chat/sidebar/index.tsx
--- a/apps/desktop/src/app/chat/sidebar/order.test.ts
+++ b/apps/desktop/src/app/chat/sidebar/order.test.ts
@@ -1,21 +0,0 @@
-import { describe, expect, it } from 'vitest'
-
-import { resolveManualSessionOrderIds } from './order'
-
-describe('resolveManualSessionOrderIds', () => {
-  it('clears legacy auto-seeded order until the user manually reorders sessions', () => {
-    expect(resolveManualSessionOrderIds(['newest', 'older'], ['older', 'newest'], false)).toEqual([])
-  })
-
-  it('keeps a manual order and surfaces newly seen sessions first', () => {
-    expect(resolveManualSessionOrderIds(['newest', 'older', 'oldest'], ['oldest', 'older'], true)).toEqual([
-      'newest',
-      'oldest',
-      'older'
-    ])
-  })
-
-  it('clears manual order when none of the saved ids still exist', () => {
-    expect(resolveManualSessionOrderIds(['newest'], ['gone'], true)).toEqual([])
-  })
-})
--- a/apps/desktop/src/app/chat/sidebar/order.ts
+++ b/apps/desktop/src/app/chat/sidebar/order.ts
@@ -1,17 +0,0 @@
-export function resolveManualSessionOrderIds(currentIds: string[], orderIds: string[], manual: boolean): string[] {
-  if (!manual || !currentIds.length || !orderIds.length) {
-    return []
-  }
-
-  const current = new Set(currentIds)
-  const retained = orderIds.filter(id => current.has(id))
-
-  if (!retained.length) {
-    return []
-  }
-
-  const retainedSet = new Set(retained)
-  const fresh = currentIds.filter(id => !retainedSet.has(id))
-
-  return [...fresh, ...retained]
-}
--- a/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
+++ b/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
@@ -284,7 +284,6 @@ export function ProfileRail() {
          selectProfile(name)
        }}
        open={createOpen}
-        profiles={profiles}
      />

      <RenameProfileDialog
@@ -468,10 +467,6 @@ function ProfileSquare({ active, color, label, onDelete, onRecolor, onRename, on
          aria-label={p.actionsFor(label)}
          className="w-40"
          collisionPadding={{ bottom: 44, left: 8, right: 8, top: 8 }}
-          // Menu close refocuses the trigger — which doubles as the popover
-          // anchor — so the picker reads it as focus-outside and dies on open.
-          // Suppress the refocus and the picker survives.
-          onCloseAutoFocus={event => event.preventDefault()}
        >
          <ContextMenuItem onSelect={() => setPickerOpen(true)}>
            <Codicon name="symbol-color" size="0.875rem" />
--- a/apps/desktop/src/app/chat/sidebar/session-actions-menu.tsx
+++ b/apps/desktop/src/app/chat/sidebar/session-actions-menu.tsx
@@ -4,7 +4,7 @@ import { useEffect, useRef, useState } from 'react'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
 import { ContextMenu, ContextMenuContent, ContextMenuItem, ContextMenuTrigger } from '@/components/ui/context-menu'
-import { CopyButton } from '@/components/ui/copy-button'
+import { writeClipboardText } from '@/components/ui/copy-button'
 import {
  Dialog,
  DialogContent,
@@ -49,17 +49,26 @@ function useSessionActions({ sessionId, title, pinned = false, profile, onPin, o
  const r = t.sidebar.row
  const [renameOpen, setRenameOpen] = useState(false)

-  const pinItem: ItemSpec = {
-    disabled: !onPin,
-    icon: 'pin',
-    label: pinned ? r.unpin : r.pin,
-    onSelect: () => {
-      triggerHaptic('selection')
-      onPin?.()
-    }
-  }
-
  const items: ItemSpec[] = [
+    {
+      disabled: !onPin,
+      icon: 'pin',
+      label: pinned ? r.unpin : r.pin,
+      onSelect: () => {
+        triggerHaptic('selection')
+        onPin?.()
+      }
+    },
+    {
+      disabled: !sessionId,
+      icon: 'copy',
+      label: r.copyId,
+      onSelect: event => {
+        event.preventDefault()
+        triggerHaptic('selection')
+        void writeClipboardText(sessionId).catch(err => notifyError(err, r.copyIdFailed))
+      }
+    },
    ...(canOpenSessionWindow()
      ? [
          {
@@ -113,28 +122,13 @@ function useSessionActions({ sessionId, title, pinned = false, profile, onPin, o
    }
  ]

-  const renderMenuItem = (Item: MenuItem, { className, disabled, icon, label, onSelect, variant }: ItemSpec) => (
-    <Item className={className} disabled={disabled} key={label} onSelect={onSelect} variant={variant}>
-      <Codicon name={icon} size="0.875rem" />
-      <span>{label}</span>
-    </Item>
-  )
-
-  const renderItems = (Item: MenuItem) => (
-    <>
-      {renderMenuItem(Item, pinItem)}
-      <CopyButton
-        appearance={Item === DropdownMenuItem ? 'menu-item' : 'context-menu-item'}
-        disabled={!sessionId}
-        errorMessage={r.copyIdFailed}
-        key={r.copyId}
-        label={r.copyId}
-        onCopyError={err => notifyError(err, r.copyIdFailed)}
-        text={sessionId}
-      />
-      {items.map(spec => renderMenuItem(Item, spec))}
-    </>
-  )
+  const renderItems = (Item: MenuItem) =>
+    items.map(({ className, disabled, icon, label, onSelect, variant }) => (
+      <Item className={className} disabled={disabled} key={label} onSelect={onSelect} variant={variant}>
+        <Codicon name={icon} size="0.875rem" />
+        <span>{label}</span>
+      </Item>
+    ))

  const renameDialog = (
    <RenameSessionDialog
--- a/apps/desktop/src/app/chat/sidebar/session-row.tsx
+++ b/apps/desktop/src/app/chat/sidebar/session-row.tsx
@@ -96,9 +96,7 @@ export function SidebarSessionRow({
          'group relative grid min-h-[1.625rem] cursor-pointer grid-cols-[minmax(0,1fr)_1.375rem] items-center rounded-md transition-colors duration-100 ease-out hover:bg-(--ui-row-hover-background) hover:transition-none',
          isSelected && 'bg-(--ui-row-active-background)',
          isWorking && 'text-foreground',
-          // Opaque surface while lifted so the dragged row erases what's under
-          // it (translucency let the rows below bleed through).
-          dragging && 'z-10 cursor-grabbing bg-(--ui-sidebar-surface-background)',
+          dragging && 'z-10 cursor-grabbing opacity-60 shadow-sm',
          className
        )}
        data-working={isWorking ? 'true' : undefined}
--- a/apps/desktop/src/app/chat/sidebar/virtual-session-list.tsx
+++ b/apps/desktop/src/app/chat/sidebar/virtual-session-list.tsx
@@ -1,7 +1,7 @@
-import { useSortable } from '@dnd-kit/sortable'
+import { SortableContext, useSortable, verticalListSortingStrategy } from '@dnd-kit/sortable'
 import { CSS } from '@dnd-kit/utilities'
 import { useVirtualizer } from '@tanstack/react-virtual'
-import { type FC, useCallback, useRef } from 'react'
+import { type FC, useCallback, useMemo, useRef } from 'react'

 import type { SessionInfo } from '@/hermes'
 import { cn } from '@/lib/utils'
@@ -48,6 +48,7 @@ export const VirtualSessionList: FC<VirtualSessionListProps> = ({
  workingSessionIdSet
 }) => {
  const scrollerRef = useRef<HTMLDivElement | null>(null)
+  const ids = useMemo(() => sessions.map(s => s.id), [sessions])

  const virtualizer = useVirtualizer({
    count: sessions.length,
@@ -100,16 +101,21 @@ export const VirtualSessionList: FC<VirtualSessionListProps> = ({
    )
  })

-  // When sortable, the caller wraps this in a ReorderableList that owns the
-  // DndContext + SortableContext (keyed on the same ids); the virtualized rows
-  // just consume that context via useSortable.
-  return (
+  const list = (
    <div className={cn('relative min-h-0 flex-1 overflow-x-hidden overflow-y-auto overscroll-contain', className)} ref={scrollerRef}>
      <div className="grid gap-px" style={{ paddingBottom: `${paddingBottom}px`, paddingTop: `${paddingTop}px` }}>
        {rows}
      </div>
    </div>
  )
+
+  return sortable ? (
+    <SortableContext items={ids} strategy={verticalListSortingStrategy}>
+      {list}
+    </SortableContext>
+  ) : (
+    list
+  )
 }

 interface VirtualSortableRowProps {
--- a/apps/desktop/src/app/chat/sidebar/workspace-groups.test.ts
+++ b/apps/desktop/src/app/chat/sidebar/workspace-groups.test.ts
@@ -1,149 +0,0 @@
-import { describe, expect, it } from 'vitest'
-
-import type { HermesWorktreeInfo } from '@/global'
-import type { SessionInfo } from '@/types/hermes'
-
-import { uniqueCwds, workspaceGroupsFor, workspaceTreeFor, type WorktreeResolver } from './workspace-groups'
-
-let nextId = 0
-
-function makeSession(cwd: null | string, overrides: Partial<SessionInfo> = {}): SessionInfo {
-  return {
-    archived: false,
-    cwd,
-    ended_at: null,
-    id: `s${nextId++}`,
-    input_tokens: 0,
-    is_active: false,
-    last_active: 1_000,
-    message_count: 1,
-    model: 'claude',
-    output_tokens: 0,
-    preview: null,
-    source: 'cli',
-    started_at: 1_000,
-    title: null,
-    tool_call_count: 0,
-    ...overrides
-  }
-}
-
-const labels = (sessions: SessionInfo[]) => workspaceGroupsFor(sessions, 'No workspace').map(g => g.label)
-
-describe('workspaceGroupsFor', () => {
-  it('groups by full cwd, not by basename — same-named folders are separate groups', () => {
-    const groups = workspaceGroupsFor(
-      [makeSession('/a/hermes-agent/apps/desktop'), makeSession('/a/hermes-agent-wt-rtl/apps/desktop')],
-      'No workspace'
-    )
-
-    expect(groups).toHaveLength(2)
-  })
-
-  it('disambiguates colliding basenames by walking up the path', () => {
-    expect(
-      labels([makeSession('/a/hermes-agent/apps/desktop'), makeSession('/a/hermes-agent-wt-rtl/apps/desktop')])
-    ).toEqual(['hermes-agent/apps/desktop', 'hermes-agent-wt-rtl/apps/desktop'])
-  })
-
-  it('leaves a unique basename as its short label', () => {
-    expect(labels([makeSession('/a/hermes-agent/apps/desktop'), makeSession('/b/heval-py')])).toEqual([
-      'desktop',
-      'heval-py'
-    ])
-  })
-
-  it('grows the prefix past one segment when the parent also collides', () => {
-    expect(labels([makeSession('/x/proj/apps/desktop'), makeSession('/y/proj/apps/desktop')])).toEqual([
-      'x/proj/apps/desktop',
-      'y/proj/apps/desktop'
-    ])
-  })
-
-  it('keeps the synthetic no-workspace group untouched even if a real group shares its label', () => {
-    const groups = workspaceGroupsFor([makeSession(null), makeSession('/a/No workspace')], 'No workspace')
-    const noWorkspace = groups.find(g => g.path === null)
-
-    expect(noWorkspace?.label).toBe('No workspace')
-  })
-})
-
-const info = (over: Partial<HermesWorktreeInfo> & Pick<HermesWorktreeInfo, 'repoRoot' | 'worktreeRoot'>): HermesWorktreeInfo => ({
-  branch: null,
-  isMainWorktree: false,
-  ...over
-})
-
-describe('workspaceTreeFor', () => {
-  it('heuristic nests `<repo>-wt-<branch>` under its sibling repo', () => {
-    const tree = workspaceTreeFor(
-      [makeSession('/www/hermes-agent'), makeSession('/www/hermes-agent-wt-rtl')],
-      'No workspace'
-    )
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('hermes-agent')
-    expect(tree[0].groups.map(g => g.label).sort()).toEqual(['hermes-agent', 'rtl'])
-  })
-
-  it('git metadata is authoritative — worktrees group by repoRoot regardless of directory naming', () => {
-    const resolver: WorktreeResolver = cwd => {
-      if (cwd === '/www/hermes-agent') {
-        return info({ repoRoot: '/www/hermes-agent', worktreeRoot: '/www/hermes-agent', isMainWorktree: true, branch: 'main' })
-      }
-
-      if (cwd === '/elsewhere/ha-rtl') {
-        return info({ repoRoot: '/www/hermes-agent', worktreeRoot: '/elsewhere/ha-rtl', branch: 'rtl' })
-      }
-
-      return null
-    }
-
-    const tree = workspaceTreeFor(
-      [makeSession('/www/hermes-agent'), makeSession('/elsewhere/ha-rtl')],
-      'No workspace',
-      resolver
-    )
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('hermes-agent')
-    // The main checkout labels by directory (its branch is transient — using it
-    // would misattribute old sessions to the currently checked-out branch);
-    // linked worktrees label by branch.
-    expect(tree[0].groups.map(g => g.label)).toEqual(['hermes-agent', 'rtl'])
-  })
-
-  it('a standalone directory is its own parent (always parent → worktree → sessions)', () => {
-    const tree = workspaceTreeFor([makeSession('/www/heval-node')], 'No workspace')
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('heval-node')
-    expect(tree[0].groups).toHaveLength(1)
-    expect(tree[0].groups[0].label).toBe('heval-node')
-  })
-
-  it('aggregates session counts across a repo’s worktrees', () => {
-    const tree = workspaceTreeFor(
-      [makeSession('/www/ha'), makeSession('/www/ha-wt-x'), makeSession('/www/ha-wt-x')],
-      'No workspace'
-    )
-
-    const parent = tree.find(p => p.label === 'ha')
-
-    expect(parent?.sessionCount).toBe(3)
-  })
-
-  it('no-workspace sessions form their own parent', () => {
-    const tree = workspaceTreeFor([makeSession(null)], 'No workspace')
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('No workspace')
-    expect(tree[0].path).toBeNull()
-  })
-})
-
-describe('uniqueCwds', () => {
-  it('dedupes and drops empty/whitespace cwds', () => {
-    expect(uniqueCwds([makeSession('/a'), makeSession('/a'), makeSession(null), makeSession('   ')])).toEqual(['/a'])
-  })
-})
--- a/apps/desktop/src/app/chat/sidebar/workspace-groups.ts
+++ b/apps/desktop/src/app/chat/sidebar/workspace-groups.ts
@@ -1,326 +0,0 @@
-import type { HermesWorktreeInfo } from '@/global'
-import type { SessionInfo } from '@/hermes'
-
-export interface SidebarSessionGroup {
-  id: string
-  label: string
-  path: null | string
-  sessions: SessionInfo[]
-  // Profile color for the ALL-profiles view; absent for workspace groups.
-  color?: null | string
-  loadingMore?: boolean
-  mode?: 'profile' | 'source' | 'workspace'
-  onLoadMore?: () => void
-  sourceId?: string
-  totalCount?: number
-}
-
-const NO_WORKSPACE_ID = '__no_workspace__'
-
-/** Path split into segments, ignoring trailing slashes and mixed separators. */
-const segments = (path: string): string[] => path.replace(/[/\\]+$/, '').split(/[/\\]/).filter(Boolean)
-
-/** Last path segment. */
-export const baseName = (path: string): string | undefined => segments(path).pop()
-
-/** The segments above the basename. */
-const parentSegments = (path: string): string[] => segments(path).slice(0, -1)
-
-interface Labelable {
-  id: string
-  label: string
-  path: null | string
-}
-
-/**
- * Disambiguate groups whose basename collides (worktrees all end in the same
- * `apps/desktop`, sibling repos share a folder name, etc.) by walking up the
- * path and prepending parent segments until each colliding label is unique —
- * e.g. `hermes-agent/desktop` vs `hermes-agent-wt-rtl/desktop`. Groups with a
- * unique basename keep their short label untouched.
- */
-function disambiguateLabels(groups: Labelable[]): void {
-  const byLabel = new Map<string, Labelable[]>()
-
-  for (const group of groups) {
-    const bucket = byLabel.get(group.label)
-
-    if (bucket) {
-      bucket.push(group)
-    } else {
-      byLabel.set(group.label, [group])
-    }
-  }
-
-  for (const bucket of byLabel.values()) {
-    if (bucket.length < 2) {
-      continue
-    }
-
-    // Only groups backed by a real path can grow a prefix; the synthetic
-    // "No workspace" group has no path and stays as-is.
-    const pathed = bucket.filter(group => group.path)
-
-    if (pathed.length < 2) {
-      continue
-    }
-
-    const parents = new Map(pathed.map(group => [group.id, parentSegments(group.path!)]))
-    let depth = 1
-
-    // Grow the prefix one parent segment at a time until every label in the
-    // bucket is distinct, or we run out of parent segments to add.
-    while (depth <= Math.max(...pathed.map(g => parents.get(g.id)!.length))) {
-      const labels = new Map<string, number>()
-
-      for (const group of pathed) {
-        const segs = parents.get(group.id)!
-        const prefix = segs.slice(-depth).join('/')
-        const base = baseName(group.path!) ?? group.path!
-        group.label = prefix ? `${prefix}/${base}` : base
-        labels.set(group.label, (labels.get(group.label) ?? 0) + 1)
-      }
-
-      if ([...labels.values()].every(count => count === 1)) {
-        break
-      }
-
-      depth += 1
-    }
-  }
-}
-
-export function workspaceGroupsFor(
-  sessions: SessionInfo[],
-  noWorkspaceLabel: string,
-  options: { preserveSessionOrder?: boolean } = {}
-): SidebarSessionGroup[] {
-  const groups = new Map<string, SidebarSessionGroup>()
-
-  for (const session of sessions) {
-    const path = session.cwd?.trim() || ''
-    const id = path || NO_WORKSPACE_ID
-    const label = baseName(path) || path || noWorkspaceLabel
-
-    const group = groups.get(id) ?? { id, label, path: path || null, sessions: [] }
-    group.sessions.push(session)
-    groups.set(id, group)
-  }
-
-  if (!options.preserveSessionOrder) {
-    // Groups keep recency order (Map insertion = first-seen in the recency-sorted
-    // input, so an active project floats up), but rows *within* a group sort by
-    // creation time so they don't reshuffle every time a message lands — keeps
-    // muscle memory intact.
-    for (const group of groups.values()) {
-      group.sessions.sort((a, b) => b.started_at - a.started_at)
-    }
-  }
-
-  const result = [...groups.values()]
-  disambiguateLabels(result)
-
-  return result
-}
-
-/**
- * A worktree's main repo and all its linked worktrees collapse into ONE parent
- * (keyed by the repo root); each worktree is a child group; sessions hang off
- * the worktree they ran in. `parent → worktree → sessions`.
- */
-export interface SidebarWorkspaceTree {
-  id: string
-  label: string
-  path: null | string
-  groups: SidebarSessionGroup[]
-  sessionCount: number
-}
-
-/** Resolves a session cwd to git-worktree identity (from the local fs probe). */
-export type WorktreeResolver = (cwd: string) => HermesWorktreeInfo | null | undefined
-
-interface WorkspacePlacement {
-  parentKey: string
-  parentLabel: string
-  parentPath: string
-  worktreeKey: string
-  worktreeLabel: string
-  worktreePath: string
-}
-
-/** Replace a path's final segment, preserving its prefix + separators. */
-const withBaseName = (path: string, name: string): string =>
-  path.replace(/[/\\]+$/, '').replace(/[^/\\]+$/, name)
-
-/**
- * Path-only fallback for when git metadata is unavailable (remote backends,
- * unreadable paths). Mirrors the git layout: a `<repo>-wt-<branch>` directory
- * nests under its sibling `<repo>`; any other directory is its own repo root.
- */
-function placeByHeuristic(path: string): WorkspacePlacement | null {
-  const base = baseName(path)
-
-  if (!base) {
-    return null
-  }
-
-  const worktreeMatch = base.match(/^(.+)-wt-(.+)$/)
-
-  if (worktreeMatch) {
-    const repo = worktreeMatch[1]
-    const repoPath = withBaseName(path, repo)
-
-    return {
-      parentKey: repoPath,
-      parentLabel: repo,
-      parentPath: repoPath,
-      worktreeKey: path,
-      worktreeLabel: worktreeMatch[2],
-      worktreePath: path
-    }
-  }
-
-  return {
-    parentKey: path,
-    parentLabel: base,
-    parentPath: path,
-    worktreeKey: path,
-    worktreeLabel: base,
-    worktreePath: path
-  }
-}
-
-function placeWorkspace(path: string, resolver?: WorktreeResolver): WorkspacePlacement | null {
-  const info = resolver?.(path)
-
-  if (info?.repoRoot && info.worktreeRoot) {
-    const dirLabel = baseName(info.worktreeRoot) || info.worktreeRoot
-
-    return {
-      parentKey: info.repoRoot,
-      parentLabel: baseName(info.repoRoot) ?? info.repoRoot,
-      parentPath: info.repoRoot,
-      worktreeKey: info.worktreeRoot,
-      // The main checkout's branch is transient — it changes as you work, so a
-      // branch label would misattribute every past session to whatever branch
-      // is checked out *now*. Label it by directory. Linked worktrees are
-      // per-branch by construction, so branch is the clearest label there.
-      worktreeLabel: info.isMainWorktree ? dirLabel : info.branch || dirLabel,
-      worktreePath: info.worktreeRoot
-    }
-  }
-
-  return placeByHeuristic(path)
-}
-
-/** Unique, non-empty session cwds — the batch to probe for worktree info. */
-export function uniqueCwds(sessions: SessionInfo[]): string[] {
-  const seen = new Set<string>()
-
-  for (const session of sessions) {
-    const path = session.cwd?.trim()
-
-    if (path) {
-      seen.add(path)
-    }
-  }
-
-  return [...seen]
-}
-
-/**
- * Build the `parent → worktree → sessions` tree. Parents keep recency order
- * (first-seen in the recency-sorted input); worktree groups within a parent do
- * too, while rows inside a worktree sort by creation time (stable muscle memory,
- * matching `workspaceGroupsFor`).
- */
-export function workspaceTreeFor(
-  sessions: SessionInfo[],
-  noWorkspaceLabel: string,
-  resolver?: WorktreeResolver,
-  options: { preserveSessionOrder?: boolean } = {}
-): SidebarWorkspaceTree[] {
-  interface WorktreeEntry {
-    group: SidebarSessionGroup
-    parentKey: string
-    parentLabel: string
-    parentPath: string
-  }
-
-  const worktrees = new Map<string, WorktreeEntry>()
-  const noWorkspace: SessionInfo[] = []
-
-  for (const session of sessions) {
-    const path = session.cwd?.trim() || ''
-
-    if (!path) {
-      noWorkspace.push(session)
-
-      continue
-    }
-
-    const placement = placeWorkspace(path, resolver)
-
-    if (!placement) {
-      noWorkspace.push(session)
-
-      continue
-    }
-
-    let entry = worktrees.get(placement.worktreeKey)
-
-    if (!entry) {
-      entry = {
-        group: { id: placement.worktreeKey, label: placement.worktreeLabel, path: placement.worktreePath, sessions: [] },
-        parentKey: placement.parentKey,
-        parentLabel: placement.parentLabel,
-        parentPath: placement.parentPath
-      }
-      worktrees.set(placement.worktreeKey, entry)
-    }
-
-    entry.group.sessions.push(session)
-  }
-
-  if (!options.preserveSessionOrder) {
-    for (const entry of worktrees.values()) {
-      entry.group.sessions.sort((a, b) => b.started_at - a.started_at)
-    }
-  }
-
-  const parents = new Map<string, SidebarWorkspaceTree>()
-
-  for (const entry of worktrees.values()) {
-    let parent = parents.get(entry.parentKey)
-
-    if (!parent) {
-      parent = { id: entry.parentKey, label: entry.parentLabel, path: entry.parentPath, groups: [], sessionCount: 0 }
-      parents.set(entry.parentKey, parent)
-    }
-
-    parent.groups.push(entry.group)
-    parent.sessionCount += entry.group.sessions.length
-  }
-
-  const result = [...parents.values()]
-
-  if (noWorkspace.length) {
-    result.push({
-      id: NO_WORKSPACE_ID,
-      label: noWorkspaceLabel,
-      path: null,
-      groups: [{ id: NO_WORKSPACE_ID, label: noWorkspaceLabel, path: null, sessions: noWorkspace }],
-      sessionCount: noWorkspace.length
-    })
-  }
-
-  // Parents that collide on basename grow a path prefix; worktree labels that
-  // collide inside a parent do the same.
-  disambiguateLabels(result)
-
-  for (const parent of result) {
-    disambiguateLabels(parent.groups)
-  }
-
-  return result
-}
--- a/apps/desktop/src/app/chat/thread-loading.ts
+++ b/apps/desktop/src/app/chat/thread-loading.ts
@@ -3,14 +3,9 @@ import type { ChatMessage } from '@/lib/chat-messages'
 export type ThreadLoadingState = 'response' | 'session'

 export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean {
-  // Allocation-free reverse scan — runs in a hot $messages computed.
-  for (let i = messages.length - 1; i >= 0; i -= 1) {
-    if (!messages[i].hidden) {
-      return messages[i].role === 'user'
-    }
-  }
+  const lastVisible = [...messages].reverse().find(message => !message.hidden)

-  return false
+  return lastVisible?.role === 'user'
 }

 export function threadLoadingState(
--- a/apps/desktop/src/app/command-palette/index.tsx
+++ b/apps/desktop/src/app/command-palette/index.tsx
@@ -118,10 +118,6 @@ const paletteFilter = (value: string, search: string, keywords?: string[]): numb
  return needle.split(/\s+/).every(term => haystack.includes(term)) ? 1 : 0
 }

-// Hermes session ids: <YYYYMMDD>_<HHMMSS>_<6 hex>. Used to offer a direct
-// "Go to session ‹id›" jump for ids that aren't in the recent-200 list.
-const SESSION_ID_RE = /^\d{8}_\d{6}_[a-f0-9]{6}$/
-
 type SessionRow = Awaited<ReturnType<typeof listAllProfileSessions>>['sessions'][number]

 const toSessionEntry = (session: SessionRow): SessionEntry => ({
@@ -417,24 +413,6 @@ export function CommandPalette() {

    const result: PaletteGroup[] = []

-    // Paste a raw session id → jump straight to it, even if it predates the
-    // recent-200 window the lists below are built from.
-    const directId = search.trim()
-
-    if (SESSION_ID_RE.test(directId)) {
-      result.push({
-        items: [
-          {
-            icon: MessageCircle,
-            id: `goto-${directId}`,
-            keywords: ['session', 'id', 'go to', directId],
-            label: `${t.commandCenter.goToSession} ${directId}`,
-            run: go(sessionRoute(directId))
-          }
-        ]
-      })
-    }
-
    if (sessions.length > 0) {
      result.push({
        heading: t.commandCenter.sections.sessions,
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -37,7 +37,6 @@ import {
  SIDEBAR_SESSIONS_PAGE_SIZE,
  unpinSession
 } from '../store/layout'
-import { respondToApprovalAction } from '../store/native-notifications'
 import { $filePreviewTarget, $previewTarget, closeActiveRightRailTab } from '../store/preview'
 import {
  $activeGatewayProfile,
@@ -77,7 +76,6 @@ import {
  setSessionsLoading,
  setSessionsTotal
 } from '../store/session'
-import { onSessionsChanged } from '../store/session-sync'
 import { clearSessionTodos, setSessionTodos, todoListActive } from '../store/todos'
 import { openUpdatesWindow, startUpdatePoller, stopUpdatePoller } from '../store/updates'
 import { isSecondaryWindow } from '../store/windows'
@@ -271,26 +269,6 @@ export function DesktopController() {
    }
  }, [])

-  // Notification click: the main process already focused the window; jump to its session.
-  useEffect(() => {
-    const unsubscribe = window.hermesDesktop?.onFocusSession?.(sessionId => {
-      if (sessionId) {
-        navigate(sessionRoute(sessionId))
-      }
-    })
-
-    return () => unsubscribe?.()
-  }, [navigate])
-
-  // Notification action button (Approve/Reject) — resolve in place, no navigation.
-  useEffect(() => {
-    const unsubscribe = window.hermesDesktop?.onNotificationAction?.(({ actionId, sessionId }) => {
-      void respondToApprovalAction(sessionId ?? null, actionId)
-    })
-
-    return () => unsubscribe?.()
-  }, [])
-
  // hermes:// deep links (e.g. a docs "Send to App" button for an automation blueprint).
  // Build the equivalent /blueprint slash command from the payload and drop
  // it into the composer — the user reviews/edits, then sends; the agent (or
@@ -465,17 +443,6 @@ export function DesktopController() {
    void refreshSessions()
  }, [refreshSessions])

-  // Another window mutated the shared session list (e.g. a chat started in the
-  // pop-out). Re-pull so the sidebar reflects it. Pop-outs have no sidebar, so
-  // only real windows bother.
-  useEffect(() => {
-    if (isSecondaryWindow()) {
-      return
-    }
-
-    return onSessionsChanged(() => void refreshSessions().catch(() => undefined))
-  }, [refreshSessions])
-
  // ALL-profiles view pages one profile at a time: fetch that profile's next
  // page and merge it in place, leaving every other profile's rows untouched.
  const loadMoreSessionsForProfile = useCallback(async (profile: string) => {
@@ -711,9 +678,7 @@ export function DesktopController() {
    }

    lastGatewayProfileRef.current = activeGatewayProfile
-    // Force: the new profile has its own default, so reseed even if the composer
-    // already shows the previous profile's model.
-    void refreshCurrentModel(true)
+    void refreshCurrentModel()
    void refreshActiveProfile()
  }, [activeGatewayProfile, refreshCurrentModel])

@@ -861,6 +826,7 @@ export function DesktopController() {
    gatewayLogLines,
    gatewayState,
    inferenceStatus,
+    modelMenuContent,
    openAgents,
    freshDraftReady,
    openCommandCenterSection,
@@ -982,7 +948,6 @@ export function DesktopController() {
    <ChatView
      gateway={gatewayRef.current}
      maxVoiceRecordingSeconds={voiceMaxRecordingSeconds}
-      modelMenuContent={modelMenuContent}
      onAddContextRef={composer.addContextRefAttachment}
      onAddUrl={url => composer.addContextRefAttachment(`@url:${formatRefValue(url)}`, url)}
      onAttachDroppedItems={composer.attachDroppedItems}
--- a/apps/desktop/src/app/hooks/use-keybinds.ts
+++ b/apps/desktop/src/app/hooks/use-keybinds.ts
@@ -37,7 +37,6 @@ import {
  switcherActive,
  switcherJustClosed
 } from '@/store/session-switcher'
-import { openNewSessionInNewWindow } from '@/store/windows'
 import { useTheme } from '@/themes/context'

 import { requestComposerFocus } from '../chat/composer/focus'
@@ -133,7 +132,6 @@ export function useKeybinds(deps: KeybindRuntimeDeps): void {
      deps.startFreshSession()
      window.dispatchEvent(new CustomEvent('hermes:new-session-shortcut'))
    },
-    'session.newWindow': () => void openNewSessionInNewWindow(),
    'session.next': () => stepSession(1),
    'session.prev': () => stepSession(-1),
    ...sessionSlotHandlers,
--- a/apps/desktop/src/app/messaging/index.tsx
+++ b/apps/desktop/src/app/messaging/index.tsx
@@ -527,7 +527,7 @@ const PLATFORM_INTRO: Record<string, string> = {
  wecom_callback:
    'Set up a WeCom self-built app, expose its callback URL, and provide the corp ID, secret, agent ID, and AES key.',
  weixin:
-    'Run `hermes gateway setup`, select Weixin, then scan and confirm the QR code with a personal WeChat account. Hermes connects through Tencent\'s iLink Bot API and saves the credentials.',
+    'Sign in to the WeChat Official Account platform, copy the AppID and Token, and point the message callback URL at Hermes.',
  qqbot: 'Register an app on the QQ Open Platform (q.qq.com) and copy the App ID and Client Secret.',
  api_server:
    'Expose Hermes as an OpenAI-compatible API. Set an auth key, then point Open WebUI / LobeChat / etc. at the host:port.',
--- a/Show More
+++ b/Show More