test ci

docs: finish Automation Blueprints terminology rebrand (#44470 )
* docs: finish Automation Blueprints terminology rebrand Replace leftover "Automation Templates" wording from the Cron Recipes rebrand, rename the copy-paste cookbook guide to Automation Recipes, and point the marketing gallery link at the blueprints catalog. Co-authored-by: Cursor <cursoragent@cursor.com> * docs: use Automation Blueprints instead of Recipes in guide Rename the cookbook guide from automation-recipes to automation-blueprints so sidebar and copy match the product term. Co-authored-by: Cursor <cursoragent@cursor.com> * docs: rename automation-blueprints-catalog to automation-blueprints Drop the -catalog suffix from the reference page slug and title, and move the copy-paste cookbook to automation-blueprint-examples so the main Automation Blueprints doc is unambiguous. Co-authored-by: Cursor <cursoragent@cursor.com> * Revert "docs: rename automation-blueprints-catalog to automation-blueprints" This reverts commit 605f1eeab5. --------- Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-15 14:41:16 +08:00 · 2026-06-12 15:02:05 -04:00 · 2026-06-11 17:22:22 -04:00 · 2026-06-11 13:57:13 -07:00 · 2026-06-11 13:57:13 -07:00 · 2026-06-11 13:57:13 -07:00
851 changed files with 95826 additions and 21761 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -63,3 +63,45 @@ data/
 # Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
 hermes-config/
 runtime/
+
+# ---------- Not needed inside the Docker image ----------
+
+# Desktop app source (Tauri/Electron); never installed in the container
+apps/
+
+# Test suite — not shipped in production images
+tests/
+
+# Documentation site (Docusaurus) and supplementary docs
+website/
+docs/
+
+# Assets only used by the GitHub README
+assets/
+infographic/
+
+# Plugin-level docs (hermes-achievements ships docs/ but the runtime doesn't read them)
+plugins/hermes-achievements/docs/
+
+# Nix / Homebrew / AUR packaging metadata — irrelevant to Docker
+nix/
+flake.nix
+flake.lock
+packaging/
+
+# Design and planning documents
+plans/
+.plans/
+
+# ACP registry manifest (icon + agent.json) — not consumed at runtime
+acp_registry/
+
+# Repo-level dotfiles that are git-only or dev-tooling config
+.env.example
+.envrc
+.gitattributes
+.hadolint.yaml
+.mailmap
+
+# Top-level LICENSE (not matched by *.md); not needed inside the container
+LICENSE
--- a/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg
+++ b/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg
--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -44,7 +44,7 @@ jobs:

      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 20
+          node-version: 22
          cache: npm
          cache-dependency-path: website/package-lock.json

@@ -59,12 +59,22 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          # Always rebuild — the file isn't committed (gitignored), so a
-          # fresh checkout starts without it and we want the freshest crawl
-          # in every deploy. Failure is non-fatal: extract-skills.py will
-          # fall back to the legacy snapshot cache and the Skills Hub page
-          # still renders, just without the latest community catalog.
-          python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
+          # Rebuild the unified catalog. The file is gitignored, so a fresh
+          # checkout starts without it and we want the freshest crawl in
+          # every deploy.
+          #
+          # This MUST be fatal. build_skills_index.py runs a health check and
+          # exits non-zero WITHOUT writing the output file when a source
+          # collapses (e.g. a GitHub API rate limit zeroes the github /
+          # claude-marketplace / well-known taps all at once). Letting the
+          # deploy continue would either (a) ship a degenerate index missing
+          # whole hubs — the June 2026 regression where OpenAI/Anthropic/
+          # HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
+          # local-only catalog. Failing here keeps the last good deployment
+          # live (GitHub Pages serves the previous build) instead of
+          # publishing a broken catalog. Re-run the workflow once the
+          # transient rate limit clears.
+          python3 scripts/build_skills_index.py

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -18,7 +18,7 @@ jobs:

      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 20
+          node-version: 22
          cache: npm
          cache-dependency-path: website/package-lock.json

--- a/.github/workflows/e2e-cli-install.yml
+++ b/.github/workflows/e2e-cli-install.yml
@@ -0,0 +1,49 @@
+name: E2E CLI Tests
+
+on:
+  push:
+    branches:
+      - "**"
+
+permissions:
+  contents: read
+
+jobs:
+  e2e-tui-test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: cd e2e && CI=true npm run test
+        env:
+          # Ensure tests don't accidentally call real APIs
+          OPENROUTER_API_KEY: ""
+          OPENAI_API_KEY: ""
+          NOUS_API_KEY: ""
+
+      - name: Bundle TUI traces into self-contained replay HTML
+        if: always()
+        run: node e2e/scripts/bundle-replay-html.mjs
+
+      - name: Upload TUI replay viewer
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: tui-replay-viewer
+          path: tui-replay-viewer/
+          retention-days: 7
+
+      - name: Upload raw TUI test traces
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: tui-test-traces
+          path: e2e/tui-traces/
+          retention-days: 7
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -75,9 +75,10 @@ jobs:
        run: |
          set -euo pipefail

-          # Ensure only nix files were modified — prevents accidental
-          # self-triggering if fix-lockfiles ever touches package files.
-          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
+          # Ensure only nix/lib.nix (home of the single npmDepsHash) was
+          # modified — prevents accidental self-triggering if fix-lockfiles
+          # ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/lib\.nix$' || true)"
          if [ -n "$unexpected" ]; then
            echo "::error::Unexpected modified files: $unexpected"
            exit 1
@@ -89,7 +90,7 @@ jobs:

          git config user.name 'github-actions[bot]'
          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
+          git add nix/lib.nix
          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
            -m "Source: $GITHUB_SHA" \
            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
@@ -216,7 +217,7 @@ jobs:
          set -euo pipefail
          git config user.name 'github-actions[bot]'
          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
+          git add nix/lib.nix
          git commit -m "fix(nix): refresh npm lockfile hashes"
          git push

--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -55,15 +55,31 @@ jobs:

      - name: Install uv
        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        with:
+          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
+          # Keyed on the dependency manifests, so the cache is reused until
+          # pyproject.toml or uv.lock changes. `uv sync` still runs every
+          # time, but resolves from the warm cache instead of re-downloading
+          # and re-building wheels.
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock

      - name: Set up Python 3.11
        run: uv python install 3.11

      - name: Install dependencies
-        run: |
-          uv venv .venv --python 3.11
-          source .venv/bin/activate
-          uv pip install -e ".[all,dev]"
+        # `uv sync --locked` installs the exact pinned set from uv.lock (and
+        # fails if the lock is out of sync with pyproject.toml), giving a
+        # reproducible env. It also creates .venv itself, so no separate
+        # `uv venv` step is needed.
+        run: uv sync --locked --python 3.11 --extra all --extra dev
+
+      - name: Minimize uv cache
+        # Optimized for CI: prunes pre-built wheels that are cheap to
+        # re-download, keeping the persisted cache small and fast to restore.
+        run: uv cache prune --ci

      - name: Run tests (slice ${{ matrix.slice }}/6)
        # Per-file isolation via scripts/run_tests_parallel.py: discovers
@@ -161,15 +177,31 @@ jobs:

      - name: Install uv
        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        with:
+          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
+          # Keyed on the dependency manifests, so the cache is reused until
+          # pyproject.toml or uv.lock changes. `uv sync` still runs every
+          # time, but resolves from the warm cache instead of re-downloading
+          # and re-building wheels.
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock

      - name: Set up Python 3.11
        run: uv python install 3.11

      - name: Install dependencies
-        run: |
-          uv venv .venv --python 3.11
-          source .venv/bin/activate
-          uv pip install -e ".[all,dev]"
+        # `uv sync --locked` installs the exact pinned set from uv.lock (and
+        # fails if the lock is out of sync with pyproject.toml), giving a
+        # reproducible env. It also creates .venv itself, so no separate
+        # `uv venv` step is needed.
+        run: uv sync --locked --python 3.11 --extra all --extra dev
+
+      - name: Minimize uv cache
+        # Optimized for CI: prunes pre-built wheels that are cheap to
+        # re-download, keeping the persisted cache small and fast to restore.
+        run: uv cache prune --ci

      - name: Packaged-wheel i18n smoke test
        run: |
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -0,0 +1,25 @@
+# .github/workflows/typecheck.yml
+name: Typecheck
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  typecheck:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        package:
+          [ui-tui, web, apps/bootstrap-installer, apps/desktop, apps/shared]
+      fail-fast: false # report all failures, not just the first one
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: npm run --prefix ${{ matrix.package }} typecheck
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,8 @@ __pycache__/
 .notebooklm-playwright/
 .pip-cache/
 .uv-cache/
+.tui-test/
+tui-traces/
 compose.hermes.local.yml
 export*
 __pycache__/model_tools.cpython-310.pyc
@@ -89,6 +91,9 @@ website/static/api/skills-index.json
 # every build).
 website/static/api/skills.json
 website/static/api/skills-meta.json
+# automation-blueprints-index.json is a build artifact emitted by
+# website/scripts/extract-automation-blueprints.py during prebuild.
+website/static/api/automation-blueprints-index.json
 models-dev-upstream/

 # Local editor / agent tooling (machine-specific; keep in global config, not the repo)
@@ -114,6 +119,12 @@ docs/superpowers/*
 # treat it as a local edit and autostash it on every run (#38529).
 .hermes-bootstrap-complete

+# Interrupted-update breadcrumb + recovery lock written next to the shared venv
+# by `hermes update` / launch-time self-heal. Runtime state, never a code change
+# — ignore so `git status` stays clean and update's autostash skips them.
+.update-incomplete
+.update-incomplete.lock
+
 # Tool Search live-test harness output — non-deterministic model transcripts,
 # regenerated by scripts/tool_search_livetest.py. Never an artifact of the repo.
 scripts/out/
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,6 +4,201 @@ Instructions for AI coding assistants and developers working on the hermes-agent

 **Never give up on the right solution.**

+## What Hermes Is
+
+Hermes is a personal AI agent that runs the same agent core across a CLI, a
+messaging gateway (Telegram, Discord, Slack, and ~20 other platforms), a TUI,
+and an Electron desktop app. It learns across sessions (memory + skills),
+delegates to subagents, runs scheduled jobs, and drives a real terminal and
+browser. It is extended primarily through **plugins and skills**, not by
+growing the core.
+
+Two properties shape almost every design decision and are the lens for
+reviewing any change:
+
+- **Per-conversation prompt caching is sacred.** A long-lived conversation
+  reuses a cached prefix every turn. Anything that mutates past context,
+  swaps toolsets, or rebuilds the system prompt mid-conversation invalidates
+  that cache and multiplies the user's cost. We do not do it (the one
+  exception is context compression).
+- **The core is a narrow waist; capability lives at the edges.** Every model
+  tool we add is sent on every API call, so the bar for a new *core* tool is
+  high. Most new capability should arrive as a CLI command + skill, a
+  service-gated tool, or a plugin — not as core surface.
+
+## Contribution Rubric — What We Want / What We Don't
+
+This is the project's intent layer. Use it two ways:
+
+1. **For humans and for your own work** — what gets merged and what gets
+   rejected, so a contribution aims at the target.
+2. **For automated review (the triage sweeper)** — guidance on when a PR is
+   safe to close on the three allowed reasons (`implemented_on_main`,
+   `cannot_reproduce`, `incoherent`) and, just as important, **when NOT to
+   close** one. Taste-based "we don't want this / out of scope" closes are NOT
+   an automated decision — those stay with a human maintainer. The sweeper's
+   job here is to recognize design intent and *avoid wrongly closing a
+   legitimate contribution*, not to make the won't-implement call itself.
+
+Read the balance right: Hermes ships a **lot** — most merges are bug fixes to
+real reported behavior, and the product surface (platforms, channels,
+providers, models, desktop/TUI features) expands aggressively and on purpose.
+The restraint below is aimed squarely at the **core agent + the model tool
+schema**, the one place where every addition is paid for on every API call.
+"Smallest footprint" governs *how a capability is wired into the core*, NOT
+whether the product is allowed to grow. We are expansive at the edges and
+conservative at the waist.
+
+### What we want
+
+- **Fix real bugs, well.** The bulk of what lands is `fix(...)` against an
+  actual reported symptom. A good fix reproduces the symptom on current
+  `main`, points to the exact line where it manifests, and fixes the whole bug
+  class — sibling call paths included — not just the one site the reporter hit.
+- **Expand reach at the edges.** New platform adapters, channels, providers,
+  models, and desktop/TUI/dashboard features are welcome and land routinely,
+  including large ones (a new messaging channel, a session-cap feature, a
+  Windows PTY bridge). Breadth in the product is a goal, not a footprint
+  concern — as long as it integrates with the existing setup/config UX
+  (`hermes tools`, `hermes setup`, auto-install) rather than bolting on a raw
+  env var.
+- **Refactor god-files into clean modules.** Extracting a multi-thousand-line
+  cluster out of `cli.py` / `run_agent.py` / `gateway/run.py` into a focused
+  mixin or module is wanted work, even when the diff is huge and mechanical
+  (large `+N/-N` refactors merge regularly). The "every line traces to the
+  request" test applies to *feature* PRs; a declared refactor's request IS the
+  extraction.
+- **Keep the core narrow.** New *model tools* are the expensive exception —
+  every tool ships on every API call. Prefer, in order: extend existing code →
+  CLI command + skill → service-gated tool (`check_fn`) → plugin → MCP server
+  in the catalog → new core tool (last resort). See "The Footprint Ladder."
+- **Extend, don't duplicate.** Before adding a module/manager/hook, check
+  whether existing infrastructure already covers the use case. When several PRs
+  integrate the same *category*, design one shared interface instead of merging
+  them one at a time (see the ABC + orchestrator note under the Footprint
+  Ladder).
+- **Behavior contracts over snapshots.** Tests should assert how two pieces of
+  data must relate (invariants), not freeze a current value (model lists,
+  config version literals, enumeration counts). See "Don't write
+  change-detector tests."
+- **E2E validation, not just green unit mocks.** For anything touching
+  resolution chains, config propagation, security boundaries, remote
+  backends, or file/network I/O, exercise the real path with real imports
+  against a temp `HERMES_HOME`. Mocks hide integration bugs.
+- **Cache-, alternation-, and invariant-safe.** Preserve prompt caching, strict
+  message role alternation (never two same-role messages in a row; never a
+  synthetic user message injected mid-loop), and a system prompt that is
+  byte-stable for the life of a conversation.
+- **Contributor credit preserved.** Salvage external work by cherry-picking
+  (rebase-merge) so authorship survives in git history; don't reimplement from
+  scratch when you can build on top.
+
+### What we don't want (rejected even when well-built)
+
+- **Speculative infrastructure.** Hooks, callbacks, or extension points with no
+  concrete consumer. Adding a hook is easy; removing one after plugins depend
+  on it is hard. A hook is NOT speculative if a contributor has a real, stated
+  use case — even if the consumer ships separately.
+- **New `HERMES_*` env vars for non-secret config.** `.env` is for secrets
+  only (API keys, tokens, passwords). All behavioral settings — timeouts,
+  thresholds, feature flags, display prefs — go in `config.yaml`. Bridge to an
+  internal env var if the mechanism needs one, but user-facing docs point to
+  `config.yaml`. Reject PRs that tell users to "set X in your .env" unless X
+  is a credential.
+- **A new core tool when terminal + file already do the job, or when a skill
+  would.** If the only barrier is file visibility on a remote backend, fix the
+  mount, not the toolset.
+- **Lazy-reading escape hatches on instructional tools.** No `offset`/`limit`
+  pagination on tools that load content the agent must read fully (skills,
+  prompts, playbooks). Models will read page 1 and skip the rest.
+- **"Fixes" that destroy the feature they secure.** A mitigation that kills the
+  feature's purpose is the wrong mitigation. Read the original commit's intent
+  (`git log -p -S`) before restricting behavior; find a fix that preserves the
+  feature.
+- **Outbound telemetry / usage attribution without opt-in gating.** No new
+  analytics, third-party identifier tagging, or attribution tags until a
+  generic user-facing opt-in (config gate + setup prompt + `hermes tools`
+  toggle) exists. Park behind a label, do not merge.
+- **Change-detector tests, cache-breaking mid-conversation, dead code wired in
+  without E2E proof, and plugins that touch core files.** Plugins live in their
+  own directory and work within the ABCs/hooks we provide; if a plugin needs
+  more, widen the generic plugin surface, don't special-case it in core.
+
+### Before you call it a bug — verify the premise (and when NOT to close)
+
+The most common reason a well-written PR gets closed is not code quality — it
+is that the change is built on a **wrong premise**, or it treats an
+**intentional design as a gap**. These patterns cut both ways: they tell a
+human reviewer what to scrutinize, and they tell the automated sweeper when a
+PR is NOT safe to close as `implemented_on_main` / `cannot_reproduce` (when in
+doubt, leave it open for a human). They are distilled from real closes.
+
+- **"Intentional design, not a gap."** A limitation that looks like an
+  oversight is often deliberate. Before "fixing" a missing link or a
+  restriction, ask whether the isolation IS the design. Example: profiles are
+  independent islands on purpose — a PR adding live config inheritance from the
+  default profile was closed because coupling profiles together is exactly what
+  the design prevents (the copy-at-creation `--clone` path already covers the
+  legitimate "start from my default" case). Read the original commit's intent
+  (`git log -p -S "<symbol>"`) before assuming something is unfinished.
+- **"The premise doesn't hold against how X actually works."** A PR's
+  justification frequently rests on a wrong mental model of an existing
+  mechanism. Trace the real code/runtime before accepting the rationale. Two
+  real closes: a rate-limit "re-probe during cooldown" PR (the breaker only
+  trips on a *confirmed-empty* account bucket, so re-probing just hammers a
+  bucket we've already proven empty); a usage-accumulation fix whose new branch
+  **never executes at runtime** because an earlier guard already popped the
+  state it depended on. If you can't point to the exact line where the bug
+  manifests AND show the fix changes that line's behavior, you haven't verified
+  the premise.
+- **"This fix was wrong — the absence/omission was deliberate."** Adding the
+  obvious-looking missing piece can break things the omission was protecting.
+  Example: restoring "missing" `__init__.py` files made a test tree importable
+  as a dotted package that shadowed the real plugin, deleting its `register()`
+  at import time. The absence was load-bearing.
+- **"Overreached / resurrected an approach we'd moved past."** Scope creep that
+  supersedes an agreed-on base, or revives a direction the maintainers
+  deliberately closed, gets rejected even when the code works. Keep the change
+  to the narrow piece that was actually agreed; offer the rest as a focused
+  follow-up.
+
+The throughline: **verify the claim AND the intent against the codebase before
+writing or merging a fix.** A confirmed reproduction on current `main` plus a
+line-level account of where the fix acts beats a plausible-sounding rationale
+every time. When in doubt about intent, it is cheaper to ask than to ship a
+fix that fights the design.
+
+### The Footprint Ladder (new capability decision)
+
+Each rung adds more permanent surface than the one above. Choose the highest
+(least-footprint) rung that correctly solves the problem:
+
+1. **Extend existing code** — the capability is a variation of something that
+   already exists. Zero new surface.
+2. **CLI command + skill** — manages config/state/infra expressible as shell
+   commands. The agent runs `hermes <subcommand>` guided by a skill. Zero
+   model-tool footprint. Default choice for subscriptions, scheduled tasks,
+   service setup. Examples: `hermes webhook`, `hermes cron`, `hermes tools`.
+3. **Service-gated tool (`check_fn`)** — needs structured params/returns AND
+   only appears when a prerequisite is configured. Zero footprint otherwise.
+   Examples: Home Assistant tools (gated on token), memory-provider tools.
+4. **Plugin** — third-party/niche/user-specific capability that doesn't ship in
+   core. Lives in `~/.hermes/plugins/` or a pip package, discovered at runtime.
+5. **MCP server (in the catalog)** — if the capability genuinely needs to be a
+   tool (structured I/O the agent invokes) but isn't core-fundamental, prefer
+   building it as an MCP server and adding it to the MCP catalog over growing
+   the core toolset. The agent connects to it through the built-in MCP client;
+   zero permanent core-schema footprint, and it's reusable by any MCP host.
+6. **New core tool** — only when the capability is fundamental, broadly useful
+   to nearly every user, and unreachable via terminal + file (or an MCP server).
+   Examples of correct core tools: terminal, read_file, web_search,
+   browser_navigate.
+
+When 3+ open PRs try to integrate the same *category* of thing (memory
+backends, providers, notifiers), don't merge them one at a time — design an
+ABC + orchestrator, wrap the existing built-in as the first provider, and turn
+the competing PRs into plugins against that interface.
+
 ## Development Environment

 ```bash
@@ -264,7 +459,7 @@ npm install       # first time
 npm run dev       # watch mode (rebuilds hermes-ink + tsx --watch)
 npm start         # production
 npm run build     # full build (hermes-ink + tsc)
-npm run type-check # typecheck only (tsc --noEmit)
+npm run typecheck # typecheck only (tsc --noEmit)
 npm run lint      # eslint
 npm run fmt       # prettier
 npm test          # vitest
@@ -302,9 +497,11 @@ A **separate** chat surface from both the classic CLI and the dashboard's embedd

 ## Adding New Tools

-For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
-route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
-`~/.hermes/plugins/<name>/__init__.py`, then register tools with
+Before adding any tool, settle the footprint question first (see "The
+Footprint Ladder" in the Contribution Rubric): most capabilities should NOT
+be core tools. For custom or local-only tools, do **not** edit Hermes core.
+Use the plugin route instead: create `~/.hermes/plugins/<name>/plugin.yaml`
+and `~/.hermes/plugins/<name>/__init__.py`, then register tools with
 `ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
 enabled or disabled without touching `tools/` or `toolsets.py`.

--- a/29
+++ b/29
@@ -25,7 +25,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # hermes process, the dashboard, and per-profile gateways.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    ca-certificates curl iputils-ping python3 python-is-python3 ripgrep ffmpeg gcc python3-dev python3-venv libffi-dev libolm-dev procps git openssh-client docker-cli xz-utils && \
+    ca-certificates curl iputils-ping python3 python-is-python3 ripgrep ffmpeg gcc g++ make cmake python3-dev python3-venv libffi-dev libolm-dev procps git openssh-client docker-cli xz-utils && \
    rm -rf /var/lib/apt/lists/*

 # ---------- s6-overlay install ----------
@@ -146,9 +146,9 @@ RUN npm install --prefer-offline --no-audit && \
 #
 # `uv sync --frozen --no-install-project --extra all --extra messaging`
 # installs the deps reachable through the composite `[all]` extra
-# (handpicked set intended for the production image), plus gateway
-# messaging adapters that should work in the published image without a
-# first-boot lazy install.  We do NOT use `--all-extras`:
+# (handpicked set intended for the production image — excludes `[dev]`),
+# plus gateway messaging adapters that should work in the published image
+# without a first-boot lazy install.  We do NOT use `--all-extras`:
 # that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
 # git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
 # redundancy), none of which belong in the published container.
@@ -164,19 +164,30 @@ RUN npm install --prefer-offline --no-audit && \
 # image update and recall/retain then fails with
 # `ModuleNotFoundError: No module named 'hindsight_client'` (#38128).
 #
+# The Matrix gateway's deps ([matrix] extra) are baked in because
+# python-olm (transitive via mautrix[encryption]) builds from source on
+# Python/image combinations without usable wheels.  The Docker image is
+# Linux-only, so keeping the native libolm/build-toolchain packages here
+# avoids the cross-platform failures that kept [matrix] out of [all]
+# while still making Matrix work in the published container. Fixes #30399.
+#
 # The editable link is created after the source copy below.
 COPY pyproject.toml uv.lock ./
 RUN touch ./README.md
-RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra anthropic --extra bedrock --extra azure-identity --extra hindsight
+RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra anthropic --extra bedrock --extra azure-identity --extra hindsight --extra matrix
+
+# ---------- Frontend build (cached independently from Python source) ----------
+# Copy only the frontend source trees first so that Python-only changes don't
+# invalidate the (relatively slow) web + ui-tui build layer.
+COPY web/ web/
+COPY ui-tui/ ui-tui/
+RUN cd web && npm run build && \
+    cd ../ui-tui && npm run build

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build browser dashboard and terminal UI assets.
-RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build
-
 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
 # The venv needs to be traversable too.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,6 @@
 graft skills
 graft optional-skills
+graft optional-mcps
 graft locales
 # Bundled plugin manifests (plugin.yaml / plugin.yml). Without these the
 # PluginManager scan (hermes_cli/plugins.py) finds zero plugins on installs
--- a/README.md
+++ b/README.md
@@ -3,13 +3,16 @@
 </p>

 # Hermes Agent ☤
-
+<p align="center">
+  <a href="https://hermes-agent.nousresearch.com/">Hermes Agent</a> | <a href="https://hermes-agent.nousresearch.com/">Hermes Desktop</a>
+</p>
 <p align="center">
  <a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
  <a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
+  <a href="README.ur-pk.md"><img src="https://img.shields.io/badge/Lang-اردو-green?style=for-the-badge" alt="اردو"></a>
 </p>

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
@@ -52,7 +55,7 @@ If you already have Git installed, the installer detects it and uses that instea

 > **Android / Termux:** The tested manual path is documented in the [Termux guide](https://hermes-agent.nousresearch.com/docs/getting-started/termux). On Termux, Hermes installs a curated `.[termux]` extra because the full `.[all]` extra currently pulls Android-incompatible voice dependencies.
 >
-> **Windows:** Native Windows is fully supported — the PowerShell one-liner above installs everything. If you'd rather use WSL2, the Linux command works there too. Native Windows install lives under `%LOCALAPPDATA%\hermes`; WSL2 installs under `~/.hermes` as on Linux. The only Hermes feature that currently needs WSL2 specifically is the browser-based dashboard chat pane (it uses a POSIX PTY — classic CLI and gateway both run natively).
+> **Windows:** Native Windows is fully supported — the PowerShell one-liner above installs everything. If you'd rather use WSL2, the Linux command works there too. Native Windows install lives under `%LOCALAPPDATA%\hermes`; WSL2 installs under `~/.hermes` as on Linux.

 After installation:

--- a/README.ur-pk.md
+++ b/README.ur-pk.md
@@ -0,0 +1,261 @@
+<div dir="rtl">
+
+<p align="center">
+  <img src="assets/banner.png" alt="Hermes Agent" width="100%">
+</p>
+
+# ہرمیس ایجنٹ ☤ (Hermes Agent)
+
+<p align="center">
+  <a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
+  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
+  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
+  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
+  <a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
+  <a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
+</p>
+
+**[نوس ریسرچ (Nous Research)](https://nousresearch.com) کا تیار کردہ خود کو بہتر بنانے والا اے آئی (AI) ایجنٹ۔** یہ واحد ایجنٹ ہے جس میں سیکھنے کا عمل (learning loop) پہلے سے موجود ہے — یہ اپنے تجربات سے نئی مہارتیں (skills) بناتا ہے، استعمال کے دوران ان کو بہتر کرتا ہے، معلومات کو محفوظ رکھنے کے لیے خود کو یاد دہانی کرواتا ہے، اپنی پرانی بات چیت کو تلاش کر سکتا ہے، اور مختلف سیشنز کے دوران آپ کے بارے میں ایک گہری سمجھ پیدا کرتا ہے۔ اسے $5 والے VPS پر چلائیں، GPU کلسٹر پر، یا سرور لیس (serverless) انفراسٹرکچر پر جس کی قیمت استعمال نہ ہونے پر تقریباً صفر ہے۔ یہ آپ کے لیپ ٹاپ تک محدود نہیں ہے — آپ ٹیلی گرام (Telegram) سے اس کے ساتھ بات چیت کر سکتے ہیں جبکہ یہ کلاؤڈ VM پر کام کر رہا ہو۔
+
+آپ اپنی مرضی کا کوئی بھی ماڈل استعمال کر سکتے ہیں — [Nous Portal](https://portal.nousresearch.com)، [OpenRouter](https://openrouter.ai) (200 سے زائد ماڈلز)، [NovitaAI](https://novita.ai) (ماڈل API، ایجنٹ سینڈ باکس، اور GPU کلاؤڈ کے لیے اے آئی مقامی کلاؤڈ)، [NVIDIA NIM](https://build.nvidia.com) (Nemotron)، [Xiaomi MiMo](https://platform.xiaomimimo.com)، [z.ai/GLM](https://z.ai)، [Kimi/Moonshot](https://platform.moonshot.ai)، [MiniMax](https://www.minimax.io)، [Hugging Face](https://huggingface.co)، OpenAI، یا اپنا حسب ضرورت اینڈ پوائنٹ (endpoint) استعمال کریں۔ ماڈل تبدیل کرنے کے لیے صرف `hermes model` استعمال کریں — کسی کوڈ کو تبدیل کرنے کی ضرورت نہیں، کوئی پابندی نہیں۔
+
+<table>
+<tr><td><b>حقیقی ٹرمینل انٹرفیس</b></td><td>مکمل TUI جس میں ملٹی لائن ایڈیٹنگ، سلیش-کمانڈ آٹو کمپلیٹ، بات چیت کی ہسٹری، انٹرپٹ اور ری ڈائریکٹ، اور سٹریمنگ ٹول آؤٹ پٹ شامل ہے۔</td></tr>
+<tr><td><b>یہ وہاں موجود ہے جہاں آپ ہیں</b></td><td>ٹیلی گرام، ڈسکارڈ (Discord)، سلیک (Slack)، واٹس ایپ (WhatsApp)، سگنل (Signal)، اور CLI — سب ایک ہی گیٹ وے پروسیس سے کام کرتے ہیں۔ وائس میمو (Voice memo) ٹرانسکرپشن، کراس پلیٹ فارم بات چیت کا تسلسل۔</td></tr>
+<tr><td><b>سیکھنے کا ایک مکمل عمل</b></td><td>ایجنٹ کی اپنی ترتیب دی گئی میموری، جس میں وہ خود کو وقتاً فوقتاً یاد دہانی کرواتا ہے۔ پیچیدہ کاموں کے بعد خود کار طریقے سے مہارت (skill) کی تخلیق۔ استعمال کے دوران مہارتوں میں بہتری۔ LLM سمرائزیشن کے ساتھ FTS5 سیشن سرچ تاکہ پرانے سیشنز کی یاددہانی کی جا سکے۔ <a href="https://github.com/plastic-labs/honcho">Honcho</a> کے ذریعے صارف کی ماڈلنگ۔ <a href="https://agentskills.io">agentskills.io</a> اوپن سٹینڈرڈ کے ساتھ مکمل مطابقت۔</td></tr>
+<tr><td><b>شیڈول کی گئی خودکار کارروائیاں</b></td><td>بلٹ ان (Built-in) کرون (cron) شیڈیولر جو کسی بھی پلیٹ فارم پر ڈیلیوری کے لیے استعمال ہو سکتا ہے۔ روزانہ کی رپورٹس، رات کے بیک اپس، ہفتہ وار آڈٹس — یہ سب کچھ قدرتی زبان (natural language) میں اور بغیر کسی نگرانی کے کام کرتا ہے۔</td></tr>
+<tr><td><b>کام کی تقسیم اور متوازی عمل</b></td><td>متوازی (parallel) کاموں کے لیے الگ سے ذیلی ایجنٹس (subagents) بنائیں۔ پائتھون (Python) سکرپٹس لکھیں جو RPC کے ذریعے ٹولز کو استعمال کریں، تاکہ کئی مراحل پر مشتمل کاموں کو بغیر کسی سیاق و سباق (context) کے خرچ کے، ایک ہی باری میں انجام دیا جا سکے۔</td></tr>
+<tr><td><b>کہیں بھی چلائیں، صرف اپنے لیپ ٹاپ پر نہیں</b></td><td>چھ (Six) ٹرمینل بیک اینڈز — لوکل، Docker، SSH، Singularity، Modal، اور Daytona۔ ڈیٹونا (Daytona) اور موڈل (Modal) سرور لیس (serverless) فعالیت پیش کرتے ہیں — جب آپ کا ایجنٹ فارغ ہوتا ہے تو اس کا ماحول سلیپ (hibernate) ہو جاتا ہے اور ضرورت پڑنے پر خود بخود جاگ جاتا ہے، جس کی وجہ سے سیشنز کے درمیان لاگت تقریباً صفر رہتی ہے۔ اسے $5 والے VPS یا GPU کلسٹر پر چلائیں۔</td></tr>
+<tr><td><b>تحقیق کے لیے تیار</b></td><td>بیچ (Batch) ٹریجیکٹری (trajectory) جنریشن، اگلی نسل کے ٹول کالنگ ماڈلز کی تربیت کے لیے ٹریجیکٹری کمپریشن۔</td></tr>
+</table>
+
+---
+
+## فوری انسٹالیشن (Quick Install)
+
+### لینکس (Linux)، میک او ایس (macOS)، ڈبلیو ایس ایل ٹو (WSL2)، ٹرمکس (Termux)
+
+<div dir="ltr">
+
+```bash
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+```
+
+</div>
+
+### ونڈوز (نیٹو، پاور شیل)
+
+> **توجہ فرمائیں:** مقامی ونڈوز (Native Windows) پر ہرمیس بغیر WSL کے چلتا ہے — CLI، گیٹ وے، TUI، اور ٹولز سب مقامی طور پر کام کرتے ہیں۔ اگر آپ WSL2 استعمال کرنا پسند کرتے ہیں، تو اوپر دی گئی لینکس/میک او ایس کی کمانڈ وہاں بھی کام کرے گی۔ کوئی مسئلہ نظر آیا؟ براہ کرم [مسائل (issues) درج کریں](https://github.com/NousResearch/hermes-agent/issues)۔
+
+اسے پاور شیل (PowerShell) میں چلائیں:
+
+<div dir="ltr">
+
+```powershell
+iex (irm https://hermes-agent.nousresearch.com/install.ps1)
+```
+
+</div>
+
+انسٹالر سب کچھ خود سنبھالتا ہے: uv، Python 3.11، Node.js، ripgrep، ffmpeg، **اور ایک پورٹ ایبل (portable) گٹ بیش (Git Bash)** (یعنی MinGit، جو `%LOCALAPPDATA%\hermes\git` میں ان پیک ہوتا ہے — اس کے لیے ایڈمن کی اجازت درکار نہیں، اور یہ سسٹم کے کسی بھی گٹ انسٹال سے بالکل الگ ہے)۔ ہرمیس اس بنڈل شدہ گٹ بیش کو شیل کمانڈز چلانے کے لیے استعمال کرتا ہے۔
+
+اگر آپ کے پاس پہلے سے گٹ (Git) انسٹال ہے، تو انسٹالر اسے شناخت کر لیتا ہے اور اسے ہی استعمال کرتا ہے۔ بصورت دیگر آپ کو صرف ~45MB کے MinGit ڈاؤنلوڈ کی ضرورت ہوگی — یہ آپ کے سسٹم کے گٹ پر کوئی اثر نہیں ڈالے گا۔
+
+> **اینڈرائیڈ (Android) / ٹرمکس (Termux):** ٹیسٹ کیا گیا مینوئل طریقہ [Termux گائیڈ](https://hermes-agent.nousresearch.com/docs/getting-started/termux) میں موجود ہے۔ ٹرمکس پر ہرمیس ایک مخصوص `.[termux]` ایکسٹرا انسٹال کرتا ہے کیونکہ مکمل `.[all]` ایکسٹرا میں ایسی وائس ڈیپینڈینسیز شامل ہیں جو اینڈرائیڈ کے ساتھ مطابقت نہیں رکھتیں۔
+>
+> **ونڈوز (Windows):** مقامی ونڈوز کی مکمل سپورٹ موجود ہے — اوپر دی گئی پاور شیل کی کمانڈ سب کچھ انسٹال کر دیتی ہے۔ اگر آپ WSL2 استعمال کرنا چاہتے ہیں، تو لینکس کی کمانڈ وہاں کام کرتی ہے۔ مقامی ونڈوز میں انسٹالیشن `%LOCALAPPDATA%\hermes` میں ہوتی ہے؛ جبکہ WSL2 میں لینکس کی طرح `~/.hermes` میں ہوتی ہے۔ ہرمیس کا وہ واحد فیچر جسے فی الحال خاص طور پر WSL2 کی ضرورت ہے وہ براؤزر پر مبنی ڈیش بورڈ چیٹ پین ہے (یہ POSIX PTY استعمال کرتا ہے — کلاسک CLI اور گیٹ وے دونوں مقامی طور پر چلتے ہیں)۔
+
+انسٹالیشن کے بعد:
+
+<div dir="ltr">
+
+```bash
+source ~/.bashrc    # شیل کو ری لوڈ کریں (یا: source ~/.zshrc)
+hermes              # بات چیت شروع کریں!
+```
+
+</div>
+
+---
+
+## آغاز کریں (Getting Started)
+
+<div dir="ltr">
+
+```bash
+hermes              # انٹرایکٹو CLI — بات چیت شروع کریں
+hermes model        # اپنا LLM پرووائیڈر اور ماڈل منتخب کریں
+hermes tools        # کنفیگر کریں کہ کون سے ٹولز ایکٹو ہیں
+hermes config set   # انفرادی کنفگ (config) ویلیوز سیٹ کریں
+hermes gateway      # میسجنگ گیٹ وے شروع کریں (ٹیلی گرام، ڈسکارڈ، وغیرہ)
+hermes setup        # مکمل سیٹ اپ وزرڈ چلائیں (یہ سب کچھ ایک ساتھ کنفیگر کر دے گا)
+hermes claw migrate # OpenClaw سے مائیگریٹ کریں (اگر آپ OpenClaw سے آ رہے ہیں)
+hermes update       # لیٹسٹ ورژن پر اپ ڈیٹ کریں
+hermes doctor       # کسی بھی مسئلے کی تشخیص کریں
+```
+
+</div>
+
+📖 **[مکمل دستاویزات →](https://hermes-agent.nousresearch.com/docs/)**
+
+---
+
+## API-کیز اکٹھی کرنے سے بچیں — Nous Portal
+
+ہرمیس آپ کے پسندیدہ پرووائیڈر کے ساتھ کام کرتا ہے — یہ چیز تبدیل نہیں ہو رہی۔ لیکن اگر آپ ماڈل، ویب سرچ، امیج جنریشن، TTS، اور کلاؤڈ براؤزر کے لیے پانچ الگ الگ API کیز جمع نہیں کرنا چاہتے، تو **[Nous Portal](https://portal.nousresearch.com)** ان سب کو ایک ہی سبسکرپشن کے تحت کور کرتا ہے:
+
+- **300+ ماڈلز** — ان میں سے کوئی بھی ماڈل `/model <name>` کے ذریعے منتخب کریں
+- **ٹول گیٹ وے (Tool Gateway)** — ویب سرچ (Firecrawl)، امیج جنریشن (FAL)، ٹیکسٹ ٹو سپیچ (OpenAI)، کلاؤڈ براؤزر (Browser Use)، یہ سب آپ کی سبسکرپشن کے ذریعے چلتے ہیں۔ کسی اضافی اکاؤنٹ کی ضرورت نہیں۔
+
+نئی انسٹالیشن کے بعد بس ایک کمانڈ کی ضرورت ہے:
+
+<div dir="ltr">
+
+```bash
+hermes setup --portal
+```
+
+</div>
+
+یہ آپ کو OAuth کے ذریعے لاگ ان کرواتا ہے، Nous کو آپ کا پرووائیڈر مقرر کرتا ہے، اور ٹول گیٹ وے کو آن کر دیتا ہے۔ `hermes portal info` کمانڈ استعمال کر کے آپ کسی بھی وقت چیک کر سکتے ہیں کہ کون کون سی سروسز منسلک ہیں۔ مکمل تفصیلات [Tool Gateway دستاویزات کے صفحے](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway) پر موجود ہیں۔
+
+آپ اب بھی کسی بھی ٹول کے لیے اپنی مرضی کی API کیز استعمال کر سکتے ہیں — گیٹ وے ہر سروس کے لیے الگ الگ کام کرتا ہے، ایسا نہیں کہ یا تو سب کچھ استعمال کریں یا کچھ بھی نہیں۔
+
+---
+
+## CLI بمقابلہ میسجنگ فوری حوالہ
+
+ہرمیس کے دو بنیادی انٹر فیس ہیں: آپ ٹرمینل UI کو `hermes` کے ساتھ شروع کریں، یا گیٹ وے چلا کر اس کے ساتھ ٹیلی گرام، ڈسکارڈ، سلیک، واٹس ایپ، سگنل، یا ای میل کے ذریعے بات کریں۔ جب آپ کسی بات چیت میں ہوتے ہیں، تو بہت سی سلیش (slash) کمانڈز دونوں انٹرفیسز میں ایک جیسی ہوتی ہیں۔
+
+<div dir="ltr">
+
+| کارروائی (Action)                         | سی ایل آئی (CLI)                              | میسجنگ پلیٹ فارمز (Messaging platforms)                                          |
+| --------------------------------------- | --------------------------------------------- | -------------------------------------------------------------------------------- |
+| بات چیت شروع کریں                       | `hermes`                                      | `hermes gateway setup` اور `hermes gateway start` چلائیں، پھر بوٹ کو میسج بھیجیں |
+| نئی بات چیت شروع کریں                   | `/new` یا `/reset`                            | `/new` یا `/reset`                                                               |
+| ماڈل تبدیل کریں                         | `/model [provider:model]`                     | `/model [provider:model]`                                                        |
+| پرسنلٹی (Personality) سیٹ کریں           | `/personality [name]`                         | `/personality [name]`                                                            |
+| پچھلی باری کو دوبارہ یا منسوخ (undo) کریں | `/retry`، `/undo`                             | `/retry`، `/undo`                                                                |
+| کانٹیکسٹ (context) کمپریس کریں / استعمال چیک کریں | `/compress`، `/usage`، `/insights [--days N]` | `/compress`، `/usage`، `/insights [days]`                                        |
+| مہارتیں (Skills) براؤز کریں             | `/skills` یا `/<skill-name>`                  | `/<skill-name>`                                                                  |
+| موجودہ کام کو روکیں                     | `Ctrl+C` دبائیں یا نیا میسج بھیجیں            | `/stop` یا نیا میسج بھیجیں                                                       |
+| پلیٹ فارم کے لحاظ سے سٹیٹس              | `/platforms`                                  | `/status`، `/sethome`                                                            |
+
+</div>
+
+مکمل کمانڈ لسٹ کے لیے، [CLI گائیڈ](https://hermes-agent.nousresearch.com/docs/user-guide/cli) اور [میسجنگ گیٹ وے گائیڈ](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) دیکھیں۔
+
+---
+
+## دستاویزات (Documentation)
+
+تمام دستاویزات **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)** پر موجود ہیں:
+
+<div dir="ltr">
+
+| سیکشن (Section)                                                                                     | تفصیل (What's Covered)                                     |
+| --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
+| [فوری آغاز (Quickstart)](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart)     | انسٹالیشن → سیٹ اپ → 2 منٹ میں پہلی بات چیت شروع کریں       |
+| [CLI کا استعمال](https://hermes-agent.nousresearch.com/docs/user-guide/cli)                         | کمانڈز، کی بائنڈنگز (keybindings)، پرسنلٹیز (personalities)، سیشنز |
+| [کنفیگریشن (Configuration)](https://hermes-agent.nousresearch.com/docs/user-guide/configuration)    | کنفگ فائل، پرووائیڈرز، ماڈلز، اور تمام آپشنز               |
+| [میسجنگ گیٹ وے](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)                    | ٹیلی گرام، ڈسکارڈ، سلیک، واٹس ایپ، سگنل، ہوم اسسٹنٹ         |
+| [سیکیورٹی (Security)](https://hermes-agent.nousresearch.com/docs/user-guide/security)              | کمانڈ کی منظوری، DM پیئرنگ (pairing)، کنٹینر آئسولیشن       |
+| [ٹولز اور ٹول سیٹس](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools)          | 40 سے زائد ٹولز، ٹول سیٹ سسٹم، ٹرمینل بیک اینڈز             |
+| [مہارتوں کا سسٹم (Skills System)](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills)| پروسیجرل (Procedural) میموری، سکلز ہب، نئی مہارتیں بنانا    |
+| [میموری (Memory)](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory)            | مستقل میموری، یوزر پروفائلز، بہترین طریقہ کار              |
+| [MCP انضمام (Integration)](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp)      | صلاحیتوں کو بڑھانے کے لیے کسی بھی MCP سرور کو جوڑیں        |
+| [کرون (Cron) شیڈیولنگ](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron)         | پلیٹ فارم ڈیلیوری کے ساتھ شیڈول کیے گئے کام                 |
+| [کانٹیکسٹ (Context) فائلز](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files)| پروجیکٹ کا سیاق و سباق (context) جو ہر بات چیت پر اثر انداز ہوتا ہے |
+| [آرکیٹیکچر (Architecture)](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | پروجیکٹ کا ڈھانچہ، ایجنٹ لوپ، اہم کلاسز                    |
+| [تعاون (Contributing)](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing)     | ڈیویلپمنٹ سیٹ اپ، PR کا طریقہ کار، کوڈنگ کا انداز          |
+| [CLI حوالہ جات (Reference)](https://hermes-agent.nousresearch.com/docs/reference/cli-commands)      | تمام کمانڈز اور فلیگز (flags)                              |
+| [انوائرمنٹ ویری ایبلز](https://hermes-agent.nousresearch.com/docs/reference/environment-variables)  | مکمل انوائرمنٹ ویری ایبل حوالہ جات                         |
+
+</div>
+
+---
+
+## OpenClaw سے منتقلی
+
+اگر آپ OpenClaw سے منتقل ہو رہے ہیں، تو ہرمیس آپ کی سیٹنگز، یادیں (memories)، مہارتیں (skills)، اور API کیز کو خود بخود امپورٹ کر سکتا ہے۔
+
+**پہلی بار سیٹ اپ کے دوران:** سیٹ اپ وزرڈ (`hermes setup`) خود بخود `~/.openclaw` کو پہچان لیتا ہے اور کنفیگریشن شروع ہونے سے پہلے مائیگریٹ (migrate) کرنے کا آپشن دیتا ہے۔
+
+**انسٹالیشن کے بعد کسی بھی وقت:**
+
+<div dir="ltr">
+
+```bash
+hermes claw migrate              # انٹرایکٹو مائیگریشن (مکمل پری سیٹ)
+hermes claw migrate --dry-run    # جائزہ لیں کہ کیا کیا مائیگریٹ ہوگا
+hermes claw migrate --preset user-data   # حساس معلومات (secrets) کے بغیر مائیگریٹ کریں
+hermes claw migrate --overwrite  # موجودہ متصادم فائلوں کو اوور رائٹ کریں
+```
+
+</div>
+
+جو چیزیں امپورٹ ہوتی ہیں:
+
+- **SOUL.md** — پرسونا (persona) فائل
+- **میموریز (Memories)** — MEMORY.md اور USER.md کی اندراجات
+- **مہارتیں (Skills)** — صارف کی بنائی گئی مہارتیں → `~/.hermes/skills/openclaw-imports/`
+- **کمانڈ الاؤ لسٹ (allowlist)** — منظوری کے پیٹرنز (approval patterns)
+- **میسجنگ سیٹنگز** — پلیٹ فارم کنفیگریشنز، اجازت یافتہ صارفین، ورکنگ ڈائریکٹری
+- **API کیز** — الاؤ لسٹ شدہ حساس معلومات (ٹیلی گرام، OpenRouter، OpenAI، Anthropic، ElevenLabs)
+- **TTS اثاثے** — ورک اسپیس کی آڈیو فائلیں
+- **ورک اسپیس کی ہدایات** — AGENTS.md (`--workspace-target` کے ساتھ)
+
+تمام آپشنز دیکھنے کے لیے `hermes claw migrate --help` استعمال کریں، یا انٹرایکٹو ایجنٹ کی مدد سے مائیگریٹ کرنے کے لیے `openclaw-migration` سکل کا استعمال کریں (جس میں ڈرائی رن (dry-run) پریویوز شامل ہیں)۔
+
+---
+
+## تعاون کریں (Contributing)
+
+ہم آپ کے تعاون کا خیرمقدم کرتے ہیں! ڈیویلپمنٹ سیٹ اپ، کوڈ کے انداز اور PR کے طریقہ کار کے لیے براہ کرم ہماری [Contributing گائیڈ](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) دیکھیں۔
+
+معاونین (contributors) کے لیے فوری آغاز — کلون (clone) کریں اور `setup-hermes.sh` چلائیں:
+
+<div dir="ltr">
+
+```bash
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh     # uv کو انسٹال کرتا ہے، venv بناتا ہے، .[all] کو انسٹال کرتا ہے، اور ~/.local/bin/hermes کا سیم لنک (symlink) بناتا ہے
+./hermes              # خود بخود venv کی شناخت کرتا ہے، پہلے `source` کرنے کی ضرورت نہیں
+```
+
+</div>
+
+مینوئل طریقہ (اوپر والے طریقے کے مساوی):
+
+<div dir="ltr">
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+uv venv .venv --python 3.11
+source .venv/bin/activate
+uv pip install -e ".[all,dev]"
+scripts/run_tests.sh
+```
+
+</div>
+
+---
+
+## کمیونٹی (Community)
+
+- 💬 [ڈسکارڈ (Discord)](https://discord.gg/NousResearch)
+- 📚 [سکلز ہب (Skills Hub)](https://agentskills.io)
+- 🐛 [مسائل (Issues)](https://github.com/NousResearch/hermes-agent/issues)
+- 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — ہرمیس اور دیگر MCP ہوسٹس کے لیے لینکس (Linux) ڈیسک ٹاپ کنٹرول MCP سرور، جس میں AT-SPI ایکسیسیبلٹی ٹریز، Wayland/X11 ان پٹ، سکرین شاٹس، اور کمپوزیٹر ونڈو ٹارگیٹنگ شامل ہے۔
+- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — کمیونٹی وی چیٹ (WeChat) برج: ہرمیس ایجنٹ اور OpenClaw کو ایک ہی وی چیٹ اکاؤنٹ پر چلائیں۔
+
+---
+
+## لائسنس (License)
+
+MIT — تفصیلات کے لیے [LICENSE](LICENSE) دیکھیں۔
+
+[نوس ریسرچ (Nous Research)](https://nousresearch.com) کی جانب سے تیار کردہ۔
+
+</div>
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -10,6 +10,7 @@
  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
  <a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
+  <a href="README.ur-pk.md"><img src="https://img.shields.io/badge/Lang-اردو-green?style=for-the-badge" alt="اردو"></a>
 </p>

 **由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能，在使用中改进技能，主动持久化知识，搜索过往对话，并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行，也可以在 GPU 集群上运行，或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话，而它在云端 VM 上工作。
--- a/acp_adapter/provenance.py
+++ b/acp_adapter/provenance.py
@@ -0,0 +1,127 @@
+"""Derive ACP session-provenance metadata from the existing compression chain.
+
+This is an additive Hermes extension surfaced under ACP ``_meta.hermes`` so
+existing ACP clients ignore it. It carries no new persisted state: everything
+is derived on demand from the ``sessions`` table (``parent_session_id`` /
+``end_reason``), which already models compression-continuation chains.
+
+The ACP/editor ``session_id`` stays the stable public handle. When context
+compression rotates the internal Hermes head, ``build_session_provenance`` lets
+a client see the previous/current internal ids and the lineage root without
+parsing status text, guessing from token drops, or reading ``state.db``.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Optional
+
+# Bound defensive walks; compression chains this deep are pathological.
+_MAX_WALK = 100
+
+
+def build_session_provenance(
+    db: Any,
+    acp_session_id: str,
+    current_hermes_session_id: str,
+    *,
+    previous_hermes_session_id: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Build ``_meta.hermes.sessionProvenance`` for an ACP session.
+
+    Args:
+        db: A ``SessionDB`` (must expose ``get_session``).
+        acp_session_id: The stable ACP/editor-facing session handle.
+        current_hermes_session_id: The live internal Hermes DB session id
+            (``state.agent.session_id``).
+        previous_hermes_session_id: The internal id from before the most recent
+            turn, when known. Supplied by ``prompt()`` to flag a rotation.
+
+    Returns:
+        A dict suitable for ``{"hermes": {"sessionProvenance": <dict>}}`` under
+        ACP ``_meta``, or ``None`` if the session can't be read.
+    """
+    try:
+        row = db.get_session(current_hermes_session_id)
+    except Exception:
+        return None
+    if not row:
+        return None
+
+    parent_id = row.get("parent_session_id")
+    end_reason = row.get("end_reason")
+
+    # Walk parents to the lineage root and count compression depth. Only
+    # compression-split parents (parent.end_reason == 'compression') count
+    # toward depth — delegate/branch children share the parent_session_id
+    # column but are not compaction boundaries.
+    root_id = current_hermes_session_id
+    compression_depth = 0
+    cursor_parent = parent_id
+    seen = {current_hermes_session_id}
+    for _ in range(_MAX_WALK):
+        if not cursor_parent or cursor_parent in seen:
+            break
+        seen.add(cursor_parent)
+        try:
+            prow = db.get_session(cursor_parent)
+        except Exception:
+            prow = None
+        if not prow:
+            break
+        root_id = cursor_parent
+        if prow.get("end_reason") == "compression":
+            compression_depth += 1
+        cursor_parent = prow.get("parent_session_id")
+
+    # A session is a compression continuation when its parent was ended with
+    # end_reason='compression'. Determine that from the immediate parent.
+    is_continuation = False
+    if parent_id:
+        try:
+            immediate_parent = db.get_session(parent_id)
+        except Exception:
+            immediate_parent = None
+        if immediate_parent and immediate_parent.get("end_reason") == "compression":
+            is_continuation = True
+
+    rotated = bool(
+        previous_hermes_session_id
+        and previous_hermes_session_id != current_hermes_session_id
+    )
+
+    provenance: Dict[str, Any] = {
+        "acpSessionId": acp_session_id,
+        "currentHermesSessionId": current_hermes_session_id,
+        "rootHermesSessionId": root_id,
+        "parentHermesSessionId": parent_id,
+        "sessionKind": "continuation" if is_continuation else "root",
+        "compressionDepth": compression_depth,
+    }
+    if previous_hermes_session_id:
+        provenance["previousHermesSessionId"] = previous_hermes_session_id
+    if rotated:
+        # The head moved during the last turn. The only mechanism that rotates
+        # the internal id mid-turn is compression-driven session splitting.
+        provenance["reason"] = "compression"
+        provenance["creatorKind"] = "compression"
+
+    return provenance
+
+
+def session_provenance_meta(
+    db: Any,
+    acp_session_id: str,
+    current_hermes_session_id: str,
+    *,
+    previous_hermes_session_id: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Return a ready ``_meta`` payload: ``{"hermes": {"sessionProvenance": ...}}``."""
+    prov = build_session_provenance(
+        db,
+        acp_session_id,
+        current_hermes_session_id,
+        previous_hermes_session_id=previous_hermes_session_id,
+    )
+    if prov is None:
+        return None
+    return {"hermes": {"sessionProvenance": prov}}
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -71,6 +71,7 @@ from acp_adapter.events import (
    make_tool_progress_cb,
 )
 from acp_adapter.permissions import make_approval_callback
+from acp_adapter.provenance import session_provenance_meta
 from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
 from acp_adapter.tools import build_tool_complete, build_tool_start

@@ -709,8 +710,39 @@ class HermesACPAgent(acp.Agent):
                exc_info=True,
            )

-    async def _send_session_info_update(self, session_id: str) -> None:
-        """Send ACP native session metadata after Hermes changes it."""
+    def _provenance_meta(
+        self,
+        acp_session_id: str,
+        current_hermes_session_id: str,
+        previous_hermes_session_id: Optional[str] = None,
+    ) -> Optional[dict]:
+        """Best-effort ``_meta.hermes.sessionProvenance`` for an ACP session."""
+        try:
+            return session_provenance_meta(
+                self.session_manager._get_db(),
+                acp_session_id,
+                current_hermes_session_id,
+                previous_hermes_session_id=previous_hermes_session_id,
+            )
+        except Exception:
+            logger.debug(
+                "Could not build ACP session provenance for %s", acp_session_id, exc_info=True
+            )
+            return None
+
+    async def _send_session_info_update(
+        self,
+        session_id: str,
+        *,
+        current_hermes_session_id: Optional[str] = None,
+        previous_hermes_session_id: Optional[str] = None,
+    ) -> None:
+        """Send ACP native session metadata after Hermes changes it.
+
+        When the internal Hermes head rotated (e.g. compression-driven session
+        split during a turn), pass ``previous_hermes_session_id`` so the
+        attached ``_meta.hermes.sessionProvenance`` flags the rotation reason.
+        """
        if not self._conn:
            return
        try:
@@ -727,10 +759,16 @@ class HermesACPAgent(acp.Agent):
        # the updated_at since we're emitting this notification precisely
        # because the title was just refreshed.
        updated_at = datetime.now(timezone.utc).isoformat()
+        meta = self._provenance_meta(
+            session_id,
+            current_hermes_session_id or session_id,
+            previous_hermes_session_id,
+        )
        update = SessionInfoUpdate(
            session_update="session_info_update",
            title=title if isinstance(title, str) and title.strip() else None,
            updated_at=updated_at,
+            field_meta=meta,
        )
        try:
            await self._conn.session_update(
@@ -1081,6 +1119,9 @@ class HermesACPAgent(acp.Agent):
            session_id=state.session_id,
            models=self._build_model_state(state),
            modes=self._session_modes(state),
+            field_meta=self._provenance_meta(
+                state.session_id, getattr(state.agent, "session_id", state.session_id)
+            ),
        )

    async def load_session(
@@ -1125,6 +1166,9 @@ class HermesACPAgent(acp.Agent):
        return LoadSessionResponse(
            models=self._build_model_state(state),
            modes=self._session_modes(state),
+            field_meta=self._provenance_meta(
+                session_id, getattr(state.agent, "session_id", session_id)
+            ),
        )

    async def resume_session(
@@ -1157,6 +1201,9 @@ class HermesACPAgent(acp.Agent):
        return ResumeSessionResponse(
            models=self._build_model_state(state),
            modes=self._session_modes(state),
+            field_meta=self._provenance_meta(
+                state.session_id, getattr(state.agent, "session_id", state.session_id)
+            ),
        )

    async def cancel(self, session_id: str, **kwargs: Any) -> None:
@@ -1494,6 +1541,11 @@ class HermesACPAgent(acp.Agent):
                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
+            # Snapshot the internal Hermes DB session id before the turn so we
+            # can detect a compression-driven session rotation afterwards. The
+            # ACP `session_id` stays the stable client handle; agent.session_id
+            # is the live internal head that compression may rotate.
+            pre_turn_hermes_id = getattr(state.agent, "session_id", None)
            # Wrap the executor call in a fresh copy of the current context so
            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
@@ -1512,8 +1564,41 @@ class HermesACPAgent(acp.Agent):
            # Persist updated history so sessions survive process restarts.
            self.session_manager.save_session(session_id)

+        # Detect a compression-driven internal session rotation. If the agent's
+        # DB head moved during the turn, emit a session_info_update carrying
+        # _meta.hermes.sessionProvenance so ACP clients can render the boundary
+        # and keep old/new ids in lineage. The ACP session_id is unchanged.
+        post_turn_hermes_id = getattr(state.agent, "session_id", None)
+        if (
+            conn
+            and post_turn_hermes_id
+            and pre_turn_hermes_id
+            and post_turn_hermes_id != pre_turn_hermes_id
+        ):
+            try:
+                await self._send_session_info_update(
+                    session_id,
+                    current_hermes_session_id=post_turn_hermes_id,
+                    previous_hermes_session_id=pre_turn_hermes_id,
+                )
+            except Exception:
+                logger.debug(
+                    "Could not emit ACP provenance update after rotation for %s",
+                    session_id,
+                    exc_info=True,
+                )
+
        final_response = result.get("final_response", "")
-        if final_response:
+        cancelled = bool(state.cancel_event and state.cancel_event.is_set())
+        interrupted = bool(result.get("interrupted")) or cancelled
+        # Hermes' local "waiting for model response" interrupt status is metadata,
+        # not assistant prose — clients get cancellation from stop_reason instead.
+        from agent.conversation_loop import INTERRUPT_WAITING_FOR_MODEL_PREFIX
+
+        suppress_interrupt_response = interrupted and final_response.startswith(
+            INTERRUPT_WAITING_FOR_MODEL_PREFIX
+        )
+        if final_response and not suppress_interrupt_response:
            try:
                from agent.title_generator import maybe_auto_title

@@ -1534,7 +1619,12 @@ class HermesACPAgent(acp.Agent):
                )
            except Exception:
                logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
-        if final_response and conn and (not streamed_message or result.get("response_transformed")):
+        if (
+            final_response
+            and conn
+            and not suppress_interrupt_response
+            and (not streamed_message or result.get("response_transformed"))
+        ):
            # Deliver the final response when streaming did not already send it,
            # or when a plugin hook transformed the response after streaming
            # finished (e.g. transform_llm_output) — otherwise the appended /
@@ -1576,7 +1666,7 @@ class HermesACPAgent(acp.Agent):

        await self._send_usage_update(state)

-        stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
+        stop_reason = "cancelled" if cancelled else "end_turn"
        return PromptResponse(stop_reason=stop_reason, usage=usage)

    # ---- Slash commands (headless) -------------------------------------------
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -169,6 +169,7 @@ def init_agent(
    save_trajectories: bool = False,
    verbose_logging: bool = False,
    quiet_mode: bool = False,
+    tool_progress_mode: str = "all",
    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
    log_prefix: str = "",
@@ -186,6 +187,7 @@ def init_agent(
    thinking_callback: callable = None,
    reasoning_callback: callable = None,
    clarify_callback: callable = None,
+    read_terminal_callback: callable = None,
    step_callback: callable = None,
    stream_delta_callback: callable = None,
    interim_assistant_callback: callable = None,
@@ -280,6 +282,7 @@ def init_agent(
    agent.save_trajectories = save_trajectories
    agent.verbose_logging = verbose_logging
    agent.quiet_mode = quiet_mode
+    agent.tool_progress_mode = tool_progress_mode
    agent.ephemeral_system_prompt = ephemeral_system_prompt
    agent.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
    agent._user_id = user_id  # Platform user identifier (gateway sessions)
@@ -415,6 +418,7 @@ def init_agent(
    agent.thinking_callback = thinking_callback
    agent.reasoning_callback = reasoning_callback
    agent.clarify_callback = clarify_callback
+    agent.read_terminal_callback = read_terminal_callback
    agent.step_callback = step_callback
    agent.stream_delta_callback = stream_delta_callback
    agent.interim_assistant_callback = interim_assistant_callback
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -49,7 +49,7 @@ def _ra():


 AGENT_RUNTIME_POST_HOOK_TOOL_NAMES = frozenset(
-    {"todo", "session_search", "memory", "clarify", "delegate_task"}
+    {"todo", "session_search", "memory", "clarify", "read_terminal", "delegate_task"}
 )


@@ -679,15 +679,28 @@ def recover_with_credential_pool(
        # long-running TUI sessions stuck on stale tokens until the user
        # exited and reopened.
        is_entitlement = agent._is_entitlement_failure(error_context, status_code)
+        _auth_haystack = " ".join(
+            str(error_context.get(k) or "").lower()
+            for k in ("message", "reason", "code", "error")
+            if isinstance(error_context, dict)
+        )
+        if (
+            not is_entitlement
+            and status_code == 403
+            and "oauth authentication is currently not allowed for this organization" in _auth_haystack
+        ):
+            is_entitlement = True
+        if (
+            not is_entitlement
+            and status_code == 403
+            and (agent.provider or "") == "anthropic"
+            and getattr(agent, "api_mode", "") == "anthropic_messages"
+        ):
+            is_entitlement = True
        if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth":
-            _disambiguator_haystack = " ".join(
-                str(error_context.get(k) or "").lower()
-                for k in ("message", "reason", "code", "error")
-                if isinstance(error_context, dict)
-            )
            _is_xai_auth_failure = (
-                "[wke=unauthenticated:" in _disambiguator_haystack
-                or "oauth2 access token could not be validated" in _disambiguator_haystack
+                "[wke=unauthenticated:" in _auth_haystack
+                or "oauth2 access token could not be validated" in _auth_haystack
            )
            if not _is_xai_auth_failure:
                is_entitlement = True
@@ -1784,6 +1797,17 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
                ),
                next_args,
            )
+    elif function_name == "read_terminal":
+        def _execute(next_args: dict) -> Any:
+            from tools.read_terminal_tool import read_terminal_tool as _read_terminal_tool
+            return _finish_agent_tool(
+                _read_terminal_tool(
+                    start_line=next_args.get("start_line"),
+                    count=next_args.get("count"),
+                    callback=getattr(agent, "read_terminal_callback", None),
+                ),
+                next_args,
+            )
    elif function_name == "delegate_task":
        def _execute(next_args: dict) -> Any:
            return _finish_agent_tool(agent._dispatch_delegate_task(next_args), next_args)
@@ -1846,6 +1870,27 @@ def repair_tool_call(agent, tool_name: str) -> str | None:
    if not tool_name:
        return None

+    # VolcEngine api/plan workaround (issue #33007): the endpoint's
+    # protocol-translation layer occasionally leaks raw XML attribute
+    # fragments into tool_use.name, e.g.
+    #   `terminal" parameter="command" string="true`
+    #   `execute_code" parameter="code" string="true`
+    #   `session_search" parameter="session_id" string="true`
+    # We trim at the first unambiguous XML/quote character so the rest
+    # of the repair pipeline (lowercase / snake_case / fuzzy match)
+    # can resolve the cleaned name to a real tool.
+    #
+    # Crucially we DO NOT split on whitespace: legitimate inputs like
+    # "write file" must keep flowing through ``_norm`` -> ``write_file``
+    # (covered by test_space_to_underscore in
+    # tests/run_agent/test_repair_tool_call_name.py).
+    for _xml_sep in ('"', "'", "<", ">"):
+        _idx = tool_name.find(_xml_sep)
+        if _idx > 0:
+            tool_name = tool_name[:_idx]
+    if not tool_name:
+        return None
+
    def _norm(s: str) -> str:
        return s.lower().replace("-", "_").replace(" ", "_")

--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -73,20 +73,50 @@ ADAPTIVE_EFFORT_MAP = {
    "minimal": "low",
 }

-# Models that accept the "xhigh" output_config.effort level.  Opus 4.7 added
-# xhigh as a distinct level between high and max; older adaptive-thinking
-# models (4.6) reject it with a 400.  Keep this substring list in sync with
-# the Anthropic migration guide as new model families ship.
-_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
+# ── Anthropic thinking-mode classification ────────────────────────────
+# Claude 4.6 replaced budget-based extended thinking with *adaptive* thinking,
+# and 4.7 additionally forbids the manual ``thinking`` block entirely and drops
+# temperature/top_p/top_k.  Newer Claude releases (4.8, and named models like
+# claude-fable-5) follow the same modern contract — but they share no common
+# version substring, so an allowlist of version numbers ("4.6", "4.7", …) goes
+# stale the moment a model ships without a recognized number and silently
+# routes it down the legacy manual-thinking path.
+#
+# Instead we DEFAULT unknown Claude models to the modern contract and keep an
+# explicit *legacy* list of the older Claude families that still require manual
+# thinking.  This mirrors _get_anthropic_max_output's "default to newest" design
+# (future models are unlikely to regress to the older contract), so each new
+# Claude release works without a code change.
+#
+# Non-Claude Anthropic-Messages models (minimax, qwen3, GLM, …) are NOT Claude,
+# so they fall through to the legacy path automatically — exactly what those
+# manual-thinking endpoints need.
+
+# Older Claude families that DON'T support adaptive thinking (manual thinking
+# with budget_tokens only). Substring-matched against the model name.
+_LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS = (
+    "claude-3",          # 3, 3.5, 3.7
+    "claude-opus-4-0", "claude-opus-4.0", "claude-opus-4-1", "claude-opus-4.1",
+    "claude-sonnet-4-0", "claude-sonnet-4.0",
+    "claude-opus-4-2025", "claude-sonnet-4-2025",  # date-stamped 4.0 IDs
+    "claude-opus-4-5", "claude-opus-4.5",
+    "claude-sonnet-4-5", "claude-sonnet-4.5",
+    "claude-haiku-4-5", "claude-haiku-4.5",
+)
+
+# Older Claude families that DON'T accept the "xhigh" effort level (4.6 only
+# supports low/medium/high/max). xhigh arrived with Opus 4.7. Adaptive models
+# not in this list (4.7, 4.8, fable, future) accept xhigh.
+_NO_XHIGH_CLAUDE_SUBSTRINGS = (
+    "claude-opus-4-6", "claude-opus-4.6",
+    "claude-sonnet-4-6", "claude-sonnet-4.6",
+)
+
+
+def _is_claude_model(model: str | None) -> bool:
+    return "claude" in (model or "").lower()

-# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
-# is the only supported mode; 4.7 additionally forbids manual thinking entirely
-# and drops temperature/top_p/top_k).
-_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7", "4-8", "4.8")

-# Models where temperature/top_p/top_k return 400 if set to non-default values.
-# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
-_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
 _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")

 # ── Max output token limits per Anthropic model ───────────────────────
@@ -94,6 +124,8 @@ _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
+    # Mythos-class named models (claude-fable-5, …) — 1M context, reasoning
+    "claude-fable":      128_000,
    # Claude 4.8
    "claude-opus-4-8":   128_000,
    # Claude 4.7
@@ -208,8 +240,17 @@ def _resolve_anthropic_messages_max_tokens(


 def _supports_adaptive_thinking(model: str) -> bool:
-    """Return True for Claude 4.6+ models that support adaptive thinking."""
-    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
+    """Return True for Claude models that use adaptive thinking (4.6+).
+
+    Defaults *unknown* Claude models to adaptive (the modern contract) and
+    only returns False for the explicit legacy list of older Claude families
+    that require manual budget-based thinking. Non-Claude Anthropic-Messages
+    models (minimax, qwen3, …) return False so they keep the manual path.
+    """
+    if not _is_claude_model(model):
+        return False
+    m = model.lower()
+    return not any(v in m for v in _LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS)


 def _supports_xhigh_effort(model: str) -> bool:
@@ -219,18 +260,33 @@ def _supports_xhigh_effort(model: str) -> bool:
    Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
    and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
    when this returns False.
+
+    Defaults unknown adaptive Claude models to accepting xhigh (4.7+ contract);
+    only the 4.6 family and legacy manual-thinking models are excluded.
    """
-    return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
+    if not _supports_adaptive_thinking(model):
+        return False
+    m = model.lower()
+    return not any(v in m for v in _NO_XHIGH_CLAUDE_SUBSTRINGS)


 def _forbids_sampling_params(model: str) -> bool:
    """Return True for models that 400 on any non-default temperature/top_p/top_k.

-    Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
-    expected to follow suit.  Callers should omit these fields entirely rather
-    than passing zero/default values (the API rejects anything non-null).
+    Opus 4.7 introduced this restriction; later Claude releases follow it.
+    Defaults unknown Claude models to forbidding sampling params (the modern
+    contract). The 4.6 family still accepts them, and the legacy manual-thinking
+    families (4.5 and older) accept them too, so both are excluded. Non-Claude
+    models are unaffected. Callers should omit these fields entirely rather than
+    passing zero/default values (the API rejects anything non-null).
    """
-    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
+    if not _is_claude_model(model):
+        return False
+    m = model.lower()
+    # 4.6 family is adaptive but still accepts sampling params.
+    if any(v in m for v in _NO_XHIGH_CLAUDE_SUBSTRINGS):
+        return False
+    return not any(v in m for v in _LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS)


 def _supports_fast_mode(model: str) -> bool:
@@ -821,6 +877,7 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
            capture_output=True,
            text=True,
            timeout=5,
+            stdin=subprocess.DEVNULL,
        )
    except (OSError, subprocess.TimeoutExpired):
        logger.debug("Keychain: security command not available or timed out")
@@ -1163,7 +1220,10 @@ def run_oauth_setup_token() -> Optional[str]:
            "Install it with: npm install -g @anthropic-ai/claude-code"
        )

-    # Run interactively — stdin/stdout/stderr inherited so user can interact
+    # Run interactively — stdin/stdout/stderr inherited so the user can
+    # complete the OAuth login prompt. Must keep inherited stdin; the TUI-EOF
+    # concern does not apply to an interactive login the user explicitly
+    # invokes.  noqa: subprocess-stdin
    try:
        subprocess.run([claude_path, "setup-token"])
    except (KeyboardInterrupt, EOFError):
@@ -1511,6 +1571,15 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:

    if ptype == "input_text":
        block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
+    elif ptype == "text":
+        # A stored Anthropic text block. Rebuild from whitelisted fields only —
+        # SDK response text blocks carry output-only siblings (parsed_output,
+        # citations=None) that the Messages INPUT schema rejects with HTTP 400
+        # "Extra inputs are not permitted". Do NOT dict(part) it verbatim.
+        block = {"type": "text", "text": part.get("text", "")}
+        cits = part.get("citations")
+        if isinstance(cits, list) and cits:
+            block["citations"] = cits
    elif ptype in {"image_url", "input_image"}:
        image_value = part.get("image_url", {})
        url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
@@ -1625,6 +1694,58 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
    return out


+def _sanitize_replay_block(b: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Strip output-only fields from a stored Anthropic content block so it is
+    valid as REQUEST input on replay.
+
+    The SDK response objects carry output-only attributes that the Messages
+    *input* schema forbids ("Extra inputs are not permitted"): text blocks get
+    ``parsed_output``/``citations`` (when null), tool_use blocks get ``caller``,
+    etc. ``normalize_response`` captured blocks verbatim via ``_to_plain_data``,
+    so these leak back as input on the next turn → HTTP 400.
+
+    Whitelist per type (NOT a blacklist) so future SDK output-only fields can't
+    reintroduce the bug. Returns a clean block, or None to drop it.
+    """
+    if not isinstance(b, dict):
+        return None
+    btype = b.get("type")
+    if btype == "text":
+        out: Dict[str, Any] = {"type": "text", "text": b.get("text", "")}
+        # citations is input-valid ONLY when it's a non-empty list; the SDK
+        # emits citations=None on responses, which the input schema rejects.
+        cits = b.get("citations")
+        if isinstance(cits, list) and cits:
+            out["citations"] = cits
+        if isinstance(b.get("cache_control"), dict):
+            out["cache_control"] = b["cache_control"]
+        return out
+    if btype == "thinking":
+        out = {"type": "thinking", "thinking": b.get("thinking", "")}
+        if b.get("signature"):
+            out["signature"] = b["signature"]
+        return out
+    if btype == "redacted_thinking":
+        # Only valid with its data payload; drop if missing.
+        return {"type": "redacted_thinking", "data": b["data"]} if b.get("data") else None
+    if btype == "tool_use":
+        out = {
+            "type": "tool_use",
+            "id": _sanitize_tool_id(b.get("id", "")),
+            "name": b.get("name", ""),
+            "input": b.get("input", {}),
+        }
+        if isinstance(b.get("cache_control"), dict):
+            out["cache_control"] = b["cache_control"]
+        return out
+    if btype == "image":
+        src = b.get("source")
+        return {"type": "image", "source": src} if isinstance(src, dict) else None
+    # Unknown/unsupported block type on the input path — drop rather than risk
+    # another "Extra inputs are not permitted".
+    return None
+
+
 def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
    """Convert an assistant message to Anthropic content blocks.

@@ -1632,6 +1753,55 @@ def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
    content = m.get("content", "")
+    # Anthropic interleaved-thinking fast path: when this turn carries a
+    # verbatim, order-preserving block list (set by normalize_response only
+    # for turns that interleave SIGNED thinking with tool_use), replay it.
+    # Each block is run through _sanitize_replay_block to strip output-only
+    # SDK fields (parsed_output, caller, citations=None, …) that the Messages
+    # INPUT schema forbids — replaying them verbatim caused HTTP 400 "Extra
+    # inputs are not permitted" (text.parsed_output). Block ORDER is preserved
+    # (the reason this channel exists); only forbidden sibling fields are
+    # dropped, leaving thinking signatures and tool_use id/name/input intact.
+    ordered_blocks = m.get("anthropic_content_blocks")
+    if isinstance(ordered_blocks, list) and ordered_blocks:
+        # Re-source each tool_use input from the stored tool_calls map rather
+        # than the captured block. The ordered-blocks list captures tool_use
+        # input from the RAW API response (normalize_response), which is NOT
+        # credential-redacted; tool_calls[].function.arguments IS redacted at
+        # storage time (build_assistant_message, #19798). Replaying the raw
+        # block input would resurrect a secret the model inlined into a tool
+        # call (e.g. terminal(command="curl -H 'Authorization: Bearer sk-...'")
+        # onto the wire, even though the same value is redacted everywhere else
+        # in history. Keying by sanitized tool id preserves interleave order
+        # (the reason this channel exists) while swapping in the redacted
+        # input. Adapted from #36071 (replay-time tool-input re-sourcing).
+        redacted_input_by_id: Dict[str, Any] = {}
+        for tc in m.get("tool_calls", []) or []:
+            if not isinstance(tc, dict):
+                continue
+            fn = tc.get("function", {}) or {}
+            raw_args = fn.get("arguments", "{}")
+            try:
+                parsed_args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args
+            except (json.JSONDecodeError, ValueError):
+                parsed_args = {}
+            redacted_input_by_id[_sanitize_tool_id(tc.get("id", ""))] = parsed_args
+        replayed: List[Dict[str, Any]] = []
+        for b in ordered_blocks:
+            clean = _sanitize_replay_block(b)
+            if clean is None:
+                continue
+            if clean.get("type") == "tool_use":
+                # Override raw (un-redacted) input with the redacted copy when
+                # we have one for this id; fall back to the sanitized block
+                # input only if the tool_call is missing (shape mismatch).
+                redacted = redacted_input_by_id.get(clean.get("id", ""))
+                if redacted is not None:
+                    clean["input"] = redacted
+            replayed.append(clean)
+        if replayed:
+            return {"role": "assistant", "content": replayed}
+
    blocks = _extract_preserved_thinking_blocks(m)
    if content:
        if isinstance(content, list):
@@ -2301,3 +2471,43 @@ def build_anthropic_kwargs(
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}

    return kwargs
+
+
+# Keys that belong exclusively to the OpenAI Responses / Codex API shape.
+# The Anthropic Messages SDK (``messages.create()`` / ``messages.stream()``)
+# raises ``TypeError: ... got an unexpected keyword argument`` on any of them.
+_RESPONSES_ONLY_KWARGS = frozenset(
+    {"instructions", "input", "store", "parallel_tool_calls"}
+)
+
+
+def sanitize_anthropic_kwargs(api_kwargs: Any, *, log_prefix: str = "") -> Any:
+    """Drop Responses-API-only keys before an Anthropic Messages SDK call.
+
+    Defensive boundary guard for #31673: under rare api_mode-flip races
+    (e.g. a concurrent auxiliary call mutating a shared agent between the
+    kwargs build and the stream dispatch), a Responses-shaped payload
+    carrying ``instructions=`` can reach ``messages.stream()`` /
+    ``messages.create()``. The Anthropic SDK rejects it with a
+    non-retryable ``TypeError`` that nukes the whole turn and propagates
+    the entire fallback chain.
+
+    Mutates ``api_kwargs`` in place and returns it. When a foreign key is
+    present we log a WARNING so the underlying race stays visible in the
+    wild instead of being silently papered over.
+    """
+    if not isinstance(api_kwargs, dict):
+        return api_kwargs
+    leaked = _RESPONSES_ONLY_KWARGS.intersection(api_kwargs)
+    if leaked:
+        for _key in leaked:
+            api_kwargs.pop(_key, None)
+        logger.warning(
+            "%sStripped Responses-only kwarg(s) %s from an Anthropic Messages "
+            "call (api_mode flip race — see #31673). The call will proceed; "
+            "this breadcrumb means a kwargs build ran under a Responses "
+            "api_mode while dispatch ran under anthropic_messages.",
+            log_prefix,
+            sorted(leaked),
+        )
+    return api_kwargs
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -102,7 +102,7 @@ OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
+from utils import base_url_host_matches, base_url_hostname, model_forces_max_completion_tokens, normalize_proxy_env_vars

 logger = logging.getLogger(__name__)

@@ -637,54 +637,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
 # calls to the Codex Responses API so callers don't need any changes.


-def _convert_content_for_responses(content: Any) -> Any:
-    """Convert chat.completions content to Responses API format.
-
-    chat.completions uses:
-      {"type": "text", "text": "..."}
-      {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
-
-    Responses API uses:
-      {"type": "input_text", "text": "..."}
-      {"type": "input_image", "image_url": "data:image/png;base64,..."}
-
-    If content is a plain string, it's returned as-is (the Responses API
-    accepts strings directly for text-only messages).
-    """
-    if isinstance(content, str):
-        return content
-    if not isinstance(content, list):
-        return str(content) if content else ""
-
-    converted: List[Dict[str, Any]] = []
-    for part in content:
-        if not isinstance(part, dict):
-            continue
-        ptype = part.get("type", "")
-        if ptype == "text":
-            converted.append({"type": "input_text", "text": part.get("text", "")})
-        elif ptype == "image_url":
-            # chat.completions nests the URL: {"image_url": {"url": "..."}}
-            image_data = part.get("image_url", {})
-            url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
-            entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
-            # Preserve detail if specified
-            detail = image_data.get("detail") if isinstance(image_data, dict) else None
-            if detail:
-                entry["detail"] = detail
-            converted.append(entry)
-        elif ptype in {"input_text", "input_image"}:
-            # Already in Responses format — pass through
-            converted.append(part)
-        else:
-            # Unknown content type — try to preserve as text
-            text = part.get("text", "")
-            if text:
-                converted.append({"type": "input_text", "text": text})
-
-    return converted or ""
-
-
 class _CodexCompletionsAdapter:
    """Drop-in shim that accepts chat.completions.create() kwargs and
    routes them through the Codex Responses streaming API."""
@@ -697,26 +649,37 @@ class _CodexCompletionsAdapter:
        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)

-        # Separate system/instructions from conversation messages.
-        # Convert chat.completions multimodal content blocks to Responses
-        # API format (input_text / input_image instead of text / image_url).
+        # Separate system/instructions from replayable conversation messages,
+        # then route the rest through the SINGLE shared chat->Responses
+        # converter used by the main agent transport
+        # (agent/transports/codex.py). Maintaining a private conversion loop
+        # here let chat-style messages with role="tool" leak straight into
+        # Responses input[] — which the Responses API rejects with
+        # "Invalid value: 'tool'. Supported values are: 'assistant', 'system',
+        # 'developer', and 'user'." (issue #5709, hit hard by flush_memories()
+        # / compression replaying real session history that includes assistant
+        # tool_calls + role="tool" results). The shared converter encodes
+        # assistant tool calls as `function_call` items and tool results as
+        # `function_call_output` items with a valid call_id, so every
+        # Responses path normalizes tool history identically and cannot drift.
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+
        instructions = "You are a helpful assistant."
-        input_msgs: List[Dict[str, Any]] = []
+        replay_messages: List[Dict[str, Any]] = []
        for msg in messages:
            role = msg.get("role", "user")
            content = msg.get("content") or ""
            if role == "system":
                instructions = content if isinstance(content, str) else str(content)
            else:
-                input_msgs.append({
-                    "role": role,
-                    "content": _convert_content_for_responses(content),
-                })
+                replay_messages.append(msg)
+
+        input_items = _chat_messages_to_responses_input(replay_messages)

        resp_kwargs: Dict[str, Any] = {
            "model": model,
            "instructions": instructions,
-            "input": input_msgs or [{"role": "user", "content": ""}],
+            "input": input_items or [{"role": "user", "content": ""}],
            "store": False,
        }

@@ -2513,6 +2476,25 @@ def _is_connection_error(exc: Exception) -> bool:
    return False


+def _is_transient_transport_error(exc: Exception) -> bool:
+    """Return True for a one-off transport blip worth retrying ONCE on the
+    same provider before any provider/model fallback.
+
+    Covers connection/streaming-close errors (via the canonical
+    ``_is_connection_error`` detector, shared so the two cannot drift) plus a
+    pure 5xx/408 HTTP status. Deliberately narrow: this is the "retry the
+    same target once" gate, distinct from ``_is_payment_error`` /
+    ``_is_auth_error`` / ``_is_rate_limit_error`` which the except-chain
+    handles by switching provider, refreshing creds, or rotating the pool.
+    """
+    if _is_connection_error(exc):
+        return True
+    status = getattr(exc, "status_code", None) or getattr(
+        getattr(exc, "response", None), "status_code", None
+    )
+    return isinstance(status, int) and (status == 408 or 500 <= status < 600)
+
+
 def _is_auth_error(exc: Exception) -> bool:
    """Detect auth failures that should trigger provider-specific refresh."""
    status = getattr(exc, "status_code", None)
@@ -4318,13 +4300,15 @@ def get_auxiliary_extra_body() -> dict:
    return _nous_extra_body() if auxiliary_is_nous else {}


-def auxiliary_max_tokens_param(value: int) -> dict:
+def auxiliary_max_tokens_param(value: int, *, model: Optional[str] = None) -> dict:
    """Return the correct max tokens kwarg for the auxiliary client's provider.
-    
+
    OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
-    models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
+    models (gpt-4o, gpt-4.1, gpt-5+, o-series) requires 'max_completion_tokens'.
    The Codex adapter translates max_tokens internally, so we use max_tokens
-    for it as well.
+    for it as well. Pass ``model`` so third-party OpenAI-compatible endpoints
+    fronting the newer families are also recognised — URL-only detection
+    misses the case where a custom base URL serves e.g. ``gpt-5.4``.
    """
    custom_base = _current_custom_base_url()
    or_key = os.getenv("OPENROUTER_API_KEY")
@@ -4334,6 +4318,9 @@ def auxiliary_max_tokens_param(value: int) -> dict:
            and _read_nous_auth() is None
            and base_url_hostname(custom_base) in {"api.openai.com", "api.githubcopilot.com"}):
        return {"max_completion_tokens": value}
+    # ...and for any caller serving a newer OpenAI-family model by name.
+    if model_forces_max_completion_tokens(model):
+        return {"max_completion_tokens": value}
    return {"max_tokens": value}


@@ -5184,8 +5171,28 @@ def call_llm(
    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
    # then payment fallback.
    try:
-        return _validate_llm_response(
-            client.chat.completions.create(**kwargs), task)
+        # Retry ONCE on the same provider for a one-off transient transport
+        # blip (streaming-close / incomplete chunked read / 5xx / 408) before
+        # the except-chain below escalates to provider/model fallback. A
+        # single dropped connection shouldn't abandon an otherwise-healthy
+        # provider. A second failure (or any non-transient error) falls
+        # through to ``first_err`` and the existing fallback handling
+        # unchanged. This is the unified home for the transient retry that
+        # every auxiliary task (compression, memory flush, title-gen,
+        # session-search, vision) shares. (PR #16587)
+        try:
+            return _validate_llm_response(
+                client.chat.completions.create(**kwargs), task)
+        except Exception as transient_err:
+            if not _is_transient_transport_error(transient_err):
+                raise
+            logger.info(
+                "Auxiliary %s: transient transport error; retrying once on "
+                "the same provider before fallback: %s",
+                task or "call", transient_err,
+            )
+            return _validate_llm_response(
+                client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
            retry_kwargs = dict(kwargs)
@@ -5651,8 +5658,22 @@ async def async_call_llm(
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

    try:
-        return _validate_llm_response(
-            await client.chat.completions.create(**kwargs), task)
+        # Retry ONCE on the same provider for a transient transport blip
+        # before the except-chain escalates to fallback — see call_llm()
+        # for the rationale. (PR #16587)
+        try:
+            return _validate_llm_response(
+                await client.chat.completions.create(**kwargs), task)
+        except Exception as transient_err:
+            if not _is_transient_transport_error(transient_err):
+                raise
+            logger.info(
+                "Auxiliary %s (async): transient transport error; retrying "
+                "once on the same provider before fallback: %s",
+                task or "call", transient_err,
+            )
+            return _validate_llm_response(
+                await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
            retry_kwargs = dict(kwargs)
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -449,6 +449,17 @@ def _run_review_in_thread(
            # if a future code path bypasses the cache.
            review_agent.session_start = agent.session_start
            review_agent.session_id = agent.session_id
+            # Never let the review fork compress. It shares the parent's
+            # session_id, so if it won a compression race it would rotate the
+            # parent into a NEW child that the gateway never adopts (the fork
+            # is single-lifecycle and dies right after this run_conversation).
+            # The foreground turn would then start from the stale parent and
+            # compress it again, leaving the same parent with two sibling
+            # children (issue #38727). Review also needs full context to
+            # produce a good memory/skill summary — compressing would strip
+            # detail. Both compression triggers in conversation_loop.py gate on
+            # agent.compression_enabled, so this short-circuits both paths.
+            review_agent.compression_enabled = False

            from model_tools import get_tool_definitions
            from hermes_cli.plugins import (
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -208,6 +208,41 @@ def is_stale_connection_error(exc: BaseException) -> bool:
    return False


+def is_streaming_access_denied_error(exc: BaseException) -> bool:
+    """Return True when AWS denied the ``bedrock:InvokeModelWithResponseStream`` action.
+
+    IAM policies scoped to ``bedrock:InvokeModel`` only (a common least-privilege
+    setup) reject ``converse_stream()`` with an ``AccessDeniedException`` whose
+    message names the streaming action, e.g.::
+
+        User: arn:aws:iam::123456789012:user/x is not authorized to perform:
+        bedrock:InvokeModelWithResponseStream on resource: ...
+
+    This is permanent for the session — retrying the stream can never succeed —
+    so callers should flip to the non-streaming ``converse()`` path (which maps
+    to ``bedrock:InvokeModel``) instead of burning retries.
+
+    Detection is deliberately message-based: boto3 surfaces this as a
+    ``ClientError`` with ``Error.Code == "AccessDeniedException"``, and the
+    AnthropicBedrock SDK wraps the same AWS response in its own exception
+    types, but both preserve the action name in the message.
+    """
+    msg = str(exc).lower()
+    if "invokemodelwithresponsestream" not in msg:
+        return False
+    # ClientError with an explicit access-denied code is the canonical form.
+    try:
+        from botocore.exceptions import ClientError
+    except ImportError:  # pragma: no cover — botocore always present with boto3
+        ClientError = None  # type: ignore[assignment]
+    if ClientError is not None and isinstance(exc, ClientError):
+        code = (getattr(exc, "response", None) or {}).get("Error", {}).get("Code", "")
+        return code in ("AccessDeniedException", "UnauthorizedException")
+    # Wrapped forms (e.g. AnthropicBedrock SDK PermissionDeniedError) — match
+    # on the authorization-failure phrasing AWS uses.
+    return "not authorized" in msg or "accessdenied" in msg
+
+
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -1003,6 +1038,16 @@ def call_converse_stream(
    try:
        response = client.converse_stream(**kwargs)
    except Exception as exc:
+        if is_streaming_access_denied_error(exc):
+            # IAM allows bedrock:InvokeModel but not
+            # InvokeModelWithResponseStream — permanent for this session.
+            # Fall back to the non-streaming converse() path.
+            logger.info(
+                "bedrock: converse_stream denied by IAM on (region=%s, model=%s) — "
+                "falling back to non-streaming converse().",
+                region, model,
+            )
+            return normalize_converse_response(client.converse(**kwargs))
        if is_stale_connection_error(exc):
            logger.warning(
                "bedrock: stale-connection error on converse_stream(region=%s, "
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -139,6 +139,15 @@ def interruptible_api_call(agent, api_kwargs: dict):
    result = {"response": None, "error": None}
    request_client_holder = {"client": None, "owner_tid": None}
    request_client_lock = threading.Lock()
+    # Request-local cancellation flag. Distinct from agent._interrupt_requested
+    # because that flag is cleared at run_conversation() turn boundaries, but
+    # this daemon worker thread can outlive the turn (the gateway caches
+    # AIAgent instances per session). Tracks whether THIS specific request was
+    # cancelled by the main thread's interrupt handler, so the transport error
+    # that is the expected consequence of our own force-close isn't misread as
+    # a network bug and surfaced to the caller. (PR #6600 — cascading interrupt
+    # hang.)
+    _request_cancelled = {"value": False}

    def _set_request_client(client):
        with request_client_lock:
@@ -229,6 +238,17 @@ def interruptible_api_call(agent, api_kwargs: dict):
                )
                result["response"] = request_client.chat.completions.create(**api_kwargs)
        except Exception as e:
+            # If the request was cancelled by the main thread's interrupt
+            # handler, the transport error is the expected consequence of our
+            # own force-close, NOT a network bug. Swallow it instead of
+            # surfacing — the main thread raises InterruptedError. (#6600)
+            if _request_cancelled["value"]:
+                logger.debug(
+                    "Non-streaming worker caught %s after request cancellation — "
+                    "exiting without surfacing a network error.",
+                    type(e).__name__,
+                )
+                return
            result["error"] = e
        finally:
            _close_request_client_once("request_complete")
@@ -506,6 +526,14 @@ def interruptible_api_call(agent, api_kwargs: dict):
            break

        if agent._interrupt_requested:
+            # Mark THIS request cancelled before force-closing so the worker's
+            # exception handler recognizes the forced transport error as a
+            # cancel and exits cleanly instead of surfacing a network error or
+            # (in the streaming path) burning full retry cycles. (#6600)
+            _request_cancelled["value"] = True
+            logger.debug(
+                "Force-closing httpx client due to interrupt (not a network error)."
+            )
            # Force-close the in-flight worker-local HTTP connection to stop
            # token generation without poisoning the shared client used to
            # seed future retries.
@@ -924,6 +952,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
        if preserved:
            msg["reasoning_details"] = preserved

+    # Anthropic interleaved-thinking replay: when a turn interleaves signed
+    # thinking blocks with tool_use, the parallel reasoning_details +
+    # tool_calls fields lose the cross-type ordering, and reconstruction
+    # front-loads thinking — reordering signed blocks and triggering HTTP 400
+    # ("thinking ... blocks in the latest assistant message cannot be
+    # modified"). Carry the verbatim ordered block list so the adapter can
+    # replay the latest assistant message unchanged. See
+    # agent/transports/anthropic.py and agent/anthropic_adapter.py.
+    ordered_blocks = getattr(assistant_message, "anthropic_content_blocks", None)
+    if ordered_blocks:
+        msg["anthropic_content_blocks"] = ordered_blocks
+
    # Codex Responses API: preserve encrypted reasoning items for
    # multi-turn continuity. These get replayed as input on the next turn.
    codex_items = getattr(assistant_message, "codex_reasoning_items", None)
@@ -1575,6 +1615,8 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                    _get_bedrock_runtime_client,
                    invalidate_runtime_client,
                    is_stale_connection_error,
+                    is_streaming_access_denied_error,
+                    normalize_converse_response,
                    stream_converse_with_callbacks,
                )
                region = api_kwargs.pop("__bedrock_region__", "us-east-1")
@@ -1583,6 +1625,29 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                try:
                    raw_response = client.converse_stream(**api_kwargs)
                except Exception as _bedrock_exc:
+                    # IAM policies scoped to bedrock:InvokeModel only (no
+                    # InvokeModelWithResponseStream) reject converse_stream()
+                    # with AccessDeniedException. That denial is permanent for
+                    # the session — fall back to the non-streaming converse()
+                    # inline (it maps to bedrock:InvokeModel) and disable
+                    # streaming for subsequent calls so we don't re-fail every
+                    # turn.
+                    if is_streaming_access_denied_error(_bedrock_exc):
+                        agent._disable_streaming = True
+                        agent._safe_print(
+                            "\n⚠  AWS IAM denied bedrock:InvokeModelWithResponseStream — "
+                            "falling back to non-streaming InvokeModel.\n"
+                            "   Grant that action to restore streaming output.\n"
+                        )
+                        logger.info(
+                            "bedrock: converse_stream denied by IAM (%s) — "
+                            "using non-streaming converse() for this session.",
+                            type(_bedrock_exc).__name__,
+                        )
+                        result["response"] = normalize_converse_response(
+                            client.converse(**api_kwargs)
+                        )
+                        return
                    # Evict the cached client on stale-connection failures
                    # so the outer retry loop builds a fresh client/pool.
                    if is_stale_connection_error(_bedrock_exc):
@@ -1625,6 +1690,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
    result = {"response": None, "error": None, "partial_tool_names": []}
    request_client_holder = {"client": None, "diag": None, "owner_tid": None}
    request_client_lock = threading.Lock()
+    # Request-local cancellation flag — see interruptible_api_call for the full
+    # rationale. The streaming retry loop is where the 7-minute cascading-
+    # interrupt hang originated: a force-close raised RemoteProtocolError, the
+    # loop classified it as a transient network error, and burned full retry
+    # cycles (and emitted "reconnecting" noise) on a request the user already
+    # cancelled. The token lets the worker recognize its own forced close and
+    # exit immediately instead of retrying. (PR #6600.)
+    _request_cancelled = {"value": False}

    def _set_request_client(client):
        with request_client_lock:
@@ -1662,6 +1735,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
    # poll loop uses this to detect stale connections that keep receiving
    # SSE keep-alive pings but no actual data.
    last_chunk_time = {"t": time.time()}
+    # Stale-stream patience, shared between the httpx socket read timeout
+    # (built in ``_call_chat_completions`` below) and the stale-stream detector
+    # (computed further down, before the worker thread starts).  Initialized
+    # here so the read-timeout builder can floor itself at the stale value and
+    # never fire before the detector.  ``None`` until the detector value is
+    # resolved, so the builder degrades to its plain default if it ever runs
+    # first.
+    _stream_stale_timeout = None

    def _fire_first_delta():
        if not first_delta_fired["done"] and on_first_delta:
@@ -1698,6 +1779,26 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                    "Local provider detected (%s) — stream read timeout raised to %.0fs",
                    agent.base_url, _stream_read_timeout,
                )
+            elif (
+                _stream_read_timeout == 120.0
+                and _stream_stale_timeout is not None
+                and _stream_stale_timeout != float("inf")
+                and _stream_stale_timeout > _stream_read_timeout
+            ):
+                # Cloud reasoning models (e.g. Opus) routinely pause mid-stream
+                # for minutes during extended thinking.  The stale-stream
+                # detector is deliberately scaled up to tolerate this (180–300s,
+                # see the stale-timeout block below), but the raw httpx socket
+                # read timeout defaulted to a flat 120s and fired *first* —
+                # tearing down a healthy reasoning stream before the stale
+                # detector (which owns retry + diagnostics) could act.  Keep the
+                # socket read timeout in step with the detector so it no longer
+                # preempts it.
+                _stream_read_timeout = _stream_stale_timeout
+                logger.debug(
+                    "Cloud reasoning stream — read timeout raised to %.0fs to "
+                    "match stale-stream detector", _stream_read_timeout,
+                )
        # Cap connect/pool at 60s even when provider timeout is higher.
        # connect/pool cover TCP handshake, not model inference.
        _conn_cap = min(_base_timeout, 60.0) if _provider_timeout_cfg is not None else 30.0
@@ -1950,6 +2051,58 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                "(possible upstream error or malformed SSE response)."
            )

+        # A stream that delivered a tool call but only partial/unparseable
+        # JSON args splits into two very different cases:
+        #
+        #   1. Provider sent finish_reason="length" → a genuine output-cap
+        #      truncation.  Boosting max_tokens on retry is the right move.
+        #
+        #   2. Provider sent NO finish_reason (the SSE simply stopped after
+        #      the opening "{" with no terminator and no [DONE]) → the
+        #      upstream dropped/stalled the connection mid tool-call.  This
+        #      is NOT an output cap — the model never reported hitting one.
+        #      Some dedicated endpoints (e.g. NVIDIA Nemotron Ultra on the
+        #      Nous dedicated endpoint) stall for minutes during large
+        #      tool-arg generation, then close the stream cleanly without a
+        #      finish_reason.  Stamping "length" here sends it down the
+        #      max_tokens-boost truncation path, which retries 3× to no
+        #      effect and finally reports the misleading "Response truncated
+        #      due to output length limit" — the red herring this guards
+        #      against.  Route it through the partial-stream-stub path
+        #      instead so the loop reports an honest mid-tool-call stream
+        #      drop and fails fast rather than escalating output budget.
+        _tool_args_dropped_no_finish = has_truncated_tool_args and finish_reason is None
+        if _tool_args_dropped_no_finish:
+            _dropped_names = [
+                (tool_calls_acc[idx]["function"]["name"] or "?")
+                for idx in sorted(tool_calls_acc)
+            ]
+            logger.warning(
+                "Stream ended with no finish_reason while a tool call's "
+                "arguments were still incomplete (tools=%s); treating as a "
+                "mid-tool-call stream drop, not an output-length truncation.",
+                _dropped_names,
+            )
+            full_reasoning = "".join(reasoning_parts) or None
+            mock_message = SimpleNamespace(
+                role=role,
+                content=full_content,
+                tool_calls=None,
+                reasoning_content=full_reasoning,
+            )
+            mock_choice = SimpleNamespace(
+                index=0,
+                message=mock_message,
+                finish_reason=FINISH_REASON_LENGTH,
+            )
+            return SimpleNamespace(
+                id=PARTIAL_STREAM_STUB_ID,
+                model=model_name,
+                choices=[mock_choice],
+                usage=usage_obj,
+                _dropped_tool_names=_dropped_names or None,
+            )
+
        effective_finish_reason = finish_reason or "stop"
        if has_truncated_tool_args:
            effective_finish_reason = "length"
@@ -1988,6 +2141,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
        # Per-attempt diagnostic dict for the retry block to consume.
        _diag = agent._stream_diag_init()
        request_client_holder["diag"] = _diag
+        # Defensive: strip Responses-only kwargs (instructions, input, ...)
+        # that can leak in under an api_mode-flip race. The Anthropic SDK
+        # raises a non-retryable TypeError on them, killing the turn. See
+        # #31673 / sanitize_anthropic_kwargs().
+        from agent.anthropic_adapter import sanitize_anthropic_kwargs
+        sanitize_anthropic_kwargs(
+            api_kwargs, log_prefix=getattr(agent, "log_prefix", "")
+        )
        # Use the Anthropic SDK's streaming context manager
        with agent._anthropic_client.messages.stream(**api_kwargs) as stream:
            # The Anthropic SDK exposes the raw httpx response on
@@ -2078,6 +2239,21 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                        result["response"] = _call_chat_completions()
                    return  # success
                except Exception as e:
+                    # If the main poll loop force-closed this request because
+                    # of an interrupt, the resulting transport error is the
+                    # expected consequence of our own close — NOT a transient
+                    # network error. Exit immediately: no retry, no fallback,
+                    # no "reconnecting" status. The outer poll loop raises
+                    # InterruptedError. This is the fix for the cascading-
+                    # interrupt hang where doomed retries burned full
+                    # stream-stale-timeout cycles. (#6600)
+                    if _request_cancelled["value"]:
+                        logger.debug(
+                            "Streaming worker caught %s after request "
+                            "cancellation — exiting without retry.",
+                            type(e).__name__,
+                        )
+                        return
                    _is_timeout = isinstance(
                        e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
                    )
@@ -2273,9 +2449,34 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            "stream" in _err_lower
                            and "not supported" in _err_lower
                        )
-                        if _is_stream_unsupported:
+                        # AWS Bedrock (AnthropicBedrock SDK path): IAM policies
+                        # with bedrock:InvokeModel but not
+                        # InvokeModelWithResponseStream reject messages.stream()
+                        # with a permission error naming the streaming action.
+                        # Permanent for the session — flip to non-streaming
+                        # (messages.create() maps to bedrock:InvokeModel).
+                        _is_bedrock_stream_denied = False
+                        if (
+                            not _is_stream_unsupported
+                            and "invokemodelwithresponsestream" in _err_lower
+                        ):
+                            # Cheap message pre-check before importing the
+                            # adapter — bedrock_adapter triggers a lazy boto3
+                            # install at import time, which must not run for
+                            # unrelated providers' stream errors.
+                            from agent.bedrock_adapter import (
+                                is_streaming_access_denied_error,
+                            )
+                            _is_bedrock_stream_denied = (
+                                is_streaming_access_denied_error(e)
+                            )
+                        if _is_stream_unsupported or _is_bedrock_stream_denied:
                            agent._disable_streaming = True
                            agent._safe_print(
+                                "\n⚠  AWS IAM denied bedrock:InvokeModelWithResponseStream. "
+                                "Switching to non-streaming.\n"
+                                "   Grant that action to restore streaming output.\n"
+                                if _is_bedrock_stream_denied else
                                "\n⚠  Streaming is not supported for this "
                                "model/provider. Switching to non-streaming.\n"
                                "   To avoid this delay, set display.streaming: false "
@@ -2387,6 +2588,15 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
            )

        if agent._interrupt_requested:
+            # Mark THIS request cancelled before force-closing so the worker's
+            # exception handler recognizes the forced transport error as a
+            # cancel and exits without retrying or surfacing a network error.
+            # (#6600)
+            _request_cancelled["value"] = True
+            logger.debug(
+                "Force-closing streaming httpx client due to interrupt "
+                "(not a network error)."
+            )
            try:
                if agent.api_mode == "anthropic_messages":
                    agent._anthropic_client.close()
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@@ -25,6 +25,154 @@ from typing import Any, Dict, List
 logger = logging.getLogger(__name__)


+def _coerce_usage_int(value: Any) -> int:
+    if isinstance(value, bool):
+        return 0
+    if isinstance(value, int):
+        return max(value, 0)
+    if isinstance(value, float):
+        return max(int(value), 0)
+    if isinstance(value, str):
+        try:
+            return max(int(value), 0)
+        except ValueError:
+            return 0
+    return 0
+
+
+def _record_codex_app_server_usage(agent, turn) -> dict[str, Any]:
+    """Translate Codex app-server token usage into Hermes accounting.
+
+    Codex app-server reports usage via thread/tokenUsage/updated as:
+    inputTokens, cachedInputTokens, outputTokens, reasoningOutputTokens,
+    totalTokens.
+
+    Hermes' canonical prompt bucket includes uncached input + cached input.
+    The Codex app-server protocol does not currently expose cache-write tokens,
+    so that bucket remains zero on this runtime.
+
+    Even when Codex omits usage for a turn, Hermes should still count that turn
+    as one API call for session/status accounting.
+    """
+    agent.session_api_calls += 1
+
+    usage = getattr(turn, "token_usage_last", None)
+    if not isinstance(usage, dict) or not usage:
+        if agent._session_db and agent.session_id:
+            try:
+                if not agent._session_db_created:
+                    agent._ensure_db_session()
+                agent._session_db.update_token_counts(
+                    agent.session_id,
+                    model=agent.model,
+                    api_call_count=1,
+                )
+            except Exception as exc:
+                logger.debug(
+                    "Codex app-server api-call persistence failed (session=%s): %s",
+                    agent.session_id, exc,
+                )
+        return {}
+
+    from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
+
+    input_tokens = _coerce_usage_int(usage.get("inputTokens"))
+    cache_read_tokens = _coerce_usage_int(usage.get("cachedInputTokens"))
+    output_tokens = _coerce_usage_int(usage.get("outputTokens"))
+    reasoning_tokens = _coerce_usage_int(usage.get("reasoningOutputTokens"))
+    reported_total = _coerce_usage_int(usage.get("totalTokens"))
+
+    canonical_usage = CanonicalUsage(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cache_read_tokens=cache_read_tokens,
+        cache_write_tokens=0,
+        reasoning_tokens=reasoning_tokens,
+        raw_usage=usage,
+    )
+    prompt_tokens = canonical_usage.prompt_tokens
+    completion_tokens = canonical_usage.output_tokens
+    total_tokens = reported_total or canonical_usage.total_tokens
+    usage_dict = {
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "total_tokens": total_tokens,
+        "input_tokens": canonical_usage.input_tokens,
+        "output_tokens": canonical_usage.output_tokens,
+        "cache_read_tokens": canonical_usage.cache_read_tokens,
+        "cache_write_tokens": canonical_usage.cache_write_tokens,
+        "reasoning_tokens": canonical_usage.reasoning_tokens,
+    }
+
+    compressor = getattr(agent, "context_compressor", None)
+    if compressor is not None:
+        try:
+            compressor.update_from_response(usage_dict)
+            context_window = getattr(turn, "model_context_window", None)
+            if isinstance(context_window, int) and context_window > 0:
+                compressor.context_length = context_window
+        except Exception:
+            logger.debug("codex app-server usage update failed", exc_info=True)
+
+    agent.session_prompt_tokens += prompt_tokens
+    agent.session_completion_tokens += completion_tokens
+    agent.session_total_tokens += total_tokens
+    agent.session_input_tokens += canonical_usage.input_tokens
+    agent.session_output_tokens += canonical_usage.output_tokens
+    agent.session_cache_read_tokens += canonical_usage.cache_read_tokens
+    agent.session_cache_write_tokens += canonical_usage.cache_write_tokens
+    agent.session_reasoning_tokens += canonical_usage.reasoning_tokens
+
+    cost_result = estimate_usage_cost(
+        agent.model,
+        canonical_usage,
+        provider=agent.provider,
+        base_url=agent.base_url,
+        api_key=getattr(agent, "api_key", ""),
+    )
+    if cost_result.amount_usd is not None:
+        agent.session_estimated_cost_usd += float(cost_result.amount_usd)
+    agent.session_cost_status = cost_result.status
+    agent.session_cost_source = cost_result.source
+
+    if agent._session_db and agent.session_id:
+        try:
+            if not agent._session_db_created:
+                agent._ensure_db_session()
+            agent._session_db.update_token_counts(
+                agent.session_id,
+                input_tokens=canonical_usage.input_tokens,
+                output_tokens=canonical_usage.output_tokens,
+                cache_read_tokens=canonical_usage.cache_read_tokens,
+                cache_write_tokens=canonical_usage.cache_write_tokens,
+                reasoning_tokens=canonical_usage.reasoning_tokens,
+                estimated_cost_usd=float(cost_result.amount_usd)
+                if cost_result.amount_usd is not None else None,
+                cost_status=cost_result.status,
+                cost_source=cost_result.source,
+                billing_provider=agent.provider,
+                billing_base_url=agent.base_url,
+                billing_mode="subscription_included"
+                if cost_result.status == "included" else None,
+                model=agent.model,
+                api_call_count=1,
+            )
+        except Exception as exc:
+            logger.debug(
+                "Codex app-server token persistence failed (session=%s, tokens=%d): %s",
+                agent.session_id, total_tokens, exc,
+            )
+
+    return {
+        **usage_dict,
+        "last_prompt_tokens": prompt_tokens,
+        "estimated_cost_usd": float(cost_result.amount_usd)
+        if cost_result.amount_usd is not None else None,
+        "cost_status": cost_result.status,
+        "cost_source": cost_result.source,
+    }
+
+
 def run_codex_app_server_turn(
    agent,
    *,
@@ -120,6 +268,8 @@ def run_codex_app_server_turn(
    agent._iters_since_skill = (
        getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
    )
+    usage_result = _record_codex_app_server_usage(agent, turn)
+    api_calls = 1

    # Now check the skill nudge AFTER iters were incremented — same
    # pattern the chat_completions path uses (line ~15432).
@@ -164,12 +314,13 @@ def run_codex_app_server_turn(
    return {
        "final_response": turn.final_text,
        "messages": messages,
-        "api_calls": 1,  # one app-server "turn" maps to one logical API call
+        "api_calls": api_calls,
        "completed": not turn.interrupted and turn.error is None,
        "partial": turn.interrupted or turn.error is not None,
        "error": turn.error,
        "codex_thread_id": turn.thread_id,
        "codex_turn_id": turn.turn_id,
+        **usage_result,
    }


--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -0,0 +1,731 @@
+"""Coding-context awareness — base Hermes, every interactive surface.
+
+When the user runs Hermes inside a code workspace (CLI, TUI, desktop app, or an
+editor over ACP), Hermes shifts into a **coding posture**. This module is the
+single place that decides whether we're in that posture and what it implies,
+so the rest of the codebase never re-derives "are we coding?" on its own.
+
+Architecture — one seam, many consumers
+----------------------------------------
+The posture is modelled as a frozen :class:`RuntimeMode` selected from a small
+:class:`ContextProfile` registry (today: ``coding`` and ``general``). A profile
+is *data* — it declares the toolset to collapse to, the operating brief to
+inject, and hints for other domains (model routing, memory, subagents). Every
+domain reads the same resolved object instead of probing git/config itself:
+
+  * **System prompt** — ``RuntimeMode.system_blocks()`` → the operating brief +
+    a live git/workspace snapshot (``agent/system_prompt.py``).
+  * **Toolset** — ``RuntimeMode.toolset_selection()`` → the ``coding`` toolset
+    plus the user's enabled MCP servers (``cli.py`` / ``tui_gateway``). Only
+    under the opt-in ``focus`` mode: the default posture is prompt-only and
+    never touches the user's configured toolsets (toolsets like messaging /
+    smart-home / music are off-by-default anyway, and someone who explicitly
+    enabled image-gen or Spotify shouldn't lose it for being in a git repo).
+  * **Delegation** — subagents inherit the parent's toolset and run through the
+    same prompt builder, so the coding posture propagates to children for free.
+  * **Model / memory / compression** — declared on the profile
+    (``model_hint``, ``memory_policy``) as the extension seam; consumers read
+    ``mode.profile`` rather than re-deciding.
+
+Cache safety
+------------
+The mode is resolved **once** and is immutable. The workspace snapshot is built
+once at prompt-build time and baked into the *stable* system-prompt tier — never
+re-probed per turn (that would shatter the prompt cache). Branch and dirty state
+drift mid-session, so the brief tells the model to re-check with ``git`` before
+acting on the snapshot. A ``/coding`` flip therefore only takes effect next
+session (deferred), the same contract as ``/skills install`` vs ``--now``.
+
+Activation (config ``agent.coding_context``):
+
+  * ``auto`` (default) — posture (brief + snapshot) on an interactive coding
+    surface sitting in a code workspace (git repo or recognised project root).
+    Prompt-only; toolsets and the skill index untouched.
+  * ``focus`` — like ``auto``, but additionally collapses the toolset to the
+    ``coding`` set + enabled MCP servers and demotes non-coding skill
+    categories to names-only in the prompt's skill index (no skill is ever
+    hidden). Explicit opt-in for a lean schema.
+  * ``on`` — force the posture anywhere (incl. non-workspaces). Prompt-only.
+  * ``off`` — disable entirely.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Optional
+
+logger = logging.getLogger("hermes.coding_context")
+
+CODING_TOOLSET = "coding"
+
+# Surfaces where a coding posture makes sense under ``auto``. Messaging
+# platforms (telegram, discord, slack, …) are intentionally absent — a chat bot
+# in a group is not pair-programming.
+INTERACTIVE_CODING_PLATFORMS = {"cli", "tui", "acp", "desktop", ""}
+
+# Project-root signals that mark a directory as a code workspace even when it
+# isn't (yet) a git repo. Cheap filename checks — no parsing.
+_PROJECT_MARKERS = (
+    "pyproject.toml", "setup.py", "setup.cfg", "requirements.txt",
+    "package.json", "tsconfig.json", "deno.json",
+    "Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts",
+    "Gemfile", "composer.json", "mix.exs", "pubspec.yaml",
+    "CMakeLists.txt", "Makefile", "Dockerfile",
+    "AGENTS.md", "CLAUDE.md", ".cursorrules",
+)
+
+# Agent-instruction files surfaced separately from manifests in the snapshot.
+_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
+
+# Lockfile → package manager, checked in priority order.
+_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
+_JS_LOCKFILES = (
+    ("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"),
+    ("yarn.lock", "yarn"), ("package-lock.json", "npm"),
+)
+
+# package.json scripts / Makefile targets worth surfacing as verify commands.
+_VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format")
+_MAX_VERIFY_COMMANDS = 8
+_MAX_FACT_FILE_BYTES = 256 * 1024
+
+_GIT_TIMEOUT = 2.5
+
+
+# Per-model edit-format steering. Matching the edit tool format to how a model
+# was trained reduces mistakes and wasted reasoning (OpenAI/Codex handle
+# patch-style diffs best; Anthropic models — and most open-weight coding
+# models, whose RL scaffolds use str_replace-style editors — do best with
+# string-replacement). Our `patch` tool exposes both: mode="patch" (V4A
+# multi-file) and mode="replace" (find-and-swap). We nudge each family toward
+# its native format. Unknown families get nothing (the brief's neutral wording
+# stands). Substrings match the model id; aligned with TOOL_USE_ENFORCEMENT_MODELS.
+#
+# GPT/Codex get V4A for ALL edits, single-file included: in codex-rs,
+# apply_patch (V4A — apply_patch.lark) is the ONLY file editor, no
+# str_replace-style tool exists, and the shipped model prompts say to use
+# apply_patch even "for single file edits" — so a replace-mode nudge would
+# steer those models toward a format their first-party harness never taught
+# them.
+_EDIT_FORMAT_GUIDANCE: dict[str, tuple[tuple[str, ...], str]] = {
+    "patch": (
+        ("gpt", "codex"),
+        "- Edit format: author new files with `write_file`; for edits to "
+        "existing code use `patch` with `mode='patch'` (V4A diff) — including "
+        "single-file edits. It's the edit format you handle most reliably.",
+    ),
+    "replace": (
+        ("claude", "sonnet", "opus", "haiku",
+         "gemini", "gemma", "deepseek", "qwen", "kimi", "glm", "grok",
+         "hermes", "llama", "mistral", "devstral", "minimax"),
+        "- Edit format: author new files with `write_file`; for edits to "
+        "existing code prefer `patch` in `mode='replace'` — match a unique "
+        "snippet and swap it. Reach for `mode='patch'` (V4A) only when an edit "
+        "genuinely spans several files at once.",
+    ),
+}
+
+
+def _model_family(model: Optional[str]) -> Optional[str]:
+    """Classify a model id into an edit-format family key, or ``None``.
+
+    Used to steer the coding posture toward the edit tool format a model was
+    trained on. Family-agnostic by design: an unrecognised model gets ``None``
+    and the operating brief's neutral edit wording applies.
+    """
+    if not model:
+        return None
+    lowered = model.lower()
+    for family, (needles, _line) in _EDIT_FORMAT_GUIDANCE.items():
+        if any(n in lowered for n in needles):
+            return family
+    return None
+
+
+def _edit_format_line(model: Optional[str]) -> str:
+    """The edit-format guidance line for this model's family (``""`` if none)."""
+    family = _model_family(model)
+    if family is None:
+        return ""
+    return _EDIT_FORMAT_GUIDANCE[family][1]
+
+
+# Operating brief for the coding posture. Tool names referenced here (read_file,
+# search_files, patch, write_file, terminal, todo) are in the coding toolset and
+# in _HERMES_CORE_TOOLS, so they're present on every surface this fires on.
+CODING_AGENT_GUIDANCE = (
+    "You are a coding agent pairing with the user inside their codebase. "
+    "Operate like a careful senior engineer.\n"
+    "\n"
+    "Gather context first:\n"
+    "- Read the relevant files with `read_file` and locate code with "
+    "`search_files` before changing anything. Trace a symbol to its definition "
+    "and usages rather than guessing its shape.\n"
+    "- Batch independent lookups: when several reads/searches don't depend on "
+    "each other, issue them together in one turn instead of one at a time.\n"
+    "- Never invent files, symbols, APIs, or imports. If you haven't seen it in "
+    "the repo, go look. Don't assume a library is available — check the project "
+    "manifest (pyproject.toml / package.json / Cargo.toml / go.mod) and how "
+    "neighbouring files import it.\n"
+    "\n"
+    "Make changes through the tools, not the chat:\n"
+    "- Edit with `patch`/`write_file`. Do NOT print code blocks to the user as "
+    "a substitute for editing — apply the change, then summarise it. Only show "
+    "code when the user explicitly asks to see it.\n"
+    "- Match the project's existing style and conventions; AGENTS.md / "
+    "CLAUDE.md / .cursorrules already in context win over your defaults. Touch "
+    "only what the task needs — no drive-by refactors, renames, or reformatting "
+    "— and add any imports/dependencies your code requires.\n"
+    "- If an edit fails to apply, re-read the file to get the current exact "
+    "contents before retrying — don't repeat a stale patch. If the same region "
+    "fails twice, rewrite the enclosing function or file with `write_file` "
+    "instead of attempting a third patch.\n"
+    "\n"
+    "Verify, and know when to stop:\n"
+    "- Use `terminal` for git, builds, tests, and inspection. Run the relevant "
+    "tests/linter/build and confirm they pass before claiming the work is done.\n"
+    "- Fix root causes, not symptoms: when you find a bug, check sibling call "
+    "paths for the same flaw and fix the class, not just the reported site.\n"
+    "- When fixing linter/type errors on a file, stop after about three "
+    "attempts on the same file and ask the user rather than looping.\n"
+    "- Track multi-step work with `todo`. Reference code as `path:line` instead "
+    "of pasting whole files.\n"
+    "\n"
+    "Respect the user's repo: don't commit, push, or rewrite history unless "
+    "asked, and never read, print, or commit secrets — leave `.env` and "
+    "credential files alone unless the user explicitly asks. The Workspace "
+    "block below is a snapshot from session start — re-run `git status`/"
+    "`git branch` before relying on it. Be concise: lead with the change or "
+    "answer, not a preamble."
+)
+
+
+# ── Context profiles (declarative posture definitions) ──────────────────────
+
+
+@dataclass(frozen=True)
+class ContextProfile:
+    """A named operating posture. Pure data — consumers read these fields.
+
+    ``toolset``      — collapse to this toolset (+ enabled MCP) when no explicit
+                       selection is pinned; ``None`` keeps the platform default.
+    ``guidance``     — operating brief injected into the stable system prompt;
+                       ``""`` injects nothing.
+    ``model_hint``   — routing preference key for smart model routing
+                       (extension seam; not yet consumed by the router).
+    ``memory_policy``— memory namespace/weighting hint (extension seam).
+    ``compact_skill_categories`` — skill categories DEMOTED to names-only in
+                       the system-prompt skill index under the opt-in ``focus``
+                       mode. Never hidden: every skill name stays visible
+                       (so memory-anchored recall keeps working) — only the
+                       descriptions are dropped to cut index noise. Deny-list
+                       semantics so unknown/custom categories keep full
+                       entries.
+    """
+
+    name: str
+    toolset: Optional[str] = None
+    guidance: str = ""
+    model_hint: Optional[str] = None
+    memory_policy: str = "default"
+    compact_skill_categories: tuple[str, ...] = ()
+
+
+# Skill categories that are clearly not part of a coding workflow. Demoted to
+# names-only in the prompt's skill index under the opt-in ``focus`` mode only
+# (deny-list — anything not listed here, incl. custom user categories, keeps
+# full entries). Coding-adjacent categories (devops, github, mcp,
+# data-science, diagramming, research, security, …) are intentionally absent.
+_NON_CODING_SKILL_CATEGORIES = (
+    "apple", "communication", "cooking", "creative", "email", "finance",
+    "gaming", "gifs", "health", "media", "music", "note-taking",
+    "productivity", "shopping", "smart-home", "social-media", "travel",
+    "yuanbao",
+)
+
+
+GENERAL_PROFILE = ContextProfile(name="general")
+CODING_PROFILE = ContextProfile(
+    name="coding",
+    toolset=CODING_TOOLSET,
+    guidance=CODING_AGENT_GUIDANCE,
+    model_hint="coding",
+    memory_policy="project",
+    compact_skill_categories=_NON_CODING_SKILL_CATEGORIES,
+)
+
+_PROFILES: dict[str, ContextProfile] = {
+    GENERAL_PROFILE.name: GENERAL_PROFILE,
+    CODING_PROFILE.name: CODING_PROFILE,
+}
+
+
+def get_profile(name: str) -> ContextProfile:
+    """Return a registered profile, falling back to ``general``."""
+    return _PROFILES.get(name, GENERAL_PROFILE)
+
+
+# ── Helpers ─────────────────────────────────────────────────────────────────
+
+
+def _coding_mode(config: Optional[dict[str, Any]]) -> str:
+    """Return the normalized ``agent.coding_context`` mode (auto/focus/on/off)."""
+    if config is None:
+        try:
+            from hermes_cli.config import load_config
+
+            config = load_config()
+        except Exception:
+            config = {}
+    raw = ((config or {}).get("agent", {}) or {}).get("coding_context", "auto")
+    mode = str(raw).strip().lower()
+    if mode in {"focus", "strict", "lean"}:
+        return "focus"
+    if mode in {"on", "true", "yes", "1", "always"}:
+        return "on"
+    if mode in {"off", "false", "no", "0", "never"}:
+        return "off"
+    return "auto"
+
+
+def _resolve_cwd(cwd: Optional[str | Path]) -> Path:
+    if cwd:
+        return Path(cwd).expanduser()
+    try:
+        from agent.runtime_cwd import resolve_agent_cwd
+
+        return resolve_agent_cwd()
+    except Exception:
+        return Path(os.getcwd())
+
+
+def _git_root(cwd: Path) -> Optional[Path]:
+    current = cwd.resolve()
+    for parent in [current, *current.parents]:
+        if (parent / ".git").exists():
+            return parent
+    return None
+
+
+def _home() -> Optional[Path]:
+    try:
+        return Path.home().resolve()
+    except (OSError, RuntimeError):
+        return None
+
+
+def _marker_root(cwd: Path) -> Optional[Path]:
+    """Nearest ancestor that looks like a project root, or ``None``.
+
+    Walks up at most a few levels so a manifest in the workspace root counts
+    even when the user is in a subdirectory. ``$HOME`` itself is skipped — a
+    Makefile or AGENTS.md sitting in the home directory is global user config,
+    not a project-root signal.
+    """
+    current = cwd.resolve()
+    home = _home()
+    for depth, parent in enumerate([current, *current.parents]):
+        if depth > 6:
+            break
+        if parent == home:
+            continue
+        for marker in _PROJECT_MARKERS:
+            if (parent / marker).exists():
+                return parent
+    return None
+
+
+def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str:
+    """Resolve which profile applies.
+
+    ``auto``/``focus``: coding when the surface is interactive AND the cwd is a
+    code workspace (a git repo or a recognised project root). ``on``: always
+    coding. ``off``: always general.
+
+    A git repo rooted at ``$HOME`` (the dotfiles pattern) is NOT a workspace
+    signal — without the guard, every session anywhere under a dotfiles-managed
+    home directory would silently flip to the coding posture.
+
+    Detection is intentionally not memoized: it's a handful of ``stat`` calls,
+    and callers resolve the mode once per session anyway. Caching here would
+    risk a stale posture if a long-lived process (gateway/TUI) serves sessions
+    from different working directories.
+    """
+    if mode == "off":
+        return GENERAL_PROFILE.name
+    if mode == "on":
+        return CODING_PROFILE.name
+    if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS:
+        return GENERAL_PROFILE.name
+    cwd = Path(cwd_str)
+    git_root = _git_root(cwd)
+    if git_root is not None and git_root == _home():
+        git_root = None  # dotfiles repo at $HOME — not a code workspace
+    if git_root is not None or _marker_root(cwd) is not None:
+        return CODING_PROFILE.name
+    return GENERAL_PROFILE.name
+
+
+# ── RuntimeMode (the seam) ──────────────────────────────────────────────────
+
+
+@dataclass(frozen=True)
+class RuntimeMode:
+    """The resolved operating posture for a session. Immutable by construction.
+
+    Built once via :func:`resolve_runtime_mode` and consumed by every domain
+    that cares about the coding/general distinction. Never mutate or re-resolve
+    mid-session — that would break the prompt cache.
+    """
+
+    profile: ContextProfile
+    surface: str
+    cwd: Path
+    # The normalized ``agent.coding_context`` mode this posture was resolved
+    # under (auto/focus/on/off). Toolset collapse is gated on ``focus``.
+    config_mode: str = "auto"
+    # The model id this session runs (e.g. "anthropic/claude-opus-4.8"). Used
+    # only to steer edit-format guidance toward the model's family — see
+    # ``_edit_format_line``. Fixed for the session, so cache-safe.
+    model: Optional[str] = None
+
+    @property
+    def kind(self) -> str:
+        return self.profile.name
+
+    @property
+    def is_coding(self) -> bool:
+        return self.profile.name == CODING_PROFILE.name
+
+    def toolset_selection(self, config: Optional[dict[str, Any]] = None) -> Optional[list[str]]:
+        """Toolset list for this posture, or ``None`` to keep the platform default.
+
+        Non-``None`` only under the opt-in ``focus`` mode. The default posture
+        is prompt-only: most strippable toolsets are off-by-default anyway, and
+        a user who explicitly enabled one (image-gen for frontend/game assets,
+        messaging for build notifications, …) keeps it while coding.
+
+        Callers apply this only when the user hasn't pinned an explicit
+        selection (``--toolsets``, ``HERMES_TUI_TOOLSETS``, …); they never
+        override a pin. Returns the profile's toolset plus enabled MCP servers.
+        """
+        if self.config_mode != "focus":
+            return None
+        if self.profile.toolset is None:
+            return None
+        return [self.profile.toolset, *_enabled_mcp_servers(config)]
+
+    def system_blocks(self) -> list[str]:
+        """Stable system-prompt blocks for this posture (brief + workspace).
+
+        The operating brief carries a model-family edit-format nudge appended
+        to it (one cached string, not a separate block) so the model is steered
+        toward the `patch` mode it handles best — see ``_edit_format_line``.
+        """
+        if not self.is_coding:
+            return []
+        blocks: list[str] = []
+        if self.profile.guidance:
+            brief = self.profile.guidance
+            edit_line = _edit_format_line(self.model)
+            if edit_line:
+                brief = f"{brief}\n{edit_line}"
+            blocks.append(brief)
+        workspace = build_coding_workspace_block(self.cwd)
+        if workspace:
+            blocks.append(workspace)
+        return blocks
+
+    def compact_skill_categories(self) -> frozenset[str]:
+        """Skill categories to demote to names-only in the prompt's skill index.
+
+        Gated on the opt-in ``focus`` mode, like the toolset collapse: the
+        default posture leaves the skill index untouched. Users who didn't ask
+        for a lean prompt keep full entries for every category — index changes
+        under ``auto`` proved too surprising in practice, even names-only ones
+        (a demoted description is information the model no longer weighs when
+        deciding what to load).
+
+        Demoted — never hidden — even under ``focus``. An earlier revision
+        fully pruned these categories from the index, which caused silent
+        capability loss in a real workflow: agent-created skills are the
+        model's accumulated project memory (server-ops runbooks, learned
+        pitfalls, …), and models do not reliably reach for ``skills_list`` to
+        rediscover what the index stopped showing them. Names-only keeps every
+        skill loadable on recall while still cutting the description noise.
+        """
+        if not self.is_coding or self.config_mode != "focus":
+            return frozenset()
+        return frozenset(self.profile.compact_skill_categories)
+
+
+def resolve_runtime_mode(
+    *,
+    platform: Optional[str] = None,
+    cwd: Optional[str | Path] = None,
+    config: Optional[dict[str, Any]] = None,
+    model: Optional[str] = None,
+) -> RuntimeMode:
+    """Resolve the operating posture once. Cheap — a handful of ``stat`` calls.
+
+    This is the single entry point every domain should call. The returned
+    object is immutable and safe to cache for the session. Detection itself is
+    intentionally *not* memoized (see ``_detect_profile_name``) so a long-lived
+    process can't pin a stale posture; callers resolve once per session and
+    hold the result. ``model`` is recorded only to steer edit-format guidance;
+    it never affects detection.
+    """
+    resolved_cwd = _resolve_cwd(cwd)
+    mode = _coding_mode(config)
+    name = _detect_profile_name(
+        mode, (platform or "").strip().lower(), str(resolved_cwd)
+    )
+    return RuntimeMode(
+        profile=get_profile(name),
+        surface=platform or "",
+        cwd=resolved_cwd,
+        config_mode=mode,
+        model=model,
+    )
+
+
+# ── Back-compat surface (thin wrappers over RuntimeMode) ────────────────────
+
+
+def is_coding_context(
+    *,
+    platform: Optional[str] = None,
+    cwd: Optional[str | Path] = None,
+    config: Optional[dict[str, Any]] = None,
+) -> bool:
+    """Whether Hermes should operate in its coding posture right now."""
+    return resolve_runtime_mode(platform=platform, cwd=cwd, config=config).is_coding
+
+
+def coding_selection(
+    *,
+    platform: Optional[str] = None,
+    cwd: Optional[str | Path] = None,
+    config: Optional[dict[str, Any]] = None,
+) -> Optional[list[str]]:
+    """Toolset selection for the coding posture.
+
+    ``None`` unless the user opted into ``focus`` mode AND the posture is
+    active — the default coding posture never overrides configured toolsets.
+    """
+    return resolve_runtime_mode(
+        platform=platform, cwd=cwd, config=config
+    ).toolset_selection(config)
+
+
+def coding_system_blocks(
+    *,
+    platform: Optional[str] = None,
+    cwd: Optional[str | Path] = None,
+    config: Optional[dict[str, Any]] = None,
+    model: Optional[str] = None,
+) -> list[str]:
+    """Stable system-prompt blocks for the current posture (empty when general).
+
+    ``model`` steers the brief's edit-format nudge toward the model's family.
+    """
+    return resolve_runtime_mode(
+        platform=platform, cwd=cwd, config=config, model=model
+    ).system_blocks()
+
+
+def coding_compact_skill_categories(
+    *,
+    platform: Optional[str] = None,
+    cwd: Optional[str | Path] = None,
+    config: Optional[dict[str, Any]] = None,
+) -> frozenset[str]:
+    """Skill categories the active posture demotes to names-only in the index.
+
+    Empty outside the coding posture and outside the opt-in ``focus`` mode —
+    the default posture never touches the skill index. Under ``focus``,
+    demoted — never hidden: every skill name stays in the index and remains
+    loadable via ``skill_view`` / ``skills_list``; only descriptions are
+    dropped.
+    """
+    return resolve_runtime_mode(
+        platform=platform, cwd=cwd, config=config
+    ).compact_skill_categories()
+
+
+def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:
+    """Names of MCP servers the user has enabled — kept in the coding posture.
+
+    MCP servers (figma, browser, tophat, …) are explicitly configured and part
+    of the coding workflow, not noise to strip.
+    """
+    try:
+        from hermes_cli.config import read_raw_config
+        from hermes_cli.tools_config import _parse_enabled_flag
+
+        servers = read_raw_config().get("mcp_servers") or {}
+        return [
+            str(name)
+            for name, cfg in servers.items()
+            if isinstance(cfg, dict)
+            and _parse_enabled_flag(cfg.get("enabled", True), default=True)
+        ]
+    except Exception:
+        return []
+
+
+# ── git/workspace probe ─────────────────────────────────────────────────────
+
+
+def _git(cwd: Path, *args: str) -> str:
+    try:
+        out = subprocess.run(
+            ["git", "-C", str(cwd), *args],
+            capture_output=True,
+            text=True,
+            timeout=_GIT_TIMEOUT,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return ""
+    return out.stdout.strip() if out.returncode == 0 else ""
+
+
+def _parse_status(porcelain: str) -> tuple[dict[str, str], dict[str, int]]:
+    """Parse ``git status --porcelain=2 --branch`` into branch + counts."""
+    branch: dict[str, str] = {}
+    counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0}
+    for line in porcelain.splitlines():
+        if line.startswith("# branch.head"):
+            branch["head"] = line.split(maxsplit=2)[-1]
+        elif line.startswith("# branch.upstream"):
+            branch["upstream"] = line.split(maxsplit=2)[-1]
+        elif line.startswith("# branch.ab"):
+            parts = line.split()
+            branch["ahead"], branch["behind"] = parts[2].lstrip("+"), parts[3].lstrip("-")
+        elif line.startswith(("1 ", "2 ")):
+            xy = line.split(maxsplit=2)[1]
+            if xy[0] != ".":
+                counts["staged"] += 1
+            if xy[1] != ".":
+                counts["modified"] += 1
+        elif line.startswith("u "):
+            counts["conflicts"] += 1
+        elif line.startswith("? "):
+            counts["untracked"] += 1
+    return branch, counts
+
+
+def _read_small(path: Path) -> str:
+    """Read a small text file, or ``""`` — never raises, never reads huge files."""
+    try:
+        if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES:
+            return ""
+        return path.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return ""
+
+
+def _project_facts(root: Path) -> list[str]:
+    """Detected project facts for the workspace snapshot.
+
+    The point is to hand the model its *verify loop* up front — which manifest,
+    which package manager, and the exact test/lint/build commands — instead of
+    making it rediscover them every session. Cheap: stat calls plus reads of a
+    couple of small files; built once at prompt-build time (cache-safe).
+    """
+    facts: list[str] = []
+
+    manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()]
+    package_managers = [
+        pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
+    ]
+    if manifests:
+        line = f"- Project: {', '.join(manifests[:6])}"
+        if package_managers:
+            line += f" ({'/'.join(dict.fromkeys(package_managers))})"
+        facts.append(line)
+
+    verify: list[str] = []
+    if (root / "scripts" / "run_tests.sh").is_file():
+        verify.append("scripts/run_tests.sh")
+    if (root / "package.json").is_file():
+        try:
+            scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {}
+        except (json.JSONDecodeError, AttributeError):
+            scripts = {}
+        js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm")
+        verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts)
+    if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"):
+        verify.append("pytest")
+    makefile = _read_small(root / "Makefile")
+    if makefile:
+        verify.extend(
+            f"make {name}" for name in _VERIFY_TARGETS
+            if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
+        )
+    if verify:
+        deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
+        facts.append(f"- Verify: {'; '.join(deduped)}")
+
+    context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
+    if context_files:
+        facts.append(f"- Context files: {', '.join(context_files)}")
+
+    return facts
+
+
+def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str:
+    """Workspace snapshot for the system prompt (empty outside a workspace).
+
+    Git state (branch/status/commits) when the cwd is in a repo, plus detected
+    project facts (manifest, package manager, verify commands, context files)
+    — so marker-only (non-git) projects still get a snapshot.
+    """
+    resolved = _resolve_cwd(cwd)
+    git_root = _git_root(resolved)
+    root = git_root or _marker_root(resolved)
+    if root is None:
+        return ""
+
+    lines = ["Workspace (snapshot at session start — re-check with `git` before acting on it):"]
+    lines.append(f"- Root: {root}")
+
+    if git_root is not None:
+        branch, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch"))
+        head = branch.get("head", "")
+        if head and head != "(detached)":
+            line = f"- Branch: {head}"
+            if branch.get("upstream"):
+                line += f" \u2192 {branch['upstream']}"
+                ahead, behind = branch.get("ahead", "0"), branch.get("behind", "0")
+                if ahead != "0" or behind != "0":
+                    line += f" (ahead {ahead}, behind {behind})"
+            lines.append(line)
+        elif head == "(detached)":
+            lines.append("- Branch: (detached HEAD)")
+
+        # Linked worktree: the per-worktree git dir differs from the shared common dir.
+        git_dir, common_dir = _git(root, "rev-parse", "--git-dir"), _git(root, "rev-parse", "--git-common-dir")
+        if git_dir and common_dir and Path(git_dir).resolve() != Path(common_dir).resolve():
+            main_tree = Path(common_dir).resolve().parent
+            lines.append(f"- Worktree: linked (primary tree at {main_tree})")
+
+        dirty = [f"{n} {label}" for label, n in (
+            ("staged", counts["staged"]), ("modified", counts["modified"]),
+            ("untracked", counts["untracked"]), ("conflicts", counts["conflicts"]),
+        ) if n]
+        lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}")
+
+        recent = _git(root, "log", "-3", "--pretty=%h %s")
+        if recent:
+            lines.append("- Recent commits:")
+            lines.extend(f"    {c}" for c in recent.splitlines())
+
+    lines.extend(_project_facts(root))
+    return "\n".join(lines)
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -7,7 +7,7 @@ protecting head and tail context.
 Improvements over v2:
  - Structured summary template with Resolved/Pending question tracking
  - Filter-safe summarizer preamble that treats prior turns as source material
-  - "Remaining Work" replaces "Next Steps" to avoid reading as active instructions
+  - Historical (reference-only) section headings replace "Next Steps"/"Remaining Work" to avoid reading as active instructions
  - Clear separator when summary merges into tail message
  - Iterative summary updates (preserves info across multiple compactions)
  - Token-budget tail protection instead of fixed message count
@@ -34,7 +34,50 @@ from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

+HISTORICAL_TASK_HEADING = "## Historical Task Snapshot"
+HISTORICAL_IN_PROGRESS_HEADING = "## Historical In-Progress State"
+HISTORICAL_PENDING_ASKS_HEADING = "## Historical Pending User Asks"
+HISTORICAL_REMAINING_WORK_HEADING = "## Historical Remaining Work"
+
+
 SUMMARY_PREFIX = (
+    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
+    "into the summary below. This is a handoff from a previous context "
+    "window — treat it as background reference, NOT as active instructions. "
+    "Do NOT answer questions or fulfill requests mentioned in this summary; "
+    "they were already addressed. "
+    "Respond ONLY to the latest user message that appears AFTER this "
+    "summary — that message is the single source of truth for what to do "
+    "right now. "
+    "Topic overlap with the summary does NOT mean you should resume its "
+    "task: even on similar topics, the latest user message WINS. Treat ONLY "
+    "the latest message as the active task and discard stale items from "
+    f"'{HISTORICAL_TASK_HEADING}' / '{HISTORICAL_IN_PROGRESS_HEADING}' / "
+    f"'{HISTORICAL_PENDING_ASKS_HEADING}' / "
+    f"'{HISTORICAL_REMAINING_WORK_HEADING}' entirely — do not 'wrap up' or "
+    "'finish' work described there unless the latest message explicitly "
+    "asks for it. "
+    "Reverse signals in the latest message (e.g. 'stop', 'undo', 'roll "
+    "back', 'just verify', 'don't do that anymore', 'never mind', a new "
+    "topic) must immediately end any in-flight work described in the "
+    "summary; do not re-surface it in later turns. "
+    "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
+    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
+    "memory content due to this compaction note. "
+    "The current session state (files, config, etc.) may reflect work "
+    "described here — avoid repeating it:"
+)
+LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
+
+# Handoff prefixes that shipped in earlier releases. A summary persisted under
+# one of these can be inherited into a resumed lineage (#35344); when it is
+# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
+# stale directive it carried (e.g. "resume exactly from Active Task") survives
+# embedded in the body and keeps hijacking replies. Keep newest-first; entries
+# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
+_HISTORICAL_SUMMARY_PREFIXES = (
+    # Carveout era (#41607/#38364/#42812): "consistent → use as background"
+    # licensed stale-task resumption on topic overlap.
    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
    "into the summary below. This is a handoff from a previous context "
    "window — treat it as background reference, NOT as active instructions. "
@@ -57,17 +100,7 @@ SUMMARY_PREFIX = (
    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
    "memory content due to this compaction note. "
    "The current session state (files, config, etc.) may reflect work "
-    "described here — avoid repeating it:"
-)
-LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
-
-# Handoff prefixes that shipped in earlier releases. A summary persisted under
-# one of these can be inherited into a resumed lineage (#35344); when it is
-# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
-# stale directive it carried (e.g. "resume exactly from Active Task") survives
-# embedded in the body and keeps hijacking replies. Keep newest-first; entries
-# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
-_HISTORICAL_SUMMARY_PREFIXES = (
+    "described here — avoid repeating it:",
    # Pre-#35344: contained the self-contradicting "resume exactly" directive.
    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
    "into the summary below. This is a handoff from a previous context "
@@ -553,6 +586,22 @@ class ContextCompressor(ContextEngine):
        self.last_rough_tokens_when_real_prompt_fit = 0
        self.awaiting_real_usage_after_compression = False

+    def on_session_end(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
+        """Clear per-session compaction state at a real session boundary.
+
+        ``_previous_summary`` is per-session iterative-summary state. It is
+        cleared on ``on_session_reset()`` (/new, /reset), but session *end*
+        (CLI exit, gateway expiry, session-id rotation) goes through
+        ``on_session_end()`` instead — which inherited a no-op from
+        ``ContextEngine``. Without clearing here, a cron/background session's
+        summary could survive on a reused compressor instance and leak into the
+        next live session via the ``_generate_summary()`` iterative-update path
+        (#38788). ``compress()`` already guards the leak at the point of use;
+        this is defense-in-depth that drops the stale summary the moment the
+        owning session ends.
+        """
+        self._previous_summary = None
+
    def update_model(
        self,
        model: str,
@@ -1139,7 +1188,7 @@ class ContextCompressor(ContextEngine):
            )

        reason_text = f" Summary failure reason: {reason}." if reason else ""
-        body = f"""## Active Task
+        body = f"""{HISTORICAL_TASK_HEADING}
 {active_task}

 ## Goal
@@ -1156,7 +1205,7 @@ Recovered from a deterministic fallback because the LLM context summarizer was u
 ## Active State
 Unknown from deterministic fallback. Inspect current repository/session state if needed.

-## In Progress
+{HISTORICAL_IN_PROGRESS_HEADING}
 {active_task}

 ## Blocked
@@ -1168,13 +1217,13 @@ None recoverable from deterministic fallback.
 ## Resolved Questions
 None recoverable from deterministic fallback.

-## Pending User Asks
+{HISTORICAL_PENDING_ASKS_HEADING}
 {active_task}

 ## Relevant Files
 {_bullets(relevant_files, limit=12)}

-## Remaining Work
+{HISTORICAL_REMAINING_WORK_HEADING}
 Continue from the most recent unfulfilled user ask and protected tail messages. Verify state with tools before making claims.

 ## Last Dropped Turns
@@ -1247,6 +1296,19 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
        summary_budget = self._compute_summary_budget(turns_to_summarize)
        content_to_summarize = self._serialize_for_summary(turns_to_summarize)

+        # Current date for temporal anchoring (see ## Temporal Anchoring below).
+        # Date-only granularity matches system_prompt.py:337 (PR #20451) and the
+        # user's configured timezone via hermes_time.now(). The compaction summary
+        # is a mid-conversation message that is NOT part of the cached prefix, so a
+        # date here never affects prompt-cache stability. Resolved defensively —
+        # a clock failure must never block compaction.
+        try:
+            from hermes_time import now as _hermes_now
+
+            _today_str = _hermes_now().strftime("%Y-%m-%d")
+        except Exception:  # pragma: no cover - clock resolution is best-effort
+            _today_str = ""
+
        # Preamble shared by both first-compaction and iterative-update prompts.
        # Keep the wording deliberately plain: Azure/OpenAI-compatible content
        # filters have flagged stronger "injection" / "do not respond" framing.
@@ -1264,8 +1326,26 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
            "do not preserve their values."
        )

+        # Temporal anchoring directive. Rewrites relative / still-pending-sounding
+        # references into absolute, dated, past-tense facts so a resumed
+        # conversation does not re-issue completed actions. Only emitted when the
+        # current date resolved successfully; otherwise the rule is omitted so the
+        # summarizer is never handed an empty date placeholder.
+        if _today_str:
+            _temporal_anchoring_rule = (
+                f"\nTEMPORAL ANCHORING: The current date is {_today_str}. When an "
+                "action has already been carried out, phrase it as a completed, "
+                "dated, past-tense fact rather than an open instruction. For "
+                'example, rewrite "email John about the proposal" as "Sent the '
+                f'proposal email to John on {_today_str}." Never leave a finished '
+                "action worded as if it still needs doing, and never invent a date "
+                "for work that has not happened yet.\n"
+            )
+        else:
+            _temporal_anchoring_rule = ""
+
        # Shared structured template (used by both paths).
-        _template_sections = f"""## Active Task
+        _template_sections = f"""{HISTORICAL_TASK_HEADING}
 [THE SINGLE MOST IMPORTANT FIELD. Capture the user's most recent unfulfilled
 input verbatim — the exact words they used. This includes:
 - Explicit task assignments ("refactor the auth module")
@@ -1312,7 +1392,7 @@ Be specific with file paths, commands, line numbers, and results.]
 - Any running processes or servers
 - Environment details that matter]

-## In Progress
+{HISTORICAL_IN_PROGRESS_HEADING}
 [Work currently underway — what was being done when compaction fired]

 ## Blocked
@@ -1324,20 +1404,20 @@ Be specific with file paths, commands, line numbers, and results.]
 ## Resolved Questions
 [Questions the user asked that were ALREADY answered — include the answer so it is not repeated]

-## Pending User Asks
-[Questions or requests from the user that have NOT yet been answered or fulfilled. If none, write "None."]
+{HISTORICAL_PENDING_ASKS_HEADING}
+[Questions or requests from the user that have NOT yet been answered or fulfilled. These are STALE — they were from the compacted turns. Write them here for reference only. The agent must NOT act on them unless the latest user message explicitly requests it. If none, write "None."]

 ## Relevant Files
 [Files read, modified, or created — with brief note on each]

-## Remaining Work
-[What remains to be done — framed as context, not instructions]
+{HISTORICAL_REMAINING_WORK_HEADING}
+[What remains to be done — framed as STALE context for reference only. The agent must NOT resume this work unless the latest user message explicitly asks for it.]

 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
-
+{_temporal_anchoring_rule}
 Write only the summary body. Do not include any preamble or prefix."""

        if self._previous_summary:
@@ -1706,7 +1786,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        Context compressor bug (#10896): ``_align_boundary_backward`` can pull
        ``cut_idx`` past a user message when it tries to keep tool_call/result
        groups together.  If the last user message ends up in the *compressed*
-        middle region the LLM summariser writes it into "Pending User Asks",
+        middle region the LLM summariser writes it into "Historical Pending User Asks",
        but ``SUMMARY_PREFIX`` tells the next model to respond only to user
        messages *after* the summary — so the task effectively disappears from
        the active context, causing the agent to stall, repeat completed work,
@@ -1787,6 +1867,41 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            accumulated += msg_tokens
            cut_idx = i

+        # If the backward walk never broke early because the entire transcript
+        # fits within soft_ceiling, accumulated now holds the total transcript
+        # size.  Without intervention _ensure_last_user_message_in_tail pushes
+        # cut_idx forward to include the last user message, and the caller's
+        # compress_start >= compress_end guard either returns unchanged (no-op)
+        # or compresses a single message — both of which trigger the infinite
+        # compaction loop described in #40803.
+        #
+        # Fix: when the whole transcript fits in soft_ceiling, compute a
+        # meaningful cut point using the raw (non-inflated) budget so that
+        # compression actually summarizes a worthwhile middle section.
+        if cut_idx <= head_end and accumulated <= soft_ceiling and accumulated > 0:
+            # The entire compressable region fits in the soft ceiling.
+            # Re-walk with the raw budget (no 1.5x multiplier) to find a
+            # split that gives the summarizer something useful.
+            raw_budget = token_budget
+            raw_accumulated = 0
+            for j in range(n - 1, head_end - 1, -1):
+                raw_msg = messages[j]
+                raw_content = raw_msg.get("content") or ""
+                raw_len = _content_length_for_budget(raw_content)
+                raw_tok = raw_len // _CHARS_PER_TOKEN + 10
+                for tc in raw_msg.get("tool_calls") or []:
+                    if isinstance(tc, dict):
+                        args = tc.get("function", {}).get("arguments", "")
+                        raw_tok += len(args) // _CHARS_PER_TOKEN
+                if raw_accumulated + raw_tok > raw_budget and (n - j) >= min_tail:
+                    cut_idx = j
+                    break
+                raw_accumulated += raw_tok
+                cut_idx = j
+            # If the raw-budget walk also consumed everything (very small
+            # transcript), fall through — the existing fallback logic below
+            # will still force a minimal cut after head_end.
+
        # Ensure we protect at least min_tail messages
        fallback_cut = n - min_tail
        cut_idx = min(cut_idx, fallback_cut)
@@ -1889,6 +2004,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        compress_end = self._find_tail_cut_by_tokens(messages, compress_start)

        if compress_start >= compress_end:
+            # No compressable window — the entire transcript fits within
+            # the tail budget (soft_ceiling).  Without recording this as
+            # an ineffective compression the anti-thrashing guard in
+            # should_compress() never fires and every subsequent turn
+            # re-triggers a no-op compression loop.  (#40803)
+            self._ineffective_compression_count += 1
+            self._last_compression_savings_pct = 0.0
+            if not self.quiet_mode:
+                logger.warning(
+                    "Compression skipped: compress_start (%d) >= compress_end (%d) "
+                    "— transcript fits within tail budget, nothing to compress. "
+                    "ineffective_compression_count=%d",
+                    compress_start, compress_end,
+                    self._ineffective_compression_count,
+                )
            return messages

        turns_to_summarize = messages[compress_start:compress_end]
@@ -1909,6 +2039,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if summary_body and not self._previous_summary:
                self._previous_summary = summary_body
            turns_to_summarize = messages[max(compress_start, summary_idx + 1):compress_end]
+        elif self._previous_summary:
+            # No handoff summary found in the current messages, but
+            # _previous_summary is non-empty — it was set by a different
+            # (now-ended) session (e.g., a cron job, a prior /new).  Discard
+            # it so _generate_summary() does not inject cross-session content
+            # into the summarizer prompt via the iterative-update path.
+            self._previous_summary = None

        if not self.quiet_mode:
            logger.info(
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -246,7 +246,14 @@ def _expand_file_reference(
    if not path.is_file():
        return f"{ref.raw}: path is not a file", None
    if _is_binary_file(path):
-        return f"{ref.raw}: binary files are not supported", None
+        # A binary file can't be inlined as text, but it IS on disk (the agent's
+        # tools run where this resolves — the local cwd, or the staged copy in a
+        # remote session workspace). Returning a bare "not supported" warning
+        # with no content was a dead end: the model saw a failure and gave up
+        # (told the user the file type wasn't supported). Instead, hand it an
+        # actionable block — the path, type, size, and a nudge to use its tools —
+        # so it can read/convert/view the file itself.
+        return None, _binary_reference_block(ref, path)

    text = path.read_text(encoding="utf-8")
    if ref.line_start is not None:
@@ -290,6 +297,7 @@ def _expand_git_reference(
            capture_output=True,
            text=True,
            timeout=30,
+            stdin=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired:
        return f"{ref.raw}: git command timed out (30s)", None
@@ -482,6 +490,7 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
            capture_output=True,
            text=True,
            timeout=10,
+            stdin=subprocess.DEVNULL,
        )
    except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
        return None
@@ -491,6 +500,30 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
    return files[:limit]


+def _human_bytes(n: int) -> str:
+    size = float(n)
+    for unit in ("B", "KB", "MB", "GB"):
+        if size < 1024 or unit == "GB":
+            return f"{int(size)} {unit}" if unit == "B" else f"{size:.1f} {unit}"
+        size /= 1024
+    return f"{size:.1f} GB"
+
+
+def _binary_reference_block(ref: ContextReference, path: Path) -> str:
+    mime, _ = mimetypes.guess_type(path.name)
+    mime = mime or "application/octet-stream"
+    try:
+        size = _human_bytes(path.stat().st_size)
+    except OSError:
+        size = "unknown size"
+    return (
+        f"📎 {ref.raw} ({mime}, {size}) — binary file, not inlined as text. "
+        f"It is available on disk at `{path}`. Use your tools to work with it "
+        f"(read or convert it, extract its text, or view/render it as needed); "
+        f"do not tell the user the file type is unsupported."
+    )
+
+
 def _file_metadata(path: Path) -> str:
    if _is_binary_file(path):
        return f"{path.stat().st_size} bytes"
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -507,12 +507,29 @@ def compress_context(
            agent._session_db.end_session(agent.session_id, "compression")
            old_session_id = agent.session_id
            agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+            # Ordering contract: the agent thread updates the contextvar here;
+            # the gateway propagates to SessionEntry after run_in_executor returns.
            try:
                from gateway.session_context import set_current_session_id

                set_current_session_id(agent.session_id)
            except Exception:
                os.environ["HERMES_SESSION_ID"] = agent.session_id
+            # The gateway/tools session context (ContextVar + env) and the
+            # logging session context are SEPARATE mechanisms. The call above
+            # moves the former; the ``[session_id]`` tag on log lines comes
+            # from ``hermes_logging._session_context`` (set once per turn in
+            # conversation_loop.py). Without this, post-rotation log lines in
+            # the same turn keep the STALE old id while the message/DB/gateway
+            # state carry the new one — breaking log correlation exactly at the
+            # compaction boundary (see #34089). Guarded separately so a logging
+            # failure can never regress the routing update above.
+            try:
+                from hermes_logging import set_session_context
+
+                set_session_context(agent.session_id)
+            except Exception:
+                pass
            agent._session_db_created = False
            agent._session_db.create_session(
                session_id=agent.session_id,
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -91,6 +91,7 @@ AUTH_TYPE_OAUTH = "oauth"
 AUTH_TYPE_API_KEY = "api_key"

 SOURCE_MANUAL = "manual"
+SOURCE_MANUAL_DEVICE_CODE = f"{SOURCE_MANUAL}:device_code"

 STRATEGY_FILL_FIRST = "fill_first"
 STRATEGY_ROUND_ROBIN = "round_robin"
@@ -374,7 +375,7 @@ def _iter_custom_providers(config: Optional[dict] = None):
        yield _normalize_custom_pool_name(name), entry


-def get_custom_provider_pool_key(base_url: str, provider_name: Optional[str] = None) -> Optional[str]:
+def get_custom_provider_pool_key(base_url: Optional[str], provider_name: Optional[str] = None) -> Optional[str]:
    """Look up the custom_providers list in config.yaml and return 'custom:<name>' for a matching base_url.

    When provider_name is given, prefer matching by name first (solving the case where
--- a/agent/credits_tracker.py
+++ b/agent/credits_tracker.py
@@ -194,17 +194,71 @@ class AgentNotice:
    id: Optional[str] = None


+# ── is_free_tier_model (local-data-only free-model check) ────────────────────
+
+
+def is_free_tier_model(model: str, base_url: str = "") -> bool:
+    """Return True when *model* is a Nous free-tier model, using ONLY local data.
+
+    Two signals, both zero-network:
+
+    1. The ``:free`` suffix — the canonical Nous free SKU marker (e.g.
+       ``nvidia/nemotron-3-ultra:free``). Free by construction on the API side
+       (spend is forced to 0 for ``:free`` ids).
+    2. A peek into the in-process pricing cache in ``hermes_cli.models``
+       (populated when the model picker fetched ``/v1/models`` pricing for
+       *base_url*). PEEK ONLY — a cache miss never triggers a fetch. This is
+       CLI/TUI-session best-effort: gateway sessions never run the picker's
+       pricing fetch, so suppression there rests entirely on the ``:free``
+       suffix (which all Nous free SKUs carry).
+
+    Fail-open to False (the depleted notice still shows) on any error: wrongly
+    showing the warning is recoverable noise; wrongly hiding it on a paid model
+    would mask a real billing block.
+    """
+    if not model:
+        return False
+    if model.endswith(":free"):
+        return True
+    if not base_url:
+        return False
+    try:
+        from hermes_cli.models import _is_model_free, _pricing_cache
+
+        # Mirror get_pricing_for_provider's key normalization: the agent's
+        # Nous base_url is /v1-suffixed (https://inference-api.nousresearch.com/v1)
+        # but the picker keys _pricing_cache on the pre-/v1 root.
+        key = base_url.rstrip("/")
+        if key.endswith("/v1"):
+            key = key[:-3].rstrip("/")
+        pricing = _pricing_cache.get(key)
+        if not pricing:
+            return False
+        return _is_model_free(model, pricing)
+    except Exception:
+        return False
+
+
 # ── evaluate_credits_notices (pure reconciliation function) ──────────────────


 def evaluate_credits_notices(
    state: CreditsState,
    latch: dict,
+    *,
+    model_is_free: bool = False,
 ) -> tuple[list[AgentNotice], list[str]]:
    """Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.

    latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.

+    ``model_is_free``: True when the session's active model is a Nous free-tier
+    model (see :func:`is_free_tier_model`). Suppresses the ``credits.depleted``
+    notice — a depleted account on a free model can keep inferencing, so the
+    error banner is noise (and confuses free-tier users who never had credits).
+    Suppression does NOT emit the "restored" success notice; that fires only on
+    a genuine ``paid_access`` flip back to True.
+
    Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
    Caller emits to_clear FIRST, then to_show.

@@ -284,7 +338,11 @@ def evaluate_credits_notices(
        active.discard("credits.grant_spent")

    # ── depleted ─────────────────────────────────────────────────────────────
-    if depleted_cond and "credits.depleted" not in active:
+    # Suppressed while the active model is free: inference still works there,
+    # so the error banner would just alarm users (free-tier users especially,
+    # who never had paid credits to "lose").
+    show_depleted = depleted_cond and not model_is_free
+    if show_depleted and "credits.depleted" not in active:
        to_show.append(
            AgentNotice(
                text="✕ Credit access paused · run /usage for balance",
@@ -295,20 +353,23 @@ def evaluate_credits_notices(
            )
        )
        active.add("credits.depleted")
-    elif "credits.depleted" in active and not depleted_cond:
+    elif "credits.depleted" in active and not show_depleted:
        to_clear.append("credits.depleted")
        active.discard("credits.depleted")
-        # Recovery: also emit the success notice
-        to_show.append(
-            AgentNotice(
-                text="✓ Credit access restored",
-                level="success",
-                kind="ttl",
-                ttl_ms=CREDITS_RESTORED_TTL_MS,
-                key="credits.restored",
-                id="credits.restored",
+        if not depleted_cond:
+            # Genuine recovery (paid_access flipped back True): also emit the
+            # success notice. A clear caused by switching to a free model while
+            # still depleted must NOT claim access was restored.
+            to_show.append(
+                AgentNotice(
+                    text="✓ Credit access restored",
+                    level="success",
+                    kind="ttl",
+                    ttl_ms=CREDITS_RESTORED_TTL_MS,
+                    key="credits.restored",
+                    id="credits.restored",
+                )
            )
-        )

    return (to_show, to_clear)

--- a/agent/curator.py
+++ b/agent/curator.py
@@ -25,7 +25,6 @@ import json
 import logging
 import os
 import re
-import tempfile
 import threading
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
@@ -33,6 +32,7 @@ from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set

 from hermes_constants import get_hermes_home
 from tools import skill_usage
+from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -97,20 +97,7 @@ def load_state() -> Dict[str, Any]:
 def save_state(data: Dict[str, Any]) -> None:
    path = _state_file()
    try:
-        path.parent.mkdir(parents=True, exist_ok=True)
-        fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".curator_state_", suffix=".tmp")
-        try:
-            with os.fdopen(fd, "w", encoding="utf-8") as f:
-                json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
-                f.flush()
-                os.fsync(f.fileno())
-            os.replace(tmp, path)
-        except BaseException:
-            try:
-                os.unlink(tmp)
-            except OSError:
-                pass
-            raise
+        atomic_json_write(path, data, indent=2, sort_keys=True)
    except Exception as e:
        logger.debug("Failed to save curator state: %s", e, exc_info=True)

@@ -375,6 +362,11 @@ CURATOR_REVIEW_PROMPT = (
    "into ~/.hermes/skills/.archive/) is the maximum destructive action. "
    "Archives are recoverable; deletion is not.\n"
    "3. DO NOT touch skills shown as pinned=yes. Skip them entirely.\n"
+    "3b. DO NOT archive, delete, consolidate, move, or otherwise modify any "
+    "skill named in the protected built-ins list (currently: plan). These "
+    "back load-bearing UX (slash-command entry points referenced in docs and "
+    "tips) and are filtered out of the candidate list below — never resurrect "
+    "one as an archive or absorb target.\n"
    "4. DO NOT use usage counters as a reason to skip consolidation. The "
    "counters are new and often mostly zero. Judge overlap on CONTENT, "
    "not on use_count. 'use=0' is not evidence a skill is valuable; it's "
--- a/agent/display.py
+++ b/agent/display.py
@@ -858,6 +858,20 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
    return False, ""


+def _used_free_parallel(result: str | None) -> bool:
+    """True when a web result came from Parallel's free Search MCP.
+
+    Only the keyless Parallel path tags its result with ``provider="parallel"``;
+    the paid REST path and every other provider omit it. Used to label the tool
+    line "Parallel search" / "Parallel fetch" exactly when the free MCP served
+    the call.
+    """
+    if not isinstance(result, str) or '"provider"' not in result:
+        return False
+    data = safe_json_loads(result)
+    return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
+
+
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
@@ -895,15 +909,17 @@ def get_cute_tool_message(
        return f"{line}{failure_suffix}"

    if tool_name == "web_search":
-        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
+        verb = "Parallel search" if _used_free_parallel(result) else "search"
+        return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
+        verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
-        return _wrap(f"┊ 📄 fetch     pages  {dur}")
+            return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 {verb:<9} pages  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -549,14 +549,32 @@ def classify_api_error(
            should_fallback=True,
        )

-    # Anthropic thinking block signature invalid (400).
+    # Anthropic thinking block recovery (400).  Two distinct failure modes,
+    # same recovery (strip all reasoning_details and retry without thinking
+    # blocks — see the thinking_signature handler in conversation_loop.py):
+    #   1. Signature mismatch: a thinking block is signed against the full
+    #      turn content; any upstream mutation (context compression, session
+    #      truncation, message merging) invalidates the signature.
+    #      Pattern: "signature" + "thinking".
+    #   2. Frozen-block mutation: Anthropic rejects any change to the
+    #      thinking/redacted_thinking blocks in the *latest* assistant
+    #      message — "`thinking` or `redacted_thinking` blocks in the latest
+    #      assistant message cannot be modified. These blocks must remain as
+    #      they were in the original response."  This carries no "signature"
+    #      token, so the original pattern missed it and the turn hard-aborted
+    #      as a non-retryable client error instead of self-healing.
+    #      Pattern: "thinking" + ("cannot be modified" | "must remain as they were").
    # Don't gate on provider — OpenRouter proxies Anthropic errors, so the
    # provider may be "openrouter" even though the error is Anthropic-specific.
-    # The message pattern ("signature" + "thinking") is unique enough.
+    # The combined patterns are unique enough.
    if (
        status_code == 400
-        and "signature" in error_msg
        and "thinking" in error_msg
+        and (
+            "signature" in error_msg
+            or "cannot be modified" in error_msg
+            or "must remain as they were" in error_msg
+        )
    ):
        return _result(
            FailoverReason.thinking_signature,
@@ -966,6 +984,34 @@ def _classify_400(
            should_fallback=False,
        )

+    # Request-validation errors (unsupported / unknown parameter) MUST be
+    # checked BEFORE context_overflow.  A GPT-5 model rejecting max_tokens
+    # returns:
+    #   "Unsupported parameter: 'max_tokens' is not supported with this model.
+    #    Use 'max_completion_tokens' instead."
+    # That string contains the literal substring "max_tokens", which is one of
+    # the _CONTEXT_OVERFLOW_PATTERNS — so without this guard the 400 is
+    # misclassified as context_overflow, routed into the compression loop,
+    # re-sent with the same bad parameter, and ends in "Cannot compress
+    # further".  These errors are deterministic (every retry gets the identical
+    # rejection), so classify as a non-retryable format_error and fall back.
+    #
+    # NOTE: we deliberately do NOT key off the generic ``invalid_request_error``
+    # code here — OpenAI stamps that same code on genuine context-overflow 400s,
+    # so matching it would mis-route real overflows away from compression. The
+    # unambiguous signals are the explicit "unsupported/unknown parameter"
+    # message text and the specific parameter-level error codes.
+    if (
+        any(p in error_msg for p in _REQUEST_VALIDATION_PATTERNS
+            if p != "invalid_request_error")
+        or error_code_lower in {"unknown_parameter", "unsupported_parameter"}
+    ):
+        return result_fn(
+            FailoverReason.format_error,
+            retryable=False,
+            should_fallback=True,
+        )
+
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
--- a/agent/image_routing.py
+++ b/agent/image_routing.py
@@ -219,6 +219,35 @@ def _supports_vision_override(
        coerced = _coerce_capability_bool(per_model.get("supports_vision"))
        if coerced is not None:
            return coerced
+
+    # 2b. Legacy list-style custom_providers. Entries are dicts with a
+    # "name" key and a nested "models" dict. Match by provider name (which
+    # may appear as the raw name or "custom:<name>" at runtime).
+    custom_providers = cfg.get("custom_providers")
+    if isinstance(custom_providers, list):
+        # Build candidate names: the provider value and the config provider
+        # value, both raw and with "custom:" prefix stripped/added.
+        candidate_names: set = set()
+        for p in filter(None, (provider, config_provider)):
+            candidate_names.add(p)
+            if p.startswith("custom:"):
+                candidate_names.add(p[len("custom:"):])
+            else:
+                candidate_names.add(f"custom:{p}")
+        for entry_raw in custom_providers:
+            if not isinstance(entry_raw, dict):
+                continue
+            entry_name = str(entry_raw.get("name") or "").strip()
+            if entry_name not in candidate_names:
+                continue
+            models_raw = entry_raw.get("models")
+            models_cfg = models_raw if isinstance(models_raw, dict) else {}
+            per_model_raw = models_cfg.get(model)
+            per_model = per_model_raw if isinstance(per_model_raw, dict) else {}
+            coerced = _coerce_capability_bool(per_model.get("supports_vision"))
+            if coerced is not None:
+                return coerced
+
    return None


--- a/agent/insights.py
+++ b/agent/insights.py
@@ -20,23 +20,17 @@ import json
 import time
 from collections import Counter, defaultdict
 from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional

 from agent.usage_pricing import (
    CanonicalUsage,
-    DEFAULT_PRICING,
    estimate_usage_cost,
    format_duration_compact,
    has_known_pricing,
 )

-_DEFAULT_PRICING = DEFAULT_PRICING


-def _has_known_pricing(model_name: str, provider: str = None, base_url: str = None) -> bool:
-    """Check if a model has known pricing (vs unknown/custom endpoint)."""
-    return has_known_pricing(model_name, provider=provider, base_url=base_url)
-

 def _estimate_cost(
    session_or_model: Dict[str, Any] | str,
@@ -45,8 +39,8 @@ def _estimate_cost(
    *,
    cache_read_tokens: int = 0,
    cache_write_tokens: int = 0,
-    provider: str = None,
-    base_url: str = None,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
 ) -> tuple[float, str]:
    """Estimate the USD cost for a session row or a model/token tuple."""
    if isinstance(session_or_model, dict):
@@ -77,9 +71,6 @@ def _estimate_cost(
    return float(result.amount_usd or 0.0), result.status


-def _format_duration(seconds: float) -> str:
-    """Format seconds into a human-readable duration string."""
-    return format_duration_compact(seconds)


 def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
@@ -435,7 +426,7 @@ class InsightsEngine:
                included_cost_sessions += 1
            elif status == "unknown":
                unknown_cost_sessions += 1
-            if _has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url")):
+            if has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url")):
                models_with_pricing.add(display)
            else:
                models_without_pricing.add(display)
@@ -508,7 +499,7 @@ class InsightsEngine:
            d["tool_calls"] += s.get("tool_call_count") or 0
            estimate, status = _estimate_cost(s)
            d["cost"] += estimate
-            d["has_pricing"] = _has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url"))
+            d["has_pricing"] = has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url"))
            d["cost_status"] = status

        result = [
@@ -679,7 +670,7 @@ class InsightsEngine:
            top.append({
                "label": "Longest session",
                "session_id": longest["id"][:16],
-                "value": _format_duration(dur),
+                "value": format_duration_compact(dur),
                "date": datetime.fromtimestamp(longest["started_at"]).strftime("%b %d"),
            })

@@ -764,7 +755,7 @@ class InsightsEngine:
        lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
        lines.append(f"  Total tokens:      {o['total_tokens']:,}")
        if o["total_hours"] > 0:
-            lines.append(f"  Active time:       ~{_format_duration(o['total_hours'] * 3600):<11}  Avg session:     ~{_format_duration(o['avg_session_duration'])}")
+            lines.append(f"  Active time:       ~{format_duration_compact(o['total_hours'] * 3600):<11}  Avg session:     ~{format_duration_compact(o['avg_session_duration'])}")
        lines.append(f"  Avg msgs/session:  {o['avg_messages_per_session']:.1f}")
        lines.append("")

@@ -879,7 +870,7 @@ class InsightsEngine:
        lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
        if o["total_hours"] > 0:
-            lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
+            lines.append(f"**Active time:** ~{format_duration_compact(o['total_hours'] * 3600)} | **Avg session:** ~{format_duration_compact(o['avg_session_duration'])}")
        lines.append("")

        # Models (top 5)
--- a/agent/lsp/install.py
+++ b/agent/lsp/install.py
@@ -262,6 +262,7 @@ def _install_npm(
            capture_output=True,
            text=True,
            timeout=300,
+            stdin=subprocess.DEVNULL,
        )
        if proc.returncode != 0:
            logger.warning(
@@ -310,6 +311,7 @@ def _install_go(pkg: str, bin_name: str) -> Optional[str]:
            text=True,
            timeout=600,
            env=env,
+            stdin=subprocess.DEVNULL,
        )
        if proc.returncode != 0:
            logger.warning(
@@ -347,6 +349,7 @@ def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
            capture_output=True,
            text=True,
            timeout=300,
+            stdin=subprocess.DEVNULL,
        )
        if proc.returncode != 0:
            logger.warning(
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -28,6 +28,8 @@ from __future__ import annotations
 import logging
 import re
 import inspect
+import threading
+from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
@@ -35,6 +37,12 @@ from tools.registry import tool_error

 logger = logging.getLogger(__name__)

+# How long shutdown_all() waits for in-flight background sync/prefetch work
+# to drain before abandoning it. A wedged provider must never block process
+# teardown indefinitely — the worker threads are daemon, so anything still
+# running past this window dies with the interpreter.
+_SYNC_DRAIN_TIMEOUT_S = 5.0
+

 # ---------------------------------------------------------------------------
 # Context fencing helpers
@@ -252,6 +260,13 @@ class MemoryManager:
        self._providers: List[MemoryProvider] = []
        self._tool_to_provider: Dict[str, MemoryProvider] = {}
        self._has_external: bool = False  # True once a non-builtin provider is added
+        # Background executor for end-of-turn sync/prefetch. Lazily created on
+        # first use so the common builtin-only path spawns no extra threads.
+        # A single worker serializes a provider's writes (turn N must land
+        # before turn N+1) and caps thread growth at one per manager. See
+        # _submit_background() and the sync_all/queue_prefetch_all rationale.
+        self._sync_executor: Optional[ThreadPoolExecutor] = None
+        self._sync_executor_lock = threading.Lock()

    # -- Registration --------------------------------------------------------

@@ -375,15 +390,27 @@ class MemoryManager:
        return "\n\n".join(parts)

    def queue_prefetch_all(self, query: str, *, session_id: str = "") -> None:
-        """Queue background prefetch on all providers for the next turn."""
-        for provider in self._providers:
-            try:
-                provider.queue_prefetch(query, session_id=session_id)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
-                    provider.name, e,
-                )
+        """Queue background prefetch on all providers for the next turn.
+
+        Provider work is dispatched to a background worker so a slow or
+        wedged provider can never block the caller. See ``sync_all`` for
+        the full rationale (agent stuck "running" minutes after a turn).
+        """
+        providers = list(self._providers)
+        if not providers:
+            return
+
+        def _run() -> None:
+            for provider in providers:
+                try:
+                    provider.queue_prefetch(query, session_id=session_id)
+                except Exception as e:
+                    logger.debug(
+                        "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
+                        provider.name, e,
+                    )
+
+        self._submit_background(_run)

    # -- Sync ----------------------------------------------------------------

@@ -407,27 +434,120 @@ class MemoryManager:
        session_id: str = "",
        messages: Optional[List[Dict[str, Any]]] = None,
    ) -> None:
-        """Sync a completed turn to all providers."""
-        for provider in self._providers:
+        """Sync a completed turn to all providers.
+
+        Runs on a background worker thread, NOT inline on the
+        turn-completion path. A provider's ``sync_turn`` may make a
+        blocking network/daemon call (a misconfigured Hindsight daemon
+        was observed blocking ~298s before failing); doing that inline
+        held ``run_conversation`` open long after the user saw their
+        response, so every interface (CLI, TUI, gateway) kept the agent
+        marked "running" for minutes and any follow-up message triggered
+        an aggressive interrupt. Dispatching off-thread means a slow or
+        broken provider can never stall the turn — the sync simply
+        completes (or fails, logged) in the background.
+
+        Writes are serialized through a single worker so turn N lands
+        before turn N+1; provider implementations don't need their own
+        ordering guarantees.
+        """
+        providers = list(self._providers)
+        if not providers:
+            return
+
+        def _run() -> None:
+            for provider in providers:
+                try:
+                    if messages is not None and self._provider_sync_accepts_messages(provider):
+                        provider.sync_turn(
+                            user_content,
+                            assistant_content,
+                            session_id=session_id,
+                            messages=messages,
+                        )
+                    else:
+                        provider.sync_turn(
+                            user_content,
+                            assistant_content,
+                            session_id=session_id,
+                        )
+                except Exception as e:
+                    logger.warning(
+                        "Memory provider '%s' sync_turn failed: %s",
+                        provider.name, e,
+                    )
+
+        self._submit_background(_run)
+
+    # -- Background dispatch -------------------------------------------------
+
+    def _submit_background(self, fn) -> None:
+        """Run ``fn`` on the manager's background worker.
+
+        The executor is created lazily and shared across calls. If the
+        executor can't be created or has already been shut down, ``fn``
+        runs inline as a last-resort fallback — losing the async benefit
+        but never losing the write itself. ``fn`` must do its own
+        per-provider error handling; this wrapper only guards executor
+        plumbing.
+        """
+        executor = self._get_sync_executor()
+        if executor is None:
+            # Executor unavailable (shut down / creation failed) — run
+            # inline rather than drop the work. Slow, but correct.
            try:
-                if messages is not None and self._provider_sync_accepts_messages(provider):
-                    provider.sync_turn(
-                        user_content,
-                        assistant_content,
-                        session_id=session_id,
-                        messages=messages,
+                fn()
+            except Exception as e:  # pragma: no cover - fn guards internally
+                logger.debug("Inline memory background task failed: %s", e)
+            return
+        try:
+            executor.submit(fn)
+        except RuntimeError:
+            # Executor was shut down between the get and the submit
+            # (teardown race). Fall back to inline.
+            try:
+                fn()
+            except Exception as e:  # pragma: no cover - fn guards internally
+                logger.debug("Inline memory background task failed: %s", e)
+
+    def _get_sync_executor(self) -> Optional[ThreadPoolExecutor]:
+        """Lazily create the single-worker background executor."""
+        if self._sync_executor is not None:
+            return self._sync_executor
+        with self._sync_executor_lock:
+            if self._sync_executor is None:
+                try:
+                    self._sync_executor = ThreadPoolExecutor(
+                        max_workers=1,
+                        thread_name_prefix="mem-sync",
                    )
-                else:
-                    provider.sync_turn(
-                        user_content,
-                        assistant_content,
-                        session_id=session_id,
-                    )
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' sync_turn failed: %s",
-                    provider.name, e,
-                )
+                except Exception as e:  # pragma: no cover - resource exhaustion
+                    logger.warning("Failed to create memory sync executor: %s", e)
+                    return None
+            return self._sync_executor
+
+    def flush_pending(self, timeout: Optional[float] = None) -> bool:
+        """Block until queued sync/prefetch work has drained.
+
+        Single-worker executor means submitting a sentinel and waiting on
+        it guarantees every previously-submitted task has run. Returns
+        True if the barrier completed within ``timeout`` (or no executor
+        exists), False on timeout. Used at real session boundaries and by
+        tests that need to assert provider state deterministically.
+        """
+        executor = self._sync_executor
+        if executor is None:
+            return True
+        try:
+            fut = executor.submit(lambda: None)
+        except RuntimeError:
+            # Executor already shut down — nothing pending.
+            return True
+        try:
+            fut.result(timeout=timeout)
+            return True
+        except Exception:
+            return False

    # -- Tools ---------------------------------------------------------------

@@ -653,7 +773,15 @@ class MemoryManager:
                )

    def shutdown_all(self) -> None:
-        """Shut down all providers (reverse order for clean teardown)."""
+        """Shut down all providers (reverse order for clean teardown).
+
+        Drains the background sync/prefetch executor first (bounded by
+        ``_SYNC_DRAIN_TIMEOUT_S``) so a turn's final sync has a chance to
+        land before providers are torn down. The worker threads are
+        daemon, so anything still wedged past the drain window dies with
+        the interpreter rather than blocking exit.
+        """
+        self._drain_sync_executor()
        for provider in reversed(self._providers):
            try:
                provider.shutdown()
@@ -663,6 +791,52 @@ class MemoryManager:
                    provider.name, e,
                )

+    def _drain_sync_executor(self) -> None:
+        """Shut down the background executor, waiting briefly for drain.
+
+        Bounded by ``_SYNC_DRAIN_TIMEOUT_S``: a wedged provider must never
+        hang process/session teardown. We stop accepting new work and
+        cancel anything still queued, then wait at most the drain timeout
+        for the currently-running task on a watcher thread. The worker is
+        daemon, so an over-running task dies with the interpreter.
+        """
+        with self._sync_executor_lock:
+            executor = self._sync_executor
+            self._sync_executor = None
+        if executor is None:
+            return
+        try:
+            # Stop accepting new work and drop anything still queued, but
+            # do NOT block here — cancel_futures cancels not-yet-started
+            # tasks; the in-flight one keeps running on its daemon thread.
+            executor.shutdown(wait=False, cancel_futures=True)
+        except TypeError:
+            # Older Python without cancel_futures kwarg.
+            try:
+                executor.shutdown(wait=False)
+            except Exception as e:  # pragma: no cover
+                logger.debug("Memory sync executor shutdown failed: %s", e)
+            return
+        except Exception as e:  # pragma: no cover
+            logger.debug("Memory sync executor shutdown failed: %s", e)
+            return
+        # Give an in-flight sync a bounded chance to finish on a watcher
+        # thread so we don't block the caller past the drain timeout.
+        drainer = threading.Thread(
+            target=lambda: self._bounded_executor_wait(executor),
+            daemon=True,
+            name="mem-sync-drain",
+        )
+        drainer.start()
+        drainer.join(timeout=_SYNC_DRAIN_TIMEOUT_S)
+
+    @staticmethod
+    def _bounded_executor_wait(executor: ThreadPoolExecutor) -> None:
+        try:
+            executor.shutdown(wait=True)
+        except Exception as e:  # pragma: no cover
+            logger.debug("Memory sync executor drain wait failed: %s", e)
+
    def initialize_all(self, session_id: str, **kwargs) -> None:
        """Initialize all providers.

--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -141,6 +141,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
    # substring of "anthropic/claude-sonnet-4.6").
    # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
+    "claude-fable-5": 1000000,
+    "claude-fable": 1000000,
    "claude-opus-4-8": 1000000,
    "claude-opus-4.8": 1000000,
    "claude-opus-4-7": 1000000,
@@ -968,6 +970,16 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
        # OpenRouter/Nous phrasing of the same condition.
        "in the output" in error_lower
        and "maximum context length" in error_lower
+    ) or (
+        # LM Studio / llama.cpp / some OpenAI-compatible servers:
+        #   "This model's maximum context length is 65536 tokens. However, you
+        #    requested 65536 output tokens and your prompt contains 77409
+        #    characters ..."
+        # The "requested N output tokens" phrasing means the OUTPUT cap is the
+        # problem (the input itself fits) — reduce max_tokens, don't compress.
+        "maximum context length" in error_lower
+        and "requested" in error_lower
+        and "output tokens" in error_lower
    )
    if not is_output_cap_error:
        return None
@@ -999,6 +1011,22 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
        if _available >= 1:
            return _available

+    # LM Studio / llama.cpp style: context window is reported in tokens but the
+    # prompt size is reported in CHARACTERS, e.g.
+    #   "maximum context length is 65536 tokens ... your prompt contains 77409
+    #    characters ...".
+    # Estimate the input tokens conservatively (~3 chars/token, which
+    # over-reserves the input so the retried output cap stays safely inside the
+    # window) and leave the remainder of the window for output.
+    _m_ctx_tok = re.search(r'maximum context length is (\d+)\s*token', error_lower)
+    _m_chars = re.search(r'prompt contains (\d+)\s*character', error_lower)
+    if _m_ctx_tok and _m_chars:
+        _ctx = int(_m_ctx_tok.group(1))
+        _est_input = (int(_m_chars.group(1)) + 2) // 3
+        _available = _ctx - _est_input
+        if _available >= 1:
+            return _available
+
    return None


@@ -1684,6 +1712,26 @@ def get_model_context_length(
                "in config.yaml to override.",
                model, base_url, f"{DEFAULT_FALLBACK_CONTEXT:,}",
            )
+            # 3b. Before falling back to the hard 256K default, consult the
+            # hardcoded catalog as a last resort.  A proxied/custom Anthropic
+            # gateway (e.g. corporate proxy) fails the Ollama/local probes
+            # above, but the model name may still match an entry in
+            # DEFAULT_CONTEXT_LENGTHS (e.g. "claude-opus-4-8" → 1M).
+            # Without this, the early return here short-circuits the catalog
+            # lookup at step 8 and silently caps context at 256K.
+            model_lower = model.lower()
+            for default_model, length in sorted(
+                DEFAULT_CONTEXT_LENGTHS.items(),
+                key=lambda x: len(x[0]),
+                reverse=True,
+            ):
+                if default_model in model_lower:
+                    logger.info(
+                        "Using hardcoded context length %s for model %r "
+                        "(custom endpoint, catalog match on %r)",
+                        f"{length:,}", model, default_model,
+                    )
+                    return length
            return DEFAULT_FALLBACK_CONTEXT

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
@@ -1764,10 +1812,43 @@ def get_model_context_length(
        if ctx is not None:
            save_context_length(model, base_url, ctx)
            return ctx
+    # 5f. OpenRouter live /models metadata — authoritative for OpenRouter-routed
+    # models. OpenRouter's catalog carries per-model context_length (e.g.
+    # anthropic/claude-fable-5 -> 1M) and refreshes as new slugs ship, so it
+    # must win over both models.dev (step 5g) and the hardcoded family catch-all
+    # (step 8). Before this branch, an OpenRouter selection set
+    # effective_provider="openrouter", which (a) made the models.dev lookup miss
+    # brand-new slugs and (b) skipped the step-6 OR fallback (gated on `not
+    # effective_provider`), so a fresh slug like claude-fable-5 fell through to
+    # the generic "claude": 200K entry and under-reported a 1M window. Mirrors
+    # the dedicated Nous/Copilot/GMI branches above.
+    if effective_provider == "openrouter":
+        metadata = fetch_model_metadata()
+        entry = metadata.get(model)
+        if entry:
+            or_ctx = entry.get("context_length")
+            # Guard against the known OpenRouter Kimi-family 32k underreport
+            # (same class the hardcoded overrides exist to mitigate).
+            if isinstance(or_ctx, int) and or_ctx > 0 and not (
+                or_ctx == 32768 and _model_name_suggests_kimi(model)
+            ):
+                return or_ctx
+
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
        if ctx:
+            # MiniMax M3: models.dev reports 512K but actual context is 1M.
+            # Prefer hardcoded catalog over stale probe value.
+            if _model_name_suggests_minimax_m3(model):
+                catalog = DEFAULT_CONTEXT_LENGTHS.get("minimax-m3")
+                if catalog and ctx < catalog:
+                    logger.info(
+                        "Rejecting models.dev context=%s for %r "
+                        "(MiniMax-M3 underreport); using hardcoded default %s",
+                        ctx, model, f"{catalog:,}",
+                    )
+                    ctx = catalog
            return ctx

    # 6. OpenRouter live API metadata — provider-unaware fallback.
--- a/agent/onboarding.py
+++ b/agent/onboarding.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
 BUSY_INPUT_FLAG = "busy_input_prompt"
 TOOL_PROGRESS_FLAG = "tool_progress_prompt"
 OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
+PROFILE_BUILD_FLAG = "profile_build_offered"


 # -------------------------------------------------------------------------
@@ -126,6 +127,62 @@ def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
        return False


+# -------------------------------------------------------------------------
+# Onboarding profile-build path (opt-in, consent-gated)
+# -------------------------------------------------------------------------
+
+def profile_build_mode(config: Mapping[str, Any]) -> str:
+    """Resolve the onboarding profile-build mode from config.
+
+    Returns one of:
+      ``"ask"``  — on first contact, OFFER to build a profile (default).
+      ``"off"``  — never offer; the first-message note stays a plain intro.
+
+    Read from ``config.onboarding.profile_build``. Unknown / missing values
+    fall back to ``"ask"`` so the default experience offers the flow. Any
+    network/account lookups inside the flow are separately consented to in
+    conversation — this setting only governs whether the offer is made.
+    """
+    if not isinstance(config, Mapping):
+        return "ask"
+    onboarding = config.get("onboarding")
+    if not isinstance(onboarding, Mapping):
+        return "ask"
+    mode = onboarding.get("profile_build")
+    if isinstance(mode, str) and mode.strip().lower() == "off":
+        return "off"
+    return "ask"
+
+
+def profile_build_directive() -> str:
+    """System-note directive appended to the very first message ever.
+
+    Instructs the agent to run a short, opt-in, consent-gated profile-build
+    flow and persist confirmed facts to the user-profile memory store
+    (``memory`` tool, ``target="user"``). Phrased so the agent ASKS before any
+    lookup and never silently reads connected accounts — directly addressing
+    the privacy concern that reading email/accounts unprompted feels invasive.
+    """
+    return (
+        "\n\n[System note: This is the user's very first message ever. "
+        "After a one-sentence introduction (mention /help shows commands), "
+        "OFFER — do not assume — to build a short profile of them so you can "
+        "be more useful, and explain they can decline or do it later. If and "
+        "ONLY IF they accept:\n"
+        "  1. Ask for whatever they're comfortable sharing (name, what they "
+        "do, how they like you to work). Volunteered facts come first.\n"
+        "  2. Before ANY external lookup, say what you intend to look up and "
+        "get explicit consent for that step. Never read their connected "
+        "accounts (email, calendar, etc.) silently — ask each time.\n"
+        "  3. With consent, you may use web_search to confirm public details "
+        "(e.g. employer, public profiles) from the data points they gave.\n"
+        "  4. Save each confirmed, durable fact with the memory tool using "
+        "target=\"user\" — keep entries compact and high-signal.\n"
+        "If they decline at any point, stop immediately and continue normally. "
+        "Keep the whole exchange light and conversational, not an interrogation.]"
+    )
+
+
 # -------------------------------------------------------------------------
 # State read / write
 # -------------------------------------------------------------------------
@@ -182,12 +239,15 @@ __all__ = [
    "BUSY_INPUT_FLAG",
    "TOOL_PROGRESS_FLAG",
    "OPENCLAW_RESIDUE_FLAG",
+    "PROFILE_BUILD_FLAG",
    "busy_input_hint_gateway",
    "busy_input_hint_cli",
    "tool_progress_hint_gateway",
    "tool_progress_hint_cli",
    "openclaw_residue_hint_cli",
    "detect_openclaw_residue",
+    "profile_build_mode",
+    "profile_build_directive",
    "is_seen",
    "mark_seen",
 ]
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -885,6 +885,22 @@ def build_environment_hints() -> str:
                f"`uname -a && whoami && pwd`."
            )

+    # Hermes desktop GUI — any agent running under the desktop app should know
+    # it. HERMES_DESKTOP marks the backend powering the chat; HERMES_DESKTOP_TERMINAL
+    # marks a hermes launched in the embedded terminal pane. Both set by main.cjs.
+    _truthy = ("1", "true", "yes")
+    _in_desktop = (os.getenv("HERMES_DESKTOP") or "").strip().lower() in _truthy
+    _in_desktop_term = (os.getenv("HERMES_DESKTOP_TERMINAL") or "").strip().lower() in _truthy
+    if _in_desktop or _in_desktop_term:
+        _desktop_hint = "Runtime surface: you're running inside the Hermes desktop GUI app."
+        if _in_desktop_term:
+            _desktop_hint += (
+                " You're in its embedded terminal pane, beside the GUI chat — the user can "
+                "select your output (⌥-drag on macOS, Shift-drag elsewhere) and press "
+                "⌘/Ctrl+L to send it to the chat composer."
+            )
+        hints.append(_desktop_hint)
+
    if is_wsl():
        hints.append(WSL_ENVIRONMENT_HINT)

@@ -1085,11 +1101,12 @@ def _skill_should_show(
 def build_skills_system_prompt(
    available_tools: "set[str] | None" = None,
    available_toolsets: "set[str] | None" = None,
+    compact_categories: "frozenset[str] | None" = None,
 ) -> str:
    """Build a compact skill index for the system prompt.

    Two-layer cache:
-      1. In-process LRU dict keyed by (skills_dir, tools, toolsets)
+      1. In-process LRU dict keyed by (skills_dir, tools, toolsets, hidden)
      2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by
         mtime/size manifest — survives process restarts

@@ -1099,6 +1116,12 @@ def build_skills_system_prompt(
    scanned alongside the local ``~/.hermes/skills/`` directory.  External dirs
    are read-only — they appear in the index but new skills are always created
    in the local dir.  Local skills take precedence when names collide.
+
+    ``compact_categories`` (e.g. from the coding posture — see
+    agent/coding_context.py) demotes whole categories to a names-only line in
+    the rendered index. Nothing is ever hidden: every skill name stays
+    visible and loadable via ``skill_view`` / ``skills_list``; only the
+    descriptions are dropped, and a footer note explains the demotion.
    """
    skills_dir = get_skills_dir()
    external_dirs = get_all_skills_dirs()[1:]  # skip local (index 0)
@@ -1123,6 +1146,7 @@ def build_skills_system_prompt(
        tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
        _platform_hint,
        tuple(sorted(disabled)),
+        tuple(sorted(compact_categories or ())),
    )
    with _SKILLS_PROMPT_CACHE_LOCK:
        cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -1256,18 +1280,44 @@ def build_skills_system_prompt(
            except Exception as e:
                logger.debug("Could not read external skill description %s: %s", desc_file, e)

+    # Posture-driven category demotion (e.g. non-coding skills while pairing
+    # on code). Demoted categories stay in the index as a single names-only
+    # line — descriptions are dropped to cut noise, but every skill name
+    # remains visible so memory-anchored recall ("load <name>") keeps working.
+    # NEVER remove entries entirely: agent-created skills are the model's
+    # project memory, and models don't reach for skills_list to rediscover
+    # what the index stops showing them. Match on the top-level category
+    # segment so nested categories ("social-media/twitter") are demoted with
+    # their parent.
+    demoted = frozenset(
+        cat for cat in skills_by_category
+        if cat.split("/", 1)[0] in (compact_categories or frozenset())
+    )
+
+    hidden_note = ""
+    if demoted:
+        hidden_note = (
+            "\n(Categories marked [names only] are outside the current coding "
+            "context, so their descriptions are omitted — the skills work "
+            "normally and load with skill_view(name) as usual.)"
+        )
+
    if not skills_by_category:
        result = ""
    else:
        index_lines = []
        for category in sorted(skills_by_category.keys()):
+            # Deduplicate and sort skills within each category
+            seen = set()
+            if category in demoted:
+                names = sorted({name for name, _ in skills_by_category[category]})
+                index_lines.append(f"  {category} [names only]: {', '.join(names)}")
+                continue
            cat_desc = category_descriptions.get(category, "")
            if cat_desc:
                index_lines.append(f"  {category}: {cat_desc}")
            else:
                index_lines.append(f"  {category}:")
-            # Deduplicate and sort skills within each category
-            seen = set()
            for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
                if name in seen:
                    continue
@@ -1304,6 +1354,7 @@ def build_skills_system_prompt(
            "</available_skills>\n"
            "\n"
            "Only proceed without loading a skill if genuinely none are relevant to the task."
+            + hidden_note
        )

    # ── Store in LRU cache ────────────────────────────────────────────
--- a/agent/secret_sources/bitwarden.py
+++ b/agent/secret_sources/bitwarden.py
@@ -274,6 +274,7 @@ def _platform_asset_name() -> str:
                capture_output=True,
                text=True,
                timeout=2,
+                stdin=subprocess.DEVNULL,
            )
            if "musl" in (res.stdout + res.stderr).lower():
                libc = "musl"
@@ -525,6 +526,7 @@ def _run_bws_list(
            capture_output=True,
            text=True,
            timeout=_BWS_RUN_TIMEOUT,
+            stdin=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired as exc:
        raise RuntimeError(
--- a/agent/skill_preprocessing.py
+++ b/agent/skill_preprocessing.py
@@ -74,6 +74,7 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
            text=True,
            timeout=max(1, int(timeout)),
            check=False,
+            stdin=subprocess.DEVNULL,
        )
    except subprocess.TimeoutExpired:
        return f"[inline-shell timeout after {timeout}s: {command}]"
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -191,9 +191,23 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
            )
            if toolset
        }
+        # Focus mode (opt-in) demotes non-coding skill categories to
+        # names-only in the index (never hidden — skill_view/skills_list
+        # reach everything, and every name stays visible for recall). The
+        # default coding posture leaves the index untouched.
+        _compact_cats = frozenset()
+        try:
+            from agent.coding_context import coding_compact_skill_categories
+
+            _compact_cats = coding_compact_skill_categories(
+                platform=agent.platform, cwd=resolve_context_cwd()
+            )
+        except Exception:
+            _compact_cats = frozenset()
        skills_prompt = _r.build_skills_system_prompt(
            available_tools=agent.valid_tool_names,
            available_toolsets=avail_toolsets,
+            compact_categories=_compact_cats or None,
        )
    else:
        skills_prompt = ""
@@ -221,6 +235,26 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    if _env_hints:
        stable_parts.append(_env_hints)

+    # Coding posture (base Hermes, any interactive coding surface in a code
+    # workspace — see agent/coding_context.py). The operating brief + the live
+    # git/workspace snapshot are built once here and cached for the session;
+    # the snapshot is never re-probed per turn (that would break the prompt
+    # cache), so the brief tells the model to re-check git before relying on it.
+    if agent.valid_tool_names:
+        try:
+            from agent.coding_context import coding_system_blocks
+
+            stable_parts.extend(
+                coding_system_blocks(
+                    platform=agent.platform,
+                    cwd=resolve_context_cwd(),
+                    model=agent.model,
+                )
+            )
+        except Exception:
+            # Coding-context probing must never block prompt build.
+            pass
+
    # Local Python toolchain probe — names python/pip/uv/PEP-668 state when
    # something is non-default so the model can pick the right install
    # strategy without discovering by failure.  Emits a single line; emits
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -417,7 +417,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe

    # ── Logging / callbacks ──────────────────────────────────────────
    tool_names_str = ", ".join(name for _, name, _, _, _, _ in parsed_calls)
-    if not agent.quiet_mode:
+    if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
        print(f"  ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
        for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
            args_str = json.dumps(args, ensure_ascii=False)
@@ -702,7 +702,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
        if agent._should_emit_quiet_tool_messages():
            cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
            agent._safe_print(f"  {cute_msg}")
-        elif not agent.quiet_mode:
+        elif not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
            _preview_str = _multimodal_text_summary(function_result)
            if agent.verbose_logging:
                print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
@@ -866,7 +866,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        elif function_name == "skill_manage":
            agent._iters_since_skill = 0

-        if not agent.quiet_mode:
+        if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
            args_str = json.dumps(function_args, ensure_ascii=False)
            if agent.verbose_logging:
                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
@@ -1065,6 +1065,25 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            tool_duration = time.time() - tool_start_time
            if agent._should_emit_quiet_tool_messages():
                agent._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
+        elif function_name == "read_terminal":
+            def _execute(next_args: dict) -> Any:
+                from tools.read_terminal_tool import read_terminal_tool as _read_terminal_tool
+                return _read_terminal_tool(
+                    start_line=next_args.get("start_line"),
+                    count=next_args.get("count"),
+                    callback=getattr(agent, "read_terminal_callback", None),
+                )
+            function_result, function_args = _run_agent_tool_execution_middleware(
+                agent,
+                function_name=function_name,
+                function_args=function_args,
+                effective_task_id=effective_task_id,
+                tool_call_id=getattr(tool_call, "id", "") or "",
+                execute=_execute,
+            )
+            tool_duration = time.time() - tool_start_time
+            if agent._should_emit_quiet_tool_messages():
+                agent._vprint(f"  {_get_cute_tool_message_impl('read_terminal', function_args, tool_duration, result=function_result)}")
        elif function_name == "delegate_task":
            tasks_arg = function_args.get("tasks")
            if tasks_arg and isinstance(tasks_arg, list):
@@ -1365,7 +1384,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        # entire batch.  The model sees it on the next API iteration.
        agent._apply_pending_steer_to_tool_results(messages, 1)

-        if not agent.quiet_mode:
+        if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
            if agent.verbose_logging:
                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
                print(agent._wrap_verbose("Result: ", function_result))
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -84,7 +84,7 @@ class AnthropicTransport(ProviderTransport):
        to OpenAI finish_reason, and collects reasoning_details in provider_data.
        """
        import json
-        from agent.anthropic_adapter import _to_plain_data
+        from agent.anthropic_adapter import _to_plain_data, _sanitize_replay_block
        from agent.transports.types import ToolCall

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
@@ -94,14 +94,40 @@ class AnthropicTransport(ProviderTransport):
        reasoning_parts = []
        reasoning_details = []
        tool_calls = []
+        # Verbatim, order-preserving copy of every content block in the turn.
+        # Anthropic signs each thinking block against the turn content that
+        # PRECEDES it at its position; when a turn interleaves thinking and
+        # tool_use (adaptive/interleaved thinking, Claude 4.6+), the parallel
+        # reasoning_details + tool_calls lists below lose that cross-type
+        # ordering. Replaying the latest assistant message in the wrong order
+        # invalidates the signatures -> HTTP 400 "thinking ... blocks in the
+        # latest assistant message cannot be modified". Preserve the exact
+        # block sequence here so the adapter can replay it unchanged. See
+        # tests/agent/test_anthropic_thinking_block_order.py.
+        ordered_blocks = []

        for block in response.content:
+            block_dict = _to_plain_data(block)
+            clean_block = None
+            if isinstance(block_dict, dict):
+                # Sanitize at capture so output-only SDK fields (parsed_output,
+                # caller, citations=None, …) never persist to state.db and leak
+                # back as request input on replay → HTTP 400 "Extra inputs are
+                # not permitted". Defence-in-depth with the replay-side sanitize.
+                clean_block = _sanitize_replay_block(block_dict)
+                if clean_block is not None:
+                    ordered_blocks.append(clean_block)
            if block.type == "text":
                text_parts.append(block.text)
-            elif block.type == "thinking":
-                reasoning_parts.append(block.thinking)
-                block_dict = _to_plain_data(block)
-                if isinstance(block_dict, dict):
+            elif block.type in ("thinking", "redacted_thinking"):
+                if block.type == "thinking":
+                    reasoning_parts.append(block.thinking)
+                # Use the sanitized block (clean_block) for reasoning_details too,
+                # since _extract_preserved_thinking_blocks replays these on the
+                # non-ordered path. Falls back to raw only if sanitize dropped it.
+                if isinstance(clean_block, dict):
+                    reasoning_details.append(clean_block)
+                elif isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
                name = block.name
@@ -130,6 +156,23 @@ class AnthropicTransport(ProviderTransport):
        provider_data = {}
        if reasoning_details:
            provider_data["reasoning_details"] = reasoning_details
+        # Only worth carrying the ordered-blocks channel when the turn
+        # actually interleaves signed thinking with tool_use — that's the
+        # only shape the parallel lists reconstruct incorrectly. A turn that
+        # is purely text, or thinking-then-tools with a single leading
+        # thinking block, replays correctly without it.
+        _has_signed_thinking = any(
+            isinstance(b, dict)
+            and b.get("type") in ("thinking", "redacted_thinking")
+            and (b.get("signature") or b.get("data"))
+            for b in ordered_blocks
+        )
+        _has_tool_use = any(
+            isinstance(b, dict) and b.get("type") == "tool_use"
+            for b in ordered_blocks
+        )
+        if _has_signed_thinking and _has_tool_use:
+            provider_data["anthropic_content_blocks"] = ordered_blocks

        return NormalizedResponse(
            content="\n".join(text_parts) if text_parts else None,
--- a/agent/transports/codex_app_server.py
+++ b/agent/transports/codex_app_server.py
@@ -378,6 +378,7 @@ def check_codex_binary(
            capture_output=True,
            text=True,
            timeout=10,
+            stdin=subprocess.DEVNULL,
        )
    except FileNotFoundError:
        return False, (
--- a/agent/transports/codex_app_server_session.py
+++ b/agent/transports/codex_app_server_session.py
@@ -72,6 +72,9 @@ class TurnResult:
    error: Optional[str] = None  # Set if turn ended in a non-recoverable error
    turn_id: Optional[str] = None
    thread_id: Optional[str] = None
+    token_usage_last: Optional[dict[str, Any]] = None
+    token_usage_total: Optional[dict[str, Any]] = None
+    model_context_window: Optional[int] = None
    # Hint to the caller that the underlying codex subprocess is likely
    # wedged (turn-level timeout fired, post-tool watchdog tripped, or
    # token-refresh failure killed the child). The caller should retire
@@ -501,6 +504,7 @@ class CodexAppServerSession:
                    pending = self._client.take_notification(timeout=0)
                    if pending is None:
                        break
+                    _apply_token_usage_notification(result, pending)
                    self._track_pending_file_change(pending)
                    proj = projector.project(pending)
                    if proj.messages:
@@ -536,6 +540,8 @@ class CodexAppServerSession:
                except Exception:  # pragma: no cover - display callback
                    logger.debug("on_event callback raised", exc_info=True)

+            _apply_token_usage_notification(result, note)
+
            # Track in-progress fileChange items so the approval bridge
            # can surface a real change summary when codex requests
            # approval (the approval params themselves don't carry the
@@ -802,6 +808,30 @@ class CodexAppServerSession:
        return cached


+def _apply_token_usage_notification(result: TurnResult, note: dict) -> None:
+    """Capture Codex app-server token usage updates for caller accounting.
+
+    Codex does not put token usage on turn/completed. It emits a separate
+    thread/tokenUsage/updated notification containing cumulative totals and
+    the latest turn breakdown.
+    """
+    if not isinstance(note, dict) or note.get("method") != "thread/tokenUsage/updated":
+        return
+    params = note.get("params") or {}
+    token_usage = params.get("tokenUsage") or {}
+    if not isinstance(token_usage, dict):
+        return
+    last = token_usage.get("last")
+    total = token_usage.get("total")
+    if isinstance(last, dict):
+        result.token_usage_last = dict(last)
+    if isinstance(total, dict):
+        result.token_usage_total = dict(total)
+    window = token_usage.get("modelContextWindow")
+    if isinstance(window, int) and window > 0:
+        result.model_context_window = window
+
+
 def _approval_choice_to_codex_decision(choice: str) -> str:
    """Map Hermes approval choices onto codex's CommandExecutionApprovalDecision
    / FileChangeApprovalDecision wire values.
--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@@ -121,6 +121,18 @@ class NormalizedResponse:
        pd = self.provider_data or {}
        return pd.get("reasoning_details")

+    @property
+    def anthropic_content_blocks(self):
+        """Verbatim, order-preserving Anthropic content blocks for a turn.
+
+        Present only when an Anthropic turn interleaves signed thinking with
+        tool_use — the one shape the parallel reasoning_details + tool_calls
+        lists reconstruct in the wrong order, invalidating thinking-block
+        signatures on replay. See agent/transports/anthropic.py.
+        """
+        pd = self.provider_data or {}
+        return pd.get("anthropic_content_blocks")
+
    @property
    def codex_reasoning_items(self):
        pd = self.provider_data or {}
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -0,0 +1,388 @@
+"""Per-turn setup for ``run_conversation`` (the turn prologue).
+
+``run_conversation`` opened with ~470 lines of straight-line setup before the
+tool-calling loop ever started: stdio guarding, runtime-main wiring, retry-counter
+resets, user-message sanitization, todo/nudge-counter hydration, system-prompt
+restore-or-build, crash-resilience persistence, preflight context compression, the
+``pre_llm_call`` plugin hook, and external-memory prefetch.
+
+All of that is *prologue* — it runs once per turn, has no back-references into the
+loop, and produces a fixed set of values the loop then consumes. ``TurnContext``
+captures those produced values; ``build_turn_context`` performs the setup work and
+returns one. ``run_conversation`` is left to unpack the context and run the loop,
+shrinking the orchestrator by the full prologue.
+
+The builder still mutates ``agent`` heavily (counters, thread id, cached prompt,
+session DB) exactly as the inline code did — those side effects are the point. The
+``TurnContext`` it returns carries only the *locals* the loop reads back.
+
+Behavior is identical to the original inline prologue; this is a pure
+move-and-name refactor with no semantic change.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+import uuid
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from agent.iteration_budget import IterationBudget
+from agent.model_metadata import estimate_request_tokens_rough
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TurnContext:
+    """Values produced by the turn prologue and consumed by the turn loop."""
+
+    # Sanitized inbound message (surrogates stripped).
+    user_message: str
+    # Clean message preserved for transcripts / memory queries (no nudge injection).
+    original_user_message: Any
+    # Working message list for this turn (loop appends to it).
+    messages: List[Dict[str, Any]]
+    # May be reset to None by preflight compression (new session created).
+    conversation_history: Optional[List[Dict[str, Any]]]
+    # Cached system prompt active for this turn (may be rebuilt by compression).
+    active_system_prompt: Optional[str]
+    # Task / turn identifiers.
+    effective_task_id: str
+    turn_id: str
+    # Index of the current user turn within ``messages``.
+    current_turn_user_idx: int
+    # Whether the post-turn memory review should fire.
+    should_review_memory: bool = False
+    # Context contributed by ``pre_llm_call`` plugins (appended to user message).
+    plugin_user_context: str = ""
+    # External-memory prefetch result, reused across loop iterations.
+    ext_prefetch_cache: str = ""
+
+
+def build_turn_context(
+    agent,
+    user_message: str,
+    system_message: Optional[str],
+    conversation_history: Optional[List[Dict[str, Any]]],
+    task_id: Optional[str],
+    stream_callback,
+    persist_user_message: Optional[str],
+    *,
+    restore_or_build_system_prompt,
+    install_safe_stdio,
+    sanitize_surrogates,
+    summarize_user_message_for_log,
+    set_session_context,
+    set_current_write_origin,
+    ra,
+) -> TurnContext:
+    """Run the once-per-turn setup and return the loop's input context.
+
+    The callables/helpers the original prologue referenced from the
+    ``conversation_loop`` module are passed in explicitly to keep this module
+    free of an import cycle with ``agent.conversation_loop``.
+    """
+    # Guard stdio against OSError from broken pipes (systemd/headless/daemon).
+    install_safe_stdio()
+
+    agent._ensure_db_session()
+
+    # Tell auxiliary_client what the live main provider/model are for this turn.
+    try:
+        from agent.auxiliary_client import set_runtime_main
+        set_runtime_main(
+            getattr(agent, "provider", "") or "",
+            getattr(agent, "model", "") or "",
+            base_url=getattr(agent, "base_url", "") or "",
+            api_key=getattr(agent, "api_key", "") or "",
+            api_mode=getattr(agent, "api_mode", "") or "",
+        )
+    except Exception:
+        pass
+
+    # Tag log records on this thread with the session ID for ``hermes logs``.
+    set_session_context(agent.session_id)
+
+    # Bind the skill write-origin ContextVar for this thread.
+    set_current_write_origin(getattr(agent, "_memory_write_origin", "assistant_tool"))
+
+    # Restore the primary runtime if the previous turn activated fallback.
+    agent._restore_primary_runtime()
+
+    # Sanitize surrogate characters from user input.
+    if isinstance(user_message, str):
+        user_message = sanitize_surrogates(user_message)
+    if isinstance(persist_user_message, str):
+        persist_user_message = sanitize_surrogates(persist_user_message)
+
+    # Store stream callback for _interruptible_api_call to pick up.
+    agent._stream_callback = stream_callback
+    agent._persist_user_message_idx = None
+    agent._persist_user_message_override = persist_user_message
+    # Generate unique task_id if not provided to isolate VMs between tasks.
+    effective_task_id = task_id or str(uuid.uuid4())
+    agent._current_task_id = effective_task_id
+    turn_id = f"{agent.session_id or 'session'}:{effective_task_id}:{uuid.uuid4().hex[:8]}"
+    agent._current_turn_id = turn_id
+    agent._current_api_request_id = ""
+
+    # Reset retry counters and iteration budget at the start of each turn.
+    agent._invalid_tool_retries = 0
+    agent._invalid_json_retries = 0
+    agent._empty_content_retries = 0
+    agent._incomplete_scratchpad_retries = 0
+    agent._codex_incomplete_retries = 0
+    agent._thinking_prefill_retries = 0
+    agent._post_tool_empty_retried = False
+    agent._last_content_with_tools = None
+    agent._last_content_tools_all_housekeeping = False
+    agent._mute_post_response = False
+    agent._unicode_sanitization_passes = 0
+    agent._tool_guardrails.reset_for_turn()
+    agent._tool_guardrail_halt_decision = None
+    agent._vision_supported = True
+
+    # Pre-turn connection health check: clean up dead TCP connections.
+    if agent.api_mode != "anthropic_messages":
+        try:
+            if agent._cleanup_dead_connections():
+                agent._emit_status(
+                    "🔌 Detected stale connections from a previous provider "
+                    "issue — cleaned up automatically. Proceeding with fresh "
+                    "connection."
+                )
+        except Exception:
+            pass
+    # Replay compression warning through status_callback for gateway platforms.
+    if agent._compression_warning:
+        agent._replay_compression_warning()
+        agent._compression_warning = None  # send once
+
+    # NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
+    agent.iteration_budget = IterationBudget(agent.max_iterations)
+
+    # Log conversation turn start for debugging/observability.
+    _preview_text = summarize_user_message_for_log(user_message)
+    _msg_preview = (_preview_text[:80] + "...") if len(_preview_text) > 80 else _preview_text
+    _msg_preview = _msg_preview.replace("\n", " ")
+    logger.info(
+        "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r",
+        agent.session_id or "none", agent.model, agent.provider or "unknown",
+        agent.platform or "unknown", len(conversation_history or []),
+        _msg_preview,
+    )
+
+    # Initialize conversation (copy to avoid mutating the caller's list).
+    messages = list(conversation_history) if conversation_history else []
+
+    # Hydrate todo store from conversation history.
+    if conversation_history and not agent._todo_store.has_items():
+        agent._hydrate_todo_store(conversation_history)
+
+    # Hydrate per-session nudge counters from persisted history (issue #22357).
+    if conversation_history and agent._user_turn_count == 0:
+        prior_user_turns = sum(
+            1 for m in conversation_history if m.get("role") == "user"
+        )
+        if prior_user_turns > 0:
+            agent._user_turn_count = prior_user_turns
+            if agent._memory_nudge_interval > 0 and agent._turns_since_memory == 0:
+                agent._turns_since_memory = prior_user_turns % agent._memory_nudge_interval
+
+    # Track user turns for memory flush and periodic nudge logic.
+    agent._user_turn_count += 1
+
+    # Reset the streaming context scrubber at the top of each turn.
+    scrubber = getattr(agent, "_stream_context_scrubber", None)
+    if scrubber is not None:
+        scrubber.reset()
+    # Reset the think scrubber for the same reason.
+    think_scrubber = getattr(agent, "_stream_think_scrubber", None)
+    if think_scrubber is not None:
+        think_scrubber.reset()
+
+    # Preserve the original user message (no nudge injection).
+    original_user_message = persist_user_message if persist_user_message is not None else user_message
+
+    # Track memory nudge trigger (turn-based, checked here).
+    should_review_memory = False
+    if (agent._memory_nudge_interval > 0
+            and "memory" in agent.valid_tool_names
+            and agent._memory_store):
+        agent._turns_since_memory += 1
+        if agent._turns_since_memory >= agent._memory_nudge_interval:
+            should_review_memory = True
+            agent._turns_since_memory = 0
+
+    # Add user message.
+    user_msg = {"role": "user", "content": user_message}
+    messages.append(user_msg)
+    current_turn_user_idx = len(messages) - 1
+    agent._persist_user_message_idx = current_turn_user_idx
+
+    if not agent.quiet_mode:
+        _print_preview = summarize_user_message_for_log(user_message)
+        agent._safe_print(
+            f"💬 Starting conversation: '{_print_preview[:60]}"
+            f"{'...' if len(_print_preview) > 60 else ''}'"
+        )
+
+    # ── System prompt (cached per session for prefix caching) ──
+    if agent._cached_system_prompt is None:
+        restore_or_build_system_prompt(agent, system_message, conversation_history)
+
+    active_system_prompt = agent._cached_system_prompt
+
+    # Crash-resilience: persist the inbound user turn as soon as the session row exists.
+    try:
+        agent._persist_session(messages, conversation_history)
+    except Exception:
+        logger.warning(
+            "Early turn-start session persistence failed for session=%s",
+            agent.session_id or "none",
+            exc_info=True,
+        )
+
+    # ── Preflight context compression ──
+    if (
+        agent.compression_enabled
+        and len(messages) > agent.context_compressor.protect_first_n
+                            + agent.context_compressor.protect_last_n + 1
+    ):
+        _preflight_tokens = estimate_request_tokens_rough(
+            messages,
+            system_prompt=active_system_prompt or "",
+            tools=agent.tools or None,
+        )
+        _compressor = agent.context_compressor
+        _defer_preflight = getattr(
+            _compressor,
+            "should_defer_preflight_to_real_usage",
+            lambda _tokens: False,
+        )
+        _preflight_deferred = _defer_preflight(_preflight_tokens)
+
+        if not _preflight_deferred:
+            _last = _compressor.last_prompt_tokens
+            # Do NOT overwrite the -1 sentinel (#36718).
+            if _last >= 0 and _preflight_tokens > _last:
+                _compressor.last_prompt_tokens = _preflight_tokens
+
+        if _preflight_deferred:
+            logger.info(
+                "Skipping preflight compression: rough estimate ~%s >= %s, "
+                "but last real provider prompt was %s after compression",
+                f"{_preflight_tokens:,}",
+                f"{_compressor.threshold_tokens:,}",
+                f"{_compressor.last_real_prompt_tokens:,}",
+            )
+        elif _compressor.should_compress(_preflight_tokens):
+            logger.info(
+                "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
+                f"{_preflight_tokens:,}",
+                f"{_compressor.threshold_tokens:,}",
+                agent.model,
+                f"{_compressor.context_length:,}",
+            )
+            agent._emit_status(
+                f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
+                f">= {_compressor.threshold_tokens:,} threshold. "
+                "This may take a moment."
+            )
+            for _pass in range(3):
+                _orig_len = len(messages)
+                messages, active_system_prompt = agent._compress_context(
+                    messages, system_message, approx_tokens=_preflight_tokens,
+                    task_id=effective_task_id,
+                )
+                if len(messages) >= _orig_len:
+                    break  # Cannot compress further
+                conversation_history = None
+                agent._empty_content_retries = 0
+                agent._thinking_prefill_retries = 0
+                agent._last_content_with_tools = None
+                agent._last_content_tools_all_housekeeping = False
+                agent._mute_post_response = False
+                _preflight_tokens = estimate_request_tokens_rough(
+                    messages,
+                    system_prompt=active_system_prompt or "",
+                    tools=agent.tools or None,
+                )
+                if not _compressor.should_compress(_preflight_tokens):
+                    break
+
+    # Plugin hook: pre_llm_call (context injected into user message, not system prompt).
+    plugin_user_context = ""
+    try:
+        from hermes_cli.plugins import invoke_hook as _invoke_hook
+        _pre_results = _invoke_hook(
+            "pre_llm_call",
+            session_id=agent.session_id,
+            task_id=effective_task_id,
+            turn_id=turn_id,
+            user_message=original_user_message,
+            conversation_history=list(messages),
+            is_first_turn=(not bool(conversation_history)),
+            model=agent.model,
+            platform=getattr(agent, "platform", None) or "",
+            sender_id=getattr(agent, "_user_id", None) or "",
+        )
+        _ctx_parts: list[str] = []
+        for r in _pre_results:
+            if isinstance(r, dict) and r.get("context"):
+                _ctx_parts.append(str(r["context"]))
+            elif isinstance(r, str) and r.strip():
+                _ctx_parts.append(r)
+        if _ctx_parts:
+            plugin_user_context = "\n\n".join(_ctx_parts)
+    except Exception as exc:
+        logger.warning("pre_llm_call hook failed: %s", exc)
+
+    # Per-turn file-mutation verifier state.
+    agent._turn_failed_file_mutations = {}
+
+    # Record the execution thread so interrupt()/clear_interrupt() can scope
+    # the tool-level interrupt signal to THIS agent's thread only.
+    agent._execution_thread_id = threading.current_thread().ident
+
+    # Clear stale per-thread interrupt state, preserving a pending interrupt.
+    ra()._set_interrupt(False, agent._execution_thread_id)
+    if agent._interrupt_requested:
+        ra()._set_interrupt(True, agent._execution_thread_id)
+        agent._interrupt_thread_signal_pending = False
+    else:
+        agent._interrupt_message = None
+        agent._interrupt_thread_signal_pending = False
+
+    # Notify memory providers of the new turn (BEFORE prefetch_all).
+    if agent._memory_manager:
+        try:
+            _turn_msg = original_user_message if isinstance(original_user_message, str) else ""
+            agent._memory_manager.on_turn_start(agent._user_turn_count, _turn_msg)
+        except Exception:
+            pass
+
+    # External memory provider: prefetch once before the tool loop.
+    ext_prefetch_cache = ""
+    if agent._memory_manager:
+        try:
+            _query = original_user_message if isinstance(original_user_message, str) else ""
+            ext_prefetch_cache = agent._memory_manager.prefetch_all(_query) or ""
+        except Exception:
+            pass
+
+    return TurnContext(
+        user_message=user_message,
+        original_user_message=original_user_message,
+        messages=messages,
+        conversation_history=conversation_history,
+        active_system_prompt=active_system_prompt,
+        effective_task_id=effective_task_id,
+        turn_id=turn_id,
+        current_turn_user_idx=current_turn_user_idx,
+        should_review_memory=should_review_memory,
+        plugin_user_context=plugin_user_context,
+        ext_prefetch_cache=ext_prefetch_cache,
+    )
--- a/agent/turn_finalizer.py
+++ b/agent/turn_finalizer.py
@@ -0,0 +1,428 @@
+"""Post-loop turn finalization for ``run_conversation``.
+
+Extracted from ``agent/conversation_loop.py`` as part of the god-file
+decomposition campaign (``~/.hermes/plans/god-file-decomposition.md``, Phase 1
+step 4 — the post-loop ``TurnFinalizer`` seam). ``run_conversation``'s tail
+(everything after the main tool-calling ``while`` loop) is lifted here verbatim:
+budget-exhaustion summary, trajectory save, session persist, turn diagnostics,
+response transforms, result-dict assembly, steer drain, and the memory/skill
+review trigger.
+
+Behavior-neutral: the body is moved unchanged. All ``agent.*`` side effects fire
+exactly as before; only the post-loop *locals* are passed in as keyword args, and
+the assembled ``result`` dict is returned to ``run_conversation`` which returns it
+to the caller. The function is synchronous with a single return — mirroring the
+region it replaces (no awaits, no early returns).
+
+Module ``logger`` is imported lazily inside the body (``from
+agent.conversation_loop import logger``) so this module never imports
+``agent.conversation_loop`` at import time -> no import cycle, and the log records
+keep the exact logger name (``"agent.conversation_loop"``).
+"""
+
+from __future__ import annotations
+
+import os
+
+from agent.codex_responses_adapter import _summarize_user_message_for_log
+
+
+def finalize_turn(
+    agent,
+    *,
+    final_response,
+    api_call_count,
+    interrupted,
+    failed,
+    messages,
+    conversation_history,
+    effective_task_id,
+    turn_id,
+    user_message,
+    original_user_message,
+    _should_review_memory,
+    _turn_exit_reason,
+):
+    """Run the post-loop finalization and return the turn ``result`` dict.
+
+    Lifted verbatim from ``run_conversation`` (the region after the main agent
+    loop). See module docstring.
+    """
+    from agent.conversation_loop import logger
+
+    if final_response is None and (
+        api_call_count >= agent.max_iterations
+        or agent.iteration_budget.remaining <= 0
+    ):
+        # Budget exhausted — ask the model for a summary via one extra
+        # API call with tools stripped.  _handle_max_iterations injects a
+        # user message and makes a single toolless request.
+        _turn_exit_reason = f"max_iterations_reached({api_call_count}/{agent.max_iterations})"
+        agent._emit_status(
+            f"⚠️ Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
+            "— asking model to summarise"
+        )
+        if not agent.quiet_mode:
+            agent._safe_print(
+                f"\n⚠️  Iteration budget exhausted ({api_call_count}/{agent.max_iterations}) "
+                "— requesting summary..."
+            )
+        final_response = agent._handle_max_iterations(messages, api_call_count)
+
+        # If running as a kanban worker, signal the dispatcher that the
+        # worker could not complete (rather than treating it as a
+        # protocol violation).  The agent loop strips tools before calling
+        # _handle_max_iterations, so the model cannot call kanban_block
+        # itself — we must do it on its behalf.
+        #
+        # We route through ``_record_task_failure(outcome="timed_out")``
+        # rather than ``kanban_block`` so this counts toward the
+        # ``consecutive_failures`` counter and the dispatcher's
+        # ``failure_limit`` circuit breaker (#29747 gap 2).  Without this,
+        # a task whose worker keeps exhausting its budget would block
+        # silently each run, get auto-promoted by the operator (or never
+        # surface), and re-block in an endless loop with no signal.
+        _kanban_task = os.environ.get("HERMES_KANBAN_TASK")
+        if _kanban_task:
+            try:
+                from hermes_cli import kanban_db as _kb
+                _conn = _kb.connect()
+                try:
+                    _kb._record_task_failure(
+                        _conn,
+                        _kanban_task,
+                        error=(
+                            f"Iteration budget exhausted "
+                            f"({api_call_count}/{agent.max_iterations}) — "
+                            "task could not complete within the allowed "
+                            "iterations"
+                        ),
+                        outcome="timed_out",
+                        release_claim=True,
+                        end_run=True,
+                        event_payload_extra={
+                            "budget_used": api_call_count,
+                            "budget_max": agent.max_iterations,
+                        },
+                    )
+                    logger.info(
+                        "recorded budget-exhausted failure for task %s (%d/%d)",
+                        _kanban_task, api_call_count, agent.max_iterations,
+                    )
+                finally:
+                    try:
+                        _conn.close()
+                    except Exception:
+                        pass
+            except Exception:
+                logger.warning(
+                    "Failed to record budget-exhausted failure for task %s",
+                    _kanban_task,
+                    exc_info=True,
+                )
+
+    # Determine if conversation completed successfully
+    completed = (
+        final_response is not None
+        and api_call_count < agent.max_iterations
+        and not failed
+    )
+
+    # Save trajectory if enabled.  ``user_message`` may be a multimodal
+    # list of parts; the trajectory format wants a plain string.
+    agent._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
+
+    # Clean up VM and browser for this task after conversation completes
+    agent._cleanup_task_resources(effective_task_id)
+
+    # Persist session to both JSON log and SQLite only after private retry
+    # scaffolding has been removed. Otherwise a later user "continue" turn
+    # can replay assistant("(empty)") / recovery nudges and fall into the
+    # same empty-response loop again.
+    agent._drop_trailing_empty_response_scaffolding(messages)
+    agent._persist_session(messages, conversation_history)
+
+    # ── Turn-exit diagnostic log ─────────────────────────────────────
+    # Always logged at INFO so agent.log captures WHY every turn ended.
+    # When the last message is a tool result (agent was mid-work), log
+    # at WARNING — this is the "just stops" scenario users report.
+    _last_msg_role = messages[-1].get("role") if messages else None
+    _last_tool_name = None
+    if _last_msg_role == "tool":
+        # Walk back to find the assistant message with the tool call
+        for _m in reversed(messages):
+            if _m.get("role") == "assistant" and _m.get("tool_calls"):
+                _tcs = _m["tool_calls"]
+                if _tcs and isinstance(_tcs[0], dict):
+                    _last_tool_name = _tcs[-1].get("function", {}).get("name")
+                break
+
+    _turn_tool_count = sum(
+        1 for m in messages
+        if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls")
+    )
+    _resp_len = len(final_response) if final_response else 0
+    _budget_used = agent.iteration_budget.used if agent.iteration_budget else 0
+    _budget_max = agent.iteration_budget.max_total if agent.iteration_budget else 0
+
+    _diag_msg = (
+        "Turn ended: reason=%s model=%s api_calls=%d/%d budget=%d/%d "
+        "tool_turns=%d last_msg_role=%s response_len=%d session=%s"
+    )
+    _diag_args = (
+        _turn_exit_reason, agent.model, api_call_count, agent.max_iterations,
+        _budget_used, _budget_max,
+        _turn_tool_count, _last_msg_role, _resp_len,
+        agent.session_id or "none",
+    )
+
+    if _last_msg_role == "tool" and not interrupted:
+        # Agent was mid-work — this is the "just stops" case.
+        logger.warning(
+            "Turn ended with pending tool result (agent may appear stuck). "
+            + _diag_msg + " last_tool=%s",
+            *_diag_args, _last_tool_name,
+        )
+    else:
+        logger.info(_diag_msg, *_diag_args)
+
+    # File-mutation verifier footer.
+    # If one or more ``write_file`` / ``patch`` calls failed during this
+    # turn and were never superseded by a successful write to the same
+    # path, append an advisory footer to the assistant response.  This
+    # catches the specific case — reported by Ben Eng (#15524-adjacent)
+    # — where a model issues a batch of parallel patches, half of them
+    # fail with "Could not find old_string", and the model summarises
+    # the turn claiming every file was edited.  The user then has to
+    # manually run ``git status`` to catch the lie.  With this footer
+    # the truth is surfaced on every turn, so over-claiming is
+    # structurally impossible past the model.
+    #
+    # Gate: only applied when a real text response exists for this
+    # turn and the user didn't interrupt.  Empty/interrupted turns
+    # already have other surface text that shouldn't be augmented.
+    if final_response and not interrupted:
+        try:
+            _failed = getattr(agent, "_turn_failed_file_mutations", None) or {}
+            if _failed and agent._file_mutation_verifier_enabled():
+                footer = agent._format_file_mutation_failure_footer(_failed)
+                if footer:
+                    final_response = final_response.rstrip() + "\n\n" + footer
+        except Exception as _ver_err:
+            logger.debug("file-mutation verifier footer failed: %s", _ver_err)
+
+    # Turn-completion explainer.
+    # When a turn ends abnormally after substantive work — empty content
+    # after retries, a partial/truncated stream, a still-pending tool
+    # result, or an iteration/budget limit — the user otherwise gets a
+    # blank or fragmentary response box with no consolidated reason why
+    # the agent stopped (#34452).  Surface a single user-visible
+    # explanation derived from ``_turn_exit_reason``, mirroring the
+    # file-mutation verifier footer pattern above.
+    #
+    # Gate carefully so healthy turns stay quiet:
+    #   - ``text_response(...)`` exits never produce an explanation
+    #     (handled inside the formatter), so a terse ``Done.`` is silent.
+    #   - We only ACT when there is no genuinely usable reply this turn:
+    #     an empty response, the "(empty)" terminal sentinel, or a
+    #     suspiciously short partial fragment with no terminating
+    #     punctuation (e.g. "The").  A real short answer keeps its text.
+    if not interrupted:
+        try:
+            if agent._turn_completion_explainer_enabled():
+                _stripped = (final_response or "").strip()
+                _is_empty_terminal = _stripped == "" or _stripped == "(empty)"
+                # A short fragment that is not a normal text_response exit
+                # and lacks sentence-ending punctuation is treated as a
+                # truncated partial (the "The" case from #34452).
+                _is_partial_fragment = (
+                    not _is_empty_terminal
+                    and not str(_turn_exit_reason).startswith("text_response")
+                    and len(_stripped) <= 24
+                    and _stripped[-1:] not in {".", "!", "?", "。", "！", "？", "`", ")"}
+                )
+                if _is_empty_terminal or _is_partial_fragment:
+                    _explanation = agent._format_turn_completion_explanation(
+                        _turn_exit_reason
+                    )
+                    if _explanation:
+                        if _is_empty_terminal:
+                            # Replace the bare "(empty)"/blank sentinel with
+                            # the actionable explanation.
+                            final_response = _explanation
+                        else:
+                            # Keep the partial fragment, append the reason so
+                            # the user sees both what arrived and why it
+                            # stopped.
+                            final_response = (
+                                _stripped + "\n\n" + _explanation
+                            )
+        except Exception as _exp_err:
+            logger.debug("turn-completion explainer failed: %s", _exp_err)
+
+    _response_transformed = False
+
+    # Plugin hook: transform_llm_output
+    # Fired once per turn after the tool-calling loop completes.
+    # Plugins can transform the LLM's output text before it's returned.
+    # First hook to return a string wins; None/empty return leaves text unchanged.
+    if final_response and not interrupted:
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _transform_results = _invoke_hook(
+                "transform_llm_output",
+                response_text=final_response,
+                session_id=agent.session_id or "",
+                model=agent.model,
+                platform=getattr(agent, "platform", None) or "",
+            )
+            for _hook_result in _transform_results:
+                if isinstance(_hook_result, str) and _hook_result:
+                    final_response = _hook_result
+                    _response_transformed = True
+                    break  # First non-empty string wins
+        except Exception as exc:
+            logger.warning("transform_llm_output hook failed: %s", exc)
+
+    # Plugin hook: post_llm_call
+    # Fired once per turn after the tool-calling loop completes.
+    # Plugins can use this to persist conversation data (e.g. sync
+    # to an external memory system).
+    if final_response and not interrupted:
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _invoke_hook(
+                "post_llm_call",
+                session_id=agent.session_id,
+                task_id=effective_task_id,
+                turn_id=turn_id,
+                user_message=original_user_message,
+                assistant_response=final_response,
+                conversation_history=list(messages),
+                model=agent.model,
+                platform=getattr(agent, "platform", None) or "",
+            )
+        except Exception as exc:
+            logger.warning("post_llm_call hook failed: %s", exc)
+
+    # Extract reasoning from the CURRENT turn only.  Walk backwards
+    # but stop at the user message that started this turn — anything
+    # earlier is from a prior turn and must not leak into the reasoning
+    # box (confusing stale display; #17055).  Within the current turn
+    # we still want the *most recent* non-empty reasoning: many
+    # providers (Claude thinking, DeepSeek v4, Codex Responses) emit
+    # reasoning on the tool-call step and leave the final-answer step
+    # with reasoning=None, so picking only the last assistant would
+    # silently drop legitimate same-turn reasoning.
+    last_reasoning = None
+    for msg in reversed(messages):
+        if msg.get("role") == "user":
+            break  # turn boundary — don't cross into prior turns
+        if msg.get("role") == "assistant" and msg.get("reasoning"):
+            last_reasoning = msg["reasoning"]
+            break
+
+    # Build result with interrupt info if applicable
+    result = {
+        "final_response": final_response,
+        "last_reasoning": last_reasoning,
+        "messages": messages,
+        "api_calls": api_call_count,
+        "completed": completed,
+        "turn_exit_reason": _turn_exit_reason,
+        "failed": failed,
+        "partial": False,  # True only when stopped due to invalid tool calls
+        "interrupted": interrupted,
+        "response_transformed": _response_transformed,
+        "response_previewed": getattr(agent, "_response_was_previewed", False),
+        "model": agent.model,
+        "provider": agent.provider,
+        "base_url": agent.base_url,
+        "input_tokens": agent.session_input_tokens,
+        "output_tokens": agent.session_output_tokens,
+        "cache_read_tokens": agent.session_cache_read_tokens,
+        "cache_write_tokens": agent.session_cache_write_tokens,
+        "reasoning_tokens": agent.session_reasoning_tokens,
+        "prompt_tokens": agent.session_prompt_tokens,
+        "completion_tokens": agent.session_completion_tokens,
+        "total_tokens": agent.session_total_tokens,
+        "last_prompt_tokens": getattr(agent.context_compressor, "last_prompt_tokens", 0) or 0,
+        "estimated_cost_usd": agent.session_estimated_cost_usd,
+        "cost_status": agent.session_cost_status,
+        "cost_source": agent.session_cost_source,
+        "session_id": agent.session_id,
+    }
+    if agent._tool_guardrail_halt_decision is not None:
+        result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()
+    # If a /steer landed after the final assistant turn (no more tool
+    # batches to drain into), hand it back to the caller so it can be
+    # delivered as the next user turn instead of being silently lost.
+    _leftover_steer = agent._drain_pending_steer()
+    if _leftover_steer:
+        result["pending_steer"] = _leftover_steer
+    agent._response_was_previewed = False
+
+    # Include interrupt message if one triggered the interrupt
+    if interrupted and agent._interrupt_message:
+        result["interrupt_message"] = agent._interrupt_message
+
+    # Clear interrupt state after handling
+    agent.clear_interrupt()
+
+    # Clear stream callback so it doesn't leak into future calls
+    agent._stream_callback = None
+
+    # Check skill trigger NOW — based on how many tool iterations THIS turn used.
+    _should_review_skills = False
+    if (agent._skill_nudge_interval > 0
+            and agent._iters_since_skill >= agent._skill_nudge_interval
+            and "skill_manage" in agent.valid_tool_names):
+        _should_review_skills = True
+        agent._iters_since_skill = 0
+
+    # External memory provider: sync the completed turn + queue next prefetch.
+    agent._sync_external_memory_for_turn(
+        original_user_message=original_user_message,
+        final_response=final_response,
+        interrupted=interrupted,
+        messages=messages,
+    )
+
+    # Background memory/skill review — runs AFTER the response is delivered
+    # so it never competes with the user's task for model attention.
+    if final_response and not interrupted and (_should_review_memory or _should_review_skills):
+        try:
+            agent._spawn_background_review(
+                messages_snapshot=list(messages),
+                review_memory=_should_review_memory,
+                review_skills=_should_review_skills,
+            )
+        except Exception:
+            pass  # Background review is best-effort
+
+    # Note: Memory provider on_session_end() + shutdown_all() are NOT
+    # called here — run_conversation() is called once per user message in
+    # multi-turn sessions. Shutting down after every turn would kill the
+    # provider before the second message. Actual session-end cleanup is
+    # handled by the CLI (atexit / /reset) and gateway (session expiry /
+    # _reset_session).
+
+    # Plugin hook: on_session_end
+    # Fired at the very end of every run_conversation call.
+    # Plugins can use this for cleanup, flushing buffers, etc.
+    try:
+        from hermes_cli.plugins import invoke_hook as _invoke_hook
+        _invoke_hook(
+            "on_session_end",
+            session_id=agent.session_id,
+            task_id=effective_task_id,
+            turn_id=turn_id,
+            completed=completed,
+            interrupted=interrupted,
+            model=agent.model,
+            platform=getattr(agent, "platform", None) or "",
+        )
+    except Exception as exc:
+        logger.warning("on_session_end hook failed: %s", exc)
+
+    return result
--- a/agent/turn_retry_state.py
+++ b/agent/turn_retry_state.py
@@ -0,0 +1,68 @@
+"""Per-attempt recovery bookkeeping for the conversation turn loop.
+
+The inner retry loop in ``run_conversation`` (``while retry_count <
+max_retries``) makes several distinct recovery attempts on a single model API
+call: a credential-pool 429 retry, a per-provider OAuth refresh (codex,
+anthropic, nous, copilot), a long-context compression restart, a length-
+continuation restart, and a handful of format-recovery branches (thinking-
+signature stripping, multimodal-tool-content stripping, llama.cpp grammar
+fallback, image shrink, invalid-encrypted-content, 1M-beta header).
+
+Each of those branches is guarded by a one-shot boolean so it fires at most
+once per attempt. They used to be ~16 bare ``*_attempted`` / ``has_retried_*``
+/ ``restart_with_*`` locals declared inline before the loop and threaded
+through its 2,400-line body. ``TurnRetryState`` collapses them into one object
+the loop mutates in place (``state.codex_auth_retry_attempted = True``), giving
+the recovery bookkeeping a single named, testable home.
+
+Loop-control variables (``retry_count``, ``max_retries``,
+``max_compression_attempts``) intentionally stay as plain locals — they are the
+``while`` mechanics, not recovery bookkeeping, and putting them on the object
+would add indirection without clarifying anything.
+
+This module is dependency-free so it can be unit-tested in isolation and
+imported by the turn loop without an import cycle.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, fields
+
+
+@dataclass
+class TurnRetryState:
+    """One-shot recovery guards + restart signals for a single API-call attempt.
+
+    A fresh instance is created for each iteration of the outer turn loop
+    (once per ``api_call_count``). Each guard fires its recovery branch at most
+    once; the ``restart_with_*`` signals are read by the loop after the attempt
+    to decide whether to rebuild the request and retry.
+    """
+
+    # ── Per-provider OAuth / credential refresh guards ───────────────────
+    codex_auth_retry_attempted: bool = False
+    anthropic_auth_retry_attempted: bool = False
+    nous_auth_retry_attempted: bool = False
+    nous_paid_entitlement_refresh_attempted: bool = False
+    copilot_auth_retry_attempted: bool = False
+
+    # ── Format / payload recovery guards ─────────────────────────────────
+    thinking_sig_retry_attempted: bool = False
+    invalid_encrypted_content_retry_attempted: bool = False
+    image_shrink_retry_attempted: bool = False
+    multimodal_tool_content_retry_attempted: bool = False
+    oauth_1m_beta_retry_attempted: bool = False
+    llama_cpp_grammar_retry_attempted: bool = False
+
+    # ── Transport / rate-limit recovery ──────────────────────────────────
+    primary_recovery_attempted: bool = False
+    has_retried_429: bool = False
+
+    # ── Restart signals (read by the outer loop after the attempt) ───────
+    restart_with_compressed_messages: bool = False
+    restart_with_length_continuation: bool = False
+
+    def __iter__(self):
+        # Convenience for debugging / tests: iterate (name, value) pairs.
+        for f in fields(self):
+            yield f.name, getattr(self, f.name)
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -13,6 +13,7 @@ DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

 _ZERO = Decimal("0")
 _ONE_MILLION = Decimal("1000000")
+_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"

 CostStatus = Literal["actual", "estimated", "included", "unknown"]
 CostSource = Literal[
@@ -570,6 +571,8 @@ def resolve_billing_route(
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
+    if provider_name == "nous" or base_url_host_matches(base_url or "", "inference-api.nousresearch.com"):
+        return BillingRoute(provider="nous", model=model, base_url=base_url or _NOUS_DEFAULT_BASE_URL, billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
--- a/apps/bootstrap-installer/package.json
+++ b/apps/bootstrap-installer/package.json
@@ -11,7 +11,8 @@
    "tauri": "tauri",
    "tauri:dev": "tauri dev",
    "tauri:build": "tauri build",
-    "tauri:build:debug": "tauri build --debug"
+    "tauri:build:debug": "tauri build --debug",
+    "typecheck": "tsc -p . --noEmit"
  },
  "dependencies": {
    "@nous-research/ui": "0.16.0",
@@ -40,7 +41,7 @@
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
    "@vitejs/plugin-react": "^5.2.0",
-    "typescript": "~5.9.3",
+    "typescript": "^6.0.3",
    "vite": "^7.3.1"
  }
 }
--- a/apps/bootstrap-installer/tsconfig.json
+++ b/apps/bootstrap-installer/tsconfig.json
@@ -16,9 +16,8 @@
    "noUnusedParameters": true,
    "esModuleInterop": true,
    "noFallthroughCasesInSwitch": true,
-    "baseUrl": ".",
    "paths": {
-      "@/*": ["src/*"]
+      "@/*": ["./src/*"]
    }
  },
  "include": ["src"],
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -93,7 +93,7 @@ Run before opening a PR (lint may surface pre-existing warnings but must exit cl

 ```bash
 npm run fix
-npm run type-check
+npm run typecheck
 npm run lint
 npm run test:desktop:all
 ```
--- a/apps/desktop/assets/icon.icns
+++ b/apps/desktop/assets/icon.icns
--- a/apps/desktop/assets/icon.ico
+++ b/apps/desktop/assets/icon.ico
--- a/apps/desktop/assets/icon.png
+++ b/apps/desktop/assets/icon.png
--- a/apps/desktop/electron/bootstrap-runner.cjs
+++ b/apps/desktop/electron/bootstrap-runner.cjs
@@ -40,6 +40,15 @@ const path = require('node:path')
 const https = require('node:https')
 const { spawn } = require('node:child_process')

+const IS_WINDOWS = process.platform === 'win32'
+
+function hiddenWindowsChildOptions(options = {}) {
+  if (!IS_WINDOWS || Object.prototype.hasOwnProperty.call(options, 'windowsHide')) {
+    return options
+  }
+  return { ...options, windowsHide: true }
+}
+
 const STAMP_COMMIT_RE = /^[0-9a-f]{7,40}$/i

 // Stages flagged needs_user_input=true in the manifest are skipped by the
@@ -284,7 +293,7 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
    const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
    const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]

-    const child = spawn(ps, fullArgs, {
+    const child = spawn(ps, fullArgs, hiddenWindowsChildOptions({
      stdio: ['ignore', 'pipe', 'pipe'],
      env: {
        ...process.env,
@@ -292,7 +301,7 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
        // choice rather than re-computing the default.
        HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
      }
-    })
+    }))

    let stdout = ''
    let stderr = ''
--- a/apps/desktop/electron/fs-read-dir.cjs
+++ b/apps/desktop/electron/fs-read-dir.cjs
@@ -0,0 +1,109 @@
+'use strict'
+
+const fs = require('node:fs')
+const path = require('node:path')
+const { resolveDirectoryForIpc } = require('./hardening.cjs')
+
+const FS_READDIR_STAT_CONCURRENCY = 16
+
+// Always-hidden noise (covers non-git projects too; gitignore catches many of
+// these, but the project tree should keep the same hygiene without one).
+const FS_READDIR_HIDDEN = new Set([
+  '.git',
+  '.hg',
+  '.svn',
+  '.cache',
+  '.next',
+  '.turbo',
+  '.venv',
+  '__pycache__',
+  'build',
+  'dist',
+  'node_modules',
+  'target',
+  'venv'
+])
+
+function direntIsDirectory(dirent) {
+  return typeof dirent.isDirectory === 'function' && dirent.isDirectory()
+}
+
+function direntIsFile(dirent) {
+  return typeof dirent.isFile === 'function' && dirent.isFile()
+}
+
+function direntIsSymbolicLink(dirent) {
+  return typeof dirent.isSymbolicLink === 'function' && dirent.isSymbolicLink()
+}
+
+function shouldStatDirent(dirent) {
+  if (direntIsDirectory(dirent)) return false
+
+  return direntIsSymbolicLink(dirent) || !direntIsFile(dirent)
+}
+
+async function entryForDirent(dirent, resolved, fsImpl) {
+  const fullPath = path.join(resolved, dirent.name)
+  let isDirectory = direntIsDirectory(dirent)
+
+  if (!isDirectory && shouldStatDirent(dirent)) {
+    try {
+      isDirectory = (await fsImpl.promises.stat(fullPath)).isDirectory()
+    } catch {
+      isDirectory = false
+    }
+  }
+
+  return { name: dirent.name, path: fullPath, isDirectory }
+}
+
+async function mapWithStatConcurrency(items, mapper) {
+  const results = new Array(items.length)
+  let nextIndex = 0
+
+  async function runWorker() {
+    while (nextIndex < items.length) {
+      const index = nextIndex
+      nextIndex += 1
+      results[index] = await mapper(items[index])
+    }
+  }
+
+  const workerCount = Math.min(FS_READDIR_STAT_CONCURRENCY, items.length)
+  const workers = Array.from({ length: workerCount }, () => runWorker())
+  await Promise.all(workers)
+
+  return results
+}
+
+async function readDirForIpc(dirPath, options = {}) {
+  const fsImpl = options.fs || fs
+  let resolved
+
+  try {
+    ;({ resolvedPath: resolved } = await resolveDirectoryForIpc(dirPath, {
+      fs: fsImpl,
+      purpose: 'Directory read'
+    }))
+  } catch (error) {
+    return { entries: [], error: error?.code || 'read-error' }
+  }
+
+  try {
+    const dirents = await fsImpl.promises.readdir(resolved, { withFileTypes: true })
+    const visibleDirents = dirents.filter(dirent => !FS_READDIR_HIDDEN.has(dirent.name))
+    const entries = await mapWithStatConcurrency(visibleDirents, dirent =>
+      entryForDirent(dirent, resolved, fsImpl)
+    )
+
+    entries.sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name))
+
+    return { entries }
+  } catch (error) {
+    return { entries: [], error: error?.code || 'read-error' }
+  }
+}
+
+module.exports = {
+  readDirForIpc
+}
--- a/apps/desktop/electron/fs-read-dir.test.cjs
+++ b/apps/desktop/electron/fs-read-dir.test.cjs
@@ -0,0 +1,364 @@
+'use strict'
+
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+const { pathToFileURL } = require('node:url')
+
+const { readDirForIpc } = require('./fs-read-dir.cjs')
+
+function mkTmpDir() {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-fs-read-dir-'))
+}
+
+function fakeDirent(name, flags = {}) {
+  return {
+    name,
+    isDirectory: () => Boolean(flags.directory),
+    isFile: () => Boolean(flags.file),
+    isSymbolicLink: () => Boolean(flags.symlink)
+  }
+}
+
+test('readDirForIpc hides noisy directories and files from the project tree', async () => {
+  const root = mkTmpDir()
+
+  try {
+    fs.mkdirSync(path.join(root, 'node_modules'))
+    fs.mkdirSync(path.join(root, 'src'))
+    fs.writeFileSync(path.join(root, 'target'), 'hidden file')
+    fs.writeFileSync(path.join(root, 'README.md'), 'visible file')
+
+    const result = await readDirForIpc(root)
+
+    assert.equal(result.error, undefined)
+    assert.deepEqual(
+      result.entries.map(entry => entry.name),
+      ['src', 'README.md']
+    )
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc filters a hidden basename whether it is a file or directory', async () => {
+  const dirRoot = mkTmpDir()
+  const fileRoot = mkTmpDir()
+
+  try {
+    fs.mkdirSync(path.join(dirRoot, 'node_modules'))
+    fs.writeFileSync(path.join(dirRoot, 'visible.txt'), 'visible')
+    fs.writeFileSync(path.join(fileRoot, 'node_modules'), 'hidden file')
+    fs.writeFileSync(path.join(fileRoot, 'visible.txt'), 'visible')
+
+    assert.deepEqual(
+      (await readDirForIpc(dirRoot)).entries.map(entry => entry.name),
+      ['visible.txt']
+    )
+    assert.deepEqual(
+      (await readDirForIpc(fileRoot)).entries.map(entry => entry.name),
+      ['visible.txt']
+    )
+  } finally {
+    fs.rmSync(dirRoot, { recursive: true, force: true })
+    fs.rmSync(fileRoot, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc returns directories before files and sorts by name within groups', async () => {
+  const root = mkTmpDir()
+
+  try {
+    fs.writeFileSync(path.join(root, 'z.txt'), 'z')
+    fs.mkdirSync(path.join(root, 'src'))
+    fs.writeFileSync(path.join(root, 'a.txt'), 'a')
+    fs.mkdirSync(path.join(root, 'lib'))
+
+    const result = await readDirForIpc(root)
+
+    assert.equal(result.error, undefined)
+    assert.deepEqual(
+      result.entries.map(entry => entry.name),
+      ['lib', 'src', 'a.txt', 'z.txt']
+    )
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc accepts file URLs for directories', async () => {
+  const root = mkTmpDir()
+
+  try {
+    fs.mkdirSync(path.join(root, 'src'))
+    fs.writeFileSync(path.join(root, 'README.md'), 'visible file')
+
+    const result = await readDirForIpc(pathToFileURL(root).toString())
+
+    assert.equal(result.error, undefined)
+    assert.deepEqual(
+      result.entries.map(entry => entry.name),
+      ['src', 'README.md']
+    )
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc returns invalid-path for blank or non-string input', async () => {
+  let readdirCalls = 0
+  const fsImpl = {
+    promises: {
+      readdir: async () => {
+        readdirCalls += 1
+        return []
+      }
+    }
+  }
+
+  assert.deepEqual(await readDirForIpc('', { fs: fsImpl }), { entries: [], error: 'invalid-path' })
+  assert.deepEqual(await readDirForIpc('   ', { fs: fsImpl }), { entries: [], error: 'invalid-path' })
+  assert.deepEqual(await readDirForIpc(null, { fs: fsImpl }), { entries: [], error: 'invalid-path' })
+  assert.equal(readdirCalls, 0)
+})
+
+test('readDirForIpc rejects Windows device paths before readdir', async () => {
+  let readdirCalls = 0
+  const fsImpl = {
+    promises: {
+      readdir: async () => {
+        readdirCalls += 1
+        return []
+      }
+    }
+  }
+
+  assert.deepEqual(await readDirForIpc('\\\\?\\C:\\secret', { fs: fsImpl }), {
+    entries: [],
+    error: 'device-path'
+  })
+  assert.equal(readdirCalls, 0)
+})
+
+test('readDirForIpc returns filesystem error codes instead of throwing', async () => {
+  const root = mkTmpDir()
+
+  try {
+    const result = await readDirForIpc(path.join(root, 'missing'))
+
+    assert.deepEqual(result, { entries: [], error: 'ENOENT' })
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc marks a symlink to a directory as a directory', async t => {
+  const root = mkTmpDir()
+
+  try {
+    fs.mkdirSync(path.join(root, 'actual-dir'))
+
+    try {
+      fs.symlinkSync(path.join(root, 'actual-dir'), path.join(root, 'linked-dir'), 'dir')
+    } catch (error) {
+      if (error?.code === 'EPERM' || error?.code === 'EACCES') {
+        t.skip(`symlink creation is not permitted on this platform (${error.code})`)
+
+        return
+      }
+
+      throw error
+    }
+
+    const result = await readDirForIpc(root)
+    const linked = result.entries.find(entry => entry.name === 'linked-dir')
+
+    assert.equal(result.error, undefined)
+    assert.equal(linked?.isDirectory, true)
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc marks a Windows junction to a directory as a directory', async t => {
+  if (process.platform !== 'win32') {
+    t.skip('junctions are a Windows-specific symlink type')
+
+    return
+  }
+
+  const root = mkTmpDir()
+
+  try {
+    fs.mkdirSync(path.join(root, 'actual-dir'))
+
+    try {
+      fs.symlinkSync(path.join(root, 'actual-dir'), path.join(root, 'junction-dir'), 'junction')
+    } catch (error) {
+      if (error?.code === 'EPERM' || error?.code === 'EACCES') {
+        t.skip(`junction creation is not permitted on this platform (${error.code})`)
+
+        return
+      }
+
+      throw error
+    }
+
+    const result = await readDirForIpc(root)
+    const junction = result.entries.find(entry => entry.name === 'junction-dir')
+
+    assert.equal(result.error, undefined)
+    assert.equal(junction?.isDirectory, true)
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc allows expanding symlink or junction directories outside the project root', async t => {
+  const root = mkTmpDir()
+  const outside = mkTmpDir()
+
+  try {
+    fs.writeFileSync(path.join(outside, 'outside.txt'), 'ok')
+
+    const linkPath = path.join(root, 'outside-link')
+    try {
+      fs.symlinkSync(outside, linkPath, process.platform === 'win32' ? 'junction' : 'dir')
+    } catch (error) {
+      if (error?.code === 'EPERM' || error?.code === 'EACCES') {
+        t.skip(`directory symlink creation is not permitted on this platform (${error.code})`)
+
+        return
+      }
+
+      throw error
+    }
+
+    const result = await readDirForIpc(linkPath)
+
+    assert.equal(result.error, undefined)
+    assert.deepEqual(result.entries, [
+      { name: 'outside.txt', path: path.join(linkPath, 'outside.txt'), isDirectory: false }
+    ])
+  } finally {
+    fs.rmSync(root, { recursive: true, force: true })
+    fs.rmSync(outside, { recursive: true, force: true })
+  }
+})
+
+test('readDirForIpc stats symbolic links and unknown entries without dropping the whole listing', async () => {
+  const input = path.join('virtual-root')
+  const resolved = path.resolve(input)
+  const statCalls = []
+  const fsImpl = {
+    promises: {
+      readdir: async () => [
+        fakeDirent('unknown-entry'),
+        fakeDirent('linked-dir', { symlink: true }),
+        fakeDirent('broken-link', { symlink: true }),
+        fakeDirent('plain.txt', { file: true })
+      ],
+      stat: async fullPath => {
+        if (fullPath === resolved) {
+          return { isDirectory: () => true }
+        }
+
+        statCalls.push(fullPath)
+        if (fullPath.endsWith(`${path.sep}linked-dir`)) {
+          return { isDirectory: () => true }
+        }
+        throw Object.assign(new Error('gone'), { code: 'ENOENT' })
+      }
+    }
+  }
+
+  const result = await readDirForIpc(input, { fs: fsImpl })
+
+  assert.equal(result.error, undefined)
+  assert.deepEqual(
+    statCalls.sort(),
+    [path.join(resolved, 'broken-link'), path.join(resolved, 'linked-dir'), path.join(resolved, 'unknown-entry')].sort()
+  )
+  assert.deepEqual(result.entries, [
+    { name: 'linked-dir', path: path.join(resolved, 'linked-dir'), isDirectory: true },
+    { name: 'broken-link', path: path.join(resolved, 'broken-link'), isDirectory: false },
+    { name: 'plain.txt', path: path.join(resolved, 'plain.txt'), isDirectory: false },
+    { name: 'unknown-entry', path: path.join(resolved, 'unknown-entry'), isDirectory: false }
+  ])
+})
+
+test('readDirForIpc bounds concurrent stats while preserving complete sorted output', async () => {
+  const input = path.join('virtual-root')
+  const resolved = path.resolve(input)
+  const names = Array.from({ length: 105 }, (_, index) => `entry-${String(104 - index).padStart(3, '0')}`)
+  const failedName = 'entry-100'
+  const directoryNames = new Set(names.filter((_, index) => index % 10 === 4))
+  const successfulDirectoryNames = new Set([...directoryNames].filter(name => name !== failedName))
+  const statCalls = []
+  let active = 0
+  let peak = 0
+  let releaseStats
+  let markFirstStatStarted
+  const statsReleased = new Promise(resolve => {
+    releaseStats = resolve
+  })
+  const firstStatStarted = new Promise(resolve => {
+    markFirstStatStarted = resolve
+  })
+  const fsImpl = {
+    promises: {
+      readdir: async () => [
+        fakeDirent('node_modules', { symlink: true }),
+        ...names.map((name, index) => fakeDirent(name, { symlink: index % 2 === 0 }))
+      ],
+      stat: async fullPath => {
+        if (fullPath === resolved) {
+          return { isDirectory: () => true }
+        }
+
+        statCalls.push(fullPath)
+        active += 1
+        peak = Math.max(peak, active)
+        markFirstStatStarted()
+        await statsReleased
+        active -= 1
+
+        const name = path.basename(fullPath)
+        if (name === failedName) {
+          throw Object.assign(new Error('gone'), { code: 'ENOENT' })
+        }
+
+        return { isDirectory: () => successfulDirectoryNames.has(name) }
+      }
+    }
+  }
+
+  const resultPromise = readDirForIpc(input, { fs: fsImpl })
+  await firstStatStarted
+  await new Promise(resolve => setImmediate(resolve))
+  releaseStats()
+  const result = await resultPromise
+
+  const expectedNames = [
+    ...names.filter(name => successfulDirectoryNames.has(name)).sort(),
+    ...names.filter(name => !successfulDirectoryNames.has(name)).sort()
+  ]
+
+  assert.equal(result.error, undefined)
+  assert.equal(result.entries.length, names.length)
+  assert.equal(statCalls.length, names.length)
+  assert.equal(statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)), false)
+  assert.ok(peak > 1, `expected concurrent stats, observed peak ${peak}`)
+  assert.ok(peak <= 16, `expected at most 16 concurrent stats, observed peak ${peak}`)
+  assert.deepEqual(
+    result.entries.map(entry => entry.name),
+    expectedNames
+  )
+  assert.equal(result.entries.find(entry => entry.name === failedName)?.isDirectory, false)
+  assert.equal(
+    result.entries.filter(entry => entry.isDirectory).length,
+    successfulDirectoryNames.size
+  )
+})
--- a/apps/desktop/electron/git-root.cjs
+++ b/apps/desktop/electron/git-root.cjs
@@ -0,0 +1,54 @@
+'use strict'
+
+const fs = require('node:fs')
+const path = require('node:path')
+const { resolveRequestedPathForIpc } = require('./hardening.cjs')
+
+function findGitRoot(start, fsImpl = fs) {
+  let dir = start
+
+  for (let i = 0; i < 50; i += 1) {
+    try {
+      if (fsImpl.existsSync(path.join(dir, '.git'))) {
+        return dir
+      }
+    } catch {
+      return null
+    }
+
+    const parent = path.dirname(dir)
+
+    if (parent === dir) {
+      return null
+    }
+
+    dir = parent
+  }
+
+  return null
+}
+
+async function gitRootForIpc(startPath, options = {}) {
+  const fsImpl = options.fs || fs
+  let resolved
+
+  try {
+    resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Git root' })
+  } catch {
+    return null
+  }
+
+  try {
+    const stat = await fsImpl.promises.stat(resolved)
+    const start = stat.isDirectory() ? resolved : path.dirname(resolved)
+
+    return findGitRoot(start, fsImpl)
+  } catch {
+    return findGitRoot(resolved, fsImpl)
+  }
+}
+
+module.exports = {
+  findGitRoot,
+  gitRootForIpc
+}
--- a/apps/desktop/electron/git-root.test.cjs
+++ b/apps/desktop/electron/git-root.test.cjs
@@ -0,0 +1,40 @@
+'use strict'
+
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+const { pathToFileURL } = require('node:url')
+
+const { gitRootForIpc } = require('./git-root.cjs')
+
+function mkTmpDir() {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-git-root-'))
+}
+
+test('gitRootForIpc returns null for invalid and device paths', async () => {
+  assert.equal(await gitRootForIpc(''), null)
+  assert.equal(await gitRootForIpc('   '), null)
+  assert.equal(await gitRootForIpc(null), null)
+  assert.equal(await gitRootForIpc('\\\\?\\C:\\secret'), null)
+  assert.equal(await gitRootForIpc('file:///%E0%A4%A'), null)
+})
+
+test('gitRootForIpc resolves directories files missing descendants and file URLs', async t => {
+  const root = mkTmpDir()
+  t.after(() => fs.rmSync(root, { recursive: true, force: true }))
+
+  const gitDir = path.join(root, '.git')
+  const srcDir = path.join(root, 'src')
+  const filePath = path.join(srcDir, 'index.ts')
+  fs.mkdirSync(gitDir)
+  fs.mkdirSync(srcDir)
+  fs.writeFileSync(filePath, 'export {}\n', 'utf8')
+
+  assert.equal(await gitRootForIpc(root), root)
+  assert.equal(await gitRootForIpc(srcDir), root)
+  assert.equal(await gitRootForIpc(filePath), root)
+  assert.equal(await gitRootForIpc(pathToFileURL(filePath).toString()), root)
+  assert.equal(await gitRootForIpc(path.join(srcDir, 'missing.ts')), root)
+})
--- a/apps/desktop/electron/hardening.cjs
+++ b/apps/desktop/electron/hardening.cjs
@@ -106,71 +106,155 @@ function sensitiveFileBlockReason(filePath) {
  return null
 }

-function resolveRequestedFilePath(filePath, baseDir = process.cwd(), purpose = 'File read') {
-  const raw = String(filePath || '').trim()
+function ipcPathError(code, message) {
+  const error = new Error(message)
+  error.code = code
+  return error
+}
+
+function rejectUnsafePathSyntax(filePath, purpose = 'File read') {
+  if (typeof filePath !== 'string') {
+    throw ipcPathError('invalid-path', `${purpose} failed: file path is required.`)
+  }
+
+  const raw = filePath.trim()

  if (!raw) {
-    throw new Error(`${purpose} failed: file path is required.`)
+    throw ipcPathError('invalid-path', `${purpose} failed: file path is required.`)
  }

  if (raw.includes('\0')) {
-    throw new Error(`${purpose} failed: file path is invalid.`)
+    throw ipcPathError('invalid-path', `${purpose} failed: file path is invalid.`)
  }

+  const normalized = raw.replace(/\\/g, '/').toLowerCase()
+  if (
+    normalized.startsWith('//?/') ||
+    normalized.startsWith('//./') ||
+    normalized.startsWith('globalroot/device/') ||
+    normalized.includes('/globalroot/device/')
+  ) {
+    throw ipcPathError('device-path', `${purpose} blocked: Windows device paths are not allowed.`)
+  }
+
+  return raw
+}
+
+function resolveRequestedPathForIpc(filePath, options = {}) {
+  const purpose = String(options.purpose || 'File read')
+  const raw = rejectUnsafePathSyntax(filePath, purpose)
+
  if (/^file:/i.test(raw)) {
+    let resolvedPath
    try {
-      return fileURLToPath(raw)
+      const parsed = new URL(raw)
+      if (parsed.protocol !== 'file:') {
+        throw new Error('not a file URL')
+      }
+      resolvedPath = fileURLToPath(parsed)
    } catch {
-      throw new Error(`${purpose} failed: file URL is invalid.`)
+      throw ipcPathError('invalid-path', `${purpose} failed: file URL is invalid.`)
    }
+
+    rejectUnsafePathSyntax(resolvedPath, purpose)
+    return path.resolve(resolvedPath)
  }

-  const resolvedBase = path.resolve(String(baseDir || process.cwd()))
-  return path.resolve(resolvedBase, raw)
+  const baseInput = typeof options.baseDir === 'string' && options.baseDir.trim() ? options.baseDir : process.cwd()
+  const safeBaseInput = rejectUnsafePathSyntax(baseInput, purpose)
+  const resolvedBase = path.resolve(safeBaseInput)
+  rejectUnsafePathSyntax(resolvedBase, purpose)
+  const resolvedPath = path.resolve(resolvedBase, raw)
+  rejectUnsafePathSyntax(resolvedPath, purpose)
+
+  return resolvedPath
+}
+
+async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
+  try {
+    return await fsImpl.promises.stat(resolvedPath)
+  } catch (error) {
+    const code = error && typeof error === 'object' ? error.code : ''
+    if (code === 'ENOENT' || code === 'ENOTDIR') {
+      throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
+    }
+    throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
+  }
+}
+
+async function realpathForIpc(fsImpl, resolvedPath, purpose) {
+  if (typeof fsImpl.promises.realpath !== 'function') {
+    return resolvedPath
+  }
+
+  try {
+    const realPath = await fsImpl.promises.realpath(resolvedPath)
+    rejectUnsafePathSyntax(realPath, purpose)
+    return realPath
+  } catch (error) {
+    const code = error && typeof error === 'object' ? error.code : ''
+    throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
+  }
+}
+
+function rejectSensitiveFilePath(filePath, purpose) {
+  const blockReason = sensitiveFileBlockReason(filePath)
+  if (blockReason) {
+    throw ipcPathError('sensitive-file', `${purpose} blocked for sensitive file: ${blockReason}`)
+  }
+}
+
+async function resolveDirectoryForIpc(dirPath, options = {}) {
+  const purpose = String(options.purpose || 'Directory read')
+  const fsImpl = options.fs || fs
+  const resolvedPath = resolveRequestedPathForIpc(dirPath, { baseDir: options.baseDir, purpose })
+  const stat = await statForIpc(fsImpl, resolvedPath, purpose, 'directory')
+
+  if (!stat.isDirectory()) {
+    throw ipcPathError('ENOTDIR', `${purpose} failed: path is not a directory.`)
+  }
+
+  const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose)
+
+  return { realPath, resolvedPath, stat }
 }

 async function resolveReadableFileForIpc(filePath, options = {}) {
  const purpose = String(options.purpose || 'File read')
-  const resolvedPath = resolveRequestedFilePath(filePath, options.baseDir, purpose)
+  const fsImpl = options.fs || fs
+  const resolvedPath = resolveRequestedPathForIpc(filePath, { baseDir: options.baseDir, purpose })

  if (options.blockSensitive !== false) {
-    const blockReason = sensitiveFileBlockReason(resolvedPath)
-    if (blockReason) {
-      throw new Error(`${purpose} blocked for sensitive file: ${blockReason}`)
-    }
+    rejectSensitiveFilePath(resolvedPath, purpose)
  }

-  let stat
-  try {
-    stat = await fs.promises.stat(resolvedPath)
-  } catch (error) {
-    const code = error && typeof error === 'object' ? error.code : ''
-    if (code === 'ENOENT' || code === 'ENOTDIR') {
-      throw new Error(`${purpose} failed: file does not exist.`)
-    }
-    throw new Error(`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
-  }
+  const stat = await statForIpc(fsImpl, resolvedPath, purpose, 'file')

  if (stat.isDirectory()) {
-    throw new Error(`${purpose} failed: path points to a directory.`)
+    throw ipcPathError('EISDIR', `${purpose} failed: path points to a directory.`)
  }

  if (!stat.isFile()) {
-    throw new Error(`${purpose} failed: only regular files can be read.`)
+    throw ipcPathError('EINVAL', `${purpose} failed: only regular files can be read.`)
+  }
+
+  const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose)
+  if (options.blockSensitive !== false) {
+    rejectSensitiveFilePath(realPath, purpose)
  }

  const maxBytes = Number.isFinite(options.maxBytes) && Number(options.maxBytes) > 0 ? Number(options.maxBytes) : null
  if (maxBytes && stat.size > maxBytes) {
-    throw new Error(`${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`)
+    throw ipcPathError('EFBIG', `${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`)
  }

  try {
-    await fs.promises.access(resolvedPath, fs.constants.R_OK)
+    await fsImpl.promises.access(resolvedPath, fs.constants.R_OK)
  } catch {
-    throw new Error(`${purpose} failed: file is not readable.`)
+    throw ipcPathError('EACCES', `${purpose} failed: file is not readable.`)
  }

-  return { resolvedPath, stat }
+  return { realPath, resolvedPath, stat }
 }

 module.exports = {
@@ -178,7 +262,10 @@ module.exports = {
  DEFAULT_FETCH_TIMEOUT_MS,
  TEXT_PREVIEW_SOURCE_MAX_BYTES,
  encryptDesktopSecret,
+  rejectUnsafePathSyntax,
+  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
+  resolveRequestedPathForIpc,
  resolveTimeoutMs,
  sensitiveFileBlockReason
 }
--- a/apps/desktop/electron/hardening.test.cjs
+++ b/apps/desktop/electron/hardening.test.cjs
@@ -8,11 +8,20 @@ const { pathToFileURL } = require('node:url')
 const {
  DEFAULT_FETCH_TIMEOUT_MS,
  encryptDesktopSecret,
+  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
+  resolveRequestedPathForIpc,
  resolveTimeoutMs,
  sensitiveFileBlockReason
 } = require('./hardening.cjs')

+async function rejectsWithCode(promise, code) {
+  await assert.rejects(promise, error => {
+    assert.equal(error?.code, code)
+    return true
+  })
+}
+
 test('resolveTimeoutMs falls back to defaults and accepts overrides', () => {
  assert.equal(resolveTimeoutMs(undefined), DEFAULT_FETCH_TIMEOUT_MS)
  assert.equal(resolveTimeoutMs(0), DEFAULT_FETCH_TIMEOUT_MS)
@@ -51,6 +60,52 @@ test('sensitiveFileBlockReason blocks obvious secret file patterns', () => {
  assert.match(String(sensitiveFileBlockReason('/tmp/server-cert.pem')), /\.pem/)
 })

+test('path helpers reject blank non-string NUL and Windows device syntax', async () => {
+  await rejectsWithCode(resolveReadableFileForIpc('', { purpose: 'File preview' }), 'invalid-path')
+  await rejectsWithCode(resolveReadableFileForIpc('   ', { purpose: 'File preview' }), 'invalid-path')
+  await rejectsWithCode(resolveReadableFileForIpc(null, { purpose: 'File preview' }), 'invalid-path')
+  await rejectsWithCode(resolveReadableFileForIpc(`safe${String.fromCharCode(0)}name.txt`), 'invalid-path')
+
+  const devicePaths = [
+    '\\\\?\\C:\\secret.txt',
+    '\\\\.\\C:\\secret.txt',
+    '\\\\?\\UNC\\server\\share\\secret.txt',
+    'GLOBALROOT/Device/HarddiskVolumeShadowCopy1/secret.txt'
+  ]
+
+  for (const devicePath of devicePaths) {
+    assert.throws(
+      () => resolveRequestedPathForIpc(devicePath, { purpose: 'File preview' }),
+      error => {
+        assert.equal(error?.code, 'device-path')
+        return true
+      }
+    )
+    await rejectsWithCode(resolveReadableFileForIpc(devicePath, { purpose: 'File preview' }), 'device-path')
+  }
+
+  assert.throws(
+    () => resolveRequestedPathForIpc('file:///%E0%A4%A', { purpose: 'File preview' }),
+    error => {
+      assert.equal(error?.code, 'invalid-path')
+      return true
+    }
+  )
+  await rejectsWithCode(resolveReadableFileForIpc('file:///%E0%A4%A', { purpose: 'File preview' }), 'invalid-path')
+})
+
+test('resolveRequestedPathForIpc resolves relative paths from the trimmed base directory', () => {
+  const baseDir = path.join(os.tmpdir(), 'hermes-desktop-base')
+
+  assert.equal(
+    resolveRequestedPathForIpc('notes.txt', {
+      baseDir: `  ${baseDir}  `,
+      purpose: 'File preview'
+    }),
+    path.resolve(baseDir, 'notes.txt')
+  )
+})
+
 test('resolveReadableFileForIpc validates existence type size and sensitivity', async t => {
  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-hardening-'))
  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
@@ -71,6 +126,13 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',
  })
  assert.equal(fromFileUrl.resolvedPath, textPath)

+  const spacedPath = path.join(tempDir, 'notes with spaces.txt')
+  fs.writeFileSync(spacedPath, 'space ok', 'utf8')
+  const fromSpacedFileUrl = await resolveReadableFileForIpc(pathToFileURL(spacedPath).toString(), {
+    purpose: 'File preview'
+  })
+  assert.equal(fromSpacedFileUrl.resolvedPath, spacedPath)
+
  await assert.rejects(
    resolveReadableFileForIpc('missing.txt', {
      baseDir: tempDir,
@@ -114,3 +176,91 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',
  })
  assert.equal(envTemplate.resolvedPath, envTemplatePath)
 })
+
+test('resolveReadableFileForIpc blocks common sensitive files', async t => {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-sensitive-'))
+  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
+
+  const sshDir = path.join(tempDir, '.ssh')
+  fs.mkdirSync(sshDir)
+
+  const blockedFiles = [
+    path.join(tempDir, '.env'),
+    path.join(tempDir, '.npmrc'),
+    path.join(sshDir, 'id_ed25519'),
+    path.join(tempDir, 'cert.pem'),
+    path.join(tempDir, 'cert.p12'),
+    path.join(tempDir, 'cert.pfx')
+  ]
+
+  for (const filePath of blockedFiles) {
+    fs.writeFileSync(filePath, 'secret', 'utf8')
+    await rejectsWithCode(resolveReadableFileForIpc(filePath, { purpose: 'File preview' }), 'sensitive-file')
+  }
+
+  const allowed = path.join(tempDir, '.env.example')
+  fs.writeFileSync(allowed, 'EXAMPLE_TOKEN=value', 'utf8')
+  assert.equal((await resolveReadableFileForIpc(allowed, { purpose: 'File preview' })).resolvedPath, allowed)
+})
+
+test('resolveReadableFileForIpc blocks symlinks whose realpath is sensitive', async t => {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-realpath-'))
+  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
+
+  const envPath = path.join(tempDir, '.env')
+  const linkPath = path.join(tempDir, 'safe-name.txt')
+  fs.writeFileSync(envPath, 'SECRET_TOKEN=123', 'utf8')
+
+  try {
+    fs.symlinkSync(envPath, linkPath, 'file')
+  } catch (error) {
+    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
+      t.skip(`symlink creation is not permitted on this platform (${error.code})`)
+      return
+    }
+    throw error
+  }
+
+  await rejectsWithCode(resolveReadableFileForIpc(linkPath, { purpose: 'File preview' }), 'sensitive-file')
+})
+
+test('resolveDirectoryForIpc accepts directories and rejects invalid directory targets', async t => {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-dir-'))
+  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
+
+  const directory = path.join(tempDir, 'project')
+  const filePath = path.join(tempDir, 'file.txt')
+  fs.mkdirSync(directory)
+  fs.writeFileSync(filePath, 'not a directory', 'utf8')
+
+  const resolved = await resolveDirectoryForIpc(directory)
+  assert.equal(resolved.resolvedPath, directory)
+  assert.equal(resolved.stat.isDirectory(), true)
+
+  await rejectsWithCode(resolveDirectoryForIpc(filePath), 'ENOTDIR')
+  await rejectsWithCode(resolveDirectoryForIpc(path.join(tempDir, 'missing')), 'ENOENT')
+  await rejectsWithCode(resolveDirectoryForIpc('\\\\?\\C:\\secret'), 'device-path')
+})
+
+test('resolveDirectoryForIpc accepts directory symlinks or junctions', async t => {
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-dir-link-'))
+  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
+
+  const directory = path.join(tempDir, 'actual-project')
+  const linkPath = path.join(tempDir, 'linked-project')
+  fs.mkdirSync(directory)
+
+  try {
+    fs.symlinkSync(directory, linkPath, process.platform === 'win32' ? 'junction' : 'dir')
+  } catch (error) {
+    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
+      t.skip(`directory symlink creation is not permitted on this platform (${error.code})`)
+      return
+    }
+    throw error
+  }
+
+  const resolved = await resolveDirectoryForIpc(linkPath)
+  assert.equal(resolved.resolvedPath, linkPath)
+  assert.equal(resolved.stat.isDirectory(), true)
+})
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -2,8 +2,10 @@ const { contextBridge, ipcRenderer, webUtils } = require('electron')

 contextBridge.exposeInMainWorld('hermesDesktop', {
  getConnection: profile => ipcRenderer.invoke('hermes:connection', profile),
+  revalidateConnection: () => ipcRenderer.invoke('hermes:connection:revalidate'),
  touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
  getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
+  openSessionWindow: sessionId => ipcRenderer.invoke('hermes:window:openSession', sessionId),
  getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
  getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
  saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
@@ -40,6 +42,7 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  setPreviewShortcutActive: active => ipcRenderer.send('hermes:previewShortcutActive', Boolean(active)),
  openExternal: url => ipcRenderer.invoke('hermes:openExternal', url),
  fetchLinkTitle: url => ipcRenderer.invoke('hermes:fetchLinkTitle', url),
+  sanitizeWorkspaceCwd: cwd => ipcRenderer.invoke('hermes:workspace:sanitize', cwd),
  settings: {
    getDefaultProjectDir: () => ipcRenderer.invoke('hermes:setting:defaultProjectDir:get'),
    setDefaultProjectDir: dir => ipcRenderer.invoke('hermes:setting:defaultProjectDir:set', dir),
@@ -77,6 +80,12 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    ipcRenderer.on('hermes:open-updates', listener)
    return () => ipcRenderer.removeListener('hermes:open-updates', listener)
  },
+  onDeepLink: callback => {
+    const listener = (_event, payload) => callback(payload)
+    ipcRenderer.on('hermes:deep-link', listener)
+    return () => ipcRenderer.removeListener('hermes:deep-link', listener)
+  },
+  signalDeepLinkReady: () => ipcRenderer.invoke('hermes:deep-link-ready'),
  onWindowStateChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:window-state-changed', listener)
@@ -131,5 +140,9 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
      ipcRenderer.on('hermes:updates:progress', listener)
      return () => ipcRenderer.removeListener('hermes:updates:progress', listener)
    }
+  },
+  themes: {
+    fetchMarketplace: id => ipcRenderer.invoke('hermes:vscode-theme:fetch', id),
+    searchMarketplace: query => ipcRenderer.invoke('hermes:vscode-theme:search', query)
  }
 })
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -0,0 +1,86 @@
+// Secondary "session windows" — one extra OS window per chat so a user can
+// work with multiple chats side by side. The pure, Electron-free pieces live
+// here so they can be unit-tested with node --test (mirroring how the rest of
+// electron/*.cjs splits testable logic out of the main.cjs monolith).
+
+const { pathToFileURL } = require('node:url')
+
+// Build the renderer URL for a secondary window. The renderer uses a
+// HashRouter, so the session route lives after the '#'. The `?win=secondary`
+// flag MUST sit in the query string BEFORE the '#': anything after the '#' is
+// treated as the route by HashRouter and would break routeSessionId(). The
+// renderer reads the flag from window.location.search to suppress the install /
+// onboarding overlays and the global session sidebar.
+function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath } = {}) {
+  const route = `#/${encodeURIComponent(sessionId)}`
+
+  if (devServer) {
+    const base = devServer.endsWith('/') ? devServer.slice(0, -1) : devServer
+
+    return `${base}/?win=secondary${route}`
+  }
+
+  return `${pathToFileURL(rendererIndexPath).toString()}?win=secondary${route}`
+}
+
+// A small registry keyed by sessionId that guarantees one window per chat:
+// opening a session that already has a live window focuses it instead of
+// spawning a duplicate, and a window removes itself from the registry when it
+// closes. The actual BrowserWindow construction is injected (the `factory`) so
+// this module stays free of Electron and is unit-testable.
+function createSessionWindowRegistry() {
+  const windows = new Map()
+
+  function openOrFocus(sessionId, factory) {
+    const key = typeof sessionId === 'string' ? sessionId.trim() : ''
+
+    if (!key) {
+      return null
+    }
+
+    const existing = windows.get(key)
+
+    if (existing && !existing.isDestroyed()) {
+      // Focus-or-create: never duplicate a window for the same chat.
+      if (typeof existing.isMinimized === 'function' && existing.isMinimized()) {
+        existing.restore?.()
+      }
+
+      if (typeof existing.isVisible === 'function' && !existing.isVisible()) {
+        existing.show?.()
+      }
+
+      existing.focus?.()
+
+      return existing
+    }
+
+    const win = factory(key)
+
+    if (!win) {
+      return null
+    }
+
+    windows.set(key, win)
+
+    // Self-cleanup on close so the registry never holds a destroyed window.
+    win.on?.('closed', () => {
+      if (windows.get(key) === win) {
+        windows.delete(key)
+      }
+    })
+
+    return win
+  }
+
+  return {
+    openOrFocus,
+    get: key => windows.get(key),
+    has: key => windows.has(key),
+    get size() {
+      return windows.size
+    }
+  }
+}
+
+module.exports = { buildSessionWindowUrl, createSessionWindowRegistry }
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -0,0 +1,165 @@
+const assert = require('node:assert/strict')
+const test = require('node:test')
+
+const { buildSessionWindowUrl, createSessionWindowRegistry } = require('./session-windows.cjs')
+
+// A minimal fake BrowserWindow: tracks listeners + destroyed state and lets a
+// test fire the 'closed' event, mirroring the slice of the Electron API the
+// registry actually touches.
+function makeFakeWindow() {
+  const listeners = {}
+  const calls = { focus: 0, show: 0, restore: 0 }
+  let destroyed = false
+  let minimized = false
+  let visible = true
+
+  return {
+    on(event, handler) {
+      listeners[event] = handler
+
+      return this
+    },
+    emit(event) {
+      listeners[event]?.()
+    },
+    isDestroyed: () => destroyed,
+    destroy() {
+      destroyed = true
+    },
+    isMinimized: () => minimized,
+    setMinimized(value) {
+      minimized = value
+    },
+    isVisible: () => visible,
+    setVisible(value) {
+      visible = value
+    },
+    restore() {
+      calls.restore += 1
+      minimized = false
+    },
+    show() {
+      calls.show += 1
+      visible = true
+    },
+    focus() {
+      calls.focus += 1
+    },
+    calls
+  }
+}
+
+test('buildSessionWindowUrl puts the secondary flag before the hash route (dev server)', () => {
+  const url = buildSessionWindowUrl('abc123', { devServer: 'http://localhost:5173' })
+
+  assert.equal(url, 'http://localhost:5173/?win=secondary#/abc123')
+})
+
+test('buildSessionWindowUrl avoids a double slash when the dev server has a trailing slash', () => {
+  const url = buildSessionWindowUrl('abc123', { devServer: 'http://localhost:5173/' })
+
+  assert.equal(url, 'http://localhost:5173/?win=secondary#/abc123')
+})
+
+test('buildSessionWindowUrl encodes the session id in the hash route', () => {
+  const url = buildSessionWindowUrl('a b/c', { devServer: 'http://localhost:5173' })
+
+  // The query flag must precede the '#' or HashRouter would swallow it as the
+  // route; the id is URL-encoded so slashes/spaces survive routeSessionId().
+  assert.equal(url, 'http://localhost:5173/?win=secondary#/a%20b%2Fc')
+  assert.ok(url.indexOf('?win=secondary') < url.indexOf('#'))
+})
+
+test('buildSessionWindowUrl builds a packaged file URL with the flag before the hash', () => {
+  const url = buildSessionWindowUrl('abc', { rendererIndexPath: '/opt/app/index.html' })
+
+  assert.match(url, /^file:\/\/.*index\.html\?win=secondary#\/abc$/)
+})
+
+test('registry opens one window per session and focuses on re-open', () => {
+  const registry = createSessionWindowRegistry()
+  let built = 0
+  const win = makeFakeWindow()
+  const factory = () => {
+    built += 1
+
+    return win
+  }
+
+  const first = registry.openOrFocus('s1', factory)
+  const second = registry.openOrFocus('s1', factory)
+
+  assert.equal(built, 1, 'factory runs once for the same session')
+  assert.equal(first, second)
+  assert.equal(registry.size, 1)
+  assert.equal(win.calls.focus, 1, 'second open focuses the existing window')
+})
+
+test('registry restores + shows a minimized/hidden window on re-open', () => {
+  const registry = createSessionWindowRegistry()
+  const win = makeFakeWindow()
+  registry.openOrFocus('s1', () => win)
+
+  win.setMinimized(true)
+  win.setVisible(false)
+  registry.openOrFocus('s1', () => win)
+
+  assert.equal(win.calls.restore, 1)
+  assert.equal(win.calls.show, 1)
+  assert.equal(win.calls.focus, 1)
+})
+
+test('registry drops the entry when the window closes', () => {
+  const registry = createSessionWindowRegistry()
+  const win = makeFakeWindow()
+  registry.openOrFocus('s1', () => win)
+  assert.equal(registry.size, 1)
+
+  win.emit('closed')
+
+  assert.equal(registry.size, 0)
+  assert.equal(registry.has('s1'), false)
+})
+
+test('registry rebuilds a fresh window after the previous one was destroyed', () => {
+  const registry = createSessionWindowRegistry()
+  const first = makeFakeWindow()
+  registry.openOrFocus('s1', () => first)
+  first.destroy()
+
+  let built = 0
+  const second = makeFakeWindow()
+  const result = registry.openOrFocus('s1', () => {
+    built += 1
+
+    return second
+  })
+
+  assert.equal(built, 1, 'a destroyed window is replaced, not focused')
+  assert.equal(result, second)
+})
+
+test('registry ignores empty / non-string session ids', () => {
+  const registry = createSessionWindowRegistry()
+  let built = 0
+  const factory = () => {
+    built += 1
+
+    return makeFakeWindow()
+  }
+
+  assert.equal(registry.openOrFocus('', factory), null)
+  assert.equal(registry.openOrFocus('   ', factory), null)
+  assert.equal(registry.openOrFocus(null, factory), null)
+  assert.equal(registry.openOrFocus(42, factory), null)
+  assert.equal(built, 0)
+  assert.equal(registry.size, 0)
+})
+
+test('registry trims the session id before keying', () => {
+  const registry = createSessionWindowRegistry()
+  const win = makeFakeWindow()
+  registry.openOrFocus('  s1  ', () => win)
+
+  assert.equal(registry.has('s1'), true)
+})
--- a/apps/desktop/electron/update-remote.cjs
+++ b/apps/desktop/electron/update-remote.cjs
@@ -0,0 +1,56 @@
+/**
+ * Pure helpers for choosing a remote URL during passive update checks.
+ *
+ * A public install can end up with `origin=git@github.com:NousResearch/hermes-agent.git`.
+ * If the user's GitHub SSH key is FIDO2/passkey-backed, a background `git fetch
+ * origin` triggers an unexplained hardware-touch prompt. For passive checks
+ * against the official repo we substitute the public HTTPS `ls-remote` path,
+ * which needs no auth and cannot prompt. Active update/apply flows are left
+ * unchanged.
+ *
+ * Extracted from main.cjs so the security-critical remote detection is unit
+ * testable without booting Electron (main.cjs requires('electron') at load).
+ */
+
+const OFFICIAL_REPO_HTTPS_URL = 'https://github.com/NousResearch/hermes-agent.git'
+const OFFICIAL_REPO_CANONICAL = 'github.com/nousresearch/hermes-agent'
+
+// Normalize common GitHub remote URL forms to `host/owner/repo` (lowercased,
+// no trailing slash, no .git suffix) so SSH and HTTPS forms of the same repo
+// compare equal.
+function canonicalGitHubRemote(url) {
+  if (!url) return ''
+  let value = String(url).trim()
+  if (value.startsWith('git@github.com:')) {
+    value = `github.com/${value.slice('git@github.com:'.length)}`
+  } else if (value.startsWith('ssh://git@github.com/')) {
+    value = `github.com/${value.slice('ssh://git@github.com/'.length)}`
+  } else {
+    try {
+      const parsed = new URL(value)
+      if (parsed.hostname && parsed.pathname) value = `${parsed.hostname}${parsed.pathname}`
+    } catch {
+      // Leave non-URL forms unchanged.
+    }
+  }
+  value = value.trim().replace(/\/+$/, '')
+  if (value.endsWith('.git')) value = value.slice(0, -4)
+  return value.toLowerCase()
+}
+
+function isSshRemote(url) {
+  const value = String(url || '').trim().toLowerCase()
+  return value.startsWith('git@') || value.startsWith('ssh://')
+}
+
+function isOfficialSshRemote(url) {
+  return isSshRemote(url) && canonicalGitHubRemote(url) === OFFICIAL_REPO_CANONICAL
+}
+
+module.exports = {
+  OFFICIAL_REPO_HTTPS_URL,
+  OFFICIAL_REPO_CANONICAL,
+  canonicalGitHubRemote,
+  isSshRemote,
+  isOfficialSshRemote
+}
--- a/apps/desktop/electron/update-remote.test.cjs
+++ b/apps/desktop/electron/update-remote.test.cjs
@@ -0,0 +1,78 @@
+/**
+ * Tests for electron/update-remote.cjs — the remote-detection helpers that
+ * keep passive update checks off the SSH origin for official installs.
+ *
+ * Run with: node --test electron/update-remote.test.cjs
+ * (Wired into npm test:desktop:platforms in package.json.)
+ *
+ * Why this matters: a public install can carry
+ * origin=git@github.com:NousResearch/hermes-agent.git. A background
+ * `git fetch origin` then authenticates over SSH and, with a FIDO2/passkey
+ * key, triggers an unexplained hardware-touch prompt. isOfficialSshRemote
+ * must reliably recognize the official SSH remote (in every URL form,
+ * case-insensitively) so the caller can swap in the anonymous HTTPS path —
+ * while NOT misclassifying forks, other hosts, or the HTTPS remote (which
+ * never prompts and should keep the normal fetch path).
+ */
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+
+const {
+  OFFICIAL_REPO_HTTPS_URL,
+  OFFICIAL_REPO_CANONICAL,
+  canonicalGitHubRemote,
+  isSshRemote,
+  isOfficialSshRemote
+} = require('./update-remote.cjs')
+
+test('canonicalGitHubRemote normalizes SSH and HTTPS forms to the same value', () => {
+  assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
+  assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent'), OFFICIAL_REPO_CANONICAL)
+  assert.equal(canonicalGitHubRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
+  assert.equal(canonicalGitHubRemote('https://github.com/NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
+  // Case-insensitive: an uppercased owner still canonicalizes to the same repo.
+  assert.equal(canonicalGitHubRemote('git@github.com:nousresearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
+  // Trailing slashes are stripped.
+  assert.equal(canonicalGitHubRemote('https://github.com/NousResearch/hermes-agent/'), OFFICIAL_REPO_CANONICAL)
+})
+
+test('canonicalGitHubRemote is empty for falsy input', () => {
+  assert.equal(canonicalGitHubRemote(''), '')
+  assert.equal(canonicalGitHubRemote(null), '')
+  assert.equal(canonicalGitHubRemote(undefined), '')
+})
+
+test('isSshRemote detects scp-like and ssh:// forms only', () => {
+  assert.equal(isSshRemote('git@github.com:NousResearch/hermes-agent.git'), true)
+  assert.equal(isSshRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), true)
+  assert.equal(isSshRemote('https://github.com/NousResearch/hermes-agent.git'), false)
+  assert.equal(isSshRemote(''), false)
+  assert.equal(isSshRemote(null), false)
+})
+
+test('isOfficialSshRemote is true only for the official repo over SSH', () => {
+  assert.equal(isOfficialSshRemote('git@github.com:NousResearch/hermes-agent.git'), true)
+  assert.equal(isOfficialSshRemote('git@github.com:NousResearch/hermes-agent'), true)
+  assert.equal(isOfficialSshRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), true)
+  // Case-insensitive owner/repo match.
+  assert.equal(isOfficialSshRemote('git@github.com:nousresearch/hermes-agent.git'), true)
+})
+
+test('isOfficialSshRemote does NOT match forks, other hosts, or HTTPS', () => {
+  // A fork over SSH belongs to the user — fetching it is their own remote,
+  // not the official upstream, so the SSH-avoidance swap must not apply.
+  assert.equal(isOfficialSshRemote('git@github.com:someuser/hermes-agent.git'), false)
+  // Same repo name on a different host is not the official repo.
+  assert.equal(isOfficialSshRemote('git@gitlab.com:NousResearch/hermes-agent.git'), false)
+  // HTTPS to the official repo never prompts for SSH/FIDO2, so it keeps the
+  // normal fetch path — must not be flagged as an official SSH remote.
+  assert.equal(isOfficialSshRemote('https://github.com/NousResearch/hermes-agent.git'), false)
+  assert.equal(isOfficialSshRemote(''), false)
+  assert.equal(isOfficialSshRemote(null), false)
+})
+
+test('OFFICIAL_REPO_HTTPS_URL canonicalizes to OFFICIAL_REPO_CANONICAL', () => {
+  // Invariant: the URL we substitute in must be the same repo we detect.
+  assert.equal(canonicalGitHubRemote(OFFICIAL_REPO_HTTPS_URL), OFFICIAL_REPO_CANONICAL)
+})
--- a/apps/desktop/electron/vscode-marketplace.cjs
+++ b/apps/desktop/electron/vscode-marketplace.cjs
@@ -0,0 +1,331 @@
+'use strict'
+
+/**
+ * VS Code Marketplace color-theme fetcher (main process).
+ *
+ * Resolves an extension's latest version via the (undocumented but stable)
+ * gallery ExtensionQuery API, downloads the `.vsix` (a zip), and extracts the
+ * color-theme JSON files it contributes. No theme code is ever executed — we
+ * only read `package.json` + the referenced `*.json` theme files out of the
+ * archive and hand their text back to the renderer to convert.
+ *
+ * Dependency-free on purpose: a `.vsix` is a plain zip, so we parse the central
+ * directory and inflate just the entries we need with `zlib`. Avoids pulling a
+ * zip library into the desktop bundle for a feature this small.
+ */
+
+const https = require('node:https')
+const zlib = require('node:zlib')
+
+const GALLERY_QUERY_URL = 'https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery'
+const VSIX_ASSET_TYPE = 'Microsoft.VisualStudio.Services.VSIXPackage'
+const MAX_VSIX_BYTES = 40 * 1024 * 1024 // 40 MB — themes are tiny; this is paranoia.
+const MAX_REDIRECTS = 5
+const REQUEST_TIMEOUT_MS = 20_000
+
+const ID_RE = /^[\w-]+\.[\w-]+$/
+
+/** Minimal HTTPS helper with redirect-following, timeout, and a size cap. */
+function request(url, { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {}, redirectsLeft = MAX_REDIRECTS) {
+  return new Promise((resolve, reject) => {
+    const req = https.request(url, { method, headers }, res => {
+      const status = res.statusCode ?? 0
+
+      if (status >= 300 && status < 400 && res.headers.location) {
+        if (redirectsLeft <= 0) {
+          res.resume()
+          reject(new Error('Too many redirects.'))
+
+          return
+        }
+
+        const next = new URL(res.headers.location, url).toString()
+        res.resume()
+        // Redirects to the CDN are plain GETs (drop the POST body).
+        resolve(request(next, { method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes }, redirectsLeft - 1))
+
+        return
+      }
+
+      if (status < 200 || status >= 300) {
+        res.resume()
+        reject(new Error(`Request failed (${status}) for ${url}`))
+
+        return
+      }
+
+      const chunks = []
+      let total = 0
+
+      res.on('data', chunk => {
+        total += chunk.length
+
+        if (total > maxBytes) {
+          req.destroy()
+          reject(new Error('Response exceeded the size limit.'))
+
+          return
+        }
+
+        chunks.push(chunk)
+      })
+      res.on('end', () => resolve(Buffer.concat(chunks)))
+    })
+
+    req.on('error', reject)
+    req.setTimeout(REQUEST_TIMEOUT_MS, () => req.destroy(new Error('Request timed out.')))
+
+    if (body) {
+      req.write(body)
+    }
+
+    req.end()
+  })
+}
+
+/** Resolve `{ displayName, vsixUrl }` for the latest version of `id`. */
+async function resolveExtension(id) {
+  const json = await queryGallery({
+    // FilterType 7 = ExtensionName (the full publisher.extension id).
+    filters: [{ criteria: [{ filterType: 7, value: id }], pageNumber: 1, pageSize: 1 }],
+    // Flags: IncludeFiles | IncludeVersionProperties | IncludeAssetUri |
+    // IncludeCategoryAndTags | IncludeLatestVersionOnly = 914.
+    flags: 914
+  })
+  const extension = json?.results?.[0]?.extensions?.[0]
+
+  if (!extension) {
+    throw new Error(`Extension "${id}" was not found on the Marketplace.`)
+  }
+
+  const version = extension.versions?.[0]
+
+  if (!version) {
+    throw new Error(`Extension "${id}" has no published versions.`)
+  }
+
+  const asset = (version.files ?? []).find(file => file.assetType === VSIX_ASSET_TYPE)
+  const vsixUrl = asset?.source
+
+  if (!vsixUrl) {
+    throw new Error(`Could not find a downloadable package for "${id}".`)
+  }
+
+  return { displayName: extension.displayName || id, vsixUrl }
+}
+
+/** POST an ExtensionQuery payload and return the parsed gallery response. */
+async function queryGallery(payload, { maxBytes = 4 * 1024 * 1024 } = {}) {
+  const body = JSON.stringify(payload)
+  const raw = await request(GALLERY_QUERY_URL, {
+    method: 'POST',
+    headers: {
+      Accept: 'application/json;api-version=3.0-preview.1',
+      'Content-Type': 'application/json',
+      'Content-Length': Buffer.byteLength(body),
+      'User-Agent': 'Hermes-Desktop'
+    },
+    body,
+    maxBytes
+  })
+
+  return JSON.parse(raw.toString('utf8'))
+}
+
+/**
+ * Search the Marketplace for color-theme extensions. With an empty query this
+ * returns the most-installed themes; with a query it's a full-text search
+ * scoped to the Themes category. Returns lightweight cards (no download).
+ */
+/**
+ * The "Themes" category also contains file-icon and product-icon themes (the
+ * gallery has no color-only category). We can't see an extension's actual
+ * contributions without downloading it, so filter the obvious icon packs out by
+ * tag + name/description. Color themes that also ship icons are rare; worst case
+ * a user installs them by exact id from settings.
+ */
+function looksLikeIconTheme(extension) {
+  const tags = (extension.tags ?? []).map(tag => String(tag).toLowerCase())
+
+  if (tags.includes('icon-theme') || tags.includes('product-icon-theme')) {
+    return true
+  }
+
+  const text = `${extension.displayName ?? ''} ${extension.shortDescription ?? ''}`.toLowerCase()
+
+  return /\b(icon theme|file icons?|product icons?|icon pack|fileicons)\b/.test(text)
+}
+
+async function searchMarketplaceThemes(query, limit = 20) {
+  const text = String(query || '').trim()
+  const pageSize = Math.min(Math.max(Number(limit) || 20, 1), 50)
+
+  // FilterType: 8=Target, 5=Category, 10=SearchText, 12=ExcludeWithFlags.
+  const criteria = [
+    { filterType: 8, value: 'Microsoft.VisualStudio.Code' },
+    { filterType: 5, value: 'Themes' },
+    { filterType: 12, value: '4096' } // Exclude unpublished (Unpublished = 0x1000).
+  ]
+
+  if (text) {
+    criteria.push({ filterType: 10, value: text })
+  }
+
+  const json = await queryGallery({
+    // Over-fetch so the icon-theme filter below still leaves a full page.
+    filters: [{ criteria, pageNumber: 1, pageSize: Math.min(pageSize * 2, 50), sortBy: 4, sortOrder: 0 }],
+    // IncludeStatistics (0x100) | IncludeLatestVersionOnly (0x200) | IncludeCategoryAndTags (0x4).
+    flags: 772
+  })
+
+  const extensions = json?.results?.[0]?.extensions ?? []
+
+  return extensions
+    .filter(extension => !looksLikeIconTheme(extension))
+    .slice(0, pageSize)
+    .map(extension => {
+      const publisherName = extension.publisher?.publisherName ?? ''
+      const installStat = (extension.statistics ?? []).find(stat => stat.statisticName === 'install')
+
+      return {
+        extensionId: `${publisherName}.${extension.extensionName}`,
+        displayName: extension.displayName || extension.extensionName,
+        publisher: extension.publisher?.displayName || publisherName,
+        description: extension.shortDescription || '',
+        installs: Math.round(installStat?.value ?? 0)
+      }
+    })
+}
+
+// ─── Minimal zip reader ─────────────────────────────────────────────────────
+
+function findEndOfCentralDirectory(buf) {
+  // EOCD signature 0x06054b50, scanning back from the end (comment is rare).
+  for (let i = buf.length - 22; i >= 0; i--) {
+    if (buf.readUInt32LE(i) === 0x06054b50) {
+      return i
+    }
+  }
+
+  throw new Error('Not a valid zip archive (no end-of-central-directory).')
+}
+
+/** Parse the central directory into a name → record map. */
+function readCentralDirectory(buf) {
+  const eocd = findEndOfCentralDirectory(buf)
+  const count = buf.readUInt16LE(eocd + 10)
+  let offset = buf.readUInt32LE(eocd + 16)
+  const records = new Map()
+
+  for (let i = 0; i < count; i++) {
+    if (buf.readUInt32LE(offset) !== 0x02014b50) {
+      break
+    }
+
+    const method = buf.readUInt16LE(offset + 10)
+    const compressedSize = buf.readUInt32LE(offset + 20)
+    const nameLen = buf.readUInt16LE(offset + 28)
+    const extraLen = buf.readUInt16LE(offset + 30)
+    const commentLen = buf.readUInt16LE(offset + 32)
+    const localOffset = buf.readUInt32LE(offset + 42)
+    const name = buf.toString('utf8', offset + 46, offset + 46 + nameLen)
+
+    records.set(name, { method, compressedSize, localOffset })
+    offset += 46 + nameLen + extraLen + commentLen
+  }
+
+  return records
+}
+
+/** Inflate a single entry to a string. */
+function extractEntry(buf, record) {
+  // The local header's name/extra lengths can differ from the central record,
+  // so re-read them here to locate the compressed payload.
+  if (buf.readUInt32LE(record.localOffset) !== 0x04034b50) {
+    throw new Error('Corrupt zip: bad local file header.')
+  }
+
+  const nameLen = buf.readUInt16LE(record.localOffset + 26)
+  const extraLen = buf.readUInt16LE(record.localOffset + 28)
+  const dataStart = record.localOffset + 30 + nameLen + extraLen
+  const data = buf.subarray(dataStart, dataStart + record.compressedSize)
+
+  // 0 = stored, 8 = deflate. Theme files are one or the other.
+  return record.method === 0 ? data.toString('utf8') : zlib.inflateRawSync(data).toString('utf8')
+}
+
+/** Normalize a package.json theme path to its zip entry name. */
+function themeEntryName(themePath) {
+  const clean = String(themePath).replace(/^\.\//, '').replace(/^\//, '')
+
+  return `extension/${clean}`
+}
+
+/** Extract every contributed color theme from a `.vsix` buffer. */
+function extractThemes(vsixBuffer) {
+  const records = readCentralDirectory(vsixBuffer)
+  const pkgRecord = records.get('extension/package.json')
+
+  if (!pkgRecord) {
+    throw new Error('Package manifest missing from the extension.')
+  }
+
+  const pkg = JSON.parse(extractEntry(vsixBuffer, pkgRecord))
+  const contributed = pkg?.contributes?.themes
+
+  if (!Array.isArray(contributed) || contributed.length === 0) {
+    return []
+  }
+
+  const themes = []
+
+  for (const entry of contributed) {
+    if (!entry?.path) {
+      continue
+    }
+
+    const record = records.get(themeEntryName(entry.path))
+
+    if (!record) {
+      continue
+    }
+
+    try {
+      themes.push({
+        label: entry.label || entry.id || pkg.displayName || pkg.name || 'VS Code Theme',
+        uiTheme: entry.uiTheme,
+        contents: extractEntry(vsixBuffer, record)
+      })
+    } catch {
+      // Skip an entry we can't inflate rather than failing the whole install.
+    }
+  }
+
+  return themes
+}
+
+/**
+ * Public entry: resolve, download, and extract color themes for `id`
+ * (`publisher.extension`). Returns `{ extensionId, displayName, themes }`.
+ */
+async function fetchMarketplaceThemes(id) {
+  const trimmed = String(id || '').trim()
+
+  if (!ID_RE.test(trimmed)) {
+    throw new Error('Expected a Marketplace id like "publisher.extension".')
+  }
+
+  const { displayName, vsixUrl } = await resolveExtension(trimmed)
+  const vsix = await request(vsixUrl, { headers: { 'User-Agent': 'Hermes-Desktop' } })
+  const themes = extractThemes(vsix)
+
+  return { extensionId: trimmed, displayName, themes }
+}
+
+module.exports = {
+  fetchMarketplaceThemes,
+  searchMarketplaceThemes,
+  extractThemes,
+  readCentralDirectory,
+  __testing: { themeEntryName, looksLikeIconTheme }
+}
--- a/apps/desktop/electron/vscode-marketplace.test.cjs
+++ b/apps/desktop/electron/vscode-marketplace.test.cjs
@@ -0,0 +1,113 @@
+'use strict'
+
+const assert = require('node:assert')
+const test = require('node:test')
+
+const { __testing, extractThemes, readCentralDirectory } = require('./vscode-marketplace.cjs')
+
+// Build a minimal zip with stored (uncompressed) entries so the test controls
+// the bytes exactly — exercises the central-directory reader + theme extraction
+// without a deflate dependency.
+function makeZip(entries) {
+  const locals = []
+  const centrals = []
+  let offset = 0
+
+  for (const { name, data } of entries) {
+    const nameBuf = Buffer.from(name, 'utf8')
+    const body = Buffer.from(data, 'utf8')
+
+    const local = Buffer.alloc(30 + nameBuf.length)
+    local.writeUInt32LE(0x04034b50, 0)
+    local.writeUInt16LE(0, 8) // method: stored
+    local.writeUInt32LE(body.length, 18) // compressed size
+    local.writeUInt32LE(body.length, 22) // uncompressed size
+    local.writeUInt16LE(nameBuf.length, 26)
+    nameBuf.copy(local, 30)
+
+    locals.push(local, body)
+
+    const central = Buffer.alloc(46 + nameBuf.length)
+    central.writeUInt32LE(0x02014b50, 0)
+    central.writeUInt16LE(0, 10) // method: stored
+    central.writeUInt32LE(body.length, 20)
+    central.writeUInt32LE(body.length, 24)
+    central.writeUInt16LE(nameBuf.length, 28)
+    central.writeUInt32LE(offset, 42) // local header offset
+    nameBuf.copy(central, 46)
+
+    centrals.push(central)
+    offset += local.length + body.length
+  }
+
+  const centralStart = offset
+  const centralBuf = Buffer.concat(centrals)
+
+  const eocd = Buffer.alloc(22)
+  eocd.writeUInt32LE(0x06054b50, 0)
+  eocd.writeUInt16LE(entries.length, 8)
+  eocd.writeUInt16LE(entries.length, 10)
+  eocd.writeUInt32LE(centralBuf.length, 12)
+  eocd.writeUInt32LE(centralStart, 16)
+
+  return Buffer.concat([...locals, centralBuf, eocd])
+}
+
+test('readCentralDirectory finds every entry', () => {
+  const zip = makeZip([
+    { name: 'extension/package.json', data: '{}' },
+    { name: 'extension/themes/x.json', data: '{}' }
+  ])
+
+  const records = readCentralDirectory(zip)
+  assert.ok(records.has('extension/package.json'))
+  assert.ok(records.has('extension/themes/x.json'))
+})
+
+test('extractThemes reads contributed color themes (resolving ./ paths)', () => {
+  const pkg = JSON.stringify({
+    name: 'theme-dracula',
+    displayName: 'Dracula',
+    contributes: {
+      themes: [{ label: 'Dracula', uiTheme: 'vs-dark', path: './themes/dracula.json' }]
+    }
+  })
+  const themeJson = JSON.stringify({ name: 'Dracula', type: 'dark', colors: { 'editor.background': '#282a36' } })
+
+  const zip = makeZip([
+    { name: 'extension/package.json', data: pkg },
+    { name: 'extension/themes/dracula.json', data: themeJson }
+  ])
+
+  const themes = extractThemes(zip)
+  assert.strictEqual(themes.length, 1)
+  assert.strictEqual(themes[0].label, 'Dracula')
+  assert.strictEqual(themes[0].uiTheme, 'vs-dark')
+  assert.match(themes[0].contents, /editor\.background/)
+})
+
+test('extractThemes returns empty when the extension contributes no themes', () => {
+  const zip = makeZip([{ name: 'extension/package.json', data: JSON.stringify({ name: 'x', contributes: {} }) }])
+  assert.deepStrictEqual(extractThemes(zip), [])
+})
+
+test('extractThemes throws when the manifest is missing', () => {
+  const zip = makeZip([{ name: 'extension/other.txt', data: 'hi' }])
+  assert.throws(() => extractThemes(zip), /manifest missing/i)
+})
+
+test('looksLikeIconTheme filters icon/product-icon packs out of theme search', () => {
+  const { looksLikeIconTheme } = __testing
+
+  // Tagged contribution points are the strongest signal.
+  assert.strictEqual(looksLikeIconTheme({ tags: ['theme', 'icon-theme'] }), true)
+  assert.strictEqual(looksLikeIconTheme({ tags: ['product-icon-theme'] }), true)
+
+  // Name/description fallback for packs that don't tag themselves.
+  assert.strictEqual(looksLikeIconTheme({ displayName: 'Material Icon Theme' }), true)
+  assert.strictEqual(looksLikeIconTheme({ shortDescription: 'A pack of file icons.' }), true)
+
+  // Real color themes survive.
+  assert.strictEqual(looksLikeIconTheme({ displayName: 'Dracula Official', tags: ['theme', 'color-theme'] }), false)
+  assert.strictEqual(looksLikeIconTheme({ displayName: 'One Dark Pro' }), false)
+})
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -0,0 +1,54 @@
+'use strict'
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const path = require('node:path')
+
+const ELECTRON_DIR = __dirname
+
+function readElectronFile(name) {
+  return fs.readFileSync(path.join(ELECTRON_DIR, name), 'utf8')
+}
+
+function requireHiddenChildOptions(source, needle) {
+  const index = source.indexOf(needle)
+  assert.notEqual(index, -1, `missing call site: ${needle}`)
+  const snippet = source.slice(index, index + 700)
+  assert.match(
+    snippet,
+    /hiddenWindowsChildOptions\(/,
+    `expected ${needle} to wrap child-process options with hiddenWindowsChildOptions`
+  )
+}
+
+test('desktop background child processes opt into hidden Windows consoles', () => {
+  const source = readElectronFile('main.cjs')
+
+  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
+
+  requireHiddenChildOptions(source, "execFileSync(\n          'reg'")
+  requireHiddenChildOptions(source, 'execFileSync(pyExe')
+  requireHiddenChildOptions(source, 'spawn(resolveGitBinary()')
+  requireHiddenChildOptions(source, "execFileSync('taskkill'")
+  requireHiddenChildOptions(source, 'spawn(command, args')
+  requireHiddenChildOptions(source, "spawn('curl'")
+  requireHiddenChildOptions(source, 'spawn(backend.command, backend.args')
+  requireHiddenChildOptions(source, 'hermesProcess = spawn(backend.command, backend.args')
+  requireHiddenChildOptions(source, "spawn(py, ['-m', 'hermes_cli.main', 'uninstall', '--gui-summary']")
+})
+
+test('intentional or interactive desktop child processes stay documented', () => {
+  const source = readElectronFile('main.cjs')
+
+  assert.match(source, /windowsHide: false/)
+  assert.match(source, /nodePty\.spawn\(command, args/)
+  assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
+})
+
+test('bootstrap PowerShell runner hides Windows console children', () => {
+  const source = readElectronFile('bootstrap-runner.cjs')
+
+  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
+  requireHiddenChildOptions(source, 'spawn(ps, fullArgs')
+})
--- a/apps/desktop/electron/workspace-cwd.cjs
+++ b/apps/desktop/electron/workspace-cwd.cjs
@@ -0,0 +1,38 @@
+const path = require('node:path')
+
+/** True when `dir` lives inside a packaged app bundle / install tree. */
+function isPackagedInstallPath(dir, { installRoots, isPackaged }) {
+  if (!isPackaged || !dir) {
+    return false
+  }
+
+  let resolved
+
+  try {
+    resolved = path.resolve(String(dir))
+  } catch {
+    return false
+  }
+
+  const roots = new Set(
+    (installRoots ?? [])
+      .filter(Boolean)
+      .map(candidate => path.resolve(String(candidate)))
+  )
+
+  for (const root of roots) {
+    if (resolved === root) {
+      return true
+    }
+
+    const rel = path.relative(root, resolved)
+
+    if (rel && !rel.startsWith('..') && !path.isAbsolute(rel)) {
+      return true
+    }
+  }
+
+  return false
+}
+
+module.exports = { isPackagedInstallPath }
--- a/apps/desktop/electron/workspace-cwd.test.cjs
+++ b/apps/desktop/electron/workspace-cwd.test.cjs
@@ -0,0 +1,45 @@
+/**
+ * Tests for electron/workspace-cwd.cjs.
+ *
+ * Run with: node --test electron/workspace-cwd.test.cjs
+ */
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const path = require('node:path')
+
+const { isPackagedInstallPath } = require('./workspace-cwd.cjs')
+
+const installRoot = path.resolve('/opt/Hermes')
+
+test('isPackagedInstallPath returns false when not packaged', () => {
+  assert.equal(
+    isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }),
+    false
+  )
+})
+
+test('isPackagedInstallPath flags the install root itself', () => {
+  assert.equal(
+    isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }),
+    true
+  )
+})
+
+test('isPackagedInstallPath flags paths nested under the install root', () => {
+  const nested = path.join(installRoot, 'resources', 'app.asar')
+
+  assert.equal(
+    isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }),
+    true
+  )
+})
+
+test('isPackagedInstallPath ignores paths outside the install root', () => {
+  const homeProject = path.resolve('/home/user/projects/demo')
+
+  assert.equal(
+    isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }),
+    false
+  )
+})
--- a/apps/desktop/eslint.config.mjs
+++ b/apps/desktop/eslint.config.mjs
@@ -3,7 +3,6 @@ import typescriptEslint from '@typescript-eslint/eslint-plugin'
 import typescriptParser from '@typescript-eslint/parser'
 import perfectionist from 'eslint-plugin-perfectionist'
 import reactPlugin from 'eslint-plugin-react'
-import reactCompiler from 'eslint-plugin-react-compiler'
 import hooksPlugin from 'eslint-plugin-react-hooks'
 import unusedImports from 'eslint-plugin-unused-imports'
 import globals from 'globals'
@@ -47,7 +46,6 @@ export default [
      'custom-rules': customRules,
      perfectionist,
      react: reactPlugin,
-      'react-compiler': reactCompiler,
      'react-hooks': hooksPlugin,
      'unused-imports': unusedImports
    },
@@ -98,7 +96,6 @@ export default [
      'perfectionist/sort-jsx-props': ['error', { order: 'asc', type: 'natural' }],
      'perfectionist/sort-named-exports': ['error', { order: 'asc', type: 'natural' }],
      'perfectionist/sort-named-imports': ['error', { order: 'asc', type: 'natural' }],
-      'react-compiler/react-compiler': 'warn',
      'react-hooks/exhaustive-deps': 'warn',
      'react-hooks/rules-of-hooks': 'error',
      'unused-imports/no-unused-imports': 'error'
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -18,7 +18,7 @@
    "profile:main": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
    "profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
    "start": "npm run build && electron .",
-    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build",
+    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && node scripts/assert-dist-built.cjs",
    "builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 electron-builder",
    "pack": "npm run build && npm run builder -- --dir",
    "dist": "npm run build && npm run builder",
@@ -35,8 +35,8 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs",
-    "type-check": "tsc -b",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
+    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
    "fmt": "prettier --write 'src/**/*.{ts,tsx}' 'electron/**/*.{js,cjs}' 'vite.config.ts'",
@@ -72,6 +72,7 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "cmdk": "^1.1.1",
+    "dnd-core": "^14.0.1",
    "hast-util-from-html-isomorphic": "^2.0.0",
    "hast-util-to-text": "^4.0.2",
    "ignore": "^7.0.5",
@@ -83,6 +84,7 @@
    "radix-ui": "^1.4.3",
    "react": "^19.2.5",
    "react-arborist": "^3.5.0",
+    "react-dnd-html5-backend": "^14.0.3",
    "react-dom": "^19.2.5",
    "react-router-dom": "^7.17.0",
    "react-shiki": "^0.9.3",
@@ -103,20 +105,19 @@
    "@testing-library/dom": "^10.4.0",
    "@testing-library/react": "^16.3.2",
    "@types/hast": "^3.0.4",
-    "@types/node": "^24.12.2",
+    "@types/node": "^24.13.2",
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
    "@typescript-eslint/eslint-plugin": "^8.59.1",
    "@typescript-eslint/parser": "^8.59.1",
    "@vitejs/plugin-react": "^6.0.1",
-    "concurrently": "^9.2.1",
+    "concurrently": "^10.0.3",
    "cross-env": "^10.1.0",
    "electron": "^40.9.3",
    "electron-builder": "^26.8.1",
    "eslint": "^9.39.4",
    "eslint-plugin-perfectionist": "^5.9.0",
    "eslint-plugin-react": "^7.37.5",
-    "eslint-plugin-react-compiler": "^19.1.0-rc.2",
    "eslint-plugin-react-hooks": "^7.1.1",
    "eslint-plugin-unused-imports": "^4.4.1",
    "globals": "^16.5.0",
@@ -133,6 +134,14 @@
    "appId": "com.nousresearch.hermes",
    "productName": "Hermes",
    "executableName": "Hermes",
+    "protocols": [
+      {
+        "name": "Hermes Protocol",
+        "schemes": [
+          "hermes"
+        ]
+      }
+    ],
    "artifactName": "Hermes-${version}-${os}-${arch}.${ext}",
    "icon": "assets/icon",
    "directories": {
@@ -166,7 +175,8 @@
    "afterSign": "scripts/notarize.cjs",
    "asarUnpack": [
      "**/*.node",
-      "**/prebuilds/**"
+      "**/prebuilds/**",
+      "dist/**"
    ],
    "mac": {
      "category": "public.app-category.developer-tools",
--- a/apps/desktop/pr-assets/session-source-folders.png
+++ b/apps/desktop/pr-assets/session-source-folders.png
--- a/apps/desktop/public/apple-touch-icon.png
+++ b/apps/desktop/public/apple-touch-icon.png
--- a/apps/desktop/scripts/assert-dist-built.cjs
+++ b/apps/desktop/scripts/assert-dist-built.cjs
@@ -0,0 +1,70 @@
+"use strict"
+
+// Build-time guard: refuse to hand a half-built renderer to electron-builder.
+//
+// `npm run pack` / `npm run dist*` are `npm run build && npm run builder`.
+// If the `build` step (tsc -b && vite build) fails but packaging proceeds
+// anyway — a stale checkout that fails typecheck, an interrupted vite build,
+// or npm not short-circuiting `&&` in some shells — electron-builder happily
+// packages an app with an empty or missing `dist/`. The result launches but
+// blank-pages with `ERR_FILE_NOT_FOUND` for dist/index.html, with no clue why.
+//
+// This runs at the tail of `build`, after vite build, so any packaging path
+// inherits it. It fails loud and early instead of shipping a broken bundle.
+// See issues #39484 (renderer blank page) and #41327 / #39472 (dashboard 404).
+
+const fs = require("fs")
+const path = require("path")
+
+// Pure check — returns { ok: true } or { ok: false, error: "..." }.
+// Kept side-effect-free so it can be unit tested without spawning a process.
+function checkDistBuilt(distDir) {
+  if (!fs.existsSync(distDir) || !fs.statSync(distDir).isDirectory()) {
+    return { ok: false, error: `no dist directory at ${distDir}` }
+  }
+
+  const indexHtml = path.join(distDir, "index.html")
+  if (!fs.existsSync(indexHtml) || !fs.statSync(indexHtml).isFile()) {
+    return { ok: false, error: `dist/index.html is missing at ${indexHtml}` }
+  }
+  if (fs.statSync(indexHtml).size === 0) {
+    return { ok: false, error: `dist/index.html is empty at ${indexHtml}` }
+  }
+
+  // index.html alone isn't enough — vite emits hashed JS into dist/assets.
+  // An index.html with no script bundle still blank-pages.
+  const assetsDir = path.join(distDir, "assets")
+  const hasAssets =
+    fs.existsSync(assetsDir) &&
+    fs.statSync(assetsDir).isDirectory() &&
+    fs.readdirSync(assetsDir).some(name => name.endsWith(".js"))
+  if (!hasAssets) {
+    return { ok: false, error: `dist/assets has no built JS bundle (expected vite output under ${assetsDir})` }
+  }
+
+  return { ok: true }
+}
+
+function main() {
+  const desktopRoot = path.resolve(__dirname, "..")
+  const distDir = path.join(desktopRoot, "dist")
+  const result = checkDistBuilt(distDir)
+
+  if (!result.ok) {
+    console.error(`\n✗ assert-dist-built: ${result.error}`)
+    console.error("  The renderer bundle is missing or incomplete, so packaging")
+    console.error("  would produce an app that launches to a blank page.")
+    console.error("  Re-run the build and check the tsc/vite output above for the")
+    console.error("  real failure, then package again:")
+    console.error(`    cd ${desktopRoot} && npm run build\n`)
+    process.exit(1)
+  }
+
+  console.log("✓ assert-dist-built: dist/index.html + assets present")
+}
+
+if (require.main === module) {
+  main()
+}
+
+module.exports = { checkDistBuilt }
--- a/apps/desktop/scripts/assert-dist-built.test.cjs
+++ b/apps/desktop/scripts/assert-dist-built.test.cjs
@@ -0,0 +1,84 @@
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+
+const { checkDistBuilt } = require('../scripts/assert-dist-built.cjs')
+
+function makeDist(extra) {
+  const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-assert-dist-'))
+  const distDir = path.join(tempRoot, 'dist')
+  fs.mkdirSync(distDir, { recursive: true })
+  if (extra) extra(distDir)
+  return { tempRoot, distDir }
+}
+
+test('checkDistBuilt passes when index.html + an assets JS bundle exist', () => {
+  const { tempRoot, distDir } = makeDist(d => {
+    fs.writeFileSync(path.join(d, 'index.html'), '<!doctype html><div id=root></div>', 'utf8')
+    fs.mkdirSync(path.join(d, 'assets'))
+    fs.writeFileSync(path.join(d, 'assets', 'index-abc123.js'), 'console.log(1)', 'utf8')
+  })
+  try {
+    assert.deepEqual(checkDistBuilt(distDir), { ok: true })
+  } finally {
+    fs.rmSync(tempRoot, { recursive: true, force: true })
+  }
+})
+
+test('checkDistBuilt fails when the dist directory is absent', () => {
+  const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-assert-dist-'))
+  try {
+    const result = checkDistBuilt(path.join(tempRoot, 'dist'))
+    assert.equal(result.ok, false)
+    assert.match(result.error, /no dist directory/)
+  } finally {
+    fs.rmSync(tempRoot, { recursive: true, force: true })
+  }
+})
+
+test('checkDistBuilt fails when index.html is missing', () => {
+  const { tempRoot, distDir } = makeDist(d => {
+    fs.mkdirSync(path.join(d, 'assets'))
+    fs.writeFileSync(path.join(d, 'assets', 'index-abc123.js'), 'console.log(1)', 'utf8')
+  })
+  try {
+    const result = checkDistBuilt(distDir)
+    assert.equal(result.ok, false)
+    assert.match(result.error, /index\.html is missing/)
+  } finally {
+    fs.rmSync(tempRoot, { recursive: true, force: true })
+  }
+})
+
+test('checkDistBuilt fails when index.html is empty', () => {
+  const { tempRoot, distDir } = makeDist(d => {
+    fs.writeFileSync(path.join(d, 'index.html'), '', 'utf8')
+    fs.mkdirSync(path.join(d, 'assets'))
+    fs.writeFileSync(path.join(d, 'assets', 'index-abc123.js'), 'console.log(1)', 'utf8')
+  })
+  try {
+    const result = checkDistBuilt(distDir)
+    assert.equal(result.ok, false)
+    assert.match(result.error, /index\.html is empty/)
+  } finally {
+    fs.rmSync(tempRoot, { recursive: true, force: true })
+  }
+})
+
+test('checkDistBuilt fails when assets/ has no JS bundle', () => {
+  const { tempRoot, distDir } = makeDist(d => {
+    fs.writeFileSync(path.join(d, 'index.html'), '<!doctype html>', 'utf8')
+    fs.mkdirSync(path.join(d, 'assets'))
+    // CSS only, no JS — still a blank page at runtime.
+    fs.writeFileSync(path.join(d, 'assets', 'index-abc123.css'), 'body{}', 'utf8')
+  })
+  try {
+    const result = checkDistBuilt(distDir)
+    assert.equal(result.ok, false)
+    assert.match(result.error, /no built JS bundle/)
+  } finally {
+    fs.rmSync(tempRoot, { recursive: true, force: true })
+  }
+})
--- a/apps/desktop/src/app/chat/composer/attachments.tsx
+++ b/apps/desktop/src/app/chat/composer/attachments.tsx
@@ -3,8 +3,9 @@ import { useStore } from '@nanostores/react'
 import { Codicon } from '@/components/ui/codicon'
 import { Tip } from '@/components/ui/tooltip'
 import { useI18n } from '@/i18n'
-import { FileText, FolderOpen, ImageIcon, Link, Terminal } from '@/lib/icons'
+import { AlertCircle, FileText, FolderOpen, ImageIcon, Link, Loader2, Terminal } from '@/lib/icons'
 import { normalizeOrLocalPreviewTarget } from '@/lib/local-preview'
+import { cn } from '@/lib/utils'
 import type { ComposerAttachment } from '@/store/composer'
 import { notifyError } from '@/store/notifications'
 import { setCurrentSessionPreviewTarget } from '@/store/preview'
@@ -31,7 +32,9 @@ function AttachmentPill({ attachment, onRemove }: { attachment: ComposerAttachme
  const c = t.composer
  const Icon = { folder: FolderOpen, url: Link, image: ImageIcon, file: FileText, terminal: Terminal }[attachment.kind]
  const cwd = useStore($currentCwd)
-  const canPreview = attachment.kind !== 'folder' && attachment.kind !== 'terminal'
+  const isUploading = attachment.uploadState === 'uploading'
+  const hasUploadError = attachment.uploadState === 'error'
+  const canPreview = attachment.kind !== 'folder' && attachment.kind !== 'terminal' && !isUploading
  const detail = attachment.detail && attachment.detail !== attachment.label ? attachment.detail : undefined

  async function openPreview() {
@@ -59,7 +62,15 @@ function AttachmentPill({ attachment, onRemove }: { attachment: ComposerAttachme
        throw new Error(c.couldNotPreview(attachment.label))
      }

-      setCurrentSessionPreviewTarget(preview, 'manual', target)
+      // We already hold the image bytes (the card thumbnail) — render those
+      // directly so a screenshot/clipboard image previews even when its only
+      // on-disk copy is a transient path the renderer can't re-read.
+      const withBytes =
+        attachment.kind === 'image' && attachment.previewUrl
+          ? { ...preview, dataUrl: attachment.previewUrl, previewKind: 'image' as const }
+          : preview
+
+      setCurrentSessionPreviewTarget(withBytes, 'manual', target)
    } catch (error) {
      notifyError(error, c.previewUnavailable)
    }
@@ -69,30 +80,51 @@ function AttachmentPill({ attachment, onRemove }: { attachment: ComposerAttachme
    <Tip label={attachment.path || attachment.detail || attachment.label}>
      <div className="group/attachment relative min-w-0 shrink-0">
        <button
+          aria-busy={isUploading || undefined}
          aria-label={canPreview ? c.previewLabel(attachment.label) : attachment.label}
-          className="flex max-w-56 items-center gap-2 border border-border/60 bg-background/50 px-2 py-1.5 text-left shadow-[inset_0_1px_0_rgba(255,255,255,0.25)] transition-colors hover:border-primary/35 hover:bg-accent/45 disabled:cursor-default"
+          className={cn(
+            'flex max-w-56 items-center gap-2 rounded-2xl border bg-background/50 px-2 py-1.5 text-left shadow-[inset_0_1px_0_rgba(255,255,255,0.18)] transition-colors disabled:cursor-default',
+            hasUploadError
+              ? 'border-destructive/45 hover:border-destructive/60'
+              : 'border-border/60 hover:border-primary/35 hover:bg-accent/45'
+          )}
          disabled={!canPreview}
          onClick={() => void openPreview()}
          type="button"
        >
-          {attachment.previewUrl && attachment.kind === 'image' ? (
-            <img
-              alt={attachment.label}
-              className="size-8 shrink-0 border border-border/70 object-cover"
-              draggable={false}
-              src={attachment.previewUrl}
-            />
-          ) : (
-            <span className="grid size-8 shrink-0 place-items-center border border-border/55 bg-muted/35 text-muted-foreground">
+          <span className="relative grid size-8 shrink-0 place-items-center overflow-hidden rounded-lg border border-border/55 bg-muted/35 text-muted-foreground">
+            {attachment.previewUrl && attachment.kind === 'image' ? (
+              <img
+                alt={attachment.label}
+                className="size-full object-cover"
+                draggable={false}
+                src={attachment.previewUrl}
+              />
+            ) : (
              <Icon className="size-3.5" />
-            </span>
-          )}
+            )}
+            {isUploading && (
+              <span className="absolute inset-0 grid place-items-center bg-background/60 backdrop-blur-[1px]">
+                <Loader2 className="size-3.5 animate-spin text-foreground/75" />
+              </span>
+            )}
+            {hasUploadError && (
+              <span className="absolute inset-0 grid place-items-center bg-destructive/15">
+                <AlertCircle className="size-3.5 text-destructive" />
+              </span>
+            )}
+          </span>
          <span className="min-w-0">
            <span className="block truncate text-[0.72rem] font-medium leading-4 text-foreground/90">
              {attachment.label}
            </span>
            {detail && (
-              <span className="block truncate font-mono text-[0.6rem] leading-3 text-muted-foreground/65">
+              <span
+                className={cn(
+                  'block truncate text-[0.62rem] leading-3.5',
+                  hasUploadError ? 'text-destructive/80' : 'text-muted-foreground/65'
+                )}
+              >
                {detail}
              </span>
            )}
--- a/apps/desktop/src/app/chat/composer/completion-drawer.tsx
+++ b/apps/desktop/src/app/chat/composer/completion-drawer.tsx
@@ -3,32 +3,25 @@ import { ComposerPrimitive } from '@assistant-ui/react'
 import type { ReactNode } from 'react'

 export const COMPLETION_DRAWER_CLASS = [
-  'absolute bottom-[calc(100%+0.25rem)] left-0 z-50',
-  'w-60 max-w-[calc(100vw-2rem)]',
-  'max-h-[min(23rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
-  'rounded-lg border border-(--ui-stroke-secondary)',
-  'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_96%,transparent)]',
-  'p-1 text-xs text-popover-foreground shadow-md',
+  'absolute bottom-[calc(100%+0.375rem)] left-0 z-50',
+  'w-80 max-w-[calc(100vw-2rem)]',
+  'max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
+  'rounded-xl border border-(--ui-stroke-secondary)',
+  'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_97%,transparent)]',
+  'p-1 text-xs text-popover-foreground shadow-lg',
  'backdrop-blur-md'
 ].join(' ')

 export const COMPLETION_DRAWER_BELOW_CLASS = [
-  'absolute left-0 top-[calc(100%+0.25rem)] z-50',
-  'w-60 max-w-[calc(100vw-2rem)]',
-  'max-h-[min(23rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
-  'rounded-lg border border-(--ui-stroke-secondary)',
-  'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_96%,transparent)]',
-  'p-1 text-xs text-popover-foreground shadow-md',
+  'absolute left-0 top-[calc(100%+0.375rem)] z-50',
+  'w-80 max-w-[calc(100vw-2rem)]',
+  'max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
+  'rounded-xl border border-(--ui-stroke-secondary)',
+  'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_97%,transparent)]',
+  'p-1 text-xs text-popover-foreground shadow-lg',
  'backdrop-blur-md'
 ].join(' ')

-export const COMPLETION_DRAWER_ROW_CLASS = [
-  'relative flex cursor-default select-none items-center gap-2 rounded-md px-2 py-1',
-  'w-full min-w-0 text-left text-xs outline-hidden transition-colors',
-  'hover:bg-(--ui-bg-tertiary)',
-  'data-[highlighted]:bg-(--ui-bg-tertiary) data-[highlighted]:text-foreground'
-].join(' ')
-
 export function ComposerCompletionDrawer({
  adapter,
  ariaLabel,
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -4,6 +4,7 @@ import { Tip } from '@/components/ui/tooltip'
 import { useI18n } from '@/i18n'
 import { triggerHaptic } from '@/lib/haptics'
 import { AudioLines, Layers3, Loader2, Square, SteeringWheel } from '@/lib/icons'
+import { formatCombo } from '@/lib/keybinds/combo'
 import { cn } from '@/lib/utils'

 import type { ConversationStatus } from './hooks/use-voice-conversation'
@@ -62,6 +63,7 @@ export function ComposerControls({
 }) {
  const { t } = useI18n()
  const c = t.composer
+  const steerLabel = `${c.steer} (${formatCombo('mod+enter')})`

  if (conversation.active) {
    return <ConversationPill {...conversation} disabled={disabled} />
@@ -73,9 +75,9 @@ export function ComposerControls({
    <div className="ml-auto flex shrink-0 items-center gap-(--composer-control-gap)">
      <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
      {canSteer && (
-        <Tip label={c.steer}>
+        <Tip label={steerLabel}>
          <Button
-            aria-label={c.steer}
+            aria-label={steerLabel}
            className={GHOST_ICON_BTN}
            disabled={disabled}
            onClick={onSteer}
--- a/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
+++ b/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
@@ -0,0 +1,189 @@
+import { act, cleanup, fireEvent, render } from '@testing-library/react'
+import { useRef, useState } from 'react'
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+// No global setupFiles registers auto-cleanup, so unmount between tests —
+// otherwise a second render() leaks the first editor and getByTestId('editor')
+// matches multiple nodes.
+afterEach(cleanup)
+
+// Faithful mirror of index.tsx's Enter wiring (handleEditorKeyDown's Enter
+// branch + submitDraft), driven through REAL DOM keydown events on a
+// contentEditable.
+//
+// Regression repro for #39630: pressing Enter right after typing (fast typing /
+// IME) did nothing. The composer state (`draft` from useAuiState) and its
+// derived `hasComposerPayload` lag the DOM by a render, so the keydown handler
+// read empty state and either dropped the message, drained a queued prompt
+// instead of sending, or (while busy) refused to queue. The fix reads the live
+// editor text — `hasLivePayload` in the handler and a DOM re-sync at the top of
+// submitDraft — so the just-typed text always wins.
+//
+// We model the race deterministically the way the IME repro does: mutate the
+// editor's textContent WITHOUT firing an input event, so the React `draft`
+// state stays stale while the DOM already holds the text.
+function Harness({
+  busy = false,
+  queued = [],
+  onSubmit,
+  onQueue,
+  onCancel,
+  onDrain
+}: {
+  busy?: boolean
+  queued?: readonly string[]
+  onSubmit: (text: string) => void
+  onQueue: (text: string) => void
+  onCancel: () => void
+  onDrain: () => void
+}) {
+  const editorRef = useRef<HTMLDivElement>(null)
+  const draftRef = useRef('')
+  // Mirrors `useAuiState(s => s.composer.text)` — updated only via setText, so
+  // it lags the DOM until React re-renders (the source of the bug).
+  const [draft, setDraft] = useState('')
+  const attachments: unknown[] = []
+
+  const composerPlainText = (el: HTMLElement) => el.textContent ?? ''
+
+  const setText = (next: string) => {
+    draftRef.current = next
+    setDraft(next)
+  }
+
+  const submitDraft = () => {
+    const editor = editorRef.current
+    if (editor) {
+      const domText = composerPlainText(editor)
+      if (domText !== draftRef.current) {
+        draftRef.current = domText
+        setDraft(domText)
+      }
+    }
+
+    const text = draftRef.current
+    const payloadPresent = text.trim().length > 0 || attachments.length > 0
+
+    if (busy) {
+      if (payloadPresent) {
+        onQueue(text)
+      } else {
+        onCancel()
+      }
+    } else if (!payloadPresent && queued.length > 0) {
+      onDrain()
+    } else if (payloadPresent) {
+      onSubmit(text)
+    }
+  }
+
+  const handleKeyDown = (event: React.KeyboardEvent<HTMLDivElement>) => {
+    if (event.key === 'Enter' && !event.shiftKey) {
+      event.preventDefault()
+
+      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
+      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0
+
+      if (!busy && !hasLivePayload && queued.length > 0) {
+        onDrain()
+
+        return
+      }
+
+      if (busy && !hasLivePayload) {
+        return
+      }
+
+      submitDraft()
+    }
+  }
+
+  // `draft` is read so the lint/compiler treats the stale-state mirror as live;
+  // the assertions prove the handler never relies on it.
+  void draft
+
+  return (
+    <div
+      contentEditable
+      data-testid="editor"
+      onInput={event => setText(composerPlainText(event.currentTarget))}
+      onKeyDown={handleKeyDown}
+      ref={editorRef}
+      suppressContentEditableWarning
+    />
+  )
+}
+
+describe('composer Enter submit — live DOM vs stale composer state (#39630)', () => {
+  it('sends the just-typed text on Enter even when composer state has not synced', async () => {
+    const onSubmit = vi.fn()
+    const { getByTestId } = render(
+      <Harness onCancel={vi.fn()} onDrain={vi.fn()} onQueue={vi.fn()} onSubmit={onSubmit} />
+    )
+    const editor = getByTestId('editor')
+
+    // Fast typing: the DOM has the text but NO input event fired, so `draft`
+    // state is still empty (the exact stale-state race).
+    await act(async () => {
+      editor.textContent = 'hello world'
+      fireEvent.keyDown(editor, { key: 'Enter' })
+    })
+
+    expect(onSubmit).toHaveBeenCalledWith('hello world')
+  })
+
+  it('queues a fast-typed message while busy instead of draining the queue or cancelling', async () => {
+    const onQueue = vi.fn()
+    const onDrain = vi.fn()
+    const onCancel = vi.fn()
+    const { getByTestId } = render(
+      <Harness busy onCancel={onCancel} onDrain={onDrain} onQueue={onQueue} onSubmit={vi.fn()} queued={['queued-1']} />
+    )
+    const editor = getByTestId('editor')
+
+    await act(async () => {
+      editor.textContent = 'urgent follow-up'
+      fireEvent.keyDown(editor, { key: 'Enter' })
+    })
+
+    expect(onQueue).toHaveBeenCalledWith('urgent follow-up')
+    expect(onDrain).not.toHaveBeenCalled()
+    expect(onCancel).not.toHaveBeenCalled()
+  })
+
+  it('treats an empty Enter while busy as a no-op (never an accidental Stop)', async () => {
+    const onCancel = vi.fn()
+    const onSubmit = vi.fn()
+    const onQueue = vi.fn()
+    const { getByTestId } = render(
+      <Harness busy onCancel={onCancel} onDrain={vi.fn()} onQueue={onQueue} onSubmit={onSubmit} />
+    )
+    const editor = getByTestId('editor')
+
+    await act(async () => {
+      editor.textContent = ''
+      fireEvent.keyDown(editor, { key: 'Enter' })
+    })
+
+    expect(onCancel).not.toHaveBeenCalled()
+    expect(onSubmit).not.toHaveBeenCalled()
+    expect(onQueue).not.toHaveBeenCalled()
+  })
+
+  it('drains the next queued prompt on Enter when idle with a truly empty editor', async () => {
+    const onDrain = vi.fn()
+    const onSubmit = vi.fn()
+    const { getByTestId } = render(
+      <Harness onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
+    )
+    const editor = getByTestId('editor')
+
+    await act(async () => {
+      editor.textContent = ''
+      fireEvent.keyDown(editor, { key: 'Enter' })
+    })
+
+    expect(onDrain).toHaveBeenCalledTimes(1)
+    expect(onSubmit).not.toHaveBeenCalled()
+  })
+})
--- a/apps/desktop/src/app/chat/composer/hooks/use-live-completion-adapter.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-live-completion-adapter.ts
@@ -5,6 +5,13 @@ export interface CompletionEntry {
  text: string
  display?: unknown
  meta?: unknown
+  /** Optional section label (e.g. "Commands", "Skills"). The popover renders a
+   *  header whenever this changes between consecutive items, so the fetcher must
+   *  emit entries already grouped contiguously. */
+  group?: string
+  /** Optional completion-action id. When set, picking the item runs that action
+   *  (e.g. opening an overlay) instead of inserting a chip + waiting for submit. */
+  action?: string
 }

 export interface CompletionPayload {
--- a/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts
@@ -2,12 +2,17 @@ import type { Unstable_TriggerAdapter, Unstable_TriggerItem } from '@assistant-u
 import { useCallback } from 'react'

 import type { HermesGateway } from '@/hermes'
+import { sessionTitle } from '@/lib/chat-runtime'
 import {
  type CommandsCatalogLike,
+  desktopSkinSlashCompletions,
  desktopSlashDescription,
+  type DesktopThemeCommandOption,
  filterDesktopCommandsCatalog,
+  isDesktopSlashExtensionCommand,
  isDesktopSlashSuggestion
 } from '@/lib/desktop-slash-commands'
+import { $sessions } from '@/store/session'

 import type { CompletionEntry, CompletionPayload } from './use-live-completion-adapter'
 import { useLiveCompletionAdapter } from './use-live-completion-adapter'
@@ -16,7 +21,10 @@ interface SlashItemMetadata extends Record<string, string> {
  command: string
  display: string
  meta: string
+  group: string
  rawText: string
+  /** Completion-action id; empty for ordinary insert-a-chip completions. */
+  action: string
 }

 function textValue(value: unknown, fallback = ''): string {
@@ -38,12 +46,21 @@ function commandText(value: string): string {
  return value.startsWith('/') ? value : `/${value}`
 }

+/** How many recent sessions to surface inline before the "Browse all…" entry. */
+const SESSION_INLINE_LIMIT = 7
+
 /** Live `/` completions backed by the gateway's `complete.slash` RPC. */
-export function useSlashCompletions(options: { gateway: HermesGateway | null }): {
+export function useSlashCompletions(options: {
+  gateway: HermesGateway | null
+  /** Desktop theme list — `/skin` is owned client-side, so its arg completions
+   *  come from here, not the backend (whose skin list is CLI/TUI-only). */
+  skinThemes?: DesktopThemeCommandOption[]
+  activeSkin?: string
+}): {
  adapter: Unstable_TriggerAdapter
  loading: boolean
 } {
-  const { gateway } = options
+  const { gateway, skinThemes, activeSkin } = options
  const enabled = Boolean(gateway)

  const fetcher = useCallback(
@@ -54,34 +71,136 @@ export function useSlashCompletions(options: { gateway: HermesGateway | null }):

      const text = `/${query}`

+      // The desktop owns /skin entirely (client-side theme context). Surface its
+      // theme list inside this single popover instead of a bespoke one, and skip
+      // the backend skin completions (which describe CLI/TUI skins that don't
+      // apply here). Matches once we're past `/skin ` into the arg stage.
+      const skinArg = /^\/skin\s+(.*)$/is.exec(text)
+
+      if (skinArg && skinThemes) {
+        const items = desktopSkinSlashCompletions(skinThemes, activeSkin ?? '', skinArg[1] ?? '').map(entry => ({
+          text: entry.text,
+          display: entry.display,
+          meta: entry.meta,
+          group: 'Themes'
+        }))
+
+        return { items, query }
+      }
+
+      // /resume (and its aliases) completes recent sessions inline — the same
+      // client-side list the picker overlay shows — instead of the backend
+      // (whose /resume opens an interactive TUI picker we can't render here).
+      const sessionArg = /^\/(?:resume|sessions|switch)\s+(.*)$/is.exec(text)
+
+      if (sessionArg) {
+        const needle = (sessionArg[1] ?? '').trim().toLowerCase()
+
+        const matches = (
+          needle
+            ? $sessions.get().filter(
+                session =>
+                  sessionTitle(session).toLowerCase().includes(needle) ||
+                  (session.preview ?? '').toLowerCase().includes(needle) ||
+                  session.id.toLowerCase().includes(needle)
+              )
+            : $sessions.get()
+        ).slice(0, SESSION_INLINE_LIMIT)
+
+        const items: CompletionEntry[] = matches.map(session => ({
+          text: `/resume ${session.id}`,
+          display: sessionTitle(session),
+          meta: (session.preview ?? '').trim(),
+          group: 'Sessions'
+        }))
+
+        // Trailing "more" affordance (Cursor-style): picking it opens the full
+        // session picker overlay directly. `text` stays a bare `/resume` so that
+        // submitting it (Enter) still opens the overlay if the action is skipped.
+        items.push({
+          text: '/resume',
+          display: 'Browse all sessions…',
+          meta: '',
+          group: 'Sessions',
+          action: 'session-picker'
+        })
+
+        return { items, query }
+      }
+
      try {
        if (!query) {
          const catalog = filterDesktopCommandsCatalog(await gateway.request<CommandsCatalogLike>('commands.catalog'))

-          const items = (catalog.pairs ?? []).map(([command, meta]) => ({
-            text: command,
-            display: command,
-            meta
-          }))
+          // Prefer the categorized layout so the popover renders section headers
+          // (Session, Tools & Skills, ...). Fall back to the flat list when the
+          // backend didn't categorize.
+          const sections = catalog.categories?.length
+            ? catalog.categories
+            : [{ name: '', pairs: catalog.pairs ?? [] }]
+
+          const items = sections.flatMap(section =>
+            section.pairs.map(([command, meta]) => ({
+              text: command,
+              display: command,
+              group: section.name || undefined,
+              meta
+            }))
+          )

          return { items, query }
        }

-        const result = await gateway.request<{ items?: CompletionEntry[] }>('complete.slash', { text })
+        const result = await gateway.request<{ items?: CompletionEntry[]; replace_from?: number }>(
+          'complete.slash',
+          { text }
+        )

-        const items = (result.items ?? [])
-          .filter(item => isDesktopSlashSuggestion(item.text))
+        // Arg-completion items (replace_from > 1) carry just the arg stub —
+        // e.g. complete.slash returns `{text: "alice"}` for `/personality alic`
+        // with replace_from = 14. Rewrite those entries so the popover inserts
+        // the full `/personality alice` token instead of stranding `/alice`.
+        const replaceFrom = typeof result.replace_from === 'number' ? result.replace_from : 1
+        const isArgCompletion = replaceFrom > 1
+        const prefix = isArgCompletion ? text.slice(0, replaceFrom) : ''
+
+        const decorated = (result.items ?? [])
+          .map(item => {
+            if (!isArgCompletion) {
+              return item
+            }
+
+            const argText = typeof item.text === 'string' ? item.text : ''
+
+            return { ...item, text: `${prefix}${argText}` }
+          })
+          .filter(item => isArgCompletion || isDesktopSlashSuggestion(item.text))
          .map(item => ({
            ...item,
-            meta: desktopSlashDescription(item.text, textValue(item.meta))
+            // Arg suggestions (e.g. `/handoff <platform>`) live under one
+            // header; otherwise split skills out from built-in commands.
+            group: isArgCompletion ? 'Options' : isDesktopSlashExtensionCommand(item.text) ? 'Skills' : 'Commands',
+            // Arg items carry their own meta (the personality/toolset/platform
+            // blurb). Only command rows get the registry description — looking
+            // one up for `/personality none` would clobber it with the parent
+            // command's text.
+            meta: isArgCompletion ? textValue(item.meta) : desktopSlashDescription(item.text, textValue(item.meta))
          }))

+        // Keep each group contiguous so headers render once: Commands before
+        // Skills (stable within a group, preserving backend relevance order).
+        const groupOrder = ['Commands', 'Skills', 'Options']
+
+        const items = isArgCompletion
+          ? decorated
+          : [...decorated].sort((a, b) => groupOrder.indexOf(a.group) - groupOrder.indexOf(b.group))
+
        return { items, query }
      } catch {
        return { items: [], query }
      }
    },
-    [gateway]
+    [gateway, skinThemes, activeSkin]
  )

  const toItem = useCallback((entry: CompletionEntry, index: number): Unstable_TriggerItem => {
@@ -93,6 +212,8 @@ export function useSlashCompletions(options: { gateway: HermesGateway | null }):
      command,
      display,
      meta,
+      group: textValue(entry.group),
+      action: textValue(entry.action),
      // Provide rawText so hermesDirectiveFormatter.serialize uses the
      // direct-insertion path instead of the legacy @type:id fallback.
      // Without this, the item.id (which includes a "|index" suffix for
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -13,17 +13,25 @@ import {
  useState
 } from 'react'

-import { hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
+import { hermesDirectiveFormatter, type SlashChipKind } from '@/components/assistant-ui/directive-text'
 import { Button } from '@/components/ui/button'
 import { useMediaQuery } from '@/hooks/use-media-query'
 import { useResizeObserver } from '@/hooks/use-resize-observer'
 import { useI18n } from '@/i18n'
 import { chatMessageText } from '@/lib/chat-messages'
 import { SLASH_COMMAND_RE } from '@/lib/chat-runtime'
+import { desktopSlashCommandTakesArgs } from '@/lib/desktop-slash-commands'
 import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
 import { triggerHaptic } from '@/lib/haptics'
 import { cn } from '@/lib/utils'
-import { $composerAttachments, clearComposerAttachments, type ComposerAttachment } from '@/store/composer'
+import {
+  $composerAttachments,
+  clearComposerAttachments,
+  clearSessionDraft,
+  type ComposerAttachment,
+  stashSessionDraft,
+  takeSessionDraft
+} from '@/store/composer'
 import {
  browseBackward,
  browseForward,
@@ -40,10 +48,11 @@ import {
  shouldAutoDrainOnSettle,
  updateQueuedPrompt
 } from '@/store/composer-queue'
-import { $gatewayState, $messages } from '@/store/session'
+import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
+import { useTheme } from '@/themes'

-import { extractDroppedFiles, HERMES_PATHS_MIME } from '../hooks/use-composer-actions'
+import { extractDroppedFiles, HERMES_PATHS_MIME, partitionDroppedFiles } from '../hooks/use-composer-actions'

 import { AttachmentList } from './attachments'
 import { ContextMenu } from './context-menu'
@@ -64,7 +73,7 @@ import { useVoiceConversation } from './hooks/use-voice-conversation'
 import { useVoiceRecorder } from './hooks/use-voice-recorder'
 import {
  dragHasAttachments,
-  droppedFileInlineRef,
+  droppedFileInlineRefs,
  type InlineRefInput,
  insertInlineRefsIntoEditor
 } from './inline-refs'
@@ -74,9 +83,9 @@ import {
  placeCaretEnd,
  refChipElement,
  renderComposerContents,
-  RICH_INPUT_SLOT
+  RICH_INPUT_SLOT,
+  slashChipElement
 } from './rich-editor'
-import { SkinSlashPopover } from './skin-slash-popover'
 import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
 import { ComposerTriggerPopover } from './trigger-popover'
 import type { ChatBarProps } from './types'
@@ -95,6 +104,30 @@ const COMPOSER_FADE_BACKGROUND =

 const pickPlaceholder = (pool: readonly string[]) => pool[Math.floor(Math.random() * pool.length)]

+/** Completion items can carry an `action` (set in use-slash-completions) that
+ *  runs a side effect on pick instead of inserting a chip — e.g. the session
+ *  picker's "Browse all…" entry opens the overlay. Table-driven so new action
+ *  items are a registry row, not a composer branch. */
+const COMPLETION_ACTIONS: Record<string, () => void> = {
+  'session-picker': () => setSessionPickerOpen(true)
+}
+
+/** Map a picked `/` completion to its pill accent. Driven by the completion
+ *  group set in use-slash-completions (Skills / Themes / Commands|Options). */
+function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind {
+  const group = (item.metadata as { group?: unknown } | undefined)?.group
+
+  if (group === 'Skills') {
+    return 'skill'
+  }
+
+  if (group === 'Themes') {
+    return 'theme'
+  }
+
+  return 'command'
+}
+
 interface QueueEditState {
  attachments: ComposerAttachment[]
  draft: string
@@ -104,6 +137,10 @@ interface QueueEditState {

 const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))

+// Quiet period after the last keystroke before persisting the draft;
+// unmount/pagehide flushes bypass it.
+const DRAFT_PERSIST_DEBOUNCE_MS = 400
+
 export function ChatBar({
  busy,
  cwd,
@@ -145,6 +182,9 @@ export function ChatBar({
  const editorRef = useRef<HTMLDivElement | null>(null)
  const draftRef = useRef(draft)
  const previousBusyRef = useRef(busy)
+  const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null)
+  const activeQueueSessionKeyRef = useRef(activeQueueSessionKey)
+  activeQueueSessionKeyRef.current = activeQueueSessionKey
  const drainingQueueRef = useRef(false)
  const urlInputRef = useRef<HTMLInputElement | null>(null)

@@ -156,14 +196,17 @@ export function ChatBar({
  const [dragActive, setDragActive] = useState(false)
  const [queueEdit, setQueueEdit] = useState<QueueEditState | null>(null)
  const [focusRequestId, setFocusRequestId] = useState(0)
+  const queueEditRef = useRef(queueEdit)
+  queueEditRef.current = queueEdit
  const dragDepthRef = useRef(0)
  const composingRef = useRef(false) // true during IME composition (CJK input)
  const lastSpokenIdRef = useRef<string | null>(null)

  const narrow = useMediaQuery('(max-width: 30rem)')

+  const { availableThemes, themeName } = useTheme()
  const at = useAtCompletions({ gateway: gateway ?? null, sessionId: sessionId ?? null, cwd: cwd ?? null })
-  const slash = useSlashCompletions({ gateway: gateway ?? null })
+  const slash = useSlashCompletions({ activeSkin: themeName, gateway: gateway ?? null, skinThemes: availableThemes })

  const stacked = expanded || narrow || tight
  const trimmedDraft = draft.trim()
@@ -171,10 +214,12 @@ export function ChatBar({
  const canSubmit = busy || hasComposerPayload
  const editingQueuedPrompt = queueEdit ? (queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null) : null
  const busyAction = busy && hasComposerPayload ? 'queue' : 'stop'
+
  // Steer only makes sense mid-turn, text-only (the gateway can't carry images
  // into a tool result) and never for a slash command (those execute inline).
  const canSteer =
    busy && !!onSteer && attachments.length === 0 && trimmedDraft.length > 0 && !SLASH_COMMAND_RE.test(trimmedDraft)
+
  const showHelpHint = draft === '?'

  const { t } = useI18n()
@@ -462,12 +507,6 @@ export function ChatBar({
    })
  }, [])

-  const selectSkinSlashCommand = (command: string) => {
-    draftRef.current = command
-    aui.composer().setText(command)
-    requestMainFocus()
-  }
-
  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)

@@ -620,16 +659,50 @@ export function ChatBar({
      return
    }

+    // Action items (e.g. "Browse all sessions…") run a side effect instead of
+    // inserting a chip: strip the typed trigger token, then fire the action.
+    const completionAction = (item.metadata as { action?: unknown } | undefined)?.action
+    const runAction = typeof completionAction === 'string' ? COMPLETION_ACTIONS[completionAction] : undefined
+
+    if (runAction) {
+      const current = composerPlainText(editor)
+      const prefix = current.slice(0, Math.max(0, current.length - trigger.tokenLength))
+
+      renderComposerContents(editor, prefix)
+      placeCaretEnd(editor)
+      draftRef.current = composerPlainText(editor)
+      aui.composer().setText(draftRef.current)
+      closeTrigger()
+      runAction()
+      requestMainFocus()
+
+      return
+    }
+
    const serialized = hermesDirectiveFormatter.serialize(item)
    const starter = serialized.endsWith(':')
+
+    // Picking a bare arg-taking command (e.g. `/personality`) shouldn't commit
+    // it — expand to its options step so the popover shows the inline list, just
+    // as typing `/personality ` by hand would. A serialized value with a space is
+    // already an arg pick (`/personality alice`), so it commits normally.
+    const command = (item.metadata as { command?: string } | undefined)?.command ?? ''
+
+    const expandsToArgs =
+      trigger.kind === '/' && !serialized.includes(' ') && desktopSlashCommandTakesArgs(command)
+
    const text = starter || serialized.endsWith(' ') ? serialized : `${serialized} `
    const directive = !starter && serialized.match(/^@([^:]+):(.+)$/)
+    // No pill while expanding — the bare command stays plain text until an arg
+    // is picked, at which point a single pill is emitted for the full command.
+    const slashKind = !expandsToArgs && trigger.kind === '/' ? slashChipKindForItem(item) : null
+    const keepTriggerOpen = starter || expandsToArgs

    const finish = () => {
      draftRef.current = composerPlainText(editor)
      aui.composer().setText(draftRef.current)
      requestMainFocus()
-      starter ? window.setTimeout(refreshTrigger, 0) : closeTrigger()
+      keepTriggerOpen ? window.setTimeout(refreshTrigger, 0) : closeTrigger()
    }

    const sel = window.getSelection()
@@ -639,7 +712,20 @@ export function ChatBar({

    if (!sel || !range || node?.nodeType !== Node.TEXT_NODE || offset < trigger.tokenLength) {
      const current = composerPlainText(editor)
-      renderComposerContents(editor, `${current.slice(0, Math.max(0, current.length - trigger.tokenLength))}${text}`)
+      const prefix = current.slice(0, Math.max(0, current.length - trigger.tokenLength))
+
+      if (slashKind) {
+        // Two-step arg picks (e.g. `/handoff` pill already inserted, now picking
+        // the platform) land here because the caret sits past a contenteditable
+        // chip. Rebuild the prefix and re-emit a single pill for the full command.
+        renderComposerContents(editor, prefix)
+        editor.append(slashChipElement(serialized, slashKind), document.createTextNode(' '))
+        placeCaretEnd(editor)
+
+        return finish()
+      }
+
+      renderComposerContents(editor, `${prefix}${text}`)
      placeCaretEnd(editor)

      return finish()
@@ -650,8 +736,13 @@ export function ChatBar({
    replaceRange.setEnd(node, offset)
    replaceRange.deleteContents()

-    if (directive) {
-      const chip = refChipElement(directive[1], directive[2])
+    const chip = slashKind
+      ? slashChipElement(serialized, slashKind)
+      : directive
+        ? refChipElement(directive[1], directive[2])
+        : null
+
+    if (chip) {
      const space = document.createTextNode(' ')
      const fragment = document.createDocumentFragment()
      fragment.append(chip, space)
@@ -814,7 +905,16 @@ export function ChatBar({
    if (event.key === 'Enter' && !event.shiftKey) {
      event.preventDefault()

-      if (!busy && !hasComposerPayload && queuedPrompts.length > 0) {
+      // Decide from the DOM, not React state. `hasComposerPayload` is derived
+      // from the AUI composer state, which lags the latest keystroke by a
+      // render, so on fast typing / IME the just-typed text isn't in state yet.
+      // Without the live read, a real message typed while prompts are queued
+      // would drain the queue instead of sending. submitDraft() re-syncs and
+      // sends the live editor text.
+      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
+      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0
+
+      if (!busy && !hasLivePayload && queuedPrompts.length > 0) {
        void drainNextQueued()

        return
@@ -822,7 +922,10 @@ export function ChatBar({

      // Empty Enter while busy is a no-op — interrupting is explicit (Stop/Esc),
      // never a stray Enter after sending. With a payload, submitDraft queues it.
-      if (busy && !hasComposerPayload) {
+      // Gate on the live DOM payload (not the render-lagged composer state) so a
+      // message typed fast / via IME while busy still reaches submitDraft() and
+      // gets queued instead of being mistaken for an empty Enter.
+      if (busy && !hasLivePayload) {
        return
      }

@@ -919,24 +1022,25 @@ export function ChatBar({
      return
    }

-    if (Array.from(event.dataTransfer.types || []).includes(HERMES_PATHS_MIME)) {
-      const refs = candidates
-        .map(candidate => droppedFileInlineRef(candidate, cwd))
-        .filter((ref): ref is string => Boolean(ref))
+    // In-app drags (project tree / gutter) are workspace-relative paths the
+    // gateway resolves directly, so they stay inline @file:/@line: refs. OS
+    // drops are absolute local paths a remote gateway can't read (and images
+    // need byte upload for vision), so route them through the upload pipeline.
+    const { inAppRefs, osDrops } = partitionDroppedFiles(candidates)
+    const refs = droppedFileInlineRefs(inAppRefs, cwd)

-      if (insertInlineRefs(refs)) {
-        triggerHaptic('selection')
-      }
-
-      return
+    if (refs.length && insertInlineRefs(refs)) {
+      triggerHaptic('selection')
    }

-    void Promise.resolve(onAttachDroppedItems(candidates)).then(attached => {
-      if (attached) {
-        triggerHaptic('selection')
-        requestMainFocus()
-      }
-    })
+    if (osDrops.length) {
+      void Promise.resolve(onAttachDroppedItems(osDrops)).then(attached => {
+        if (attached) {
+          triggerHaptic('selection')
+          requestMainFocus()
+        }
+      })
+    }
  }

  const handleInputDragOver = (event: ReactDragEvent<HTMLDivElement>) => {
@@ -956,11 +1060,7 @@ export function ChatBar({

    const candidates = extractDroppedFiles(event.dataTransfer)

-    const refs = candidates
-      .map(candidate => droppedFileInlineRef(candidate, cwd))
-      .filter((ref): ref is string => Boolean(ref))
-
-    if (!refs.length) {
+    if (!candidates.length) {
      return
    }

@@ -968,9 +1068,27 @@ export function ChatBar({
    event.stopPropagation()
    resetDragState()

-    if (insertInlineRefs(refs)) {
+    // Dropping straight onto the text box used to inline-ref *every* file —
+    // including OS/Finder drops, whose absolute local path a remote gateway
+    // can't read and whose image bytes never reached vision. Split by origin:
+    // in-app drags stay inline refs; OS drops go through the upload pipeline.
+    // (When no upload handler is wired, fall back to inline refs for all.)
+    const attach = onAttachDroppedItems
+    const { inAppRefs, osDrops } = partitionDroppedFiles(candidates)
+    const refs = droppedFileInlineRefs(attach ? inAppRefs : candidates, cwd)
+
+    if (refs.length && insertInlineRefs(refs)) {
      triggerHaptic('selection')
    }
+
+    if (attach && osDrops.length) {
+      void Promise.resolve(attach(osDrops)).then(attached => {
+        if (attached) {
+          triggerHaptic('selection')
+          requestMainFocus()
+        }
+      })
+    }
  }

  const clearDraft = useCallback(() => {
@@ -995,6 +1113,69 @@ export function ChatBar({
    }
  }

+  const stashAt = (
+    scope: string | null,
+    text = draftRef.current,
+    attachments = $composerAttachments.get()
+  ) => stashSessionDraft(scope, text, attachments)
+
+  // Per-thread draft swap — the composer's only session coupling. Lifecycle
+  // never clears composer state; this effect alone stashes on leave, restores
+  // on enter. Keyed writes are idempotent, so no skip-sentinel.
+  useEffect(() => {
+    const { attachments, text } = takeSessionDraft(activeQueueSessionKey)
+    loadIntoComposer(text, attachments)
+
+    return () => {
+      const editing = queueEditRef.current
+
+      if (editing?.sessionKey === activeQueueSessionKey) {
+        stashAt(activeQueueSessionKey, editing.draft, editing.attachments)
+      } else if (!isBrowsingHistory(sessionId)) {
+        stashAt(activeQueueSessionKey)
+      }
+    }
+  }, [activeQueueSessionKey]) // eslint-disable-line react-hooks/exhaustive-deps
+
+  // Debounced stash into the active scope. Skipped while browsing history or
+  // editing a queued prompt — recalled text must not clobber the real draft.
+  useEffect(() => {
+    if (isBrowsingHistory(sessionId) || queueEdit) {
+      return
+    }
+
+    pendingDraftPersistRef.current = { scope: activeQueueSessionKey, text: draft }
+
+    const handle = window.setTimeout(() => {
+      pendingDraftPersistRef.current = null
+      stashAt(activeQueueSessionKey, draft)
+    }, DRAFT_PERSIST_DEBOUNCE_MS)
+
+    return () => window.clearTimeout(handle)
+  }, [activeQueueSessionKey, draft, queueEdit, sessionId])
+
+  // pagehide is load-bearing: React skips effect cleanups on reload, so Cmd+R
+  // inside the debounce window would drop trailing keystrokes without this.
+  useEffect(() => {
+    const flushPendingDraftPersist = () => {
+      const pending = pendingDraftPersistRef.current
+
+      if (!pending) {
+        return
+      }
+
+      pendingDraftPersistRef.current = null
+      stashAt(pending.scope, pending.text)
+    }
+
+    window.addEventListener('pagehide', flushPendingDraftPersist)
+
+    return () => {
+      window.removeEventListener('pagehide', flushPendingDraftPersist)
+      flushPendingDraftPersist()
+    }
+  }, [])
+
  const beginQueuedEdit = (entry: QueuedPromptEntry) => {
    if (!activeQueueSessionKey || queueEdit) {
      return
@@ -1197,21 +1378,61 @@ export function ChatBar({
    }
  }, [busy, drainNextQueued, queuedPrompts.length])

-  // Clean up queue edit when its target disappears (session swap or external delete).
+  // Queue-edit cleanup: on session swap the scope effect already stashed the
+  // edit snapshot; only restore into the composer when still on the same scope.
  useEffect(() => {
    if (!queueEdit) {
      return
    }

-    if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) {
-      return
+    if (queueEdit.sessionKey === activeQueueSessionKey) {
+      if (editingQueuedPrompt) {
+        return
+      }
+
+      loadIntoComposer(queueEdit.draft, queueEdit.attachments)
    }

-    loadIntoComposer(queueEdit.draft, queueEdit.attachments)
    setQueueEdit(null)
  }, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps

+  const dispatchSubmit = (text: string, attachments?: ComposerAttachment[]) => {
+    const submittedScope = activeQueueSessionKeyRef.current
+    const submittedAttachments = attachments ?? []
+
+    const restore = () => {
+      loadIntoComposer(text, submittedAttachments)
+      stashAt(activeQueueSessionKeyRef.current, text, submittedAttachments)
+    }
+
+    void Promise.resolve(attachments ? onSubmit(text, { attachments }) : onSubmit(text))
+      .then(accepted => void (accepted === false ? restore() : clearSessionDraft(submittedScope)))
+      .catch(restore)
+  }
+
  const submitDraft = () => {
+    // Source the text from the DOM editor, not React state. The AUI composer
+    // state (`draft`) and the derived `hasComposerPayload` lag the DOM by a
+    // render, so on fast typing or IME composition the final keystroke(s) may
+    // not have synced yet — reading state here drops the message (Enter looks
+    // like it does nothing; typing a trailing space only "fixes" it because the
+    // extra input event forces a state sync). draftRef is updated on every
+    // input event; refresh it from the editor once more to also cover an
+    // in-flight keystroke that hasn't fired its input event yet.
+    const editor = editorRef.current
+
+    if (editor) {
+      const domText = composerPlainText(editor)
+
+      if (domText !== draftRef.current) {
+        draftRef.current = domText
+        aui.composer().setText(domText)
+      }
+    }
+
+    const text = draftRef.current
+    const payloadPresent = text.trim().length > 0 || attachments.length > 0
+
    if (queueEdit) {
      exitQueuedEdit('save')
    } else if (busy) {
@@ -1222,12 +1443,11 @@ export function ChatBar({
      // busy guard for commands that genuinely need an idle session (skill
      // /send directives).  Queuing them would make every slash command wait
      // for the current turn to finish, which is how the TUI never behaves.
-      if (!attachments.length && SLASH_COMMAND_RE.test(draft.trim())) {
-        const submitted = draft
+      if (!attachments.length && SLASH_COMMAND_RE.test(text.trim())) {
        triggerHaptic('submit')
        clearDraft()
-        void onSubmit(submitted)
-      } else if (hasComposerPayload) {
+        dispatchSubmit(text)
+      } else if (payloadPresent) {
        queueCurrentDraft()
      } else {
        // Stop button (the only way to reach here while busy with an empty
@@ -1235,15 +1455,15 @@ export function ChatBar({
        triggerHaptic('cancel')
        void Promise.resolve(onCancel())
      }
-    } else if (!hasComposerPayload && queuedPrompts.length > 0) {
+    } else if (!payloadPresent && queuedPrompts.length > 0) {
      void drainNextQueued()
-    } else if (draft.trim() || attachments.length > 0) {
-      const submitted = draft
+    } else if (payloadPresent) {
+      const submittedAttachments = cloneAttachments(attachments)
      triggerHaptic('submit')
      resetBrowseState(sessionId)
      clearDraft()
      clearComposerAttachments()
-      void onSubmit(submitted, { attachments })
+      dispatchSubmit(text, submittedAttachments)
    }

    focusInput()
@@ -1410,7 +1630,7 @@ export function ChatBar({
        onPaste={handlePaste}
        ref={editorRef}
        role="textbox"
-        spellCheck="true"
+        spellCheck={false}
        suppressContentEditableWarning
      />
      {/* assistant-ui requires ComposerPrimitive.Input somewhere in the tree
@@ -1429,7 +1649,15 @@ export function ChatBar({
        `asChild` swaps TextareaAutosize for a Radix Slot wrapping our
        plain <textarea>, which carries the binding but skips autosize. */}
      <ComposerPrimitive.Input asChild submitMode="ctrlEnter" tabIndex={-1} unstable_focusOnScrollToBottom={false}>
-        <textarea aria-hidden className="sr-only" tabIndex={-1} />
+        <textarea
+          aria-hidden
+          autoCapitalize="off"
+          autoComplete="off"
+          autoCorrect="off"
+          className="sr-only"
+          spellCheck={false}
+          tabIndex={-1}
+        />
      </ComposerPrimitive.Input>
    </div>
  )
@@ -1468,7 +1696,6 @@ export function ChatBar({
              onPick={replaceTriggerWithChip}
            />
          )}
-          <SkinSlashPopover draft={draft} onSelect={selectSkinSlashCommand} />
          {activeQueueSessionKey && queuedPrompts.length > 0 && (
            // Out of flow so the queue never inflates the composer's measured
            // height (that drives thread bottom padding → chat resizes on
--- a/apps/desktop/src/app/chat/composer/inline-refs.ts
+++ b/apps/desktop/src/app/chat/composer/inline-refs.ts
@@ -83,6 +83,12 @@ export function droppedFileInlineRef(candidate: DroppedFile, cwd: string | null
  return `@${kind}:${formatRefValue(rel)}`
 }

+/** Resolve a batch of drops to their inline `@file:`/`@line:`/`@folder:` refs,
+ * dropping any that carry no path. */
+export function droppedFileInlineRefs(candidates: DroppedFile[], cwd: string | null | undefined): string[] {
+  return candidates.map(candidate => droppedFileInlineRef(candidate, cwd)).filter((ref): ref is string => Boolean(ref))
+}
+
 export function insertInlineRefsIntoEditor(editor: HTMLDivElement, refs: readonly InlineRefInput[]) {
  if (!refs.length) {
    return null
--- a/apps/desktop/src/app/chat/composer/rich-editor.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.ts
@@ -10,7 +10,10 @@ import {
  DIRECTIVE_CHIP_CLASS,
  directiveIconElement,
  directiveIconSvg,
-  formatRefValue
+  formatRefValue,
+  slashChipClass,
+  type SlashChipKind,
+  slashIconElement
 } from '@/components/assistant-ui/directive-text'

 export const RICH_INPUT_SLOT = 'composer-rich-input'
@@ -77,6 +80,24 @@ export function refChipElement(kind: string, rawValue: string, displayLabel?: st
  return chip
 }

+/** A non-editable pill for a picked slash command (`/skin nous`, `/tropes`).
+ *  `data-ref-text` carries the literal command so `composerPlainText` round-trips
+ *  it back to the exact text that gets submitted. */
+export function slashChipElement(command: string, kind: SlashChipKind, label?: string) {
+  const chip = document.createElement('span')
+  const text = document.createElement('span')
+
+  chip.contentEditable = 'false'
+  chip.dataset.refText = command
+  chip.dataset.slashKind = kind
+  chip.className = slashChipClass(kind)
+  text.className = 'truncate'
+  text.textContent = label || command
+  chip.append(slashIconElement(kind), text)
+
+  return chip
+}
+
 function appendTextWithBreaks(target: DocumentFragment | HTMLElement, text: string) {
  const lines = text.split('\n')

--- a/apps/desktop/src/app/chat/composer/skin-slash-popover.tsx
+++ b/apps/desktop/src/app/chat/composer/skin-slash-popover.tsx
@@ -1,61 +0,0 @@
-import { useI18n } from '@/i18n'
-import { desktopSkinSlashCompletions } from '@/lib/desktop-slash-commands'
-import { triggerHaptic } from '@/lib/haptics'
-import { useTheme } from '@/themes/context'
-
-import { COMPLETION_DRAWER_CLASS, COMPLETION_DRAWER_ROW_CLASS, CompletionDrawerEmpty } from './completion-drawer'
-
-interface SkinSlashPopoverProps {
-  draft: string
-  onSelect: (command: string) => void
-}
-
-export function SkinSlashPopover({ draft, onSelect }: SkinSlashPopoverProps) {
-  const { t } = useI18n()
-  const c = t.composer
-  const { availableThemes, themeName } = useTheme()
-  const match = draft.match(/^\/skin\s+(\S*)$/i)
-
-  if (!match) {
-    return null
-  }
-
-  const items = desktopSkinSlashCompletions(availableThemes, themeName, match[1] ?? '')
-
-  return (
-    <div
-      aria-label={c.themeSuggestions}
-      className={COMPLETION_DRAWER_CLASS}
-      data-slot="composer-skin-completion-drawer"
-      data-state="open"
-      role="listbox"
-    >
-      <div className="grid gap-0.5 pt-0.5">
-        {items.length === 0 ? (
-          <CompletionDrawerEmpty title={c.noMatchingThemes}>
-            {c.themeTryPre}
-            <span className="font-mono text-foreground/80">/skin list</span>
-            {c.themeTryPost}
-          </CompletionDrawerEmpty>
-        ) : (
-          items.map(item => (
-            <button
-              className={COMPLETION_DRAWER_ROW_CLASS}
-              key={item.text}
-              onClick={() => {
-                triggerHaptic('selection')
-                onSelect(item.text)
-              }}
-              onMouseDown={event => event.preventDefault()}
-              role="option"
-              type="button"
-            >
-              <span className="shrink-0 font-mono font-medium leading-5 text-foreground">{item.display}</span>
-              <span className="min-w-0 truncate leading-5 text-muted-foreground/80">{item.meta}</span>
-            </button>
-          ))
-        )}
-      </div>
-    </div>
-  )
-}
--- a/apps/desktop/src/app/chat/composer/text-utils.test.ts
+++ b/apps/desktop/src/app/chat/composer/text-utils.test.ts
@@ -22,6 +22,33 @@ describe('detectTrigger', () => {
  it('returns null for plain text', () => {
    expect(detectTrigger('hello there')).toBeNull()
  })
+
+  it('keeps the slash trigger live while typing args', () => {
+    expect(detectTrigger('/personality ')).toEqual({
+      kind: '/',
+      query: 'personality ',
+      tokenLength: 13
+    })
+    expect(detectTrigger('/personality alic')).toEqual({
+      kind: '/',
+      query: 'personality alic',
+      tokenLength: 17
+    })
+    expect(detectTrigger('/tools enable foo')).toEqual({
+      kind: '/',
+      query: 'tools enable foo',
+      tokenLength: 17
+    })
+  })
+
+  it('does not treat file-style paths as slash triggers', () => {
+    expect(detectTrigger('src/foo/bar')).toBeNull()
+    expect(detectTrigger('/path/to/file')).toBeNull()
+  })
+
+  it('still anchors at-mention triggers strictly at the token edge', () => {
+    expect(detectTrigger('@file:path with space')).toBeNull()
+  })
 })

 describe('extractClipboardImageBlobs', () => {
--- a/apps/desktop/src/app/chat/composer/text-utils.ts
+++ b/apps/desktop/src/app/chat/composer/text-utils.ts
@@ -6,7 +6,13 @@ export interface TriggerState {
  tokenLength: number
 }

-const TRIGGER_RE = /(?:^|[\s])([@/])([^\s@/]*)$/
+// `@` triggers stop at the first whitespace — `@file:path` and `@diff` are
+// single tokens. `/` triggers keep going so the popover stays live while the
+// user types args (`/personality alic` → arg completer suggests `alice`).
+// Restricting the slash command name to `[a-zA-Z][\w-]*` avoids matching file
+// paths like `src/foo/bar`.
+const AT_TRIGGER_RE = /(?:^|[\s])(@)([^\s@/]*)$/
+const SLASH_TRIGGER_RE = /(?:^|[\s])(\/)((?:[a-zA-Z][\w-]*(?:\s+\S*)*)?)$/

 /** Stable key for paste dedupe — `items` and `files` often mirror the same image as different objects. */
 export function blobDedupeKey(blob: Blob): string {
@@ -97,11 +103,17 @@ export function textBeforeCaret(editor: HTMLDivElement): string | null {
 }

 export function detectTrigger(textBefore: string): TriggerState | null {
-  const match = TRIGGER_RE.exec(textBefore)
+  const slash = SLASH_TRIGGER_RE.exec(textBefore)

-  if (!match) {
-    return null
+  if (slash) {
+    return { kind: '/', query: slash[2], tokenLength: 1 + slash[2].length }
  }

-  return { kind: match[1] as '@' | '/', query: match[2], tokenLength: 1 + match[2].length }
+  const at = AT_TRIGGER_RE.exec(textBefore)
+
+  if (at) {
+    return { kind: '@', query: at[2], tokenLength: 1 + at[2].length }
+  }
+
+  return null
 }
--- a/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx
+++ b/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx
@@ -34,9 +34,17 @@ describe('ComposerTriggerPopover i18n', () => {
  })

  it('renders localized loading copy for slash commands', () => {
-    const { container } = renderPopover('/', true)
+    renderPopover('/', true)

+    // While loading the popover shows only the spinner + loading copy — the
+    // `/help` empty-state hint is reserved for the resolved (not-loading) state.
    expect(screen.getByText('查找中…')).toBeTruthy()
+  })
+
+  it('renders the slash empty-state hint when not loading', () => {
+    const { container } = renderPopover('/')
+
+    expect(screen.getByText('没有匹配项。')).toBeTruthy()
    expect(container.textContent).toContain('/help')
  })
 })
--- a/apps/desktop/src/app/chat/composer/trigger-popover.tsx
+++ b/apps/desktop/src/app/chat/composer/trigger-popover.tsx
@@ -1,5 +1,7 @@
 import type { Unstable_TriggerItem } from '@assistant-ui/core'
+import { Fragment } from 'react'

+import { BrailleSpinner } from '@/components/ui/braille-spinner'
 import { Codicon } from '@/components/ui/codicon'
 import { useI18n } from '@/i18n'
 import { cn } from '@/lib/utils'
@@ -7,7 +9,6 @@ import { cn } from '@/lib/utils'
 import {
  COMPLETION_DRAWER_BELOW_CLASS,
  COMPLETION_DRAWER_CLASS,
-  COMPLETION_DRAWER_ROW_CLASS,
  CompletionDrawerEmpty
 } from './completion-drawer'

@@ -23,11 +24,7 @@ const AT_ICON_BY_TYPE: Record<string, string> = {
  url: 'globe'
 }

-function completionIcon(kind: '@' | '/', item: Unstable_TriggerItem) {
-  if (kind === '/') {
-    return 'terminal'
-  }
-
+function atIcon(item: Unstable_TriggerItem) {
  const meta = item.metadata as { rawText?: string } | undefined
  const raw = meta?.rawText || item.label

@@ -42,6 +39,18 @@ function completionIcon(kind: '@' | '/', item: Unstable_TriggerItem) {
  return AT_ICON_BY_TYPE[item.type] || AT_ICON_BY_TYPE.simple
 }

+interface RowMeta {
+  display?: string
+  group?: string
+  meta?: string
+}
+
+const ROW_BASE_CLASS = [
+  'relative flex w-full cursor-default select-none rounded-md px-2 py-1 text-left',
+  'outline-hidden transition-colors hover:bg-(--ui-bg-tertiary)',
+  'data-[highlighted]:bg-(--ui-bg-tertiary) data-[highlighted]:text-foreground'
+].join(' ')
+
 interface ComposerTriggerPopoverProps {
  activeIndex: number
  items: readonly Unstable_TriggerItem[]
@@ -63,6 +72,9 @@ export function ComposerTriggerPopover({
 }: ComposerTriggerPopoverProps) {
  const { t } = useI18n()
  const copy = t.composer
+  const isSlash = kind === '/'
+
+  let lastGroup: string | undefined

  return (
    <div
@@ -73,41 +85,94 @@ export function ComposerTriggerPopover({
      role="listbox"
    >
      {items.length === 0 ? (
-        <CompletionDrawerEmpty title={loading ? copy.lookupLoading : copy.lookupNoMatches}>
-          {kind === '@' ? (
-            <>
-              {copy.lookupTry} <span className="font-mono text-foreground/80">@file:</span> {copy.lookupOr}{' '}
-              <span className="font-mono text-foreground/80">@folder:</span>.
-            </>
-          ) : (
-            <>
-              {copy.lookupTry} <span className="font-mono text-foreground/80">/help</span>.
-            </>
-          )}
-        </CompletionDrawerEmpty>
+        loading ? (
+          <div className="flex items-center gap-2 px-2 py-1.5 text-(--ui-text-tertiary)">
+            <BrailleSpinner ariaLabel={copy.lookupLoading} className="text-foreground/70" spinner="braille" />
+            <span>{copy.lookupLoading}</span>
+          </div>
+        ) : (
+          <CompletionDrawerEmpty title={copy.lookupNoMatches}>
+            {kind === '@' ? (
+              <>
+                {copy.lookupTry} <span className="font-mono text-foreground/80">@file:</span> {copy.lookupOr}{' '}
+                <span className="font-mono text-foreground/80">@folder:</span>.
+              </>
+            ) : (
+              <>
+                {copy.lookupTry} <span className="font-mono text-foreground/80">/help</span>.
+              </>
+            )}
+          </CompletionDrawerEmpty>
+        )
      ) : (
        items.map((item, index) => {
-          const meta = item.metadata as { display?: string; meta?: string } | undefined
-          const display = meta?.display ?? (kind === '/' ? `/${item.label}` : item.label)
+          const meta = item.metadata as RowMeta | undefined
+          const display = meta?.display ?? (isSlash ? `/${item.label}` : item.label)
          const description = meta?.meta || item.description
+          const group = meta?.group?.trim()
+          const showHeader = isSlash && Boolean(group) && group !== lastGroup
+          const isFirstHeader = lastGroup === undefined
+          lastGroup = group || lastGroup
+          const active = index === activeIndex

          return (
-            <button
-              className={cn(COMPLETION_DRAWER_ROW_CLASS, index === activeIndex && 'bg-(--ui-bg-tertiary)')}
-              data-highlighted={index === activeIndex ? '' : undefined}
-              key={item.id}
-              onClick={() => onPick(item)}
-              onMouseEnter={() => onHover(index)}
-              type="button"
-            >
-              <span className="grid size-3.5 shrink-0 place-items-center text-(--ui-text-tertiary)">
-                <Codicon name={completionIcon(kind, item)} size="0.875rem" />
-              </span>
-              <span className="min-w-0 shrink truncate font-mono font-medium leading-5 text-foreground">{display}</span>
-              {description && (
-                <span className="min-w-0 flex-1 truncate leading-5 text-(--ui-text-tertiary)">{description}</span>
+            <Fragment key={item.id}>
+              {showHeader && (
+                <div
+                  className={cn(
+                    'select-none px-2 pb-0.5 text-[0.625rem] font-semibold uppercase tracking-wider text-(--ui-text-tertiary)',
+                    isFirstHeader ? 'pt-0.5' : 'pt-2'
+                  )}
+                >
+                  {group}
+                </div>
              )}
-            </button>
+              <button
+                className={cn(ROW_BASE_CLASS, isSlash ? 'flex-col gap-0' : 'items-center gap-2')}
+                data-highlighted={active ? '' : undefined}
+                onClick={() => onPick(item)}
+                onMouseEnter={() => onHover(index)}
+                type="button"
+              >
+                {isSlash ? (
+                  <>
+                    {/* Active row (keyboard nav or hover) un-truncates inline so
+                        long command names / descriptions stay readable without a
+                        floating tooltip. */}
+                    <span
+                      className={cn(
+                        'text-[0.8125rem] font-medium leading-snug text-foreground',
+                        active ? 'whitespace-normal break-words' : 'truncate'
+                      )}
+                    >
+                      {display}
+                    </span>
+                    {description && (
+                      <span
+                        className={cn(
+                          'text-[0.6875rem] leading-snug text-(--ui-text-tertiary)',
+                          active ? 'whitespace-normal break-words' : 'truncate'
+                        )}
+                      >
+                        {description}
+                      </span>
+                    )}
+                  </>
+                ) : (
+                  <>
+                    <span className="grid size-4 shrink-0 place-items-center text-(--ui-text-tertiary)">
+                      <Codicon name={atIcon(item)} size="0.875rem" />
+                    </span>
+                    <span className="min-w-0 shrink truncate font-mono font-medium leading-5 text-foreground">
+                      {display}
+                    </span>
+                    {description && (
+                      <span className="min-w-0 flex-1 truncate leading-5 text-(--ui-text-tertiary)">{description}</span>
+                    )}
+                  </>
+                )}
+              </button>
+            </Fragment>
          )
        })
      )}
--- a/apps/desktop/src/app/chat/hooks/use-composer-actions.test.ts
+++ b/apps/desktop/src/app/chat/hooks/use-composer-actions.test.ts
@@ -0,0 +1,57 @@
+import { describe, expect, it } from 'vitest'
+
+import { type DroppedFile, partitionDroppedFiles } from './use-composer-actions'
+
+// A Finder/Explorer drop carries a native File handle; an in-app drag (project
+// tree, gutter line ref) is path-only. The split decides whether a drop becomes
+// an inline @file: ref (in-app, workspace-relative, gateway-resolvable) or goes
+// through the upload pipeline (OS drop — absolute local path a remote gateway
+// can't read, plus image bytes for vision).
+const osDrop = (path: string): DroppedFile => ({ file: new File(['x'], path.split('/').pop() || 'f'), path })
+const inAppRef = (path: string, extra: Partial<DroppedFile> = {}): DroppedFile => ({ path, ...extra })
+
+describe('partitionDroppedFiles', () => {
+  it('routes File-bearing OS drops to osDrops and path-only in-app drags to inAppRefs', () => {
+    const finderPdf = osDrop('/Users/mahmoud/Downloads/DEVIS_signed.pdf')
+    const projectFile = inAppRef('src/index.ts')
+
+    const { inAppRefs, osDrops } = partitionDroppedFiles([finderPdf, projectFile])
+
+    expect(osDrops).toEqual([finderPdf])
+    expect(inAppRefs).toEqual([projectFile])
+  })
+
+  it('treats an OS screenshot drop as an upload target (so it gets byte upload + vision)', () => {
+    const screenshot = osDrop('/var/folders/tmp/Screenshot 2026-06-09.png')
+
+    const { inAppRefs, osDrops } = partitionDroppedFiles([screenshot])
+
+    expect(osDrops).toEqual([screenshot])
+    expect(inAppRefs).toEqual([])
+  })
+
+  it('keeps gutter line-range drags inline (no File handle)', () => {
+    const lineRef = inAppRef('src/app.ts', { line: 10, lineEnd: 20 })
+
+    const { inAppRefs, osDrops } = partitionDroppedFiles([lineRef])
+
+    expect(osDrops).toEqual([])
+    expect(inAppRefs).toEqual([lineRef])
+  })
+
+  it('splits a mixed drop and preserves order within each group', () => {
+    const a = inAppRef('a.ts')
+    const b = osDrop('/abs/b.pdf')
+    const c = inAppRef('c.ts')
+    const d = osDrop('/abs/d.png')
+
+    const { inAppRefs, osDrops } = partitionDroppedFiles([a, b, c, d])
+
+    expect(inAppRefs).toEqual([a, c])
+    expect(osDrops).toEqual([b, d])
+  })
+
+  it('returns empty groups for an empty drop', () => {
+    expect(partitionDroppedFiles([])).toEqual({ inAppRefs: [], osDrops: [] })
+  })
+})
--- a/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
+++ b/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
@@ -33,7 +33,7 @@ function blobExtension(blob: Blob): string {
  return (mime && BLOB_MIME_EXTENSION[mime]) || '.png'
 }

-function isImagePath(filePath: string): boolean {
+export function isImagePath(filePath: string): boolean {
  return IMAGE_EXTENSION_PATTERN.test(filePath)
 }

@@ -181,6 +181,35 @@ export function extractDroppedFiles(transfer: DataTransfer): DroppedFile[] {
  return result
 }

+/**
+ * Split dropped entries by origin. OS/Finder drops carry a native `File`
+ * handle; in-app drags (project tree, gutter line refs) are path-only.
+ *
+ * The distinction is load-bearing: an in-app path is workspace-relative and
+ * resolves on the gateway as-is, so it stays an inline `@file:`/`@line:` ref.
+ * An OS drop is an absolute path on *this* machine — the gateway can't read it
+ * in remote mode, and an image needs its bytes uploaded to get vision either
+ * way. So OS drops must go through the attachment/upload pipeline rather than
+ * leaking a local path into the prompt text.
+ */
+export function partitionDroppedFiles(candidates: DroppedFile[]): {
+  osDrops: DroppedFile[]
+  inAppRefs: DroppedFile[]
+} {
+  const osDrops: DroppedFile[] = []
+  const inAppRefs: DroppedFile[] = []
+
+  for (const candidate of candidates) {
+    if (candidate.file) {
+      osDrops.push(candidate)
+    } else {
+      inAppRefs.push(candidate)
+    }
+  }
+
+  return { osDrops, inAppRefs }
+}
+
 interface ComposerActionsOptions {
  activeSessionId: string | null
  currentCwd: string
--- a/Show More
+++ b/Show More