Merge branch 'bb/pets' into bb/pets-gen

Carry forward the overlay/waiting-state updates and resolve the gateway merge conflict. Also tighten the desktop pet-generation flow by cleaning superseded previews, using the draft's source prompt during hatch, and previewing rows from the returned sheet taxonomy.
feat(pets): wire the waiting state across CLI, TUI, and desktop
2026-06-23 18:33:19 +08:00 · 2026-06-17 12:14:37 -05:00 · 2026-06-17 11:55:28 -05:00 · 2026-06-17 11:46:46 -05:00 · 2026-06-17 11:38:39 -05:00 · 2026-06-17 11:29:23 -05:00
809 changed files with 69168 additions and 20010 deletions
--- a/.github/workflows/contributor-check.yml
+++ b/.github/workflows/contributor-check.yml
@@ -1,12 +1,11 @@
 name: Contributor Attribution Check

 on:
-  pull_request:
-    branches: [main]
  # No paths filter — the job must always run so the required check
  # reports a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-
+  pull_request:
+    branches: [main]
 permissions:
  contents: read

--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -11,8 +11,20 @@ on:
      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:
+    inputs:
+      skills_index_run_id:
+        description: 'Optional Build Skills Index run ID whose skills-index artifact should be deployed'
+        required: false
+        type: string
+      rebuild_skills_index:
+        description: 'Force a fresh multi-source crawl instead of reusing the latest healthy index'
+        required: false
+        default: false
+        type: boolean

 permissions:
+  contents: read
+  actions: read
  pages: write
  id-token: write

@@ -55,26 +67,81 @@ jobs:
      - name: Install PyYAML for skill extraction
        run: pip install pyyaml==6.0.2 httpx==0.28.1

-      - name: Build skills index (unified multi-source catalog)
+      - name: Prepare skills index (unified multi-source catalog)
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ github.token }}
+          GITHUB_TOKEN: ${{ github.token }}
+          SKILLS_INDEX_RUN_ID: ${{ github.event.inputs.skills_index_run_id || '' }}
+          REBUILD_SKILLS_INDEX: ${{ github.event.inputs.rebuild_skills_index || 'false' }}
        run: |
-          # Rebuild the unified catalog. The file is gitignored, so a fresh
-          # checkout starts without it and we want the freshest crawl in
-          # every deploy.
+          # The unified external catalog is expensive to crawl and can burn
+          # through the repository installation's GitHub API quota when several
+          # docs deploys land close together.  Normal docs deploys therefore
+          # reuse the latest healthy catalog: first the artifact from a
+          # scheduled skills-index run, then the currently live index.  Only a
+          # manual force rebuild does a fresh crawl here.
          #
-          # This MUST be fatal. build_skills_index.py runs a health check and
-          # exits non-zero WITHOUT writing the output file when a source
-          # collapses (e.g. a GitHub API rate limit zeroes the github /
-          # claude-marketplace / well-known taps all at once). Letting the
-          # deploy continue would either (a) ship a degenerate index missing
-          # whole hubs — the June 2026 regression where OpenAI/Anthropic/
-          # HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
-          # local-only catalog. Failing here keeps the last good deployment
-          # live (GitHub Pages serves the previous build) instead of
-          # publishing a broken catalog. Re-run the workflow once the
-          # transient rate limit clears.
+          # If we do crawl, the build remains fatal. build_skills_index.py runs
+          # the health check BEFORE writing and exits non-zero on source
+          # collapse, keeping the last good Pages deployment live instead of
+          # publishing a degenerate catalog.
+          set -euo pipefail
+          INDEX_PATH="website/static/api/skills-index.json"
+          mkdir -p "$(dirname "$INDEX_PATH")"
+
+          validate_index() {
+            python3 - "$INDEX_PATH" <<'PY'
+          import json
+          import sys
+          from pathlib import Path
+
+          path = Path(sys.argv[1])
+          try:
+              data = json.loads(path.read_text(encoding="utf-8"))
+          except Exception as exc:
+              print(f"invalid skills index JSON: {exc}", file=sys.stderr)
+              sys.exit(1)
+          skills = data.get("skills")
+          if not isinstance(skills, list) or len(skills) < 1500:
+              count = len(skills) if isinstance(skills, list) else "missing"
+              print(f"skills index too small: {count}", file=sys.stderr)
+              sys.exit(1)
+          print(f"skills index ready: {len(skills)} skills")
+          PY
+          }
+
+          if [ "$REBUILD_SKILLS_INDEX" = "true" ]; then
+            python3 scripts/build_skills_index.py
+            validate_index
+            exit 0
+          fi
+
+          if [ -n "$SKILLS_INDEX_RUN_ID" ]; then
+            tmpdir="$(mktemp -d)"
+            echo "Downloading skills-index artifact from run $SKILLS_INDEX_RUN_ID"
+            if gh run download "$SKILLS_INDEX_RUN_ID" --name skills-index --dir "$tmpdir"; then
+              candidate="$(find "$tmpdir" -name skills-index.json -type f | head -n 1 || true)"
+              if [ -n "$candidate" ]; then
+                cp "$candidate" "$INDEX_PATH"
+                if validate_index; then
+                  exit 0
+                fi
+              fi
+            fi
+            echo "::warning::Could not use skills-index artifact from run $SKILLS_INDEX_RUN_ID; trying live index"
+          fi
+
+          echo "Downloading currently live skills index"
+          if curl -fsSL --retry 3 --retry-delay 5 \
+            "https://hermes-agent.nousresearch.com/docs/api/skills-index.json" \
+            -o "$INDEX_PATH" && validate_index; then
+            exit 0
+          fi
+
+          echo "::warning::Live skills index unavailable or unhealthy; falling back to a fresh crawl"
+          rm -f "$INDEX_PATH"
          python3 scripts/build_skills_index.py
+          validate_index

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -18,13 +18,12 @@ on:
      - docker/**
      - .hadolint.yaml
      - .github/workflows/docker-lint.yml
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - Dockerfile
-      - docker/**
-      - .hadolint.yaml
-      - .github/workflows/docker-lint.yml

 permissions:
  contents: read
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -11,16 +11,13 @@ on:
      - 'docker/**'
      - '.github/workflows/docker-publish.yml'
      - '.github/actions/hermes-smoke-test/**'
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - '**/*.py'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'Dockerfile'
-      - 'docker/**'
-      - '.github/workflows/docker-publish.yml'
-      - '.github/actions/hermes-smoke-test/**'
+
  release:
    types: [published]

@@ -90,7 +87,7 @@ jobs:
      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
      # shard would otherwise reach the session-scoped ``built_image``
      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
-      # ``docker build`` under a 180s pytest-timeout cap — guaranteed to
+      # ``docker build`` — guaranteed to
      # die in fixture setup.
      #
      # Piggybacking here avoids a second image build: the smoke test
@@ -114,7 +111,7 @@ jobs:
        run: |
          uv venv .venv --python 3.11
          source .venv/bin/activate
-          # ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
+          # ``dev`` extra pulls in pytest, pytest-asyncio —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -1,10 +1,12 @@
 name: Docs Site Checks

 on:
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
-    paths:
-      - 'website/**'
-      - '.github/workflows/docs-site-checks.yml'
+    branches: [main]
+
  workflow_dispatch:

 permissions:
@@ -14,9 +16,9 @@ jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
        with:
          node-version: 22
          cache: npm
@@ -26,9 +28,9 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
-          python-version: '3.11'
+          python-version: "3.11"

      - name: Install ascii-guard
        run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
--- a/.github/workflows/e2e-cli-install.yml
+++ b/.github/workflows/e2e-cli-install.yml
@@ -1,49 +0,0 @@
-name: E2E CLI Tests
-
-on:
-  push:
-    branches:
-      - "**"
-
-permissions:
-  contents: read
-
-jobs:
-  e2e-tui-test:
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
-        with:
-          node-version: 22
-          cache: npm
-      - run: npm ci
-      - run: cd e2e && CI=true npm run test
-        env:
-          # Ensure tests don't accidentally call real APIs
-          OPENROUTER_API_KEY: ""
-          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
-
-      - name: Bundle TUI traces into self-contained replay HTML
-        if: always()
-        run: node e2e/scripts/bundle-replay-html.mjs
-
-      - name: Upload TUI replay viewer
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: tui-replay-viewer
-          path: tui-replay-viewer/
-          retention-days: 7
-
-      - name: Upload raw TUI test traces
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: tui-test-traces
-          path: e2e/tui-traces/
-          retention-days: 7
--- a/.github/workflows/history-check.yml
+++ b/.github/workflows/history-check.yml
@@ -14,6 +14,9 @@ name: History Check
 # the PR head and main to be non-empty.

 on:
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -24,9 +27,9 @@ jobs:
  check-common-ancestor:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
-          fetch-depth: 0  # full history both sides for merge-base
+          fetch-depth: 0 # full history both sides for merge-base

      - name: Reject PRs with no common ancestor on main
        run: |
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -15,12 +15,12 @@ on:
      - "**/*.md"
      - "docs/**"
      - "website/**"
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
-      - "website/**"

 permissions:
  contents: read
@@ -154,7 +154,6 @@ jobs:
              });
            }

-
  ruff-blocking:
    # Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
    # PLW1514 (unspecified-encoding) — catches bare ``open()`` /
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -1,255 +0,0 @@
-name: Nix Lockfile Fix
-
-on:
-  push:
-    branches: [main]
-    paths:
-      - 'package-lock.json'
-      - 'package.json'
-      - 'ui-tui/package.json'
-      - 'apps/desktop/package.json'
-  workflow_dispatch:
-    inputs:
-      pr_number:
-        description: 'PR number to fix (leave empty to run on the selected branch)'
-        required: false
-        type: string
-  issue_comment:
-    types: [edited]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  # ── Auto-fix on main ───────────────────────────────────────────────
-  # Fires when a push to main touches package.json or package-lock.json.
-  # Runs fix-lockfiles and pushes the hash update commit directly to main
-  # so Nix builds never stay broken.
-  #
-  # Safety invariants:
-  #   1. The fix commit only touches nix/*.nix files, which are NOT in
-  #      the paths filter above, so this cannot re-trigger itself.
-  #   2. An explicit file-whitelist check before commit aborts if
-  #      fix-lockfiles ever modifies unexpected files.
-  #   3. Job-level concurrency with cancel-in-progress: true ensures
-  #      back-to-back pushes collapse to the newest; ref: main checkout
-  #      always operates on the latest branch state.
-  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
-  #      triggers downstream nix.yml verification.
-  auto-fix-main:
-    if: github.event_name == 'push'
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    concurrency:
-      group: auto-fix-main
-      cancel-in-progress: true
-    steps:
-      - name: Generate GitHub App token
-        id: app-token
-        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
-        with:
-          app-id: ${{ secrets.APP_ID }}
-          private-key: ${{ secrets.APP_PRIVATE_KEY }}
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          ref: main
-          token: ${{ steps.app-token.outputs.token }}
-
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-
-          # Ensure only nix/lib.nix (home of the single npmDepsHash) was
-          # modified — prevents accidental self-triggering if fix-lockfiles
-          # ever touches package files.
-          unexpected="$(git diff --name-only | grep -Ev '^nix/lib\.nix$' || true)"
-          if [ -n "$unexpected" ]; then
-            echo "::error::Unexpected modified files: $unexpected"
-            exit 1
-          fi
-
-          # Record the base SHA before committing — used to detect package
-          # file changes if we need to rebase after a non-fast-forward push.
-          BASE_SHA="$(git rev-parse HEAD)"
-
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/lib.nix
-          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
-            -m "Source: $GITHUB_SHA" \
-            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
-
-          # Retry push with rebase in case main advanced with an unrelated
-          # commit during the nix build. Without this, a non-fast-forward
-          # rejection silently loses the fix. If package files changed during
-          # the rebase, abort — a fresh auto-fix run will handle the new state.
-          for attempt in 1 2 3; do
-            if git push origin HEAD:main; then
-              exit 0
-            fi
-            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
-            git fetch origin main
-
-            # If package files changed between our base and the new main,
-            # our computed hashes are stale. Abort and let the next triggered
-            # run recompute from the correct package-lock state.
-            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
-              'package-lock.json' 'package.json' \
-              'ui-tui/package.json' 'apps/desktop/package.json' || true)"
-            if [ -n "$pkg_changed" ]; then
-              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
-              exit 0
-            fi
-
-            git rebase origin/main
-          done
-          echo "::error::Failed to push after 3 rebase attempts"
-          exit 1
-
-  # ── PR fix (manual / checkbox) ─────────────────────────────────────
-  # Existing behavior: run on manual dispatch OR when a task-list
-  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
-  fix:
-    if: |
-      github.event_name == 'workflow_dispatch' ||
-      (github.event_name == 'issue_comment'
-       && github.event.issue.pull_request != null
-       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
-       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Authorize & resolve PR
-        id: resolve
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
-        with:
-          script: |
-            // 1. Verify the actor has write access — applies to both checkbox
-            //    clicks and manual dispatch.
-            const { data: perm } =
-              await github.rest.repos.getCollaboratorPermissionLevel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                username: context.actor,
-              });
-            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
-              core.setFailed(
-                `${context.actor} lacks write access (has: ${perm.permission})`
-              );
-              return;
-            }
-
-            // 2. Resolve which ref to check out.
-            let prNumber = '';
-            if (context.eventName === 'issue_comment') {
-              prNumber = String(context.payload.issue.number);
-            } else if (context.eventName === 'workflow_dispatch') {
-              prNumber = context.payload.inputs.pr_number || '';
-            }
-
-            if (!prNumber) {
-              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
-              core.setOutput('repo', context.repo.repo);
-              core.setOutput('owner', context.repo.owner);
-              core.setOutput('pr', '');
-              return;
-            }
-
-            const { data: pr } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: Number(prNumber),
-            });
-            core.setOutput('ref', pr.head.ref);
-            core.setOutput('repo', pr.head.repo.name);
-            core.setOutput('owner', pr.head.repo.owner.login);
-            core.setOutput('pr', String(pr.number));
-
-      # Wipe the sticky lockfile-check comment to a "running" state as soon
-      # as the job is authorized, so the user sees their click was picked up
-      # before the ~minute of nix build work.
-      - name: Mark sticky as running
-        if: steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### 🔄 Applying lockfile fix…
-
-            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
-          ref: ${{ steps.resolve.outputs.ref }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          fetch-depth: 0
-
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/lib.nix
-          git commit -m "fix(nix): refresh npm lockfile hashes"
-          git push
-
-      - name: Update sticky (applied)
-        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile fix applied
-
-            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (already current)
-        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile hashes already current
-
-            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (failed)
-        if: failure() && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ❌ Lockfile fix failed
-
-            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -1,105 +0,0 @@
-name: Nix
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  nix:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 30
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Resolve head SHA
-        if: github.event_name == 'pull_request'
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check flake
-        id: flake
-        continue-on-error: true
-        run: nix flake check --print-build-logs
-
-      # When the flake check fails, run a targeted diagnostic to see if
-      # the failure is specifically a stale npm lockfile hash in one of the
-      # known npm subpackages (tui / web).  This avoids surfacing a generic
-      # "build failed" message when the fix is a single known command.
-      - name: Diagnose npm lockfile hashes
-        id: hash_check
-        if: steps.flake.outcome == 'failure' && runner.os == 'Linux'
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
-      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
-      # mode rather than silently ignoring it.
-      - name: Fail if hash check crashed without reporting
-        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
-        run: |
-          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
-          exit 1
-
-      - name: Post sticky PR comment (stale hashes)
-        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.hash_check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles` and commit the diff
-
-      # Clear the sticky comment when either the flake check passed outright (no
-      # hash check needed) or the hash check explicitly returned stale=false
-      # (check failed for a non-hash reason).
-      - name: Clear sticky PR comment (resolved)
-        if: |
-          github.event_name == 'pull_request' &&
-          (steps.hash_check.outputs.stale == 'false' ||
-           steps.flake.outcome == 'success')
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Final fail if flake check failed
-        if: steps.flake.outcome == 'failure'
-        run: |
-          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
-            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
-          else
-            echo "::error::Nix flake check failed. See logs above."
-          fi
-          exit 1
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -20,29 +20,23 @@ name: OSV-Scanner
 # vulnerabilities in pinned deps that we may need to patch deliberately.

 on:
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'package.json'
-      - 'package-lock.json'
-      - 'ui-tui/package.json'
-      - 'website/package.json'
-      - 'website/package-lock.json'
-      - '.github/workflows/osv-scanner.yml'
  push:
    branches: [main]
    paths:
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'package.json'
-      - 'package-lock.json'
-      - 'website/package-lock.json'
+      - "uv.lock"
+      - "pyproject.toml"
+      - "package.json"
+      - "package-lock.json"
+      - "website/package-lock.json"
  schedule:
    # Weekly scan against main — catches CVEs published after merge for
    # deps that haven't changed since.
-    - cron: '0 9 * * 1'
+    - cron: "0 9 * * 1"
  workflow_dispatch:

 permissions:
@@ -54,7 +48,7 @@ permissions:
 jobs:
  scan:
    name: Scan lockfiles
-    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2  # v2.3.8
+    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
    with:
      # Scan explicit lockfiles rather than recursing, so we only look at
      # the three sources of truth and skip vendored / test / worktree dirs.
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -53,4 +53,4 @@ jobs:
      - name: Trigger Deploy Site workflow
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
+        run: gh workflow run deploy-site.yml --repo ${{ github.repository }} -f skills_index_run_id=${{ github.run_id }}
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -1,11 +1,11 @@
 name: Supply Chain Audit

 on:
-  pull_request:
-    types: [opened, synchronize, reopened]
  # No paths filter — the jobs must always run so required checks
  # report a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
+  pull_request:
+    types: [opened, synchronize, reopened]

 permissions:
  pull-requests: write
@@ -29,8 +29,10 @@ jobs:
      scan: ${{ steps.filter.outputs.scan }}
      # True when pyproject.toml changed in this PR
      deps: ${{ steps.filter.outputs.deps }}
+      # True when the curated MCP catalog / bundled MCP manifests changed.
+      mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0
      - name: Check for relevant file changes
@@ -54,6 +56,14 @@ jobs:
          else
            echo "deps=false" >> "$GITHUB_OUTPUT"
          fi
+          MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
+            'optional-mcps/**' \
+            'hermes_cli/mcp_catalog.py' || true)
+          if [ -n "$MCP_CATALOG_FILES" ]; then
+            echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
+          fi

  scan:
    name: Scan PR for critical supply chain risks
@@ -62,7 +72,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

@@ -197,7 +207,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

@@ -268,3 +278,50 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - run: echo "No pyproject.toml changes, skipping dependency bounds check."
+
+  mcp-catalog-review:
+    name: MCP catalog security review
+    needs: changes
+    if: needs.changes.outputs.mcp_catalog == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+
+      - name: Require explicit MCP catalog review label
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          PR="${{ github.event.pull_request.number }}"
+          LABELS=$(gh pr view "$PR" --json labels --jq '.labels[].name' || true)
+          if echo "$LABELS" | grep -Fxq 'mcp-catalog-reviewed'; then
+            echo "MCP catalog review label present."
+            exit 0
+          fi
+
+          BODY="## ⚠️ MCP catalog security review required
+
+          This PR changes the bundled MCP catalog or MCP catalog installer code. MCP entries can define local commands that users later install into \`mcp_servers\`, so this needs explicit maintainer review before merge.
+
+          A maintainer should verify:
+          - any new/changed \`optional-mcps/**/manifest.yaml\` command and args are expected,
+          - stdio transports do not use shell+egress/exfiltration payloads,
+          - git install refs are pinned and bootstrap commands are minimal,
+          - requested env vars/secrets match the upstream MCP's documented needs.
+
+          After review, add the \`mcp-catalog-reviewed\` label and re-run this check."
+
+          gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
+          echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
+          exit 1
+
+  mcp-catalog-review-gate:
+    name: MCP catalog security review
+    needs: changes
+    if: always() && needs.changes.outputs.mcp_catalog != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No MCP catalog changes, skipping MCP catalog security review."
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,13 +4,13 @@ on:
  push:
    branches: [main]
    paths-ignore:
-      - '**/*.md'
-      - 'docs/**'
+      - "**/*.md"
+      - "docs/**"
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths-ignore:
-      - '**/*.md'
-      - 'docs/**'

 permissions:
  contents: read
@@ -30,13 +30,17 @@ jobs:
        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Restore duration cache
-        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
        with:
          path: test_durations.json
-          # Single stable key. main always overwrites, PRs always find it.
+          # main always writes a new suffix, but jobs pick the latest one with the same prefix
+          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
+          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
+          # If there are no exact matches, the action searches for partial matches of the restore keys.
+          # When the action finds a partial match, the most recent cache is restored to the path directory.
          key: test-durations

      - name: Install ripgrep (prebuilt binary)
@@ -54,7 +58,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -115,7 +119,7 @@ jobs:
          NOUS_API_KEY: ""

      - name: Upload per-slice durations
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: test-durations-slice-${{ matrix.slice }}
          path: test_durations.json
@@ -125,11 +129,11 @@ jobs:
  # (including PRs) get balanced slicing.
  save-durations:
    needs: test
-    if: always() && github.ref == 'refs/heads/main'
+    if: needs.test.result == 'success' && github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - name: Download all slice durations
-        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
        with:
          pattern: test-durations-slice-*
          path: durations
@@ -149,17 +153,17 @@ jobs:
          "

      - name: Save merged duration cache
-        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
        with:
          path: test_durations.json
-          key: test-durations
+          key: test-durations-${{ github.run_id }}

  e2e:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install ripgrep (prebuilt binary)
        run: |
@@ -176,7 +180,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -215,4 +219,4 @@ jobs:
        env:
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -4,6 +4,9 @@ name: Typecheck
 on:
  push:
    branches: [main]
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -23,3 +26,20 @@ jobs:
          cache: npm
      - run: npm ci
      - run: npm run --prefix ${{ matrix.package }} typecheck
+
+  # Production build of the desktop renderer. `typecheck` runs `tsc` only,
+  # which does NOT exercise Vite/Rolldown module resolution — so an
+  # unresolvable package export (e.g. a transitive @assistant-ui/tap that no
+  # longer exports "./react-shim") slips past typecheck and only explodes when
+  # users build apps/desktop from source on install/update. Run the real
+  # `vite build` here so that class of break fails in CI instead.
+  desktop-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: npm run --prefix apps/desktop build
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -47,15 +47,15 @@ on:
  push:
    branches: [main]
    paths:
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - '.github/workflows/uv-lockfile-check.yml'
+      - "pyproject.toml"
+      - "uv.lock"
+      - ".github/workflows/uv-lockfile-check.yml"
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - '.github/workflows/uv-lockfile-check.yml'

 permissions:
  contents: read
@@ -71,10 +71,10 @@ jobs:
    timeout-minutes: 5
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
--- a/.gitignore
+++ b/.gitignore
@@ -19,8 +19,6 @@ __pycache__/
 .notebooklm-playwright/
 .pip-cache/
 .uv-cache/
-.tui-test/
-tui-traces/
 compose.hermes.local.yml
 export*
 __pycache__/model_tools.cpython-310.pyc
@@ -134,3 +132,7 @@ scripts/out/
 # stores the published notes. They are not a build artifact and must never be
 # committed to the repo root. See the hermes-release skill.
 RELEASE_v*.md
+
+# Desktop demo-run scratch output (hermes writes demo/*.txt during recorded
+# walkthroughs). Throwaway artifacts, never part of the app.
+apps/desktop/demo/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,7 +78,41 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
 | **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

-### Clone and install
+### Install with the standard installer
+
+For most contributors, the best development bootstrap is the same path users
+take: run the standard installer, then work inside the repository it cloned.
+The installer creates the Hermes venv, wires the `hermes` command, stamps the
+install method for `hermes update`, and clones the full git project into
+`$HERMES_HOME/hermes-agent` (usually `~/.hermes/hermes-agent`). That keeps your
+development environment on the same layout the CLI, updater, lazy dependency
+installer, gateway, and docs assume.
+
+```bash
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
+
+# Add dev/test extras on top of the standard install.
+uv pip install -e ".[all,dev]"
+
+# Optional: browser tools / docs site dependencies.
+npm install
+```
+
+After that, create branches and run tests from that checkout:
+
+```bash
+git checkout -b fix/description
+scripts/run_tests.sh
+```
+
+### Manual clone fallback
+
+Use this only if you intentionally do not want Hermes' managed install layout
+(for example, a throwaway clone inside a container or CI job). If you install
+this way, make sure you run the `hermes` entrypoint from this venv; running the
+system `python3 -m hermes_cli.main` can pick up unrelated system Python
+packages.

 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
@@ -109,15 +143,19 @@ echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ### Run

 ```bash
-# Symlink for global access
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-
-# Verify
+# The standard installer already put `hermes` on PATH.
 hermes doctor
 hermes chat -q "Hello"
 ```

+If you used the manual clone fallback, run `./hermes` from the checkout or
+symlink this clone's venv explicitly:
+
+```bash
+mkdir -p ~/.local/bin
+ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
+```
+
 ### Run tests

 ```bash
--- a/README.md
+++ b/README.md
@@ -181,16 +181,20 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration

 We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.

-Quick start for contributors — clone and go with `setup-hermes.sh`:
+Quick start for contributors — use the standard installer, then work from the
+full git checkout it creates at `$HERMES_HOME/hermes-agent` (usually
+`~/.hermes/hermes-agent`). This matches the layout used by `hermes update`, the
+managed venv, lazy dependencies, gateway, and docs tooling.

 ```bash
-git clone https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-./setup-hermes.sh     # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
-./hermes              # auto-detects the venv, no need to `source` first
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
+uv pip install -e ".[all,dev]"
+scripts/run_tests.sh
 ```

-Manual path (equivalent to the above):
+Manual clone fallback (for throwaway clones/CI where you intentionally do not
+want the managed install layout):

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -164,16 +164,18 @@ hermes claw migrate --overwrite  # 覆盖已有冲突

 欢迎贡献！请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。

-贡献者快速开始——克隆并使用 `setup-hermes.sh`：
+贡献者快速开始——使用标准安装器，然后在它创建的完整 git checkout 中开发：
+`$HERMES_HOME/hermes-agent`（通常是 `~/.hermes/hermes-agent`）。这会匹配
+`hermes update`、托管 venv、lazy dependencies、gateway 和 docs tooling 使用的布局。

 ```bash
-git clone https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-./setup-hermes.sh     # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
-./hermes              # 自动检测 venv，无需先 source
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
+uv pip install -e ".[all,dev]"
+scripts/run_tests.sh
 ```

-手动安装（等效于上述命令）：
+手动克隆备用路径（用于一次性 clone / CI，或你明确不想使用 managed install layout 时）：

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -824,6 +824,7 @@ class HermesACPAgent(acp.Agent):

        try:
            from model_tools import get_tool_definitions
+            from agent.memory_manager import inject_memory_provider_tools

            enabled_toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
@@ -839,6 +840,7 @@ class HermesACPAgent(acp.Agent):
            state.agent.valid_tool_names = {
                tool["function"]["name"] for tool in state.agent.tools or []
            }
+            inject_memory_provider_tools(state.agent)
            invalidate = getattr(state.agent, "_invalidate_system_prompt", None)
            if callable(invalidate):
                invalidate()
@@ -1779,10 +1781,25 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
+            from types import SimpleNamespace
+            from agent.memory_manager import inject_memory_provider_tools
+
            toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            )
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
+            tool_view = SimpleNamespace(
+                tools=list(tools or []),
+                valid_tool_names={
+                    tool.get("function", {}).get("name")
+                    for tool in tools or []
+                    if isinstance(tool, dict)
+                },
+                enabled_toolsets=toolsets,
+                _memory_manager=getattr(state.agent, "_memory_manager", None),
+            )
+            inject_memory_provider_tools(tool_view)
+            tools = tool_view.tools
            if not tools:
                return "No tools available."
            lines = [f"Available tools ({len(tools)}):"]
--- a/agent/account_usage.py
+++ b/agent/account_usage.py
@@ -145,7 +145,7 @@ def build_nous_credits_snapshot(account_info) -> Optional[AccountUsageSnapshot]:
    account info to show (fail-open: caller just shows nothing).
    """
    try:
-        from hermes_cli.nous_account import nous_portal_billing_url
+        from hermes_cli.nous_account import nous_portal_topup_url

        if account_info is None or not getattr(account_info, "logged_in", False):
            return None
@@ -213,7 +213,8 @@ def build_nous_credits_snapshot(account_info) -> Optional[AccountUsageSnapshot]:
        if not windows and not details:
            return None

-        details.append(f"Manage / top up: {nous_portal_billing_url(account_info)}")
+        details.append(f"Top up: {nous_portal_topup_url(account_info)}")
+        details.append("(or run /credits)")

        plan = getattr(sub, "plan", None) if sub is not None else None
        return AccountUsageSnapshot(
@@ -337,6 +338,93 @@ def _snapshot_from_credits_state(state) -> Optional[AccountUsageSnapshot]:
        return None


+@dataclass(frozen=True)
+class CreditsView:
+    """Surface-agnostic data for the ``/credits`` command.
+
+    One portal fetch, one parse — consumed identically by the CLI panel, the
+    gateway button, and any other money surface. Fail-open: when not logged in
+    or the portal is unreachable, ``logged_in`` is False / ``topup_url`` is None
+    and callers degrade gracefully.
+    """
+
+    logged_in: bool
+    balance_lines: tuple[str, ...] = ()
+    identity_line: Optional[str] = None
+    topup_url: Optional[str] = None
+    depleted: bool = False
+
+
+def build_credits_view(*, markdown: bool = False, timeout: float = 10.0) -> CreditsView:
+    """Build the /credits view: balance block + identity line + top-up URL.
+
+    Reuses the same account fetch + snapshot + URL builder as the /usage credits
+    block, so the numbers always match. The balance block is the rendered
+    snapshot MINUS its trailing top-up/command-hint lines (the /credits surface
+    supplies its own affordance). Fail-open → ``CreditsView(logged_in=False)``.
+    """
+    not_logged_in = CreditsView(logged_in=False)
+    try:
+        from hermes_cli.auth import get_provider_auth_state
+
+        tok = (get_provider_auth_state("nous") or {}).get("access_token")
+        if not (isinstance(tok, str) and tok.strip()):
+            return not_logged_in
+    except Exception:
+        return not_logged_in
+
+    try:
+        import concurrent.futures
+
+        from hermes_cli.nous_account import (
+            get_nous_portal_account_info,
+            nous_portal_topup_url,
+        )
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            account = pool.submit(get_nous_portal_account_info, force_fresh=True).result(
+                timeout=timeout
+            )
+    except Exception:
+        logger.debug("credits ▸ /credits portal fetch failed (fail-open)", exc_info=True)
+        return not_logged_in
+
+    if account is None or not getattr(account, "logged_in", False):
+        return not_logged_in
+
+    snapshot = build_nous_credits_snapshot(account)
+    # Balance lines = the snapshot block minus the two trailing affordance lines
+    # ("Top up: <url>" + "(or run /credits)") that build_nous_credits_snapshot
+    # appends for the /usage surface. /credits renders its own button/panel.
+    balance_lines: list[str] = []
+    if snapshot is not None:
+        rendered = render_account_usage_lines(snapshot, markdown=markdown)
+        balance_lines = [
+            line
+            for line in rendered
+            if not line.lstrip().startswith("Top up:")
+            and not line.lstrip().startswith("(or run")
+        ]
+
+    # Identity line — shown before any open (roadmap §4.4).
+    email = getattr(account, "email", None)
+    org_name = getattr(account, "org_name", None)
+    who: list[str] = []
+    if email:
+        who.append(str(email))
+    if org_name:
+        who.append(f"org {org_name}")
+    identity_line = ("Topping up as " + " / ".join(who)) if who else None
+
+    return CreditsView(
+        logged_in=True,
+        balance_lines=tuple(balance_lines),
+        identity_line=identity_line,
+        topup_url=nous_portal_topup_url(account),
+        depleted=getattr(account, "paid_service_access", None) is False,
+    )
+
+
 def _resolve_codex_usage_url(base_url: str) -> str:
    normalized = (base_url or "").strip().rstrip("/")
    if not normalized:
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -27,7 +27,7 @@ import threading
 import time
 import uuid
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 from urllib.parse import urlparse, parse_qs, urlunparse

 from agent.context_compressor import ContextCompressor
@@ -195,6 +195,7 @@ def init_agent(
    status_callback: callable = None,
    notice_callback: callable = None,
    notice_clear_callback: callable = None,
+    event_callback: Optional[Callable[[str, dict], None]] = None,
    max_tokens: int = None,
    reasoning_config: Dict[str, Any] = None,
    service_tier: str = None,
@@ -299,6 +300,7 @@ def init_agent(
    # would mangle the escape sequences.  None = use builtins.print.
    agent._print_fn = None
    agent.background_review_callback = None  # Optional sync callback for gateway delivery
+    agent.memory_notifications = "on"  # Memory update notifications: "off", "on", "verbose"
    agent.skip_context_files = skip_context_files
    agent.load_soul_identity = load_soul_identity
    agent.pass_session_id = pass_session_id
@@ -425,6 +427,7 @@ def init_agent(
    agent.status_callback = status_callback
    agent.notice_callback = notice_callback
    agent.notice_clear_callback = notice_clear_callback
+    agent.event_callback = event_callback
    agent.tool_gen_callback = tool_gen_callback

    
@@ -900,6 +903,9 @@ def init_agent(
        agent.api_key = client_kwargs.get("api_key", "")
        agent.base_url = client_kwargs.get("base_url", agent.base_url)
        try:
+            from agent.ssl_guard import verify_ca_bundle_with_fallback
+
+            verify_ca_bundle_with_fallback()
            agent.client = agent._create_openai_client(client_kwargs, reason="agent_init", shared=True)
            if not agent.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {agent.model}")
@@ -1193,38 +1199,8 @@ def init_agent(
            _ra().logger.warning("Memory provider plugin init failed: %s", _mpe)
            agent._memory_manager = None

-    # Inject memory provider tool schemas into the tool surface.
-    # Skip tools whose names already exist (plugins may register the
-    # same tools via ctx.register_tool(), which lands in agent.tools
-    # through _ra().get_tool_definitions()).  Duplicate function names cause
-    # 400 errors on providers that enforce unique names (e.g. Xiaomi
-    # MiMo via Nous Portal).
-    #
-    # Respect the platform's enabled_toolsets configuration (#5544):
-    #   enabled_toolsets is None        → no filter, inject (backward compat)
-    #   "memory" in enabled_toolsets    → user opted in, inject
-    #   otherwise (incl. [])            → user excluded memory, skip injection
-    #
-    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
-    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
-    # penalty on local models and a frequent trigger of tool-call loops.
-    if agent._memory_manager and agent.tools is not None and (
-        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
-    ):
-        _existing_tool_names = {
-            t.get("function", {}).get("name")
-            for t in agent.tools
-            if isinstance(t, dict)
-        }
-        for _schema in agent._memory_manager.get_all_tool_schemas():
-            _tname = _schema.get("name", "")
-            if _tname and _tname in _existing_tool_names:
-                continue  # already registered via plugin path
-            _wrapped = {"type": "function", "function": _schema}
-            agent.tools.append(_wrapped)
-            if _tname:
-                agent.valid_tool_names.add(_tname)
-                _existing_tool_names.add(_tname)
+    from agent.memory_manager import inject_memory_provider_tools as _inject_memory_provider_tools
+    _inject_memory_provider_tools(agent)

    # Skills config: nudge interval for skill creation reminders
    agent._skill_nudge_interval = 10
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -445,6 +445,45 @@ def repair_message_sequence(agent, messages: List[Dict]) -> int:
    return repairs


+def repair_message_sequence_with_cursor(agent, messages: List[Dict]) -> int:
+    """Run :func:`repair_message_sequence` and keep the SessionDB flush
+    cursor consistent with the compacted list (#44837).
+
+    ``repair_message_sequence`` merges/drops messages in place, shrinking
+    the list. ``_last_flushed_db_idx`` (the DB-write cursor) indexes into
+    that list, so after compaction it can point past the new end — the
+    turn-end flush would then skip the assistant/tool chain entirely — or
+    past unflushed messages shifted to lower indexes.
+
+    Repair preserves object identity for surviving messages, so counting
+    the survivors from the previously-flushed prefix gives the exact new
+    cursor even when messages are dropped/merged at indexes *before* the
+    cursor — a plain ``min()`` clamp would silently skip that many
+    unflushed rows. Falls back to the clamp when no prefix snapshot is
+    available.
+
+    Returns the number of repairs made (same as ``repair_message_sequence``).
+    """
+    pre_repair_flushed_ids = None
+    flush_cursor = getattr(agent, "_last_flushed_db_idx", None)
+    if isinstance(flush_cursor, int) and flush_cursor > 0:
+        pre_repair_flushed_ids = {id(m) for m in messages[:flush_cursor]}
+
+    repairs = repair_message_sequence(agent, messages)
+
+    if repairs > 0 and hasattr(agent, "_last_flushed_db_idx"):
+        if pre_repair_flushed_ids is not None:
+            agent._last_flushed_db_idx = sum(
+                1 for m in messages if id(m) in pre_repair_flushed_ids
+            )
+        else:
+            agent._last_flushed_db_idx = min(
+                agent._last_flushed_db_idx, len(messages)
+            )
+
+    return repairs
+
+

 def strip_think_blocks(agent, content: str) -> str:
    """Remove reasoning/thinking blocks from content, returning only visible text.
@@ -579,12 +618,33 @@ def recover_with_credential_pool(
    current_provider = (getattr(agent, "provider", "") or "").strip().lower()
    pool_provider = (getattr(pool, "provider", "") or "").strip().lower()
    if current_provider and pool_provider and current_provider != pool_provider:
-        _ra().logger.warning(
-            "Credential pool provider mismatch: pool=%s, agent=%s — "
-            "skipping pool mutation to avoid cross-provider contamination",
-            pool_provider, current_provider,
-        )
-        return False, has_retried_429
+        # Custom endpoints use two naming conventions for the SAME provider:
+        # the agent carries the generic ``custom`` label while the pool is
+        # keyed ``custom:<name>`` (see CUSTOM_POOL_PREFIX). A literal string
+        # compare treats them as a mismatch and skips recovery for every
+        # custom-provider user — 401s/429s then burn the full retry cycle
+        # with no rotation or refresh. Accept the pair as matching only when
+        # the agent's CURRENT base_url actually resolves to this pool key,
+        # so a fallback provider (or a different custom endpoint) still
+        # triggers the guard.
+        _custom_match = False
+        if current_provider == "custom" and pool_provider.startswith("custom:"):
+            try:
+                from agent.credential_pool import get_custom_provider_pool_key
+                _agent_base = (getattr(agent, "base_url", "") or "").strip()
+                _custom_match = bool(_agent_base) and (
+                    (get_custom_provider_pool_key(_agent_base) or "").strip().lower()
+                    == pool_provider
+                )
+            except Exception:
+                _custom_match = False
+        if not _custom_match:
+            _ra().logger.warning(
+                "Credential pool provider mismatch: pool=%s, agent=%s — "
+                "skipping pool mutation to avoid cross-provider contamination",
+                pool_provider, current_provider,
+            )
+            return False, has_retried_429

    effective_reason = classified_reason
    if effective_reason is None:
@@ -821,6 +881,8 @@ def try_recover_primary_transport(

 def drop_thinking_only_and_merge_users(
    messages: List[Dict[str, Any]],
+    *,
+    drop_codex_reasoning_items: bool = True,
 ) -> List[Dict[str, Any]]:
    """Drop thinking-only assistant turns; merge any adjacent user messages left behind.

@@ -842,7 +904,13 @@ def drop_thinking_only_and_merge_users(
        return messages

    # Pass 1: drop thinking-only assistant turns.
-    kept = [m for m in messages if not _ra().AIAgent._is_thinking_only_assistant(m)]
+    kept = [
+        m for m in messages
+        if not _ra().AIAgent._is_thinking_only_assistant(
+            m,
+            drop_codex_reasoning_items=drop_codex_reasoning_items,
+        )
+    ]
    dropped = len(messages) - len(kept)
    if dropped == 0:
        return messages
@@ -1149,12 +1217,23 @@ def dump_api_request_debug(

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
-        atomic_json_write(dump_file, dump_payload, default=str)
+
+        # Redact secrets before persisting/printing. This dump captures the
+        # full request body (system prompt, tool defs, context-embedded
+        # values), and this path fires unconditionally on API errors — so it
+        # otherwise lands any context-embedded secret in cleartext on disk.
+        # Run the serialized dump through the same scrubber used for logs/tool
+        # output, then hand the resulting payload back to the shared atomic
+        # JSON writer so request dumps keep the same write semantics as before.
+        from agent.redact import redact_sensitive_text
+        _serialized = json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)
+        _redacted_payload = json.loads(redact_sensitive_text(_serialized, force=True))
+        atomic_json_write(dump_file, _redacted_payload, default=str)

        agent._vprint(f"{agent.log_prefix}🧾 Request debug dump written to: {dump_file}")

        if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"):
-            print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
+            print(json.dumps(_redacted_payload, ensure_ascii=False, indent=2, default=str))

        return dump_file
    except Exception as dump_error:
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -751,6 +751,9 @@ def build_anthropic_client(
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
+    if normalized_base_url:
+        import re as _re
+        normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1144,7 +1144,8 @@ def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
    normalized = (base_url or "").strip().lower().rstrip("/")
    if not normalized:
        return False
-    if normalized.endswith("/anthropic"):
+    path = urlparse(normalized).path.rstrip("/")
+    if path.endswith("/anthropic") or path.endswith("/anthropic/v1"):
        return True
    hostname = base_url_hostname(normalized)
    if hostname == "api.anthropic.com":
@@ -3078,23 +3079,20 @@ def _try_configured_fallback_chain(
        if not fb_provider or fb_provider.lower() == skip:
            continue
        fb_model = str(entry.get("model", "")).strip() or None
-        fb_base_url = str(entry.get("base_url", "")).strip() or None
-        fb_api_key = str(entry.get("api_key", "")).strip() or None

        label = f"fallback_chain[{i}]({fb_provider})"

        try:
-            fb_client = _resolve_single_provider(
-                fb_provider, fb_model, fb_base_url, fb_api_key)
+            fb_client, resolved_model = _resolve_fallback_entry(entry)
        except Exception:
-            fb_client = None
+            fb_client, resolved_model = None, None

        if fb_client is not None:
            logger.info(
                "Auxiliary %s: %s on %s — configured fallback to %s (%s)",
-                task, reason, failed_provider, label, fb_model or "default",
+                task, reason, failed_provider, label, resolved_model or fb_model or "default",
            )
-            return fb_client, fb_model, label
+            return fb_client, resolved_model or fb_model, label
        tried.append(label)

    if tried:
@@ -3105,6 +3103,103 @@ def _try_configured_fallback_chain(
    return None, None, ""


+def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
+    """Resolve inline or env-backed API key from a fallback-chain entry."""
+    explicit = str(entry.get("api_key") or "").strip()
+    if explicit:
+        return explicit
+    key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
+    if key_env:
+        return os.getenv(key_env, "").strip() or None
+    return None
+
+
+def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
+    """Resolve one fallback entry through the central provider router."""
+    provider = str(entry.get("provider") or "").strip()
+    model = str(entry.get("model") or "").strip() or None
+    if not provider or not model:
+        return None, None
+    base_url = str(entry.get("base_url") or "").strip() or None
+    api_key = _fallback_entry_api_key(entry)
+    api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
+    return resolve_provider_client(
+        provider,
+        model=model,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+        api_mode=api_mode,
+    )
+
+
+def _try_main_fallback_chain(
+    task: Optional[str],
+    failed_provider: str = "",
+    reason: str = "error",
+) -> Tuple[Optional[Any], Optional[str], str]:
+    """Try the top-level main-agent fallback chain for an auxiliary call.
+
+    ``provider: auto`` auxiliary tasks should respect the user's declared
+    main fallback policy before dropping into Hermes' built-in discovery
+    chain. The top-level chain is read through ``get_fallback_chain`` so
+    both modern ``fallback_providers`` and legacy ``fallback_model`` entries
+    participate in the same order as the main agent.
+    """
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.fallback_config import get_fallback_chain
+
+        chain = get_fallback_chain(load_config())
+    except Exception as exc:
+        logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
+        return None, None, ""
+
+    if not chain:
+        return None, None, ""
+
+    failed_norm = (failed_provider or "").strip().lower()
+    main_norm = (_read_main_provider() or "").strip().lower()
+    skip = {p for p in (failed_norm, main_norm, "auto") if p}
+    tried: List[str] = []
+
+    for i, entry in enumerate(chain):
+        if not isinstance(entry, dict):
+            continue
+        fb_provider = str(entry.get("provider") or "").strip()
+        fb_model = str(entry.get("model") or "").strip()
+        if not fb_provider or not fb_model:
+            continue
+        fb_norm = fb_provider.lower()
+        label = f"fallback_providers[{i}]({fb_provider})"
+        if fb_norm in skip:
+            tried.append(f"{label} (skipped)")
+            continue
+        if _is_provider_unhealthy(fb_norm):
+            _log_skip_unhealthy(fb_norm, task)
+            tried.append(f"{label} (unhealthy)")
+            continue
+        try:
+            fb_client, resolved_model = _resolve_fallback_entry(entry)
+        except Exception as exc:
+            logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
+            fb_client, resolved_model = None, None
+        if fb_client is not None:
+            logger.info(
+                "Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
+                task or "call", reason, failed_provider or "auto", label,
+                resolved_model or fb_model,
+            )
+            return fb_client, resolved_model or fb_model, fb_provider
+        tried.append(label)
+
+    if tried:
+        logger.debug(
+            "Auxiliary %s: main fallback chain exhausted (tried: %s)",
+            task or "call", ", ".join(tried),
+        )
+    return None, None, ""
+
+
 def _resolve_single_provider(
    provider: str,
    model: Optional[str] = None,
@@ -3115,16 +3210,19 @@ def _resolve_single_provider(

    Uses the existing provider resolution infrastructure where possible.
    """
-    # Reuse resolve_provider_client which handles provider→client mapping
+    # Reuse resolve_provider_client which handles provider→client mapping.
    client, resolved_model = resolve_provider_client(
        provider=provider,
        model=model,
-        base_url=base_url,
-        api_key=api_key,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
    )
    return client

-def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(
+    main_runtime: Optional[Dict[str, Any]] = None,
+    task: Optional[str] = None,
+) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -3190,7 +3288,7 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
-        explicit_base_url = None
+        explicit_base_url = runtime_base_url or None
        explicit_api_key = None
        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
            resolved_provider = "custom"
@@ -3222,7 +3320,22 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
                            main_provider, resolved or main_model)
                return client, resolved or main_model

-    # ── Step 2: aggregator / fallback chain ──────────────────────────────
+    # ── Step 2: user-configured fallback policy ─────────────────────────
+    # In auto mode, respect the task-specific fallback chain first, then the
+    # main agent's top-level fallback_providers/fallback_model chain. The
+    # hardcoded provider discovery chain below is only the convenience default
+    # for users who have not declared a fallback policy.
+    if task:
+        fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
+            task, main_provider or "auto", reason="main provider unavailable")
+        if fb_client is not None:
+            return fb_client, fb_model
+    fb_client, fb_model, _fb_label = _try_main_fallback_chain(
+        task, main_provider or "auto", reason="main provider unavailable")
+    if fb_client is not None:
+        return fb_client, fb_model
+
+    # ── Step 3: aggregator / fallback chain ──────────────────────────────
    tried = []
    for label, try_fn in _get_provider_chain():
        if _is_provider_unhealthy(label):
@@ -3343,6 +3456,7 @@ def resolve_provider_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
+    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -3463,7 +3577,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto(main_runtime=main_runtime)
+        client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -4356,11 +4470,16 @@ def _client_cache_key(
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
+    task: Optional[str] = None,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    # `auto` can now resolve through task-specific or main fallback policy,
+    # so the task participates in the cache key. Non-auto providers keep the
+    # old cache shape because the explicit provider/model tuple is sufficient.
+    task_key = (task or "") if provider == "auto" else ""
    pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -4553,6 +4672,7 @@ def _get_cached_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
+    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -4590,6 +4710,7 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=main_runtime,
        is_vision=is_vision,
+        task=task,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -4634,6 +4755,7 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=runtime,
        is_vision=is_vision,
+        task=task,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -5004,7 +5126,7 @@ def _build_call_kwargs(

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
-    if provider == "nous" or auxiliary_is_nous:
+    if provider == "nous":
        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
    if merged_extra:
        kwargs["extra_body"] = merged_extra
@@ -5139,7 +5261,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5465,14 +5587,19 @@ def call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. Main agent model (last-resort safety net)
-            # For auto users (no explicit aux provider), use the full
-            # auto-detection chain instead — its Step 1 IS the main agent
-            # model, so users on `auto` already get main-model fallback.
+            #   2. For auto: top-level main fallback_providers/fallback_model
+            #   3. For auto: built-in auxiliary discovery chain
+            #   4. For explicit aux providers: main agent model safety net
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_payment_fallback(
-                    resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
+                    task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
+                        task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_payment_fallback(
+                        resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
@@ -5635,7 +5762,7 @@ async def async_call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", async_mode=True)
+                client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5903,13 +6030,19 @@ async def async_call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. Main agent model (last-resort safety net)
-            # Auto users get the full auto-detection chain instead — its
-            # Step 1 IS the main agent model.
+            #   2. For auto: top-level main fallback_providers/fallback_model
+            #   3. For auto: built-in auxiliary discovery chain
+            #   4. For explicit aux providers: main agent model safety net
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_payment_fallback(
-                    resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
+                    task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
+                        task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_payment_fallback(
+                        resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -237,18 +237,25 @@ _COMBINED_REVIEW_PROMPT = (
 def summarize_background_review_actions(
    review_messages: List[Dict],
    prior_snapshot: List[Dict],
+    notification_mode: str = "on",
 ) -> List[str]:
    """Build the human-facing action summary for a background review pass.

-    Walks the review agent's session messages and collects "successful tool
-    action" descriptions to surface to the user (e.g. "Memory updated").
-    Tool messages already present in ``prior_snapshot`` are skipped so we
-    don't re-surface stale results from the prior conversation that the
-    review agent inherited via ``conversation_history`` (issue #14944).
+    Walks the review agent's session messages and collects successful memory
+    and skill-management actions to surface to the user. Tool messages already
+    present in ``prior_snapshot`` are skipped so stale inherited results are
+    not re-surfaced as fresh background work (issue #14944).

-    Matching is by ``tool_call_id`` when available, with a content-equality
-    fallback for tool messages that lack one.
+    ``notification_mode`` controls display detail:
+    - ``off``: return no actions.
+    - ``on``: generic "Memory updated"/tool messages.
+    - ``verbose``: include compact content previews from tool-call arguments.
    """
+    mode = str(notification_mode or "on").lower()
+    if mode == "off":
+        return []
+    verbose = mode == "verbose"
+
    existing_tool_call_ids = set()
    existing_tool_contents = set()
    for prior in prior_snapshot or []:
@@ -262,6 +269,42 @@ def summarize_background_review_actions(
            if isinstance(content, str):
                existing_tool_contents.add(content)

+    # Map review-agent tool results back to the calls that produced them.  The
+    # result JSON only says "Entry added"; the call arguments contain action,
+    # target, and content previews.  Restricting to notify_tools also prevents
+    # helper tools from surfacing as memory work just because they succeeded.
+    notify_tools = {"memory", "skill_manage"}
+    all_tool_call_ids: set = set()
+    call_details: dict = {}
+    for msg in review_messages or []:
+        if not isinstance(msg, dict) or msg.get("role") != "assistant":
+            continue
+        for tc in msg.get("tool_calls", []) or []:
+            if not isinstance(tc, dict):
+                continue
+            fn = tc.get("function", {}) or {}
+            fn_name = fn.get("name", "")
+            tcid = tc.get("id")
+            if tcid:
+                all_tool_call_ids.add(tcid)
+            if fn_name not in notify_tools:
+                continue
+            try:
+                args = json.loads(fn.get("arguments", "{}"))
+            except (json.JSONDecodeError, TypeError):
+                args = {}
+            if tcid:
+                call_details[tcid] = {
+                    "tool": fn_name,
+                    "action": args.get("action", "?"),
+                    "target": args.get("target", "memory"),
+                    "content": args.get("content", ""),
+                    "old_text": args.get("old_text", ""),
+                    "name": args.get("name", ""),
+                    "old_string": args.get("old_string", ""),
+                    "new_string": args.get("new_string", ""),
+                }
+
    actions: List[str] = []
    for msg in review_messages or []:
        if not isinstance(msg, dict) or msg.get("role") != "tool":
@@ -273,6 +316,8 @@ def summarize_background_review_actions(
            content_str = msg.get("content")
            if isinstance(content_str, str) and content_str in existing_tool_contents:
                continue
+        if tcid and all_tool_call_ids and tcid not in call_details:
+            continue
        try:
            data = json.loads(msg.get("content", "{}"))
        except (json.JSONDecodeError, TypeError):
@@ -280,19 +325,75 @@ def summarize_background_review_actions(
        if not isinstance(data, dict) or not data.get("success"):
            continue
        message = data.get("message", "")
-        target = data.get("target", "")
-        if "created" in message.lower():
-            actions.append(message)
-        elif "updated" in message.lower():
-            actions.append(message)
-        elif "added" in message.lower() or (target and "add" in message.lower()):
-            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-            actions.append(f"{label} updated")
-        elif "Entry added" in message:
-            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-            actions.append(f"{label} updated")
-        elif "removed" in message.lower() or "replaced" in message.lower():
+        detail = call_details.get(tcid, {})
+        target = data.get("target", "") or detail.get("target", "")
+        is_skill = detail.get("tool") == "skill_manage"
+
+        message_lower = message.lower()
+        if not verbose:
+            if "created" in message_lower:
+                actions.append(message)
+                continue
+            if "updated" in message_lower:
+                actions.append(message)
+                continue
+            if is_skill and "patched" in message_lower:
+                actions.append(message)
+                continue
+
+        if is_skill:
+            label = "Skill"
+        elif target:
            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+        else:
+            continue
+
+        if verbose:
+            action = detail.get("action", "")
+            content = detail.get("content", "")
+            old_text = detail.get("old_text", "")
+            skill_name = detail.get("name", "")
+            max_preview = 120
+            if is_skill:
+                change = data.get("_change", {})
+                old_string = change.get("old", "") or detail.get("old_string", "")
+                new_string = change.get("new", "") or detail.get("new_string", "")
+                description = change.get("description", "")
+                if action == "patch" and (old_string or new_string):
+                    old_preview = old_string[:80].replace("\n", " ") + (
+                        "…" if len(old_string) > 80 else ""
+                    )
+                    new_preview = new_string[:80].replace("\n", " ") + (
+                        "…" if len(new_string) > 80 else ""
+                    )
+                    actions.append(
+                        f"📝 Skill '{skill_name}' patched: "
+                        f"\"{old_preview}\" → \"{new_preview}\""
+                    )
+                elif action == "create" and description:
+                    actions.append(f"📝 Skill '{skill_name}' created: {description}")
+                elif action == "edit" and description:
+                    actions.append(f"📝 Skill '{skill_name}' rewritten: {description}")
+                else:
+                    actions.append(f"📝 {message}" if message else f"Skill {action}")
+            elif action == "add" and content:
+                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
+                actions.append(f"{label} ➕ {preview}")
+            elif action == "replace" and content:
+                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
+                actions.append(f"{label} ✏️ {preview}")
+            elif action == "remove" and old_text:
+                preview = old_text[:60] + ("…" if len(old_text) > 60 else "")
+                actions.append(f"{label} ➖ {preview}")
+            else:
+                actions.append(f"{label} updated")
+        elif (
+            "added" in message_lower
+            or "replaced" in message_lower
+            or "removed" in message_lower
+            or (target and "add" in message.lower())
+            or "Entry added" in message
+        ):
            actions.append(f"{label} updated")
    return actions

@@ -522,6 +623,7 @@ def _run_review_in_thread(
        actions = summarize_background_review_actions(
            review_messages,
            messages_snapshot,
+            notification_mode=getattr(agent, "memory_notifications", "on"),
        )

        if actions:
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -58,17 +58,34 @@ _bedrock_runtime_client_cache: Dict[str, Any] = {}
 _bedrock_control_client_cache: Dict[str, Any] = {}


+_MIN_BOTO3_VERSION = (1, 34, 59)
+
+
 def _require_boto3():
-    """Import boto3, raising a clear error if not installed."""
+    """Import boto3, raising a clear error if not installed or too old."""
    try:
        import boto3
-        return boto3
    except ImportError:
        raise ImportError(
            "The 'boto3' package is required for the AWS Bedrock provider. "
            "Install it with: pip install boto3\n"
            "Or install Hermes with Bedrock support: pip install -e '.[bedrock]'"
        )
+    # converse() / converse_stream() were added in boto3 1.34.59.
+    # When Hermes is installed editable into system Python, the system boto3
+    # (e.g. Ubuntu 24.04 ships 1.34.46) may take precedence over the venv
+    # version pinned in pyproject.toml.
+    try:
+        version = tuple(int(x) for x in boto3.__version__.split(".")[:3])
+    except (AttributeError, ValueError):
+        return boto3  # can't parse — don't block on version check
+    if version < _MIN_BOTO3_VERSION:
+        raise RuntimeError(
+            f"boto3 {boto3.__version__} does not support converse_stream "
+            f"(minimum 1.34.59 required). Upgrade with: "
+            f"pip install --upgrade boto3"
+        )
+    return boto3


 def _get_bedrock_runtime_client(region: str):
@@ -935,11 +952,14 @@ def build_converse_kwargs(
    if system_prompt:
        kwargs["system"] = system_prompt

-    if temperature is not None:
-        kwargs["inferenceConfig"]["temperature"] = temperature
+    from agent.anthropic_adapter import _forbids_sampling_params

-    if top_p is not None:
-        kwargs["inferenceConfig"]["topP"] = top_p
+    if not _forbids_sampling_params(model):
+        if temperature is not None:
+            kwargs["inferenceConfig"]["temperature"] = temperature
+
+        if top_p is not None:
+            kwargs["inferenceConfig"]["topP"] = top_p

    if stop_sequences:
        kwargs["inferenceConfig"]["stopSequences"] = stop_sequences
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -127,14 +127,21 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
    return converted


-def _summarize_user_message_for_log(content: Any) -> str:
-    """Return a short text summary of a user message for logging/trajectory.
+def _summarize_user_message_for_log(content: Any, *, sep: str = " ") -> str:
+    """Flatten message content to a plain-text summary.

    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
-    parts from the API server.  Logging, spinner previews, and trajectory
-    files all want a plain string — this helper extracts the first chunk of
-    text and notes any attached images.  Returns an empty string for empty
-    lists and ``str(content)`` for unexpected scalar types.
+    parts from the API server.  Several consumers want a plain string:
+
+    - Logging, spinner previews, and trajectory files (the default ``sep=" "``).
+    - External memory providers, which feed the text to regexes
+      (``sanitize_context``) and text APIs — a raw list crashes the sync with
+      ``expected string or bytes-like object, got 'list'`` (use ``sep="\\n"``).
+
+    Text parts are joined with ``sep``; images become a ``[N image(s)]`` marker
+    so the turn isn't recorded as if the attachment never existed.  Returns an
+    empty string for empty lists and ``str(content)`` for unexpected scalar
+    types.
    """
    if content is None:
        return ""
@@ -157,7 +164,7 @@ def _summarize_user_message_for_log(content: Any) -> str:
                    text_bits.append(text)
            elif ptype in {"image_url", "input_image"}:
                image_count += 1
-        summary = " ".join(text_bits).strip()
+        summary = sep.join(text_bits).strip()
        if image_count:
            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
            summary = f"{note} {summary}" if summary else note
@@ -1074,6 +1081,7 @@ def _normalize_codex_response(
    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+    saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"}
    saw_commentary_phase = False
    saw_final_answer_phase = False
    saw_reasoning_item = False
@@ -1088,6 +1096,7 @@ def _normalize_codex_response(

        if item_status in {"queued", "in_progress", "incomplete"}:
            has_incomplete_items = True
+            saw_streaming_or_item_incomplete = True

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
@@ -1245,7 +1254,9 @@ def _normalize_codex_response(
        finish_reason = "tool_calls"
    elif leaked_tool_call_text:
        finish_reason = "incomplete"
-    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+    elif saw_streaming_or_item_incomplete:
+        finish_reason = "incomplete"
+    elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase:
        finish_reason = "incomplete"
    elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
        # Response contains only reasoning (encrypted thinking state and/or
--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -190,6 +190,10 @@ CODING_AGENT_GUIDANCE = (
    "Verify, and know when to stop:\n"
    "- Use `terminal` for git, builds, tests, and inspection. Run the relevant "
    "tests/linter/build and confirm they pass before claiming the work is done.\n"
+    "- Terminal state persists across calls: current directory and exported "
+    "environment variables carry forward. Activate a virtualenv or export setup "
+    "vars once, then reuse that state instead of re-sourcing it before every "
+    "test command.\n"
    "- Fix root causes, not symptoms: when you find a bug, check sibling call "
    "paths for the same flaw and fix the class, not just the reported site.\n"
    "- When fixing linter/type errors on a file, stop after about three "
@@ -711,10 +715,13 @@ def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str:
            lines.append("- Branch: (detached HEAD)")

        # Linked worktree: the per-worktree git dir differs from the shared common dir.
+        # We surface the fact that it's a worktree (so the model knows branches/stashes
+        # are shared state) but deliberately do NOT expose the primary tree path —
+        # giving the model a second absolute path causes it to sometimes run commands
+        # in the wrong directory.
        git_dir, common_dir = _git(root, "rev-parse", "--git-dir"), _git(root, "rev-parse", "--git-common-dir")
        if git_dir and common_dir and Path(git_dir).resolve() != Path(common_dir).resolve():
-            main_tree = Path(common_dir).resolve().parent
-            lines.append(f"- Worktree: linked (primary tree at {main_tree})")
+            lines.append("- Worktree: linked (git state shared with primary tree)")

        dirty = [f"{n} {label}" for label, n in (
            ("staged", counts["staged"]), ("modified", counts["modified"]),
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -69,6 +69,31 @@ SUMMARY_PREFIX = (
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

+# Metadata key added to context compression summary messages so that frontends
+# (CLI, Desktop, gateway, TUI) can distinguish them from real assistant/user
+# messages and filter or render them appropriately without content-prefix
+# heuristics. See https://github.com/NousResearch/hermes-agent/issues/38389
+#
+# Underscore-prefixed ON PURPOSE: the wire sanitizers
+# (agent/transports/chat_completions.py convert_messages and the summary-path
+# mirror in agent/chat_completion_helpers.py) strip every top-level message
+# key starting with "_" before the request leaves the process. Strict
+# OpenAI-compatible gateways (Fireworks, Mistral, Moonshot/Kimi, opencode-go)
+# reject payloads carrying unknown keys with "Extra inputs are not permitted",
+# poisoning every subsequent request in the session — a bare key like
+# "is_compressed_summary" would reach the wire and trip exactly that.
+COMPRESSED_SUMMARY_METADATA_KEY = "_compressed_summary"
+
+# Appended to every standalone summary message (and to the merged-into-tail
+# prefix) so the model has an unambiguous "summary ends here" boundary.
+# Without it, weak models read the verbatim "## Active Task" quote as fresh
+# user input (#11475, #14521) or regurgitate an assistant-role summary as
+# their own output (#33256).
+_SUMMARY_END_MARKER = (
+    "--- END OF CONTEXT SUMMARY — "
+    "respond to the message below, not the summary above ---"
+)
+
 # Handoff prefixes that shipped in earlier releases. A summary persisted under
 # one of these can be inherited into a resumed lineage (#35344); when it is
 # re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
@@ -143,10 +168,23 @@ _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 # become another unbounded transcript copy after the LLM summarizer failed.
 _FALLBACK_SUMMARY_MAX_CHARS = 8_000
 _FALLBACK_TURN_MAX_CHARS = 700
+_AUTO_FOCUS_MAX_TURNS = 3
+_AUTO_FOCUS_TURN_MAX_CHARS = 260
+_AUTO_FOCUS_MAX_CHARS = 700
+# Keep a short run of recent messages verbatim even when the token budget is
+# already exhausted.  The public ``protect_last_n`` default is intentionally
+# high for small/light tails, but using all 20 as a hard floor here would bring
+# back the old large-tool-output case where nothing can be compacted.
+_MAX_TAIL_MESSAGE_FLOOR = 8


 _PATH_MENTION_RE = re.compile(r"(?:/|~/?|[A-Za-z]:\\)[^\s`'\")\]}<>]+")

+# MEDIA delivery directives must not reach the summarizer — if one leaks into
+# the summary, the downstream model may re-emit it as an active directive on
+# the next turn, triggering bogus attachment sends (#14665).
+_MEDIA_DIRECTIVE_RE = re.compile(r"MEDIA:\S+")
+

 def _dedupe_append(items: list[str], value: str, *, limit: int) -> None:
    value = value.strip()
@@ -1007,6 +1045,7 @@ class ContextCompressor(ContextEngine):
        for msg in turns:
            role = msg.get("role", "unknown")
            content = redact_sensitive_text(msg.get("content") or "")
+            content = _MEDIA_DIRECTIVE_RE.sub("[media attachment]", content)

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -1454,7 +1493,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
+This compaction should PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""

        try:
            call_kwargs = {
@@ -1607,7 +1646,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        text = (summary or "").strip()
        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
            if text.startswith(prefix):
-                return text[len(prefix):].lstrip()
+                text = text[len(prefix):].lstrip()
+                break
+        # Strip the trailing end marker too — a rehydrated handoff body that
+        # keeps it would leak the boundary directive into the iterative-update
+        # summarizer prompt (and the marker is re-appended on insertion anyway).
+        if text.endswith(_SUMMARY_END_MARKER):
+            text = text[: -len(_SUMMARY_END_MARKER)].rstrip()
        return text

    @classmethod
@@ -1623,6 +1668,52 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return True
        return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)

+    @staticmethod
+    def _has_compressed_summary_metadata(message: Any) -> bool:
+        """Return True if *message* carries the compressed-summary flag.
+
+        Callers (frontends, CLI, gateway) can use this to distinguish context
+        compaction summaries from real assistant or user messages without
+        relying on content-prefix heuristics.  The flag is in-process only —
+        the wire sanitizers strip underscore-prefixed keys before API calls.
+        """
+        if not isinstance(message, dict):
+            return False
+        return bool(message.get(COMPRESSED_SUMMARY_METADATA_KEY))
+
+    @classmethod
+    def _derive_auto_focus_topic(
+        cls,
+        messages: List[Dict[str, Any]],
+    ) -> Optional[str]:
+        """Infer a compact focus hint from the most recent real user turns."""
+        candidates: list[str] = []
+        for idx in range(len(messages) - 1, -1, -1):
+            msg = messages[idx]
+            if msg.get("role") != "user":
+                continue
+            content = msg.get("content")
+            if cls._is_context_summary_content(content):
+                continue
+            text = redact_sensitive_text(_content_text_for_contains(content).strip())
+            if not text:
+                continue
+            text = " ".join(text.split())
+            if len(text) > _AUTO_FOCUS_TURN_MAX_CHARS:
+                text = text[: _AUTO_FOCUS_TURN_MAX_CHARS - 1].rstrip() + "…"
+            candidates.append(text)
+            if len(candidates) >= _AUTO_FOCUS_MAX_TURNS:
+                break
+
+        if not candidates:
+            return None
+
+        candidates.reverse()
+        focus = "Recent user focus:\n" + "\n".join(f"- {item}" for item in candidates)
+        if len(focus) > _AUTO_FOCUS_MAX_CHARS:
+            focus = focus[: _AUTO_FOCUS_MAX_CHARS - 1].rstrip() + "…"
+        return focus
+
    @classmethod
    def _find_latest_context_summary(
        cls,
@@ -1775,6 +1866,105 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                return i
        return -1

+    def _find_last_assistant_message_idx(
+        self, messages: List[Dict[str, Any]], head_end: int
+    ) -> int:
+        """Return the index of the last user-visible assistant reply at or
+        after *head_end*, or -1.
+
+        A "user-visible reply" is an assistant message with non-empty
+        textual content — i.e. one that the WebUI / TUI / SessionsPage
+        rendered as a bubble the operator could read. We deliberately
+        skip assistant messages that contain only ``tool_calls`` (and
+        no text), because those render as small "calling tool X"
+        indicators and aren't what the reporter means by "the output
+        of the last message you sent" (#29824).
+
+        Falling back to the most recent assistant message of ANY kind
+        only kicks in when no content-bearing assistant message exists
+        in the compressible region — typically a fresh session that
+        just started a multi-step tool sequence with no prior reply
+        to anchor. In that case the agent fix is a no-op and the
+        existing user-message anchor carries the load.
+        """
+        last_any = -1
+        for i in range(len(messages) - 1, head_end - 1, -1):
+            msg = messages[i]
+            if msg.get("role") != "assistant":
+                continue
+            if last_any < 0:
+                last_any = i
+            content = msg.get("content")
+            if isinstance(content, str) and content.strip():
+                return i
+            if isinstance(content, list):
+                # Multimodal / Anthropic-style content: look for any
+                # text block with non-empty text.
+                for part in content:
+                    if isinstance(part, dict):
+                        text = part.get("text") or part.get("content")
+                        if isinstance(text, str) and text.strip():
+                            return i
+        return last_any
+
+    def _ensure_last_assistant_message_in_tail(
+        self,
+        messages: List[Dict[str, Any]],
+        cut_idx: int,
+        head_end: int,
+    ) -> int:
+        """Guarantee the most recent assistant message is in the protected tail.
+
+        WebUI / TUI / SessionsPage bug (#29824). Without this anchor,
+        ``_find_tail_cut_by_tokens`` can leave the user's most recent
+        visible assistant response inside the compressed middle region —
+        especially when the conversation has a single oversized tool
+        result or a long stretch of tool-call/result pairs after the
+        last assistant reply. The summariser then rolls that reply up
+        into the single ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block
+        persisted as ``role="user"`` or ``role="assistant"``. From the
+        operator's perspective the WebUI session viewer
+        (``web/src/pages/SessionsPage.tsx``) and the TUI chat panel
+        both suddenly show the opaque "Context compaction" block in the
+        slot where they were just reading the assistant's actual reply:
+
+            User:       "i cant see the output of the last message you
+                         sent, i did see it previously, however now see
+                         'context compaction'"
+
+        Mirror of ``_ensure_last_user_message_in_tail`` but anchors on
+        the last assistant-role message. Re-runs the tool-group
+        alignment so we don't split a ``tool_call`` / ``tool_result``
+        group that immediately precedes the anchored message — orphaned
+        tool messages would otherwise be removed by
+        ``_sanitize_tool_pairs`` and trigger the same data-loss symptom
+        we're trying to prevent.
+        """
+        last_asst_idx = self._find_last_assistant_message_idx(messages, head_end)
+        if last_asst_idx < 0:
+            # No assistant message in the compressible region — nothing
+            # to anchor (single-turn pre-reply state, etc.).
+            return cut_idx
+        if last_asst_idx >= cut_idx:
+            # Already in the tail — the token-budget walk did the right
+            # thing on its own.
+            return cut_idx
+        # Pull cut_idx back to the assistant message, then re-align so
+        # we don't split a tool group that immediately precedes it
+        # (e.g. an ``assistant(tool_calls)`` → ``tool(result)`` →
+        # ``assistant(final reply)`` sequence would otherwise leave the
+        # ``tool`` orphan when cut lands at the final reply).
+        new_cut = self._align_boundary_backward(messages, last_asst_idx)
+        if not self.quiet_mode:
+            logger.debug(
+                "Anchoring tail cut to last assistant message at index %d "
+                "(was %d, aligned to %d) to keep the previously-visible "
+                "reply out of the compaction summary (#29824)",
+                last_asst_idx, cut_idx, new_cut,
+            )
+        # Safety: never go back into the head region.
+        return max(new_cut, head_end + 1)
+
    def _ensure_last_user_message_in_tail(
        self,
        messages: List[Dict[str, Any]],
@@ -1833,11 +2023,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        derived from ``summary_target_ratio * context_length``, so it
        scales automatically with the model's context window.

-        Token budget is the primary criterion.  A hard minimum of 3 messages
-        is always protected, but the budget is allowed to exceed by up to
-        1.5x to avoid cutting inside an oversized message (tool output, file
-        read, etc.).  If even the minimum 3 messages exceed 1.5x the budget
-        the cut is placed right after the head so compression still runs.
+        Token budget is the primary criterion.  A bounded message-count floor
+        keeps a short run of recent turns verbatim even when the budget is
+        exhausted, but the budget is allowed to exceed by up to 1.5x to avoid
+        cutting inside an oversized message (tool output, file read, etc.). If
+        even that floor exceeds 1.5x the budget, the cut is placed right after
+        the head so compression still runs.

        Never cuts inside a tool_call/result group.  Always ensures the most
        recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
@@ -1845,8 +2036,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        if token_budget is None:
            token_budget = self.tail_token_budget
        n = len(messages)
-        # Hard minimum: always keep at least 3 messages in the tail
-        min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
+        # Hard minimum: always keep a bounded recent-message floor in the tail.
+        # ``protect_last_n`` remains a minimum up to the cap; the cap avoids
+        # preserving a whole run of bulky tool outputs on every compaction.
+        available_tail = max(0, n - head_end - 1)
+        min_tail_floor = max(3, min(self.protect_last_n, _MAX_TAIL_MESSAGE_FLOOR))
+        # Leave at least two non-head messages available to summarize on short
+        # transcripts; otherwise compression can replace a tiny middle with a
+        # summary and save no messages at all.
+        compressible_tail_cap = max(3, available_tail - 2)
+        min_tail = (
+            min(min_tail_floor, compressible_tail_cap, available_tail)
+            if available_tail > 1 else 0
+        )
        soft_ceiling = int(token_budget * 1.5)
        accumulated = 0
        cut_idx = n  # start from beyond the end
@@ -1918,6 +2120,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        # active task is never lost to compression (fixes #10896).
        cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)

+        # Ensure the most recent assistant message is always in the tail
+        # so the previously-visible reply isn't silently rolled into the
+        # ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block (fixes #29824).
+        # Each anchor only walks ``cut_idx`` backward, so chaining them is
+        # monotonic — the tail can only grow, never shrink.
+        cut_idx = self._ensure_last_assistant_message_in_tail(messages, cut_idx, head_end)
+
        return max(cut_idx, head_end + 1)

    # ------------------------------------------------------------------
@@ -2070,7 +2279,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            )

        # Phase 3: Generate structured summary
-        summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
+        summary_focus_topic = focus_topic or self._derive_auto_focus_topic(messages)
+        summary = self._generate_summary(turns_to_summarize, focus_topic=summary_focus_topic)

        # If summary generation failed, behavior splits on
        # ``abort_on_summary_failure`` (config: compression.abort_on_summary_failure):
@@ -2150,32 +2360,33 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        # When the summary lands as a standalone role="user" message,
        # weak models read the verbatim "## Active Task" quote of a past
-        # user request as fresh input (#11475, #14521). Append the explicit
-        # end marker — the same one used in the merge-into-tail path — so
-        # the model has a clear "summary above, not new input" signal.
-        if not _merge_summary_into_tail and summary_role == "user":
-            summary = (
-                summary
-                + "\n\n--- END OF CONTEXT SUMMARY — "
-                "respond to the message below, not the summary above ---"
-            )
+        # user request as fresh input (#11475, #14521).
+        # When it lands as role="assistant", models may regurgitate the
+        # summary text as their own output (#33256). In both cases, append
+        # the explicit end marker so the model has a clear "summary ends
+        # here, respond to the message below" signal.
+        if not _merge_summary_into_tail:
+            summary = summary + "\n\n" + _SUMMARY_END_MARKER

        if not _merge_summary_into_tail:
-            compressed.append({"role": summary_role, "content": summary})
+            compressed.append({
+                "role": summary_role,
+                "content": summary,
+                COMPRESSED_SUMMARY_METADATA_KEY: True,
+            })

        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = (
-                    summary
-                    + "\n\n--- END OF CONTEXT SUMMARY — "
-                    "respond to the message below, not the summary above ---\n\n"
-                )
+                merged_prefix = summary + "\n\n" + _SUMMARY_END_MARKER + "\n\n"
                msg["content"] = _append_text_to_content(
                    msg.get("content"),
                    merged_prefix,
                    prepend=True,
                )
+                # Mark the merged message so frontends can identify it as
+                # containing a compression summary prefix.
+                msg[COMPRESSED_SUMMARY_METADATA_KEY] = True
                _merge_summary_into_tail = False
            compressed.append(msg)

--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -40,6 +40,16 @@ from agent.model_metadata import estimate_request_tokens_rough

 logger = logging.getLogger(__name__)

+# Stable marker the gateway matches on to re-tag the auto-compaction lifecycle
+# status as ``kind="compacting"`` (tui_gateway/server.py::_status_update), so
+# drivers like the desktop app can show an explicit "Summarizing…" indicator
+# instead of the transcript appearing to silently reset. Keep the marker phrase
+# intact if you reword COMPACTION_STATUS.
+COMPACTION_STATUS_MARKER = "Compacting context"
+COMPACTION_STATUS = (
+    f"🗜️ {COMPACTION_STATUS_MARKER} — summarizing earlier conversation so I can continue..."
+)
+

 def _compression_lock_holder(agent: Any) -> str:
    """Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
@@ -324,9 +334,7 @@ def compress_context(
        f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
        focus_topic,
    )
-    agent._emit_status(
-        "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
-    )
+    agent._emit_status(COMPACTION_STATUS)

    # ── Compression lock ────────────────────────────────────────────────
    # Atomic, state.db-backed lock per session_id.  Without this, two
@@ -595,6 +603,20 @@ def compress_context(
            force=True,
        )

+    # Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
+    # the completed old session before its details are lost.
+    _old_sid_for_event = locals().get("old_session_id")
+    if getattr(agent, "event_callback", None):
+        try:
+            agent.event_callback("session:compress", {
+                "platform": agent.platform or "",
+                "session_id": agent.session_id,
+                "old_session_id": _old_sid_for_event or "",
+                "compression_count": agent.context_compressor.compression_count,
+            })
+        except Exception as e:
+            logger.debug("event_callback error on session:compress: %s", e)
+
    # Keep the post-compression rough estimate for diagnostics, but do not
    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
    # can remain above threshold even after the next real API request fits.
@@ -631,7 +653,11 @@ def compress_context(
    return compressed, new_system_prompt


-def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
+def try_shrink_image_parts_in_messages(
+    api_messages: list,
+    *,
+    max_dimension: int = 8000,
+) -> bool:
    """Re-encode all native image parts at a smaller size to recover from
    image-too-large errors (Anthropic 5 MB, unknown other providers).

@@ -642,7 +668,8 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    Strategy: look for ``image_url`` / ``input_image`` parts carrying a
    ``data:image/...;base64,...`` payload.  For each one whose encoded
    size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
-    ceiling with header overhead), write the base64 to a tempfile, call
+    ceiling with header overhead) or whose longest side exceeds
+    ``max_dimension``, write the base64 to a tempfile, call
    ``vision_tools._resize_image_for_vision`` to produce a smaller data
    URL, and substitute it in place.

@@ -664,10 +691,9 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    # after a confirmed provider rejection, so the alternative is failure.
    target_bytes = 4 * 1024 * 1024
    # Anthropic enforces an 8000px per-side dimension cap independently of
-    # the 5 MB byte cap.  A tall screenshot can be well under 5 MB yet far
-    # over 8000px (e.g. 1200×12000 at 0.06 MB).  We check pixel dimensions
-    # even when the byte budget is fine.
-    max_dimension = 8000
+    # the 5 MB byte cap.  In many-image requests, the provider can report a
+    # lower cap (observed: 2000px).  The caller passes that parsed ceiling
+    # when the rejection includes it.
    changed_count = 0
    # Track parts that are over the target but could NOT be shrunk under it.
    # If any survive, retrying is pointless — the same oversized payload will
@@ -684,9 +710,9 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
        # Check both byte size AND pixel dimensions.
        needs_shrink = len(url) > target_bytes  # over byte budget
        if not needs_shrink:
-            # Even if bytes are fine, check pixel dimensions against
-            # Anthropic's 8000px cap.  A tall image can be tiny in bytes
-            # yet huge in pixels.
+            # Even if bytes are fine, check pixel dimensions against the
+            # provider's reported per-side cap.  A screenshot can be tiny in
+            # bytes yet too large in pixels.
            try:
                import base64 as _b64_dim
                header_d, _, data_d = url.partition(",")
@@ -795,6 +821,8 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:


 __all__ = [
+    "COMPACTION_STATUS",
+    "COMPACTION_STATUS_MARKER",
    "check_compression_model_feasibility",
    "replay_compression_warning",
    "compress_context",
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -71,6 +71,35 @@ logger = logging.getLogger(__name__)
 INTERRUPT_WAITING_FOR_MODEL_PREFIX = "Operation interrupted: waiting for model response ("


+def _image_error_max_dimension(error: Exception) -> Optional[int]:
+    """Extract a provider-reported image dimension ceiling, if present."""
+    parts = []
+    for value in (
+        error,
+        getattr(error, "message", None),
+        getattr(error, "body", None),
+    ):
+        if value:
+            try:
+                parts.append(str(value))
+            except Exception:
+                pass
+    text = " ".join(parts).lower()
+    if "image" not in text or "dimension" not in text or "max allowed size" not in text:
+        return None
+
+    match = re.search(r"max allowed size(?:\s+for [^:]+)?:\s*(\d{3,5})\s*pixels?", text)
+    if not match:
+        return None
+    try:
+        max_dimension = int(match.group(1))
+    except ValueError:
+        return None
+    if 512 <= max_dimension <= 8000:
+        return max_dimension
+    return None
+
+
 def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
    """Return a user-facing error when Ollama is loaded with too little context."""
    if not getattr(agent, "tools", None):
@@ -271,11 +300,20 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
                agent.session_id, exc,
            )

-    if stored_prompt:
+    if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
        # Continuing session — reuse the exact system prompt from the
        # previous turn so the Anthropic cache prefix matches.
        agent._cached_system_prompt = stored_prompt
        return
+    if stored_prompt:
+        stored_state = "stale_runtime"
+        logger.info(
+            "Stored system prompt for session %s has stale runtime identity; "
+            "rebuilding for model=%s provider=%s.",
+            agent.session_id,
+            getattr(agent, "model", "") or "",
+            getattr(agent, "provider", "") or "",
+        )

    if conversation_history and stored_state in ("null", "empty"):
        # Continuing session whose stored prompt is unusable.  The
@@ -337,6 +375,30 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
            )


+def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
+    """Return False when the persisted Model/Provider lines are stale."""
+
+    def line_value(label: str) -> str:
+        prefix = f"{label}:"
+        value = ""
+        for line in prompt.splitlines():
+            if line.startswith(prefix):
+                value = line[len(prefix):].strip()
+        return value
+
+    stored_model = line_value("Model")
+    current_model = str(getattr(agent, "model", "") or "").strip()
+    if stored_model and current_model and stored_model != current_model:
+        return False
+
+    stored_provider = line_value("Provider")
+    current_provider = str(getattr(agent, "provider", "") or "").strip()
+    if stored_provider and current_provider and stored_provider != current_provider:
+        return False
+
+    return True
+
+
 def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
    if is_partial_stub and dropped_tools:
        tool_list = ", ".join(dropped_tools[:3])
@@ -368,6 +430,42 @@ def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List
        )


+# Shared recovery hint appended to every content-policy refusal message. Both
+# the HTTP-200 refusal path (``finish_reason=content_filter``) and the
+# exception path (a provider moderation error classified as
+# ``content_policy_blocked``) end with the same actionable next steps, so they
+# share one trailer to keep the guidance from drifting between the two sites.
+_CONTENT_POLICY_RECOVERY_HINT = (
+    "Try rephrasing the request, narrowing the context, or "
+    "adding a fallback provider with `hermes fallback add`."
+)
+
+
+def _content_policy_blocked_result(
+    messages: List[Dict],
+    api_call_count: int,
+    *,
+    final_response: str,
+    error_detail: str,
+) -> Dict[str, Any]:
+    """Build the terminal turn result for a content-policy block.
+
+    A content-policy refusal is deterministic for the unchanged prompt, so the
+    turn ends here (no retry). Both the HTTP-200 refusal handler and the
+    exception-path handler return the identical shape — a failed, non-completed
+    turn carrying the user-facing message and a ``content_policy_blocked:``
+    prefixed error — so they funnel through this one builder.
+    """
+    return {
+        "final_response": final_response,
+        "messages": messages,
+        "api_calls": api_call_count,
+        "completed": False,
+        "failed": True,
+        "error": f"content_policy_blocked: {error_detail}",
+    }
+
+
 def run_conversation(
    agent,
    user_message: str,
@@ -595,7 +693,11 @@ def run_conversation(
        # landed after an orphan tool result). Most providers return
        # empty content on malformed sequences, which would otherwise
        # retrigger the empty-retry loop indefinitely.
-        repaired_seq = agent._repair_message_sequence(messages)
+        # repair_message_sequence_with_cursor also recomputes the SessionDB
+        # flush cursor (_last_flushed_db_idx) when repair compacts the list,
+        # so the turn-end flush doesn't skip the assistant/tool chain (#44837).
+        from agent.agent_runtime_helpers import repair_message_sequence_with_cursor
+        repaired_seq = repair_message_sequence_with_cursor(agent, messages)
        if repaired_seq > 0:
            request_logger.info(
                "Repaired %s message-alternation violations before request (session=%s)",
@@ -703,7 +805,10 @@ def run_conversation(
        # a thinking-only turn. Runs on the per-call copy only — the
        # stored conversation history keeps the reasoning block for the
        # UI transcript and session persistence.
-        api_messages = agent._drop_thinking_only_and_merge_users(api_messages)
+        api_messages = agent._drop_thinking_only_and_merge_users(
+            api_messages,
+            drop_codex_reasoning_items=agent.api_mode != "codex_responses",
+        )

        # Normalize message whitespace and tool-call JSON for consistent
        # prefix matching.  Ensures bit-perfect prefixes across turns,
@@ -1312,6 +1417,106 @@ def run_conversation(
                        )
                        finish_reason = "length"

+                # ── Content-policy refusal (HTTP 200) ──────────────────
+                # The model — or the provider's safety system — returned a
+                # *successful* response whose stop/finish reason is a refusal:
+                # Anthropic ``stop_reason="refusal"`` → ``content_filter``;
+                # OpenAI / portal ``finish_reason="content_filter"`` or a
+                # populated ``message.refusal`` (mapped in the chat_completions
+                # transport); Bedrock ``guardrail_intervened``. The content is
+                # typically empty, so without this branch the response falls
+                # through to the empty-response / invalid-response retry loops
+                # and is mis-surfaced as "rate limited" / "no content after
+                # retries" — burning paid attempts reproducing a deterministic
+                # refusal. Surface it clearly and stop. Mirrors the
+                # exception-based ``content_policy_blocked`` recovery: try a
+                # configured fallback once, otherwise return the refusal.
+                if finish_reason == "content_filter":
+                    _refusal_transport = agent._get_transport()
+                    if agent.api_mode == "anthropic_messages":
+                        _refusal_result = _refusal_transport.normalize_response(
+                            response, strip_tool_prefix=agent._is_anthropic_oauth
+                        )
+                    else:
+                        _refusal_result = _refusal_transport.normalize_response(response)
+                    _refusal_text = (getattr(_refusal_result, "content", None) or "").strip()
+                    # Some refusals carry the explanation only in the reasoning
+                    # channel; fall back to it so the user sees *something*.
+                    if not _refusal_text:
+                        _refusal_text = (agent._extract_reasoning(_refusal_result) or "").strip()
+
+                    agent._invoke_api_request_error_hook(
+                        task_id=effective_task_id,
+                        turn_id=turn_id,
+                        api_request_id=api_request_id,
+                        api_call_count=api_call_count,
+                        api_start_time=api_start_time,
+                        api_kwargs=api_kwargs,
+                        error_type="ContentPolicyBlocked",
+                        error_message=_refusal_text or "model declined to respond (content_filter)",
+                        status_code=None,
+                        retry_count=retry_count,
+                        max_retries=max_retries,
+                        retryable=False,
+                        reason=FailoverReason.content_policy_blocked.value,
+                    )
+
+                    if thinking_spinner:
+                        thinking_spinner.stop("")
+                        thinking_spinner = None
+                    if agent.thinking_callback:
+                        agent.thinking_callback("")
+
+                    # Deterministic for the unchanged prompt — never retry.
+                    # Try a configured fallback once (a different model may not
+                    # refuse); otherwise surface the refusal terminally.
+                    if agent._has_pending_fallback():
+                        agent._buffer_status(
+                            "⚠️ Model declined to respond (safety refusal) — trying fallback..."
+                        )
+                    if agent._try_activate_fallback():
+                        retry_count = 0
+                        compression_attempts = 0
+                        _retry.primary_recovery_attempted = False
+                        continue
+
+                    agent._flush_status_buffer()
+                    _refusal_log = (
+                        _refusal_text[:500] + "..."
+                        if len(_refusal_text) > 500
+                        else _refusal_text
+                    )
+                    logger.warning(
+                        "%sModel declined to respond (finish_reason=content_filter). "
+                        "model=%s provider=%s refusal=%s",
+                        agent.log_prefix, agent.model, agent.provider,
+                        _refusal_log or "(no text)",
+                    )
+                    agent._emit_status(
+                        "⚠️ The model declined to respond to this request (safety refusal)."
+                    )
+
+                    _refusal_detail = (
+                        f"Model's explanation: {_refusal_text}"
+                        if _refusal_text
+                        else "The model returned no explanation."
+                    )
+                    _refusal_response = (
+                        "⚠️  The model declined to respond to this request "
+                        "(safety refusal — not a Hermes/gateway failure).\n\n"
+                        f"{_refusal_detail}\n\n"
+                        f"{_CONTENT_POLICY_RECOVERY_HINT}"
+                    )
+
+                    agent._cleanup_task_resources(effective_task_id)
+                    agent._persist_session(messages, conversation_history)
+                    return _content_policy_blocked_result(
+                        messages,
+                        api_call_count,
+                        final_response=_refusal_response,
+                        error_detail=_refusal_text or "model declined (content_filter)",
+                    )
+
                if finish_reason == "length":
                    if getattr(response, "id", "") == PARTIAL_STREAM_STUB_ID:
                        agent._vprint(
@@ -2063,7 +2268,11 @@ def run_conversation(
                    and not _retry.image_shrink_retry_attempted
                ):
                    _retry.image_shrink_retry_attempted = True
-                    if agent._try_shrink_image_parts_in_messages(api_messages):
+                    image_max_dimension = _image_error_max_dimension(api_error) or 8000
+                    if agent._try_shrink_image_parts_in_messages(
+                        api_messages,
+                        max_dimension=image_max_dimension,
+                    ):
                        agent._vprint(
                            f"{agent.log_prefix}📐 Image(s) exceeded provider size limit — "
                            f"shrank and retrying...",
@@ -2631,10 +2840,13 @@ def run_conversation(
                    except Exception:
                        pass
                    if _genuine_nous_rate_limit:
-                        # Skip straight to max_retries -- the
-                        # top-of-loop guard will handle fallback or
-                        # bail cleanly.
-                        retry_count = max_retries
+                        # Re-enter the loop exactly once so the
+                        # top-of-loop Nous guard handles fallback or
+                        # bails cleanly. (Setting retry_count to
+                        # max_retries would make the while condition
+                        # false immediately and the guard would never
+                        # run -- no fallback, generic exhaustion error.)
+                        retry_count = max(0, max_retries - 1)
                        continue
                    # Upstream capacity 429: fall through to normal
                    # retry logic.  A different model (or the same
@@ -3076,20 +3288,17 @@ def run_conversation(
                    if classified.reason == FailoverReason.content_policy_blocked:
                        _summary = agent._summarize_api_error(api_error)
                        _policy_response = (
-                            f"⚠️  The model provider's safety filter blocked this request "
-                            f"(not a Hermes/gateway failure).\n\n"
+                            "⚠️  The model provider's safety filter blocked this request "
+                            "(not a Hermes/gateway failure).\n\n"
                            f"Provider message: {_summary}\n\n"
-                            f"Try rephrasing the request, narrowing the context, or "
-                            f"adding a fallback provider with `hermes fallback add`."
+                            f"{_CONTENT_POLICY_RECOVERY_HINT}"
+                        )
+                        return _content_policy_blocked_result(
+                            messages,
+                            api_call_count,
+                            final_response=_policy_response,
+                            error_detail=_summary,
                        )
-                        return {
-                            "final_response": _policy_response,
-                            "messages": messages,
-                            "api_calls": api_call_count,
-                            "completed": False,
-                            "failed": True,
-                            "error": f"content_policy_blocked: {_summary}",
-                        }
                    return {
                        "final_response": None,
                        "messages": messages,
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -70,16 +70,6 @@ def _resolve_args() -> list[str]:

 def _resolve_home_dir() -> str:
    """Return a stable HOME for child ACP processes."""
-
-    try:
-        from hermes_constants import get_subprocess_home
-
-        profile_home = get_subprocess_home()
-        if profile_home:
-            return profile_home
-    except Exception:
-        pass
-
    home = os.environ.get("HOME", "").strip()
    if home:
        return home
@@ -105,7 +95,10 @@ def _resolve_home_dir() -> str:

 def _build_subprocess_env() -> dict[str, str]:
    env = os.environ.copy()
-    env["HOME"] = _resolve_home_dir()
+    home = _resolve_home_dir()
+    env["HOME"] = home
+    from hermes_constants import apply_subprocess_home_env
+    apply_subprocess_home_env(env)
    return env


--- a/agent/credits_tracker.py
+++ b/agent/credits_tracker.py
@@ -286,6 +286,16 @@ def evaluate_credits_notices(
        for band in CREDITS_USAGE_BANDS:  # ascending → last match wins = highest
            if uf >= band[0]:
                current_band = band
+    # Top-up suppression: when the account holds purchased (top-up) credits,
+    # the subscription-cap gauge is the wrong denominator — warning "90% used"
+    # at a user sitting on $50 of top-up is noise (and it previously stuck
+    # PERMANENTLY alongside grant_spent at >=100%). Suppress the usage band
+    # entirely; the cap-reached case is covered by the grant_spent info notice
+    # below, which already names the remaining top-up balance. A top-up landing
+    # mid-session flips current_band → None and the clear path below removes
+    # any showing band line.
+    if state.purchased_micros > 0:
+        current_band = None
    grant_cond = (
        state.denominator_kind == "subscription_cap"
        and uf is not None
@@ -345,7 +355,7 @@ def evaluate_credits_notices(
    if show_depleted and "credits.depleted" not in active:
        to_show.append(
            AgentNotice(
-                text="✕ Credit access paused · run /usage for balance",
+                text="✕ Credit access paused · run /credits to top up",
                level="error",
                kind=CREDITS_NOTICE_KIND,
                key="credits.depleted",
--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -454,16 +454,16 @@ def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
        report["attempted"] = True  # we tried but there was nothing to do
        return report

-    # Load and rewrite the live jobs under the scheduler's lock.
+    # Load and rewrite the live jobs under the scheduler's cross-process lock.
    try:
-        from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
+        from cron.jobs import load_jobs, save_jobs, _jobs_lock
    except ImportError as e:
        report["error"] = f"cron module unavailable: {e}"
        return report

    report["attempted"] = True
    try:
-        with _jobs_file_lock:
+        with _jobs_lock():
            live_jobs = load_jobs()
            changed = False

--- a/agent/display.py
+++ b/agent/display.py
@@ -12,6 +12,7 @@ import time
 from dataclasses import dataclass, field
 from difflib import unified_diff
 from pathlib import Path
+from typing import Any

 from utils import safe_json_loads
 from agent.tool_result_classification import file_mutation_result_landed
@@ -168,6 +169,27 @@ def _oneline(text: str) -> str:
    return " ".join(text.split())


+def _truncate_preview(text: str, max_len: int | None) -> str:
+    if max_len and max_len > 0 and len(text) > max_len:
+        if max_len <= 3:
+            return "." * max_len
+        return text[:max_len - 3] + "..."
+    return text
+
+
+def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
+    if not isinstance(tasks, list):
+        return 0, []
+    goals: list[str] = []
+    for task in tasks:
+        if not isinstance(task, dict):
+            continue
+        raw_goal = task.get("goal")
+        goal = "?" if raw_goal is None else _oneline(str(raw_goal))
+        goals.append(_truncate_preview(goal or "?", per_goal_len))
+    return len(goals), goals
+
+
 def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
    """Build a short preview of a tool call's primary argument for display.

@@ -191,6 +213,22 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
        "clarify": "question", "skill_manage": "name",
    }

+    # delegate_task: show goal (single) or individual task goals (batch)
+    if tool_name == "delegate_task":
+        tasks = args.get("tasks")
+        if tasks and isinstance(tasks, list):
+            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=40)
+            preview = (
+                f"{task_count} tasks: " + " | ".join(goals)
+                if goals else f"{len(tasks)} parallel tasks"
+            )
+            return _truncate_preview(preview, max_len)
+        goal = args.get("goal", "")
+        if goal is None:
+            return None
+        preview = _oneline(str(goal))
+        return _truncate_preview(preview, max_len) if preview else None
+
    if tool_name == "process":
        action = args.get("action", "")
        sid = args.get("session_id", "")
@@ -858,20 +896,6 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
    return False, ""


-def _used_free_parallel(result: str | None) -> bool:
-    """True when a web result came from Parallel's free Search MCP.
-
-    Only the keyless Parallel path tags its result with ``provider="parallel"``;
-    the paid REST path and every other provider omit it. Used to label the tool
-    line "Parallel search" / "Parallel fetch" exactly when the free MCP served
-    the call.
-    """
-    if not isinstance(result, str) or '"provider"' not in result:
-        return False
-    data = safe_json_loads(result)
-    return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
-
-
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
@@ -909,17 +933,15 @@ def get_cute_tool_message(
        return f"{line}{failure_suffix}"

    if tool_name == "web_search":
-        verb = "Parallel search" if _used_free_parallel(result) else "search"
-        return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)}  {dur}")
+        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
-        verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra}  {dur}")
-        return _wrap(f"┊ 📄 {verb:<9} pages  {dur}")
+            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 fetch     pages  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
@@ -1035,7 +1057,10 @@ def get_cute_tool_message(
    if tool_name == "delegate_task":
        tasks = args.get("tasks")
        if tasks and isinstance(tasks, list):
-            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
+            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=30)
+            detail = " | ".join(goals) if goals else "parallel"
+            count_label = task_count or len(tasks)
+            return _wrap(f"┊ 🔀 delegate  {count_label}x: {_trunc(detail, 35)}  {dur}")
        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")

    preview = build_tool_preview(tool_name, args) or ""
--- a/agent/errors.py
+++ b/agent/errors.py
@@ -0,0 +1,3 @@
+class SSLConfigurationError(Exception):
+    """Raised when SSL/TLS certificate bundle configuration fails."""
+    pass
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -46,11 +46,6 @@ def build_write_denied_paths(home: str) -> set[str]:
            # Top-level Anthropic PKCE credential store remains sensitive even
            # when a profile is active; default/non-profile sessions still read it.
            str(hermes_root / ".anthropic_oauth.json"),
-            os.path.join(home, ".bashrc"),
-            os.path.join(home, ".zshrc"),
-            os.path.join(home, ".profile"),
-            os.path.join(home, ".bash_profile"),
-            os.path.join(home, ".zprofile"),
            os.path.join(home, ".netrc"),
            os.path.join(home, ".pgpass"),
            os.path.join(home, ".npmrc"),
@@ -104,12 +99,6 @@ def is_write_denied(path: str) -> bool:
        if resolved.startswith(prefix):
            return True

-    # Hermes control-plane files: block both the ACTIVE profile's view
-    # (hermes_home) AND the global root view. Without the root pass, a
-    # profile-mode session leaves <root>/auth.json + <root>/config.yaml
-    # writable — letting a prompt-injected write_file overwrite the global
-    # files that every profile inherits from (same shape as #15981).
-    control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
    mcp_tokens_dir_name = "mcp-tokens"

    hermes_dirs = []
@@ -122,12 +111,6 @@ def is_write_denied(path: str) -> bool:
            continue

    for base_real in hermes_dirs:
-        for name in control_file_names:
-            try:
-                if resolved == os.path.realpath(os.path.join(base_real, name)):
-                    return True
-            except Exception:
-                continue
        try:
            mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
            if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -41,6 +41,16 @@ DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
 GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535


+def bare_gemini_model_id(model: str) -> str:
+    """Strip Gemini's own provider prefix from an aggregator-style model id."""
+    name = (model or "").strip()
+    lowered = name.lower()
+    for prefix in ("google/", "gemini/"):
+        if lowered.startswith(prefix):
+            return name[len(prefix):].strip() or name
+    return name
+
+
 def is_native_gemini_base_url(base_url: str) -> bool:
    """Return True when the endpoint speaks Gemini's native REST API."""
    normalized = str(base_url or "").strip().rstrip("/").lower()
@@ -330,7 +340,7 @@ def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[st
    system_instruction = None
    joined_system = "\n".join(part for part in system_text_parts if part).strip()
    if joined_system:
-        system_instruction = {"parts": [{"text": joined_system}]}
+        system_instruction = {"role": "system", "parts": [{"text": joined_system}]}
    return contents, system_instruction


@@ -914,6 +924,7 @@ class GeminiNativeClient:
            thinking_config=thinking_config,
        )

+        model = bare_gemini_model_id(model)
        if stream:
            return self._stream_completion(model=model, request=request, timeout=timeout)

--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -33,6 +33,7 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
+from agent.skill_commands import extract_user_instruction_from_skill_message
 from tools.registry import tool_error

 logger = logging.getLogger(__name__)
@@ -44,6 +45,66 @@ logger = logging.getLogger(__name__)
 _SYNC_DRAIN_TIMEOUT_S = 5.0


+def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
+    """Return whether external memory-provider tools should be exposed."""
+    if enabled_toolsets is None:
+        return True
+    if not enabled_toolsets:
+        return False
+    if "memory" in enabled_toolsets:
+        return True
+
+    try:
+        from toolsets import resolve_toolset
+
+        return any("memory" in resolve_toolset(name) for name in enabled_toolsets)
+    except Exception:
+        logger.debug("Failed to resolve enabled toolsets for memory-provider tools", exc_info=True)
+        return False
+
+
+def inject_memory_provider_tools(agent: Any) -> int:
+    """Append external memory-provider tool schemas to an agent tool surface."""
+    memory_manager = getattr(agent, "_memory_manager", None)
+    tools = getattr(agent, "tools", None)
+    if not memory_manager or tools is None:
+        return 0
+
+    existing_tool_names = {
+        tool.get("function", {}).get("name")
+        for tool in tools
+        if isinstance(tool, dict)
+    }
+    if (
+        "memory" not in existing_tool_names
+        and not memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None))
+    ):
+        return 0
+
+    get_schemas = getattr(memory_manager, "get_all_tool_schemas", None)
+    if not callable(get_schemas):
+        return 0
+
+    valid_tool_names = getattr(agent, "valid_tool_names", None)
+    if valid_tool_names is None:
+        valid_tool_names = set()
+        agent.valid_tool_names = valid_tool_names
+
+    added = 0
+    for schema in get_schemas():
+        if not isinstance(schema, dict):
+            continue
+        tool_name = schema.get("name", "")
+        if not tool_name or tool_name in existing_tool_names:
+            continue
+        tools.append({"type": "function", "function": schema})
+        valid_tool_names.add(tool_name)
+        existing_tool_names.add(tool_name)
+        added += 1
+
+    return added
+
+
 # ---------------------------------------------------------------------------
 # Context fencing helpers
 # ---------------------------------------------------------------------------
@@ -370,16 +431,37 @@ class MemoryManager:

    # -- Prefetch / recall ---------------------------------------------------

+    @staticmethod
+    def _strip_skill_scaffolding(text: str) -> Optional[str]:
+        """Return memory-worthy user text, or None to skip the turn.
+
+        When a user invokes a /skill or /bundle, Hermes expands the turn into
+        a model-facing message that embeds the entire skill body. Feeding that
+        verbatim to memory providers pollutes their stores/embeddings with
+        prompt scaffolding instead of what the user actually asked. We recover
+        just the user's instruction here, once, for every provider — so this
+        is fixed for the whole provider fan-out, not per backend.
+
+        - Non-skill messages pass through unchanged.
+        - Skill turns with a user instruction return that instruction.
+        - Bare skill invocations (no instruction) return None → callers skip
+          the turn, since there is no user content worth remembering.
+        """
+        return extract_user_instruction_from_skill_message(text)
+
    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
        """Collect prefetch context from all providers.

        Returns merged context text labeled by provider. Empty providers
        are skipped. Failures in one provider don't block others.
        """
+        clean_query = self._strip_skill_scaffolding(query)
+        if not clean_query:
+            return ""
        parts = []
        for provider in self._providers:
            try:
-                result = provider.prefetch(query, session_id=session_id)
+                result = provider.prefetch(clean_query, session_id=session_id)
                if result and result.strip():
                    parts.append(result)
            except Exception as e:
@@ -400,10 +482,14 @@ class MemoryManager:
        if not providers:
            return

+        clean_query = self._strip_skill_scaffolding(query)
+        if not clean_query:
+            return
+
        def _run() -> None:
            for provider in providers:
                try:
-                    provider.queue_prefetch(query, session_id=session_id)
+                    provider.queue_prefetch(clean_query, session_id=session_id)
                except Exception as e:
                    logger.debug(
                        "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
@@ -455,6 +541,11 @@ class MemoryManager:
        if not providers:
            return

+        clean_user_content = self._strip_skill_scaffolding(user_content)
+        if not clean_user_content:
+            return
+        user_content = clean_user_content
+
        def _run() -> None:
            for provider in providers:
                try:
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -5,6 +5,7 @@ and run_agent.py for pre-flight context checks.
 """

 import ipaddress
+import json
 import logging
 import os
 import re
@@ -16,7 +17,7 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import base_url_host_matches, base_url_hostname
+from utils import atomic_json_write, base_url_host_matches, base_url_hostname

 from hermes_constants import OPENROUTER_MODELS_URL

@@ -111,6 +112,57 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
 _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

+
+def _get_model_metadata_cache_path() -> Path:
+    """Return path to the OpenRouter model metadata disk cache."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
+
+
+def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
+    """Return disk-cache age in seconds, or None if freshness is unknown."""
+    try:
+        cache_path = _get_model_metadata_cache_path()
+        if not cache_path.exists():
+            return None
+        age = time.time() - cache_path.stat().st_mtime
+        if age < 0:
+            return None
+        return age
+    except Exception:
+        return None
+
+
+def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
+    """Load processed OpenRouter metadata cache from disk."""
+    try:
+        cache_path = _get_model_metadata_cache_path()
+        with cache_path.open("r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return {}
+        return {
+            str(key): value
+            for key, value in data.items()
+            if isinstance(value, dict)
+        }
+    except Exception as e:
+        logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
+        return {}
+
+
+def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
+    """Save processed OpenRouter metadata cache to disk atomically."""
+    try:
+        atomic_json_write(
+            _get_model_metadata_cache_path(),
+            data,
+            indent=0,
+            separators=(",", ":"),
+        )
+    except Exception as e:
+        logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
+
 # Descending tiers for context length probing when the model is unknown.
 # We start at 256K (covers GPT-5.x, many current large-context models) and
 # step down on context-length errors until one works.  Tier[0] is also the
@@ -209,7 +261,13 @@ DEFAULT_CONTEXT_LENGTHS = {
    # https://platform.minimax.io/docs/api-reference/text-chat-openai
    "minimax-m3": 1000000,
    "minimax": 204800,
-    # GLM
+    # GLM — GLM-5.2 ships with a 1M context window (verified empirically:
+    # needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
+    # zero errors on api.z.ai/api/coding/paas/v4).  Older GLM models
+    # (5, 5.1, 5-turbo) are ~202K.  Longest-key-first substring matching
+    # ensures "glm-5.2" resolves to 1M while older variants still hit the
+    # generic 202K fallback.
+    "glm-5.2": 1_048_576,
    "glm": 202752,
    # xAI Grok — xAI /v1/models does not return context_length metadata,
    # so these hardcoded fallbacks prevent Hermes from probing-down to
@@ -627,6 +685,15 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
        return _model_metadata_cache

+    if not force_refresh:
+        disk_age = _model_metadata_disk_cache_age_seconds()
+        if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
+            disk_cache = _load_model_metadata_disk_cache()
+            if disk_cache:
+                _model_metadata_cache = disk_cache
+                _model_metadata_cache_time = time.time() - disk_age
+                return _model_metadata_cache
+
    try:
        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
@@ -648,12 +715,24 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any

        _model_metadata_cache = cache
        _model_metadata_cache_time = time.time()
+        _save_model_metadata_disk_cache(cache)
        logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
        return cache

    except Exception as e:
        logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
-        return _model_metadata_cache or {}
+        if _model_metadata_cache:
+            return _model_metadata_cache
+        disk_cache = _load_model_metadata_disk_cache()
+        if disk_cache:
+            _model_metadata_cache = disk_cache
+            disk_age = _model_metadata_disk_cache_age_seconds()
+            if disk_age is not None:
+                _model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
+            else:
+                _model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
+            return _model_metadata_cache
+        return {}


 def fetch_endpoint_model_metadata(
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -135,7 +135,14 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:

 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
    """Infer a reasonable ``type`` if this schema node has none."""
-    if "type" in node and node["type"] not in {None, ""}:
+    node_type = node.get("type")
+    if isinstance(node_type, list):
+        concrete = next(
+            (t for t in node_type if isinstance(t, str) and t not in {"", "null"}),
+            "string",
+        )
+        return {**node, "type": concrete}
+    if "type" in node and node_type not in {None, ""}:
        return node

    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
--- a/agent/pet/init.py
+++ b/agent/pet/init.py
@@ -0,0 +1,51 @@
+"""Petdex pet engine — shared core for the CLI, TUI, and desktop surfaces.
+
+Petdex (https://github.com/crafter-station/petdex) is a public gallery of
+animated sprite "pets" for coding agents.  Each pet is a ``pet.json`` plus a
+``spritesheet.{webp,png}`` of 192×208 px cells. Current Codex/petdex sheets use
+an 8-column × 9-row atlas; older Hermes/petdex sheets used an 8-row atlas.
+Hermes infers the row taxonomy from the sheet and maps agent activity onto
+idle/run/review/failed/wave/jump.
+
+This package is the **single source of truth** for the feature so the base
+CLI (Python) and TUI (Ink, via ``tui_gateway``) never duplicate the hard
+parts:
+
+- :mod:`agent.pet.constants` — frame geometry + the :class:`PetState` enum.
+- :mod:`agent.pet.state`     — map agent activity → a :class:`PetState`.
+- :mod:`agent.pet.manifest`  — fetch the public petdex manifest.
+- :mod:`agent.pet.store`     — install / list / resolve pets on disk
+                               (profile-aware via ``get_hermes_home()``).
+- :mod:`agent.pet.render`    — decode a spritesheet and encode frames for a
+                               terminal (kitty / iTerm2 / sixel graphics
+                               protocols, with a Unicode half-block
+                               fallback).
+
+Rendering in the Electron desktop is necessarily TypeScript (canvas), but it
+reuses the same on-disk store and the same state semantics.
+
+The whole feature is a *display* concern: it adds no model tool, mutates no
+system prompt or toolset, and therefore has zero effect on prompt caching.
+"""
+
+from agent.pet.constants import (
+    DEFAULT_SCALE,
+    FRAME_H,
+    FRAME_W,
+    FRAMES_PER_STATE,
+    LOOP_MS,
+    STATE_ROWS,
+    PetState,
+)
+from agent.pet.state import derive_pet_state
+
+__all__ = [
+    "DEFAULT_SCALE",
+    "FRAME_H",
+    "FRAME_W",
+    "FRAMES_PER_STATE",
+    "LOOP_MS",
+    "STATE_ROWS",
+    "PetState",
+    "derive_pet_state",
+]
--- a/agent/pet/constants.py
+++ b/agent/pet/constants.py
@@ -0,0 +1,167 @@
+"""Pet sprite geometry + animation-state taxonomy.
+
+These values are the common petdex/Codex pet geometry. The real ``pet.json``
+usually only carries ``id``/``displayName``/``description``/``spritesheetPath``;
+row taxonomy is inferred from the atlas shape so Hermes can render both legacy
+8-row sheets and current 9-row Codex sheets.
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+# Frame geometry (pixels). Current Codex/petdex spritesheets are 8 columns x 9
+# rows (1536x1872), while older Hermes/petdex sheets used 9 columns x 8 rows
+# (1728x1664). Renderers derive both row taxonomy and real column count from the
+# concrete sheet, so either shape works.
+FRAME_W = 192
+FRAME_H = 208
+
+# Frames consumed per animation state (the petdex web app uses CSS
+# ``steps(6)``).  A sheet may physically contain more columns; we only step
+# through the first ``FRAMES_PER_STATE``.
+FRAMES_PER_STATE = 6
+
+# Full-loop duration for one state, milliseconds (petdex default).
+LOOP_MS = 1100
+
+# Default on-screen scale relative to native frame size.  ``display.pet.scale``
+# is the single master scalar: the desktop canvas multiplies its native pixels
+# by it and every terminal surface derives its half-block/kitty column width
+# from it (see :func:`cols_for_scale`), so one number shrinks all three
+# interfaces together.  (petdex's own clients render at 0.7; we default smaller
+# so the kitty/GUI mascot stays a glanceable corner sprite.  The half-block
+# fallback can't shrink as far — see ``UNICODE_MIN_COLS`` — and clamps to its
+# legibility floor instead.)
+DEFAULT_SCALE = 0.33
+
+# User-settable scale bounds (``/pet scale``, desktop slider).  Floor keeps the
+# pet clickable/visible; ceiling stops a fat-fingered value from filling the
+# screen.  The unicode fallback additionally clamps to ``UNICODE_MIN_COLS``.
+MIN_SCALE = 0.1
+MAX_SCALE = 3.0
+
+
+def clamp_scale(scale: float) -> float:
+    """Clamp *scale* to ``[MIN_SCALE, MAX_SCALE]`` (the single validation point)."""
+    return max(MIN_SCALE, min(MAX_SCALE, scale))
+
+# Terminal cells one native frame spans at ``scale == 1.0``.  A cell is ~8px
+# wide, a frame is ``FRAME_W`` (192) px → 24 cells.  This mirrors the kitty
+# graphics placement (``scaled_px // 8``) so at full scale every renderer agrees.
+BASE_UNICODE_COLS = FRAME_W // 8
+
+# Legibility floor for the half-block fallback.  A half-block cell samples the
+# sprite at only 1 horizontal + 2 vertical taps, so below this width a 192×208
+# pet collapses into an unreadable blob *regardless* of scale.  kitty/GUI draw
+# true pixels and have no such floor — that's why the same ``scale: 0.33`` is
+# crisp there but mush in half-blocks.  ``scale`` shrinks the unicode pet down
+# TO this floor (and grows it above), instead of past it into noise.
+UNICODE_MIN_COLS = 16
+
+
+def cols_for_scale(scale: float) -> int:
+    """Half-block width implied by *scale*, clamped to the legibility floor.
+
+    Above the floor it tracks the kitty cell box (``scaled_px // 8``) so the two
+    renderers converge at larger sizes; below it the floor keeps the sprite
+    readable rather than letting it devolve into a blob.
+    """
+    return max(UNICODE_MIN_COLS, round(BASE_UNICODE_COLS * (scale or DEFAULT_SCALE)))
+
+
+def resolve_cols(scale: float, unicode_cols: int = 0) -> int:
+    """Resolve terminal width: explicit *unicode_cols* override, else from *scale*."""
+    return int(unicode_cols) if unicode_cols and int(unicode_cols) > 0 else cols_for_scale(scale)
+
+
+class PetState(str, Enum):
+    """Animation state a pet can be shown in.
+
+    These are Hermes' activity state names. They are not always identical to the
+    source atlas row names: Codex-format pets use rows like ``jumping`` /
+    ``running`` while the UI keeps the shorter ``jump`` / ``run`` names.
+    """
+
+    IDLE = "idle"
+    WAVE = "wave"
+    RUN = "run"
+    FAILED = "failed"
+    REVIEW = "review"
+    JUMP = "jump"
+    WAITING = "waiting"
+
+
+# Legacy Hermes/petdex row order (top -> bottom) used by the older 8-row,
+# 9-column atlas shape.
+LEGACY_STATE_ROWS: list[str] = [
+    PetState.IDLE.value,
+    PetState.WAVE.value,
+    PetState.RUN.value,
+    PetState.FAILED.value,
+    PetState.REVIEW.value,
+    PetState.JUMP.value,
+    "extra1",
+    "extra2",
+]
+
+# Current Petdex row order (top -> bottom) used by 1536x1872 atlases:
+# 8 columns x 9 rows of 192x208 cells.
+CODEX_STATE_ROWS: list[str] = [
+    PetState.IDLE.value,
+    "running-right",
+    "running-left",
+    "waving",
+    "jumping",
+    PetState.FAILED.value,
+    PetState.WAITING.value,
+    "running",
+    PetState.REVIEW.value,
+]
+
+# Default/fallback for callers without a sheet. Prefer the current 9-row Codex
+# format because generated pets and the public Codex pet contract use it.
+STATE_ROWS: list[str] = CODEX_STATE_ROWS
+
+# Canonical Hermes activity names -> accepted row-name aliases in descending
+# preference. This keeps our internal state names stable (`wave`/`jump`/`run`)
+# while matching Petdex's current `waving`/`jumping`/`running` taxonomy.
+STATE_ALIASES: dict[str, tuple[str, ...]] = {
+    PetState.IDLE.value: (PetState.IDLE.value,),
+    PetState.WAVE.value: (PetState.WAVE.value, "waving"),
+    PetState.JUMP.value: (PetState.JUMP.value, "jumping"),
+    PetState.RUN.value: (PetState.RUN.value, "running"),
+    PetState.FAILED.value: (PetState.FAILED.value,),
+    PetState.REVIEW.value: (PetState.REVIEW.value,),
+    PetState.WAITING.value: (PetState.WAITING.value,),
+}
+
+
+def state_aliases_for(state: "PetState | str") -> tuple[str, ...]:
+    """Return accepted row-name aliases for *state* (always non-empty)."""
+    value = state.value if isinstance(state, PetState) else str(state)
+    aliases = STATE_ALIASES.get(value)
+    return aliases if aliases else (value,)
+
+
+def state_rows_for_grid(row_count: int | None) -> list[str]:
+    """Return the row taxonomy for a spritesheet with *row_count* rows."""
+    try:
+        rows = int(row_count or 0)
+    except (TypeError, ValueError):
+        rows = 0
+
+    if rows >= len(CODEX_STATE_ROWS):
+        return CODEX_STATE_ROWS
+    return LEGACY_STATE_ROWS
+
+
+def state_row_index(state: "PetState | str", row_count: int | None = None) -> int:
+    """Return the spritesheet row index for *state* (clamped, never raises)."""
+    rows = state_rows_for_grid(row_count)
+    for name in state_aliases_for(state):
+        try:
+            return rows.index(name)
+        except ValueError:
+            continue
+    return 0  # fall back to the idle row
--- a/agent/pet/generate/init.py
+++ b/agent/pet/generate/init.py
@@ -0,0 +1,29 @@
+"""Pet generation — base-draft → hatch pipeline.
+
+Public surface used by the gateway RPCs, the CLI ``hermes pets generate``
+command, and tests:
+
+- :func:`generate_base_drafts` / :func:`hatch_pet` — the two-step flow.
+- :class:`HatchResult`, :class:`GenerationError`.
+- :mod:`atlas` — deterministic frame extraction + atlas composition/validation.
+
+Image generation is delegated to the active reference-capable
+:class:`~agent.image_gen_provider.ImageGenProvider` (OpenAI gpt-image-2 or Krea);
+atlas assembly is fully deterministic so it's testable without any API calls.
+"""
+
+from __future__ import annotations
+
+from agent.pet.generate.imagegen import GenerationError
+from agent.pet.generate.orchestrate import (
+    HatchResult,
+    generate_base_drafts,
+    hatch_pet,
+)
+
+__all__ = [
+    "GenerationError",
+    "HatchResult",
+    "generate_base_drafts",
+    "hatch_pet",
+]
--- a/agent/pet/generate/atlas.py
+++ b/agent/pet/generate/atlas.py
@@ -0,0 +1,400 @@
+"""Deterministic spritesheet assembly — generated row strips → Hermes atlas.
+
+Image-generation models are good at *drawing* a row of poses but bad at exact
+grid geometry, so the model never owns the atlas layout: it produces one loose
+horizontal strip per state, and these deterministic ops slice that strip into
+clean, centered, transparent ``192x208`` cells and pack them into the sheet our
+renderer reads.
+
+The atlas is **Hermes-native**, not the petdex/Codex format. Our renderer
+(:mod:`agent.pet.render`) keys frames as ``rows = states, cols = frames`` using
+:data:`agent.pet.constants.STATE_ROWS`, so we emit exactly the six states the
+engine drives — idle, wave, run, failed, review, jump — left-packed with
+trailing transparent cells (which the renderer trims). Sheet is
+``COLUMNS*192 x ROWS*208`` (1152x1248).
+
+The frame-segmentation, fit-to-cell, and transparency-residue logic is adapted
+from OpenAI's ``hatch-pet`` skill (openai/skills, Apache-2.0).
+"""
+
+from __future__ import annotations
+
+import io
+import logging
+import math
+from pathlib import Path
+
+from agent.pet.constants import FRAME_H, FRAME_W
+
+logger = logging.getLogger(__name__)
+
+CELL_WIDTH = FRAME_W
+CELL_HEIGHT = FRAME_H
+
+# (state, row index, frame count). Order/row indices MUST match
+# ``STATE_ROWS`` so the renderer crops the right row for each driven state.
+# Frame counts are the petdex-ish per-state lengths; the renderer trims any
+# trailing blank columns, so rows shorter than ``COLUMNS`` just leave the tail
+# transparent.
+ROW_SPECS: list[tuple[str, int, int]] = [
+    ("idle", 0, 6),
+    ("wave", 1, 4),
+    ("run", 2, 6),
+    ("failed", 3, 6),
+    ("review", 4, 6),
+    ("jump", 5, 5),
+]
+
+ROWS = len(ROW_SPECS)
+COLUMNS = max(count for _, _, count in ROW_SPECS)
+ATLAS_WIDTH = COLUMNS * CELL_WIDTH
+ATLAS_HEIGHT = ROWS * CELL_HEIGHT
+
+FRAME_COUNTS: dict[str, int] = {state: count for state, _, count in ROW_SPECS}
+
+# Alpha at/below which a pixel is "background" for component detection.
+_ALPHA_FLOOR = 16
+# Cell padding kept around a fitted sprite so poses never touch the edge.
+_CELL_PAD = 10
+
+
+# ───────────────────────── background removal ─────────────────────────
+
+
+def _color_distance(r: int, g: int, b: int, key: tuple[int, int, int]) -> float:
+    return math.sqrt((r - key[0]) ** 2 + (g - key[1]) ** 2 + (b - key[2]) ** 2)
+
+
+def _has_transparency(image) -> bool:
+    """True if the strip already carries a real alpha background."""
+    extrema = image.getchannel("A").getextrema()
+    # Min alpha 0 somewhere and a meaningful share of fully-transparent pixels.
+    if extrema[0] > _ALPHA_FLOOR:
+        return False
+    hist = image.getchannel("A").histogram()
+    transparent = sum(hist[: _ALPHA_FLOOR + 1])
+    total = image.width * image.height
+    return transparent > total * 0.05
+
+
+def _dominant_corner_color(image) -> tuple[int, int, int]:
+    """Sample the four corners and return the most common opaque color."""
+    from collections import Counter
+
+    w, h = image.width, image.height
+    px = image.load()
+    counter: Counter = Counter()
+    for x, y in ((0, 0), (w - 1, 0), (0, h - 1), (w - 1, h - 1)):
+        r, g, b, a = px[x, y]
+        if a > _ALPHA_FLOOR:
+            counter[(r, g, b)] += 1
+    if not counter:
+        return (0, 255, 0)
+    return counter.most_common(1)[0][0]
+
+
+def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, threshold: float = 110.0):
+    """Return *image* (RGBA) with its flat background keyed out to transparent.
+
+    If the strip already has a transparent background we leave it alone; else we
+    key out *chroma_key* (or the dominant corner color when not given). This
+    handles both providers that emit transparency natively and those that paint
+    a solid backdrop.
+    """
+    rgba = image.convert("RGBA")
+    if _has_transparency(rgba):
+        return rgba
+
+    key = chroma_key or _dominant_corner_color(rgba)
+    px = rgba.load()
+    for y in range(rgba.height):
+        for x in range(rgba.width):
+            r, g, b, a = px[x, y]
+            if a > _ALPHA_FLOOR and _color_distance(r, g, b, key) <= threshold:
+                px[x, y] = (0, 0, 0, 0)
+    return rgba
+
+
+# ───────────────────────── frame extraction ─────────────────────────
+
+
+def _fit_to_cell(image):
+    """Crop to content, scale to fit a padded cell, and center on transparent."""
+    from PIL import Image
+
+    target = Image.new("RGBA", (CELL_WIDTH, CELL_HEIGHT), (0, 0, 0, 0))
+    bbox = image.getbbox()
+    if bbox is None:
+        return target
+
+    sprite = image.crop(bbox)
+    max_w = CELL_WIDTH - _CELL_PAD
+    max_h = CELL_HEIGHT - _CELL_PAD
+    scale = min(max_w / sprite.width, max_h / sprite.height, 1.0)
+    if scale != 1.0:
+        sprite = sprite.resize(
+            (max(1, round(sprite.width * scale)), max(1, round(sprite.height * scale))),
+            Image.Resampling.LANCZOS,
+        )
+    left = (CELL_WIDTH - sprite.width) // 2
+    top = (CELL_HEIGHT - sprite.height) // 2
+    target.alpha_composite(sprite, (left, top))
+    return target
+
+
+def _connected_components(image) -> list[dict]:
+    """Flood-fill the alpha mask into connected blobs (4-connectivity)."""
+    alpha = image.getchannel("A")
+    w, h = image.size
+    data = alpha.tobytes()
+    visited = bytearray(w * h)
+    out: list[dict] = []
+
+    for start, a in enumerate(data):
+        if a <= _ALPHA_FLOOR or visited[start]:
+            continue
+        stack = [start]
+        visited[start] = 1
+        pixels: list[int] = []
+        min_x = w
+        min_y = h
+        max_x = 0
+        max_y = 0
+        while stack:
+            cur = stack.pop()
+            pixels.append(cur)
+            x = cur % w
+            y = cur // w
+            min_x = min(min_x, x)
+            min_y = min(min_y, y)
+            max_x = max(max_x, x)
+            max_y = max(max_y, y)
+            for nb, ok in (
+                (cur - 1, x > 0),
+                (cur + 1, x + 1 < w),
+                (cur - w, y > 0),
+                (cur + w, y + 1 < h),
+            ):
+                if ok and not visited[nb] and data[nb] > _ALPHA_FLOOR:
+                    visited[nb] = 1
+                    stack.append(nb)
+        out.append(
+            {
+                "pixels": pixels,
+                "area": len(pixels),
+                "bbox": (min_x, min_y, max_x + 1, max_y + 1),
+                "center_x": (min_x + max_x + 1) / 2,
+            }
+        )
+    return out
+
+
+def _group_image(source, components: list[dict], padding: int = 4):
+    from PIL import Image
+
+    w, h = source.size
+    min_x = max(0, min(c["bbox"][0] for c in components) - padding)
+    min_y = max(0, min(c["bbox"][1] for c in components) - padding)
+    max_x = min(w, max(c["bbox"][2] for c in components) + padding)
+    max_y = min(h, max(c["bbox"][3] for c in components) + padding)
+
+    out = Image.new("RGBA", (max_x - min_x, max_y - min_y), (0, 0, 0, 0))
+    src_px = source.load()
+    out_px = out.load()
+    for c in components:
+        for idx in c["pixels"]:
+            x = idx % w
+            y = idx // w
+            out_px[x - min_x, y - min_y] = src_px[x, y]
+    return out
+
+
+def _component_frames(strip, frame_count: int) -> list | None:
+    """Segment a strip into *frame_count* sprites by connected components.
+
+    Picks the ``frame_count`` largest blobs as seeds (left→right), attaches
+    smaller blobs to the nearest seed, and returns one fitted cell per group.
+    Returns ``None`` when it can't find enough distinct sprites (caller falls
+    back to equal slicing).
+    """
+    components = _connected_components(strip)
+    if not components:
+        return None
+
+    largest = max(c["area"] for c in components)
+    seed_threshold = max(120, largest * 0.20)
+    seeds = [c for c in components if c["area"] >= seed_threshold]
+    if len(seeds) < frame_count:
+        seeds = sorted(components, key=lambda c: c["area"], reverse=True)[:frame_count]
+    if len(seeds) < frame_count:
+        return None
+
+    seeds = sorted(
+        sorted(seeds, key=lambda c: c["area"], reverse=True)[:frame_count],
+        key=lambda c: c["center_x"],
+    )
+    seed_ids = {id(s) for s in seeds}
+    groups: list[list[dict]] = [[s] for s in seeds]
+    noise_threshold = max(12, largest * 0.002)
+    for c in components:
+        if id(c) in seed_ids or c["area"] < noise_threshold:
+            continue
+        nearest = min(range(len(seeds)), key=lambda i: abs(seeds[i]["center_x"] - c["center_x"]))
+        groups[nearest].append(c)
+
+    return [_fit_to_cell(_group_image(strip, g)) for g in groups]
+
+
+def _slot_frames(strip, frame_count: int) -> list:
+    """Fallback: slice the strip into *frame_count* equal columns."""
+    slot = strip.width / frame_count
+    frames = []
+    for i in range(frame_count):
+        left = round(i * slot)
+        right = round((i + 1) * slot)
+        frames.append(_fit_to_cell(strip.crop((left, 0, right, strip.height))))
+    return frames
+
+
+def extract_strip_frames(
+    strip,
+    frame_count: int,
+    *,
+    chroma_key: tuple[int, int, int] | None = None,
+    method: str = "auto",
+) -> list:
+    """Turn one generated row strip into *frame_count* clean 192x208 cells.
+
+    *strip* is a PIL image (or path). Background is keyed out, then frames are
+    found by connected components (``auto``) with an equal-slot fallback.
+    """
+    from PIL import Image
+
+    if isinstance(strip, (str, Path)):
+        with Image.open(strip) as opened:
+            strip = opened.convert("RGBA")
+    else:
+        strip = strip.convert("RGBA")
+
+    strip = remove_background(strip, chroma_key=chroma_key)
+
+    if method in ("auto", "components"):
+        frames = _component_frames(strip, frame_count)
+        if frames is not None:
+            return frames
+        if method == "components":
+            raise ValueError(f"could not segment {frame_count} sprites from strip")
+    return _slot_frames(strip, frame_count)
+
+
+# ───────────────────────── atlas composition ─────────────────────────
+
+
+def single_frame(image):
+    """One fitted 192x208 cell from a standalone image (e.g. the base look).
+
+    Used as an idle fallback so a pet always renders even if the idle row
+    generation failed.
+    """
+    from PIL import Image
+
+    if isinstance(image, (str, Path)):
+        with Image.open(image) as opened:
+            image = opened.convert("RGBA")
+    return _fit_to_cell(remove_background(image))
+
+
+def _clear_transparent_rgb(image):
+    """Zero the RGB of fully-transparent pixels (no colored-halo residue)."""
+    from PIL import Image
+
+    rgba = image.convert("RGBA")
+    data = bytearray(rgba.tobytes())
+    for i in range(0, len(data), 4):
+        if data[i + 3] == 0:
+            data[i] = data[i + 1] = data[i + 2] = 0
+    return Image.frombytes("RGBA", rgba.size, bytes(data))
+
+
+def compose_atlas(frames_by_state: dict[str, list]):
+    """Pack per-state frame lists into the Hermes atlas (RGBA, residue-cleared).
+
+    Missing/short states leave their trailing cells transparent; extra frames
+    beyond a state's spec are dropped.
+    """
+    from PIL import Image
+
+    atlas = Image.new("RGBA", (ATLAS_WIDTH, ATLAS_HEIGHT), (0, 0, 0, 0))
+    for state, row, count in ROW_SPECS:
+        frames = frames_by_state.get(state) or []
+        for col, frame in enumerate(frames[:count]):
+            cell = frame.convert("RGBA")
+            if cell.size != (CELL_WIDTH, CELL_HEIGHT):
+                cell = _fit_to_cell(cell)
+            atlas.alpha_composite(cell, (col * CELL_WIDTH, row * CELL_HEIGHT))
+    return _clear_transparent_rgb(atlas)
+
+
+def atlas_to_webp_bytes(atlas) -> bytes:
+    """Encode an atlas image to lossless WebP bytes (the on-disk pet format)."""
+    buf = io.BytesIO()
+    atlas.save(buf, format="WEBP", lossless=True, quality=100, method=6, exact=True)
+    return buf.getvalue()
+
+
+def validate_atlas(atlas) -> dict:
+    """Check geometry, per-cell occupancy, and transparency invariants.
+
+    Returns ``{ok, width, height, errors, warnings, filled_states}``. Errors are
+    blockers (wrong size, empty used cell, opaque/dirty transparency); warnings
+    are soft (a whole state row blank — generation likely dropped a row).
+    """
+    from PIL import Image
+
+    if isinstance(atlas, (str, Path)):
+        with Image.open(atlas) as opened:
+            atlas = opened.convert("RGBA")
+    else:
+        atlas = atlas.convert("RGBA")
+
+    errors: list[str] = []
+    warnings: list[str] = []
+
+    if atlas.size != (ATLAS_WIDTH, ATLAS_HEIGHT):
+        errors.append(f"expected {ATLAS_WIDTH}x{ATLAS_HEIGHT}, got {atlas.width}x{atlas.height}")
+        return {"ok": False, "width": atlas.width, "height": atlas.height, "errors": errors, "warnings": warnings, "filled_states": []}
+
+    filled_states: list[str] = []
+    for state, row, count in ROW_SPECS:
+        row_pixels = 0
+        for col in range(count):
+            left = col * CELL_WIDTH
+            top = row * CELL_HEIGHT
+            cell = atlas.crop((left, top, left + CELL_WIDTH, top + CELL_HEIGHT))
+            nonblank = sum(cell.getchannel("A").histogram()[1:])
+            row_pixels += nonblank
+        if row_pixels > 0:
+            filled_states.append(state)
+        else:
+            warnings.append(f"state '{state}' has no frames")
+
+    if not filled_states:
+        errors.append("atlas is empty — no state produced any frames")
+
+    # Transparent pixels must carry zero RGB (no halo residue).
+    data = atlas.tobytes()
+    residue = 0
+    for i in range(0, len(data), 4):
+        if data[i + 3] == 0 and (data[i] or data[i + 1] or data[i + 2]):
+            residue += 1
+    if residue:
+        errors.append(f"{residue} transparent pixels retain RGB residue")
+
+    return {
+        "ok": not errors,
+        "width": atlas.width,
+        "height": atlas.height,
+        "errors": errors,
+        "warnings": warnings,
+        "filled_states": filled_states,
+    }
--- a/agent/pet/generate/imagegen.py
+++ b/agent/pet/generate/imagegen.py
@@ -0,0 +1,168 @@
+"""Thin image-generation layer for pet sprites.
+
+Wraps the active :class:`~agent.image_gen_provider.ImageGenProvider` with the
+two things sprite generation needs that the agent-facing ``image_generate`` tool
+doesn't expose: **N variants** (loop) and **reference-image grounding** (so each
+animation row stays the same character as the chosen base).
+
+Reference grounding only works on providers that support it — currently OpenAI
+``gpt-image-2`` (image edits) and Krea (style references). We resolve to one of
+those and surface a clear, actionable error otherwise rather than silently
+producing an ungrounded, drifting pet.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Providers that can ground generation on a reference image.
+_REF_CAPABLE = ("openai", "openai-codex", "krea")
+
+
+class GenerationError(RuntimeError):
+    """Raised on any image-generation failure (no provider, API error, IO)."""
+
+
+@dataclass(frozen=True)
+class SpriteProvider:
+    """Resolved provider plus whether it can take reference images."""
+
+    name: str
+    provider: object
+    supports_references: bool
+
+
+def _discover() -> None:
+    try:
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+    except Exception as exc:  # noqa: BLE001 - discovery is best-effort
+        logger.debug("image-gen plugin discovery failed: %s", exc)
+
+
+def resolve_provider(*, require_references: bool = True) -> SpriteProvider:
+    """Pick the image provider to use for sprite work.
+
+    Preference: the configured provider when it's reference-capable, else the
+    first available reference-capable provider. With *require_references* off we
+    fall back to any available provider (used for prompt-only base drafts).
+    """
+    _discover()
+    from agent.image_gen_registry import get_active_provider, get_provider
+
+    # Configured / active provider first.
+    active = None
+    try:
+        active = get_active_provider()
+    except Exception:  # noqa: BLE001
+        active = None
+    if active is not None:
+        name = getattr(active, "name", "")
+        if name in _REF_CAPABLE and active.is_available():
+            return SpriteProvider(name=name, provider=active, supports_references=True)
+
+    # Any available reference-capable provider.
+    for name in _REF_CAPABLE:
+        provider = get_provider(name)
+        if provider is not None and provider.is_available():
+            return SpriteProvider(name=name, provider=provider, supports_references=True)
+
+    if not require_references and active is not None and active.is_available():
+        return SpriteProvider(
+            name=getattr(active, "name", "unknown"), provider=active, supports_references=False
+        )
+
+    raise GenerationError(
+        "Pet generation needs a reference-capable image backend. "
+        "Run `hermes tools` → Image Generation → OpenAI (gpt-image-2) and add an "
+        "OpenAI API key (or configure Krea)."
+    )
+
+
+def _save_local(image_ref: str, *, prefix: str) -> Path:
+    """Return a local path for *image_ref*, downloading it if it's a URL."""
+    if image_ref.startswith(("http://", "https://")):
+        from agent.image_gen_provider import save_url_image
+
+        return Path(save_url_image(image_ref, prefix=prefix))
+    return Path(image_ref)
+
+
+def _rejected_background(error: str) -> bool:
+    """True when a provider error is specifically about the ``background`` param.
+
+    Transparent backgrounds are a per-model capability (e.g. some gpt-image tiers
+    reject ``background=transparent`` outright). We detect that one rejection so
+    we can retry without the flag rather than failing the whole pet — our chroma
+    key pass makes the result transparent regardless.
+    """
+    lowered = (error or "").lower()
+    return "background" in lowered and ("not supported" in lowered or "transparent" in lowered)
+
+
+def generate(
+    prompt: str,
+    *,
+    n: int = 1,
+    reference_images: list[Path] | None = None,
+    provider: SpriteProvider | None = None,
+    prefix: str = "pet_gen",
+) -> list[Path]:
+    """Generate *n* square sprite images and return their local paths.
+
+    *reference_images* grounds the output on a base image (required for rows).
+    We *ask* for a transparent background, but fall back to an opaque generation
+    (cleaned up downstream by the chroma-key pass) on models that reject the
+    flag. Raises :class:`GenerationError` if nothing usable comes back.
+    """
+    sprite = provider or resolve_provider(require_references=bool(reference_images))
+    if reference_images and not sprite.supports_references:
+        raise GenerationError(
+            f"image backend '{sprite.name}' cannot use reference images; "
+            "configure OpenAI gpt-image-2 or Krea for pet generation"
+        )
+
+    refs = [str(p) for p in (reference_images or [])]
+
+    def _run(extra: dict) -> tuple[Path | None, str]:
+        kwargs: dict = {"aspect_ratio": "square", **extra}
+        if refs:
+            kwargs["reference_images"] = refs
+        try:
+            result = sprite.provider.generate(prompt, **kwargs)
+        except Exception as exc:  # noqa: BLE001 - normalize provider crashes
+            logger.debug("provider.generate crashed: %s", exc)
+            return None, str(exc)
+        if not isinstance(result, dict) or not result.get("success"):
+            return None, (result or {}).get("error", "unknown error") if isinstance(result, dict) else "no result"
+        image_ref = result.get("image")
+        if not image_ref:
+            return None, "provider returned no image"
+        try:
+            return _save_local(str(image_ref), prefix=prefix), ""
+        except Exception as exc:  # noqa: BLE001
+            return None, f"could not save generated image: {exc}"
+
+    out: list[Path] = []
+    last_error = ""
+    allow_transparent = True
+    for _ in range(max(1, n)):
+        path, err = _run({"background": "transparent"} if allow_transparent else {})
+        # Model doesn't support the transparent flag → drop it for this and every
+        # remaining variant (no point re-probing a capability we just disproved).
+        if path is None and allow_transparent and _rejected_background(err):
+            allow_transparent = False
+            path, err = _run({})
+        if path is not None:
+            out.append(path)
+        else:
+            last_error = err
+
+    if not out:
+        raise GenerationError(last_error or "image generation produced no output")
+    return out
--- a/agent/pet/generate/orchestrate.py
+++ b/agent/pet/generate/orchestrate.py
@@ -0,0 +1,149 @@
+"""Pet generation orchestration — the base-draft → hatch flow.
+
+Two steps, mirroring the UX across every surface:
+
+1. :func:`generate_base_drafts` — a handful of prompt-only "what should this pet
+   look like" variants. Cheap; the user picks one (or retries for a fresh set).
+2. :func:`hatch_pet` — takes the chosen base and generates one grounded row
+   strip per Hermes state, slices each into frames, composes the atlas, validates
+   it, and writes the pet into the store.
+
+Splitting it this way bounds cost (4 cheap base calls per round; the ~6 row
+calls happen once, on the pet you actually keep) and gives each UI a natural
+preview/loading point.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable
+
+from agent.pet.generate import atlas, imagegen, prompts
+from agent.pet.generate.imagegen import GenerationError, SpriteProvider
+
+logger = logging.getLogger(__name__)
+
+# (event, detail) — e.g. ("row", "idle"), ("compose", ""), ("save", "<slug>").
+ProgressFn = Callable[[str, str], None]
+
+
+@dataclass(frozen=True)
+class HatchResult:
+    """Outcome of a successful :func:`hatch_pet`."""
+
+    slug: str
+    display_name: str
+    spritesheet: Path
+    states: list[str]
+    validation: dict
+
+
+def _harden_transparency(path: Path) -> Path:
+    """Key out any solid backdrop the provider painted; save as an RGBA PNG.
+
+    ``background=transparent`` is requested on every call, but image models honor
+    it inconsistently — some still paint a flat (often near-white) backdrop. We
+    run the same chroma-key pass the row extractor uses so every base draft the
+    user picks between (and the reference the rows are grounded on) is a clean
+    cutout. Best-effort: a decode failure leaves the original untouched.
+    """
+    from PIL import Image
+
+    try:
+        with Image.open(path) as opened:
+            keyed = atlas.remove_background(opened.convert("RGBA"))
+        out = path.with_suffix(".png")
+        keyed.save(out, format="PNG")
+        return out
+    except Exception as exc:  # noqa: BLE001 - cosmetic; fall back to the raw image
+        logger.debug("base draft transparency hardening failed for %s: %s", path, exc)
+        return path
+
+
+def generate_base_drafts(
+    concept: str,
+    *,
+    n: int = 4,
+    style: str = "auto",
+    provider: SpriteProvider | None = None,
+) -> list[Path]:
+    """Generate *n* candidate base looks for *concept*; returns image paths.
+
+    Each draft is hardened to a transparent cutout (see :func:`_harden_transparency`).
+    """
+    prompt = prompts.build_base_prompt(concept, style=style)
+    sprite = provider or imagegen.resolve_provider(require_references=False)
+    raw = imagegen.generate(prompt, n=n, provider=sprite, prefix="pet_base")
+    return [_harden_transparency(p) for p in raw]
+
+
+def hatch_pet(
+    *,
+    base_image: str | Path,
+    slug: str,
+    display_name: str = "",
+    description: str = "",
+    concept: str = "",
+    style: str = "auto",
+    on_progress: ProgressFn | None = None,
+    provider: SpriteProvider | None = None,
+) -> HatchResult:
+    """Turn an approved base image into a full, installed Hermes pet.
+
+    Generates a grounded row strip per state, extracts frames, composes +
+    validates the atlas, and registers it. The idle row falls back to the base
+    look so the pet always renders. Raises :class:`GenerationError` on failure.
+    """
+    base = Path(base_image)
+    if not base.is_file():
+        raise GenerationError(f"base image not found: {base}")
+
+    sprite = provider or imagegen.resolve_provider(require_references=True)
+    progress = on_progress or (lambda *_: None)
+    label = concept or display_name or slug
+
+    frames_by_state: dict[str, list] = {}
+    for state, _row, count in atlas.ROW_SPECS:
+        progress("row", state)
+        row_prompt = prompts.build_row_prompt(state, count, label, style=style)
+        try:
+            strips = imagegen.generate(
+                row_prompt,
+                n=1,
+                reference_images=[base],
+                provider=sprite,
+                prefix=f"pet_row_{state}",
+            )
+            frames_by_state[state] = atlas.extract_strip_frames(strips[0], count, method="auto")
+        except Exception as exc:  # noqa: BLE001 - a single row may fail; keep going
+            logger.warning("pet row '%s' failed: %s", state, exc)
+
+    # Idle is the resting state the renderer falls back to — guarantee it.
+    if not frames_by_state.get("idle"):
+        progress("row", "idle-fallback")
+        frames_by_state["idle"] = [atlas.single_frame(base)]
+
+    progress("compose", "")
+    sheet = atlas.compose_atlas(frames_by_state)
+    validation = atlas.validate_atlas(sheet)
+    if not validation["ok"]:
+        raise GenerationError("; ".join(validation["errors"]) or "atlas validation failed")
+
+    from agent.pet import store
+
+    progress("save", slug)
+    pet = store.register_local_pet(
+        sheet,
+        slug=slug,
+        display_name=display_name or slug,
+        description=description,
+    )
+    return HatchResult(
+        slug=pet.slug,
+        display_name=pet.display_name,
+        spritesheet=pet.spritesheet,
+        states=validation["filled_states"],
+        validation=validation,
+    )
--- a/agent/pet/generate/prompts.py
+++ b/agent/pet/generate/prompts.py
@@ -0,0 +1,74 @@
+"""Prompt builders for pet generation.
+
+Two prompt shapes: a *base* prompt (prompt-only, produces the canonical look the
+user picks between) and per-*state* *row* prompts (grounded on the chosen base,
+produce one horizontal strip of N poses). Prompts stay concise and
+sprite-production oriented; the identity lock and "one transparent row" framing
+matter more than flowery description.
+
+Hermes drives six states (see :data:`agent.pet.generate.atlas.ROW_SPECS`); these
+mirror that set rather than the petdex/Codex nine.
+"""
+
+from __future__ import annotations
+
+# What each Hermes state should depict (kept short — these go straight into the
+# row prompt). Phrased to avoid the common sprite-gen failure modes (detached
+# effects, motion lines, shadows).
+STATE_ACTIONS: dict[str, str] = {
+    "idle": "a calm idle loop: subtle breathing, a tiny blink or gentle bob, no big gestures",
+    "wave": "a friendly greeting: raising a paw/hand/limb to wave, clear up-and-down gesture",
+    "run": "focused active work: leaning in, concentrating, busy 'thinking/processing' energy (NOT foot-running)",
+    "failed": "a sad or deflated reaction: slumped, dejected, small frown — readable but not noisy",
+    "review": "careful inspection: a focused lean, head tilt, studying something intently",
+    "jump": "a happy celebration jump: anticipation, lift off the ground, peak, and land",
+}
+
+_STYLE_HINTS: dict[str, str] = {
+    "auto": "",
+    "pixel": " Render in clean pixel-art style.",
+    "plush": " Render as a soft plush toy.",
+    "clay": " Render as a claymation / soft 3D clay figure.",
+    "sticker": " Render as a glossy die-cut sticker.",
+    "flat-vector": " Render in flat vector mascot style.",
+    "3d-toy": " Render as a glossy 3D toy.",
+    "painterly": " Render in a soft painterly style.",
+}
+
+_BACKGROUND = (
+    "Center one full-body character on a fully transparent background. "
+    "No text, no labels, no shadow, no ground line, no scenery, no frame, no border."
+)
+
+
+def style_hint(style: str | None) -> str:
+    return _STYLE_HINTS.get((style or "auto").strip().lower(), "")
+
+
+def build_base_prompt(concept: str, *, style: str | None = "auto") -> str:
+    """The base look: a single, clean, centered full-body mascot."""
+    concept = (concept or "a cute friendly mascot creature").strip()
+    return (
+        f"A cute, characterful mascot pet: {concept}. "
+        "Compact, whole-body silhouette that reads clearly at small size, "
+        "appealing face, simple consistent palette. "
+        f"{_BACKGROUND}{style_hint(style)}"
+    )
+
+
+def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str | None = "auto") -> str:
+    """A row strip: *frame_count* poses of the SAME character, left→right.
+
+    The attached base image is the identity source of truth; the prompt locks
+    species, palette, face, and props to it.
+    """
+    action = STATE_ACTIONS.get(state, "a simple idle pose")
+    concept = (concept or "the mascot").strip()
+    return (
+        f"Using the attached reference image as the exact same character "
+        f"(same species, face, colors, markings, proportions, and props), "
+        f"draw a single horizontal strip of {frame_count} animation frames showing {action}. "
+        f"The {frame_count} poses must be evenly spaced left to right, each fully separated "
+        "(not overlapping), same size and baseline, forming a smooth loop. "
+        f"Keep the character identical across all frames. {_BACKGROUND}{style_hint(style)}"
+    )
--- a/agent/pet/manifest.py
+++ b/agent/pet/manifest.py
@@ -0,0 +1,128 @@
+"""Fetch the public petdex manifest.
+
+``https://petdex.dev/api/manifest`` 307-redirects to a JSON document on R2:
+
+    {
+      "generatedAt": "...",
+      "total": 2926,
+      "pets": [
+        {"slug": "boba", "displayName": "Boba", "kind": "creature",
+         "submittedBy": "railly",
+         "spritesheetUrl": "https://assets.petdex.dev/.../spritesheet.webp",
+         "petJsonUrl": "https://assets.petdex.dev/.../pet.json",
+         "zipUrl": "https://assets.petdex.dev/.../boba.zip"},
+        ...
+      ]
+    }
+
+Read-only and unauthenticated; no credentials involved.
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+MANIFEST_URL = "https://petdex.dev/api/manifest"
+
+_DEFAULT_TIMEOUT = 20.0
+
+# In-process cache for the (large, slow, identical-per-call) manifest. The list
+# is a static CDN object that barely changes, yet a single session can ask for
+# it many times — every gallery open, plus a full re-fetch per install/select
+# (``find_entry``). A short TTL collapses those into one network hit without
+# going stale for long. Cleared by :func:`clear_cache` (tests).
+_MANIFEST_TTL = 300.0
+_cache: tuple[float, list[ManifestEntry]] | None = None
+
+
+def clear_cache() -> None:
+    """Drop the cached manifest (forces the next fetch to hit the network)."""
+    global _cache
+    _cache = None
+
+
+@dataclass(frozen=True)
+class ManifestEntry:
+    """A single pet's row in the manifest."""
+
+    slug: str
+    display_name: str
+    kind: str
+    submitted_by: str
+    spritesheet_url: str
+    pet_json_url: str
+    zip_url: str
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "ManifestEntry":
+        return cls(
+            slug=str(data.get("slug", "")).strip(),
+            display_name=str(data.get("displayName", "") or data.get("slug", "")),
+            kind=str(data.get("kind", "") or "pet"),
+            submitted_by=str(data.get("submittedBy", "") or ""),
+            spritesheet_url=str(data.get("spritesheetUrl", "") or ""),
+            pet_json_url=str(data.get("petJsonUrl", "") or ""),
+            zip_url=str(data.get("zipUrl", "") or ""),
+        )
+
+
+class ManifestError(RuntimeError):
+    """Raised when the manifest can't be fetched or parsed."""
+
+
+def fetch_manifest(*, timeout: float = _DEFAULT_TIMEOUT, force: bool = False) -> list[ManifestEntry]:
+    """Return every approved pet from the public manifest.
+
+    Cached in-process for ``_MANIFEST_TTL`` seconds (pass ``force=True`` to
+    bypass). Follows the 307 redirect to R2.  Raises :class:`ManifestError` on
+    any network/parse failure so callers can surface a clean message.
+    """
+    global _cache
+
+    if not force and _cache is not None and time.monotonic() - _cache[0] < _MANIFEST_TTL:
+        return _cache[1]
+
+    try:
+        import httpx
+    except ImportError as exc:  # pragma: no cover - httpx is a core dep
+        raise ManifestError("httpx is required to fetch the petdex manifest") from exc
+
+    try:
+        resp = httpx.get(
+            MANIFEST_URL,
+            timeout=timeout,
+            follow_redirects=True,
+            headers={"User-Agent": "hermes-agent-petdex"},
+        )
+        resp.raise_for_status()
+        payload = resp.json()
+    except Exception as exc:  # noqa: BLE001 - normalize to one error type
+        raise ManifestError(f"could not fetch petdex manifest: {exc}") from exc
+
+    pets = payload.get("pets") if isinstance(payload, dict) else None
+    if not isinstance(pets, list):
+        raise ManifestError("petdex manifest had no 'pets' array")
+
+    entries: list[ManifestEntry] = []
+    for raw in pets:
+        if not isinstance(raw, dict):
+            continue
+        entry = ManifestEntry.from_dict(raw)
+        if entry.slug and entry.spritesheet_url:
+            entries.append(entry)
+
+    _cache = (time.monotonic(), entries)
+    return entries
+
+
+def find_entry(slug: str, *, timeout: float = _DEFAULT_TIMEOUT) -> ManifestEntry | None:
+    """Return the manifest entry for *slug*, or ``None`` if not listed."""
+    slug = slug.strip().lower()
+    for entry in fetch_manifest(timeout=timeout):
+        if entry.slug.lower() == slug:
+            return entry
+    return None
--- a/agent/pet/render.py
+++ b/agent/pet/render.py
@@ -0,0 +1,618 @@
+"""Decode a pet spritesheet and encode frames for a terminal.
+
+Shared by the base CLI (writes the escape bytes to its own stdout) and the
+TUI (``tui_gateway`` ships the encoded bytes to Ink, which writes them) so the
+decode + capability-detection + protocol-encoding logic exists exactly once.
+
+Supported output modes, in fidelity order:
+
+- ``kitty``   — the kitty graphics protocol (kitty, Ghostty, WezTerm).
+- ``iterm``   — iTerm2 inline images (iTerm2, WezTerm).
+- ``sixel``   — DEC sixel (xterm -ti vt340, foot, mlterm, WezTerm, …).
+- ``unicode`` — 24-bit half-block downscale; works in any truecolor terminal.
+
+Frame decoding requires Pillow (a core Hermes dependency).  If Pillow or the
+spritesheet is unavailable the renderer degrades to ``unicode`` text or an
+empty string rather than raising.
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import logging
+import os
+import sys
+from functools import lru_cache
+from pathlib import Path
+
+from agent.pet.constants import (
+    DEFAULT_SCALE,
+    FRAME_H,
+    FRAME_W,
+    FRAMES_PER_STATE,
+    PetState,
+    state_row_index,
+)
+
+logger = logging.getLogger(__name__)
+
+# Public render-mode names accepted by ``display.pet.render_mode``.
+RENDER_MODES = ("auto", "kitty", "iterm", "sixel", "unicode", "off")
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Terminal capability detection
+# ─────────────────────────────────────────────────────────────────────────
+
+def detect_terminal_graphics() -> str:
+    """Best-effort detection of the richest graphics protocol available.
+
+    Env-based (non-blocking — we never issue a DA1/terminal query that could
+    hang a pipe).  Returns one of ``kitty`` / ``iterm`` / ``sixel`` /
+    ``unicode``.  Conservative: unknown terminals get ``unicode``, which works
+    anywhere with truecolor.
+    """
+    term = os.environ.get("TERM", "").lower()
+    term_program = os.environ.get("TERM_PROGRAM", "").lower()
+
+    # The VS Code / Cursor integrated terminal sets TERM_PROGRAM=vscode
+    # authoritatively but does NOT scrub the terminal env vars it inherits when
+    # launched from another emulator (ITERM_SESSION_ID, KITTY_WINDOW_ID, …).
+    # Trusting those leaks emits an image protocol the embedded xterm.js can't
+    # display — you get a blank frame. Inline images there are opt-in
+    # (terminal.integrated.enableImages), so default to half-blocks, which
+    # always render in its truecolor grid. Users who enabled images can pin
+    # display.pet.render_mode explicitly.
+    if term_program == "vscode":
+        return "unicode"
+
+    # kitty graphics protocol
+    if os.environ.get("KITTY_WINDOW_ID") or "kitty" in term or "ghostty" in term:
+        return "kitty"
+    if term_program in {"ghostty"}:
+        return "kitty"
+
+    # WezTerm speaks both kitty and iterm; prefer kitty (richer placement).
+    if term_program == "wezterm" or os.environ.get("WEZTERM_PANE"):
+        return "kitty"
+
+    # iTerm2 inline images
+    if term_program == "iterm.app" or os.environ.get("ITERM_SESSION_ID"):
+        return "iterm"
+
+    # sixel-capable terminals (env heuristics only)
+    if term_program in {"mintty"} or "foot" in term or "mlterm" in term:
+        return "sixel"
+    if "sixel" in term:
+        return "sixel"
+
+    return "unicode"
+
+
+def resolve_mode(configured: str | None, *, stream=None) -> str:
+    """Resolve the effective render mode from config + the environment.
+
+    ``configured`` is ``display.pet.render_mode`` (``auto`` → detect).  Returns
+    ``off`` when not attached to a TTY (no point emitting graphics into a pipe
+    or logfile).
+    """
+    mode = (configured or "auto").strip().lower()
+    if mode not in RENDER_MODES:
+        mode = "auto"
+    if mode == "off":
+        return "off"
+
+    stream = stream or sys.stdout
+    try:
+        if not (hasattr(stream, "isatty") and stream.isatty()):
+            return "off"
+    except (ValueError, OSError):
+        return "off"
+
+    if mode == "auto":
+        return detect_terminal_graphics()
+    return mode
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Frame decoding
+# ─────────────────────────────────────────────────────────────────────────
+
+def _open_sheet(path: Path):
+    from PIL import Image
+
+    img = Image.open(path)
+    return img.convert("RGBA")
+
+
+# Max alpha at/below which a frame counts as blank padding.  petdex sheets are
+# left-packed: a state with fewer real frames than ``FRAMES_PER_STATE`` fills
+# the trailing columns with fully transparent cells.  Animating into one flashes
+# the pet blank, so we stop the row at the first such gap.
+_BLANK_ALPHA = 8
+
+
+def _frame_is_blank(frame) -> bool:
+    """True if *frame* has no meaningfully opaque pixel (transparent padding)."""
+    return frame.getchannel("A").getextrema()[1] <= _BLANK_ALPHA
+
+
+@lru_cache(maxsize=16)
+def _raw_frames(
+    sheet_path: str,
+    state_value: str,
+    frame_w: int,
+    frame_h: int,
+    frames_per_state: int,
+) -> tuple:
+    """Cropped, padding-trimmed RGBA frames for one state row (unscaled).
+
+    Steps across the row until the first blank column so pets with ragged
+    per-state frame counts never animate into empty padding.  Cached; returns
+    ``()`` on any decode failure.
+    """
+    try:
+        sheet = _open_sheet(Path(sheet_path))
+        cols = max(1, sheet.width // frame_w)
+        rows = max(1, sheet.height // frame_h)
+        row = state_row_index(state_value, rows)
+        top = row * frame_h
+        # Clamp the row to the sheet (some pets ship fewer rows than the 8 the
+        # taxonomy reserves).
+        if top + frame_h > sheet.height:
+            top = max(0, sheet.height - frame_h)
+
+        frames = []
+        for i in range(min(frames_per_state, cols)):
+            left = i * frame_w
+            frame = sheet.crop((left, top, left + frame_w, top + frame_h))
+            if _frame_is_blank(frame):
+                break  # trailing transparent padding — real frames end here
+            frames.append(frame)
+        return tuple(frames)
+    except Exception as exc:  # noqa: BLE001 - cosmetic feature, never fatal
+        logger.debug("pet frame decode failed (%s, %s): %s", sheet_path, state_value, exc)
+        return ()
+
+
+@lru_cache(maxsize=8)
+def _frames_for(
+    sheet_path: str,
+    state_value: str,
+    frame_w: int,
+    frame_h: int,
+    frames_per_state: int,
+    scale_w: int,
+    scale_h: int,
+):
+    """Return padding-trimmed RGBA frames for one state row, scaled.
+
+    Thin scaling layer over :func:`_raw_frames`; both are cached so repeated
+    frame requests during animation are free.
+    """
+    raw = _raw_frames(sheet_path, state_value, frame_w, frame_h, frames_per_state)
+    if not raw or (scale_w, scale_h) == (frame_w, frame_h):
+        return list(raw)
+    from PIL import Image
+
+    return [f.resize((scale_w, scale_h), Image.LANCZOS) for f in raw]
+
+
+def state_frame_counts(
+    sheet_path: str | Path,
+    *,
+    frame_w: int = FRAME_W,
+    frame_h: int = FRAME_H,
+    frames_per_state: int = FRAMES_PER_STATE,
+) -> dict[str, int]:
+    """Map each driven :class:`PetState` → its real (padding-trimmed) frame count.
+
+    The single source of truth for "how many frames does this state actually
+    have?".  The CLI/TUI consume the trimmed frame lists directly; the gateway
+    ships this map to the desktop canvas, which steps its own loop.
+    """
+    return {
+        state.value: len(
+            _raw_frames(str(sheet_path), state.value, frame_w, frame_h, frames_per_state)
+        )
+        for state in PetState
+    }
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Encoders
+# ─────────────────────────────────────────────────────────────────────────
+
+def _png_bytes(frame) -> bytes:
+    buf = io.BytesIO()
+    frame.save(buf, format="PNG")
+    return buf.getvalue()
+
+
+def _kitty_apc(ctrl: str, data: str) -> str:
+    """Emit a kitty APC escape for *data*, chunked into ≤4096-byte ``m`` pieces."""
+    chunk = 4096
+    if len(data) <= chunk:
+        return f"\x1b_G{ctrl},m=0;{data}\x1b\\"
+    out = [f"\x1b_G{ctrl},m=1;{data[:chunk]}\x1b\\"]
+    rest = data[chunk:]
+    while rest:
+        piece, rest = rest[:chunk], rest[chunk:]
+        out.append(f"\x1b_Gm={1 if rest else 0};{piece}\x1b\\")
+    return "".join(out)
+
+
+def _encode_kitty(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
+    """Encode one frame via the kitty graphics protocol (transmit + display).
+
+    ``a=T`` transmits & displays at the cursor; ``c``/``r`` request a display
+    box in terminal cells so successive frames overwrite the same area.
+    """
+    ctrl = "f=100,a=T,q=2"
+    if cell_cols:
+        ctrl += f",c={cell_cols}"
+    if cell_rows:
+        ctrl += f",r={cell_rows}"
+    return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# kitty Unicode placeholders
+#
+# Ink (the TUI's React-for-terminal layer) owns the screen and measures every
+# cell's width, so it can't host raw kitty image escapes (no width to count,
+# clobbered on the next repaint). kitty's *Unicode placeholder* protocol is the
+# grid-safe path: transmit the image once (q=2, virtual placement U=1), then the
+# host app prints ordinary-width placeholder cells (U+10EEEE + diacritics) whose
+# foreground color encodes the image id. Ink counts those as width-1 text, so
+# layout stays correct and the terminal paints the image underneath.
+#   https://sw.kovidgoyal.net/kitty/graphics-protocol/#unicode-placeholders
+# ─────────────────────────────────────────────────────────────────────────
+
+_KITTY_PLACEHOLDER = "\U0010eeee"
+
+# Row/column diacritics, in order (index → diacritic). Verbatim from kitty's
+# gen/rowcolumn-diacritics.txt (Unicode 6.0.0, combining class 230). Index i is
+# the diacritic that encodes the number i; we only ever need the row index.
+_ROWCOL_DIACRITICS: tuple[int, ...] = (
+    0x0305, 0x030D, 0x030E, 0x0310, 0x0312, 0x033D, 0x033E, 0x033F, 0x0346, 0x034A,
+    0x034B, 0x034C, 0x0350, 0x0351, 0x0352, 0x0357, 0x035B, 0x0363, 0x0364, 0x0365,
+    0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
+    0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0592, 0x0593, 0x0594, 0x0595, 0x0597,
+    0x0598, 0x0599, 0x059C, 0x059D, 0x059E, 0x059F, 0x05A0, 0x05A1, 0x05A8, 0x05A9,
+    0x05AB, 0x05AC, 0x05AF, 0x05C4, 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615,
+    0x0616, 0x0617, 0x0657, 0x0658, 0x0659, 0x065A, 0x065B, 0x065D, 0x065E, 0x06D6,
+    0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, 0x06DF, 0x06E0, 0x06E1, 0x06E2,
+    0x06E4, 0x06E7, 0x06E8, 0x06EB, 0x06EC, 0x0730, 0x0732, 0x0733, 0x0735, 0x0736,
+    0x073A, 0x073D, 0x073F, 0x0740, 0x0741, 0x0743, 0x0745, 0x0747, 0x0749, 0x074A,
+    0x07EB, 0x07EC, 0x07ED, 0x07EE, 0x07EF, 0x07F0, 0x07F1, 0x07F3, 0x0816, 0x0817,
+    0x0818, 0x0819, 0x081B, 0x081C, 0x081D, 0x081E, 0x081F, 0x0820, 0x0821, 0x0822,
+    0x0823, 0x0825, 0x0826, 0x0827, 0x0829, 0x082A, 0x082B, 0x082C, 0x082D, 0x0951,
+    0x0953, 0x0954, 0x0F82, 0x0F83, 0x0F86, 0x0F87, 0x135D, 0x135E, 0x135F, 0x17DD,
+    0x193A, 0x1A17, 0x1A75, 0x1A76, 0x1A77, 0x1A78, 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C,
+    0x1B6B, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73, 0x1CD0, 0x1CD1,
+    0x1CD2, 0x1CDA, 0x1CDB, 0x1CE0, 0x1DC0, 0x1DC1, 0x1DC3, 0x1DC4, 0x1DC5, 0x1DC6,
+    0x1DC7, 0x1DC8, 0x1DC9, 0x1DCB, 0x1DCC, 0x1DD1, 0x1DD2, 0x1DD3, 0x1DD4, 0x1DD5,
+    0x1DD6, 0x1DD7, 0x1DD8, 0x1DD9, 0x1DDA, 0x1DDB, 0x1DDC, 0x1DDD, 0x1DDE, 0x1DDF,
+    0x1DE0, 0x1DE1, 0x1DE2, 0x1DE3, 0x1DE4, 0x1DE5, 0x1DE6, 0x1DFE, 0x20D0, 0x20D1,
+    0x20D4, 0x20D5, 0x20D6, 0x20D7, 0x20DB, 0x20DC, 0x20E1, 0x20E7, 0x20E9, 0x20F0,
+    0x2CEF, 0x2CF0, 0x2CF1, 0x2DE0, 0x2DE1, 0x2DE2, 0x2DE3, 0x2DE4, 0x2DE5, 0x2DE6,
+    0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, 0x2DEF, 0x2DF0,
+    0x2DF1, 0x2DF2, 0x2DF3, 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA,
+    0x2DFB, 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0xA66F, 0xA67C, 0xA67D, 0xA6F0, 0xA6F1,
+    0xA8E0, 0xA8E1, 0xA8E2, 0xA8E3, 0xA8E4, 0xA8E5, 0xA8E6, 0xA8E7, 0xA8E8, 0xA8E9,
+    0xA8EA, 0xA8EB, 0xA8EC, 0xA8ED, 0xA8EE, 0xA8EF, 0xA8F0, 0xA8F1, 0xAAB0, 0xAAB2,
+    0xAAB3, 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, 0xFE20, 0xFE21, 0xFE22, 0xFE23,
+    0xFE24, 0xFE25, 0xFE26, 0x10A0F, 0x10A38, 0x1D185, 0x1D186, 0x1D187, 0x1D188,
+    0x1D189, 0x1D1AA, 0x1D1AB, 0x1D1AC, 0x1D1AD, 0x1D242, 0x1D243, 0x1D244,
+)
+
+
+def kitty_image_id(slug: str) -> int:
+    """Stable per-pet image id in ``[1, 0x7FFF]``.
+
+    The id is encoded in the placeholder's 24-bit foreground color, so it must
+    be non-zero and fit comfortably under ``0xFFFFFF``. A small CRC keeps it
+    deterministic per slug (so re-renders reuse the same terminal-side image)
+    while making collisions between two different pets unlikely.
+    """
+    import zlib
+
+    return (zlib.crc32(slug.encode("utf-8")) % 0x7FFE) + 1
+
+
+def kitty_color_hex(image_id: int) -> str:
+    """Hex foreground color (``#rrggbb``) that encodes *image_id* for kitty."""
+    return "#%06x" % (image_id & 0xFFFFFF)
+
+
+def kitty_placeholder_rows(cols: int, rows: int) -> list[str]:
+    """Build the placeholder text grid for an *rows*×*cols* image.
+
+    Each line is one row of the grid: the first cell carries the row diacritic
+    (column defaults to 0), and the remaining ``cols-1`` bare placeholders let
+    the terminal auto-increment the column. The foreground color (the image id)
+    is applied by the caller / Ink, not embedded here.
+    """
+    cols = max(1, cols)
+    out: list[str] = []
+    for r in range(max(1, rows)):
+        idx = min(r, len(_ROWCOL_DIACRITICS) - 1)
+        first = _KITTY_PLACEHOLDER + chr(_ROWCOL_DIACRITICS[idx])
+        out.append(first + _KITTY_PLACEHOLDER * (cols - 1))
+    return out
+
+
+def _encode_kitty_virtual(frame, *, image_id: int, cols: int, rows: int) -> str:
+    """Transmit a frame as a kitty *virtual* placement for Unicode placeholders.
+
+    ``a=T`` transmits and creates the placement in one shot; ``U=1`` marks it
+    virtual (no on-screen output, cursor untouched); ``q=2`` suppresses the
+    terminal's OK/error replies that would otherwise corrupt the host app's
+    output. Re-sending with the same ``i`` replaces the image, so the static
+    placeholder cells animate underneath.
+    """
+    ctrl = f"a=T,U=1,i={image_id},c={cols},r={rows},f=100,q=2"
+    return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
+
+
+def _encode_iterm(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
+    """Encode one frame as an iTerm2 inline image (OSC 1337 File)."""
+    payload = base64.standard_b64encode(_png_bytes(frame)).decode("ascii")
+    size = len(payload)
+    args = [f"inline=1", f"size={size}", "preserveAspectRatio=1"]
+    if cell_cols:
+        args.append(f"width={cell_cols}")
+    if cell_rows:
+        args.append(f"height={cell_rows}")
+    return f"\x1b]1337;File={';'.join(args)}:{payload}\x07"
+
+
+def _encode_sixel(frame) -> str:
+    """Encode one frame as DEC sixel.
+
+    Quantizes to an adaptive palette (≤255 colors) and emits the sixel band
+    stream.  Pillow has no sixel writer, so this is a compact hand-rolled
+    encoder.  Transparent pixels render as background (color register skipped).
+    """
+    from PIL import Image
+
+    rgba = frame
+    # Composite onto transparent-as-skip: track alpha to decide background.
+    pal = rgba.convert("RGB").quantize(colors=255, method=Image.MEDIANCUT)
+    palette = pal.getpalette() or []
+    px = pal.load()
+    alpha = rgba.getchannel("A").load()
+    w, h = pal.size
+
+    out = ["\x1bP0;1;0q", '"1;1;%d;%d' % (w, h)]
+    # Color register definitions (sixel uses 0..100 scale).
+    used = sorted({px[x, y] for y in range(h) for x in range(w)})
+    for idx in used:
+        r = palette[idx * 3] if idx * 3 < len(palette) else 0
+        g = palette[idx * 3 + 1] if idx * 3 + 1 < len(palette) else 0
+        b = palette[idx * 3 + 2] if idx * 3 + 2 < len(palette) else 0
+        out.append("#%d;2;%d;%d;%d" % (idx, r * 100 // 255, g * 100 // 255, b * 100 // 255))
+
+    # Emit in 6-row bands.
+    for band in range(0, h, 6):
+        for color_idx in used:
+            line = ["#%d" % color_idx]
+            run_char = None
+            run_len = 0
+
+            def flush():
+                nonlocal run_char, run_len
+                if run_char is None:
+                    return
+                if run_len > 3:
+                    line.append("!%d%s" % (run_len, run_char))
+                else:
+                    line.append(run_char * run_len)
+                run_char, run_len = None, 0
+
+            for x in range(w):
+                bits = 0
+                for bit in range(6):
+                    y = band + bit
+                    if y < h and alpha[x, y] > 32 and px[x, y] == color_idx:
+                        bits |= 1 << bit
+                ch = chr(63 + bits)
+                if ch == run_char:
+                    run_len += 1
+                else:
+                    flush()
+                    run_char, run_len = ch, 1
+            flush()
+            out.append("".join(line) + "$")  # carriage return within band
+        out.append("-")  # next band
+    out.append("\x1b\\")
+    return "".join(out)
+
+
+_HALF_BLOCK = "▀"
+
+# A single half-block cell: top pixel + bottom pixel as (r, g, b, a) tuples.
+Cell = tuple[tuple[int, int, int, int], tuple[int, int, int, int]]
+
+
+def _downscale_cells(frame, *, target_cols: int) -> list[list[Cell]]:
+    """Downscale a frame to a grid of half-block cells.
+
+    Each cell pairs a top and bottom pixel so one terminal row encodes two
+    pixel rows.  Returns rows of ``((tr,tg,tb,ta),(br,bg,bb,ba))`` — the
+    framework-neutral representation shared by the ANSI encoder (CLI) and the
+    structured ``cells`` API (Ink).
+    """
+    from PIL import Image
+
+    target_cols = max(4, target_cols)
+    aspect = frame.height / max(1, frame.width)
+    target_rows = max(2, int(round(target_cols * aspect * 0.5)) * 2)
+    small = frame.resize((target_cols, target_rows), Image.LANCZOS).convert("RGBA")
+    px = small.load()
+
+    grid: list[list[Cell]] = []
+    for y in range(0, target_rows, 2):
+        row: list[Cell] = []
+        for x in range(target_cols):
+            top = px[x, y]
+            bottom = px[x, y + 1] if y + 1 < target_rows else (0, 0, 0, 0)
+            row.append((top, bottom))
+        grid.append(row)
+    return grid
+
+
+def _encode_unicode(frame, *, target_cols: int) -> str:
+    """Downscale to truecolor ANSI half-blocks (one char = 2 vertical pixels)."""
+    lines: list[str] = []
+    for row in _downscale_cells(frame, target_cols=target_cols):
+        cells: list[str] = []
+        for (tr, tg, tb, ta), (br, bg, bb, ba) in row:
+            if ta < 32 and ba < 32:
+                cells.append("\x1b[0m ")  # fully transparent → blank
+                continue
+            cells.append(f"\x1b[38;2;{tr};{tg};{tb}m\x1b[48;2;{br};{bg};{bb}m{_HALF_BLOCK}")
+        lines.append("".join(cells) + "\x1b[0m")
+    return "\n".join(lines)
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Public renderer
+# ─────────────────────────────────────────────────────────────────────────
+
+class PetRenderer:
+    """Holds a pet's spritesheet and yields encoded frames per (state, index).
+
+    Construct once per pet, then call :meth:`frame` on an animation timer.
+    Cheap to call repeatedly — decoded frames are cached.
+    """
+
+    def __init__(
+        self,
+        spritesheet: str | Path,
+        *,
+        mode: str = "unicode",
+        scale: float = DEFAULT_SCALE,
+        unicode_cols: int = 20,
+        frame_w: int = FRAME_W,
+        frame_h: int = FRAME_H,
+        frames_per_state: int = FRAMES_PER_STATE,
+    ) -> None:
+        self.spritesheet = str(spritesheet)
+        self.mode = mode if mode in RENDER_MODES else "unicode"
+        self.scale = scale
+        self.unicode_cols = unicode_cols
+        self.frame_w = frame_w
+        self.frame_h = frame_h
+        self.frames_per_state = frames_per_state
+
+    @property
+    def available(self) -> bool:
+        return self.mode != "off" and Path(self.spritesheet).is_file()
+
+    def frame_count(self, state: PetState | str) -> int:
+        return len(self._frames(state))
+
+    def _frames(self, state: PetState | str):
+        value = state.value if isinstance(state, PetState) else str(state)
+        scale_w = max(1, int(self.frame_w * self.scale))
+        scale_h = max(1, int(self.frame_h * self.scale))
+        return _frames_for(
+            self.spritesheet,
+            value,
+            self.frame_w,
+            self.frame_h,
+            self.frames_per_state,
+            scale_w,
+            scale_h,
+        )
+
+    def cells(self, state: PetState | str, index: int, *, cols: int | None = None) -> list[list[Cell]]:
+        """Return one frame as a half-block cell grid (framework-neutral).
+
+        Used by the TUI, which renders the grid with native Ink color props
+        instead of raw ANSI.  Returns ``[]`` when no frame is available.
+        """
+        frames = self._frames(state)
+        if not frames:
+            return []
+        frame = frames[index % len(frames)]
+        return _downscale_cells(frame, target_cols=cols or self.unicode_cols)
+
+    def _cell_box(self, frame) -> tuple[int, int]:
+        """Terminal cell box for a scaled frame (~8×16 px per cell).
+
+        Must match :meth:`frame` graphics sizing — kitty stretches the image to
+        fill ``c``×``r`` cells, so these must reflect the scaled pixel
+        dimensions, not a native-aspect column count (that upscales small pets).
+        """
+        return max(1, frame.width // 8), max(1, frame.height // 16)
+
+    def kitty_payload(self, state: PetState | str, *, image_id: int) -> dict | None:
+        """Build the kitty Unicode-placeholder payload for one state.
+
+        Returns ``{cols, rows, placeholder, frames}`` where ``frames`` is a
+        list of transmit escapes (one per animation frame, all reusing
+        ``image_id``) and ``placeholder`` is the static text grid Ink paints.
+        Placement geometry is derived from the scaled frame pixels (via
+        :meth:`_cell_box`), not ``unicode_cols`` — kitty upscales to fill
+        ``c``×``r`` cells. ``None`` when no frame is available.
+        """
+        frames = self._frames(state)
+        if not frames:
+            return None
+        cols, rows = self._cell_box(frames[0])
+        return {
+            "cols": cols,
+            "rows": rows,
+            "placeholder": kitty_placeholder_rows(cols, rows),
+            "frames": [
+                _encode_kitty_virtual(f, image_id=image_id, cols=cols, rows=rows) for f in frames
+            ],
+        }
+
+    def frame(self, state: PetState | str, index: int) -> str:
+        """Return the encoded escape string for one frame, or ``""``.
+
+        ``index`` is taken modulo the available frame count so callers can pass
+        a free-running counter.
+        """
+        if self.mode == "off":
+            return ""
+        frames = self._frames(state)
+        if not frames:
+            return ""
+        frame = frames[index % len(frames)]
+        cell_cols, cell_rows = self._cell_box(frame)
+
+        try:
+            if self.mode == "kitty":
+                return _encode_kitty(frame, cell_cols=cell_cols, cell_rows=cell_rows)
+            if self.mode == "iterm":
+                return _encode_iterm(frame, cell_cols=cell_cols, cell_rows=cell_rows)
+            if self.mode == "sixel":
+                return _encode_sixel(frame)
+            return _encode_unicode(frame, target_cols=self.unicode_cols)
+        except Exception as exc:  # noqa: BLE001 - degrade silently
+            logger.debug("pet frame encode failed (mode=%s): %s", self.mode, exc)
+            return ""
+
+
+def build_renderer(
+    spritesheet: str | Path,
+    *,
+    configured_mode: str | None = None,
+    scale: float = DEFAULT_SCALE,
+    unicode_cols: int = 20,
+    stream=None,
+) -> PetRenderer:
+    """Convenience factory: resolve the mode from config+env, then construct."""
+    mode = resolve_mode(configured_mode, stream=stream)
+    return PetRenderer(
+        spritesheet,
+        mode=mode,
+        scale=scale,
+        unicode_cols=unicode_cols,
+    )
--- a/agent/pet/state.py
+++ b/agent/pet/state.py
@@ -0,0 +1,81 @@
+"""Map agent activity → a :class:`PetState`.
+
+This is the one place the "what is the agent doing right now?" → "which
+animation row?" decision lives.  Each surface feeds it the signals it already
+tracks:
+
+- CLI    — ``KawaiiSpinner`` waiting/thinking state + tool outcomes.
+- TUI    — gateway ``tool.start/complete`` + ``message.delta/complete`` events.
+- Desktop — the ``$busy``/``$awaitingResponse``/tool-event nanostores
+            (re-implemented in TS, but mirroring this priority order).
+
+Keeping the priority order here (and documenting it) lets the TypeScript
+mirror stay faithful without a second design.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any
+
+from agent.pet.constants import PetState
+
+
+def todos_all_done(todos: Iterable[Any] | None) -> bool:
+    """True iff there's ≥1 todo and every one is completed/cancelled.
+
+    The "celebrate" beat (``JUMP``) fires when a plan finishes; this mirrors
+    the TUI's ``isTodoDone`` so the trigger is defined once across surfaces.
+    Accepts dicts (``{"status": ...}``) or objects with a ``status`` attr.
+    """
+    items = list(todos or [])
+    if not items:
+        return False
+
+    def _status(t: Any) -> Any:
+        return t.get("status") if isinstance(t, dict) else getattr(t, "status", None)
+
+    return all(_status(t) in ("completed", "cancelled") for t in items)
+
+
+def derive_pet_state(
+    *,
+    busy: bool = False,
+    awaiting_input: bool = False,
+    error: bool = False,
+    celebrate: bool = False,
+    just_completed: bool = False,
+    tool_running: bool = False,
+    reasoning: bool = False,
+) -> PetState:
+    """Resolve the animation state from coarse activity signals.
+
+    Priority (highest first) — only one row can show at a time, so the most
+    salient signal wins:
+
+    1. ``error``          → ``FAILED``  (a tool/turn just failed)
+    2. ``celebrate``      → ``JUMP``    (explicit success beat, e.g. todos done)
+    3. ``just_completed`` → ``WAVE``    (turn finished cleanly / greeting)
+    4. ``awaiting_input`` → ``WAITING`` (blocked on the user — a clarify/approval
+       prompt is open; this outranks the in-flight signals below because the turn
+       is paused on *you*, even though a tool is technically mid-call)
+    5. ``tool_running``   → ``RUN``     (a tool is executing)
+    6. ``reasoning``      → ``REVIEW``  (model is thinking / reading)
+    7. ``busy``           → ``RUN``     (turn in flight, unspecified work)
+    8. otherwise          → ``IDLE``
+    """
+    if error:
+        return PetState.FAILED
+    if celebrate:
+        return PetState.JUMP
+    if just_completed:
+        return PetState.WAVE
+    if awaiting_input:
+        return PetState.WAITING
+    if tool_running:
+        return PetState.RUN
+    if reasoning:
+        return PetState.REVIEW
+    if busy:
+        return PetState.RUN
+    return PetState.IDLE
--- a/agent/pet/store.py
+++ b/agent/pet/store.py
@@ -0,0 +1,388 @@
+"""On-disk pet store — install / list / resolve pets.
+
+Pets live under ``get_hermes_home()/pets/<slug>/`` so every profile gets its
+own set (we deliberately do **not** reuse petdex's ``~/.codex/pets`` default —
+that's owned by the petdex npm CLI and isn't profile-aware).  Each installed
+pet directory holds:
+
+    pets/<slug>/
+        pet.json            # {id, displayName, description, spritesheetPath}
+        spritesheet.webp    # (or .png)
+
+The active pet is resolved from the caller-supplied ``display.pet.slug`` config
+value (falling back to the first installed pet), so this module stays free of
+the config loader.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+_DOWNLOAD_TIMEOUT = 60.0
+
+
+class PetStoreError(RuntimeError):
+    """Raised on install/IO failures."""
+
+
+@dataclass(frozen=True)
+class InstalledPet:
+    """A pet present on disk."""
+
+    slug: str
+    display_name: str
+    description: str
+    directory: Path
+    spritesheet: Path
+
+    @property
+    def exists(self) -> bool:
+        return self.spritesheet.is_file()
+
+
+def pets_dir() -> Path:
+    """Return the profile-scoped pets directory (created on demand)."""
+    path = get_hermes_home() / "pets"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def _read_pet_json(directory: Path) -> dict:
+    pet_json = directory / "pet.json"
+    if not pet_json.is_file():
+        return {}
+    try:
+        return json.loads(pet_json.read_text(encoding="utf-8"))
+    except (OSError, ValueError) as exc:
+        logger.debug("unreadable pet.json in %s: %s", directory, exc)
+        return {}
+
+
+def _resolve_spritesheet(directory: Path, meta: dict) -> Path:
+    """Find the spritesheet for a pet dir.
+
+    Honors ``spritesheetPath`` from pet.json, else probes the conventional
+    filenames (``spritesheet.{webp,png}`` and petdex R2's ``sprite.webp``).
+    """
+    declared = str(meta.get("spritesheetPath", "") or "").strip()
+    if declared:
+        candidate = directory / declared
+        if candidate.is_file():
+            return candidate
+    for name in ("spritesheet.webp", "spritesheet.png", "sprite.webp", "sprite.png"):
+        candidate = directory / name
+        if candidate.is_file():
+            return candidate
+    # Default expectation even if missing, so callers get a stable path.
+    return directory / "spritesheet.webp"
+
+
+def load_pet(slug: str) -> InstalledPet | None:
+    """Return the :class:`InstalledPet` for *slug*, or ``None`` if absent."""
+    slug = slug.strip()
+    directory = pets_dir() / slug
+    if not directory.is_dir():
+        return None
+    meta = _read_pet_json(directory)
+    return InstalledPet(
+        slug=slug,
+        display_name=str(meta.get("displayName", "") or slug),
+        description=str(meta.get("description", "") or ""),
+        directory=directory,
+        spritesheet=_resolve_spritesheet(directory, meta),
+    )
+
+
+def installed_pets() -> list[InstalledPet]:
+    """Return every installed pet (dirs containing a usable spritesheet)."""
+    out: list[InstalledPet] = []
+    for child in sorted(pets_dir().iterdir()):
+        if not child.is_dir():
+            continue
+        pet = load_pet(child.name)
+        if pet and pet.exists:
+            out.append(pet)
+    return out
+
+
+def resolve_active_pet(configured_slug: str | None = None) -> InstalledPet | None:
+    """Resolve which pet to display.
+
+    Precedence: the configured slug (``display.pet.slug``) if it's installed,
+    otherwise the first installed pet alphabetically, otherwise ``None``.
+    """
+    if configured_slug:
+        pet = load_pet(configured_slug.strip())
+        if pet and pet.exists:
+            return pet
+    pets = installed_pets()
+    return pets[0] if pets else None
+
+
+def install_pet(slug: str, *, force: bool = False, timeout: float = _DOWNLOAD_TIMEOUT) -> InstalledPet:
+    """Download *slug* from the manifest into the pets directory.
+
+    Idempotent: a fully-installed pet is returned as-is unless *force*.  Raises
+    :class:`PetStoreError` / :class:`~agent.pet.manifest.ManifestError` on
+    failure.
+    """
+    from agent.pet.manifest import find_entry
+
+    slug = slug.strip()
+    existing = load_pet(slug)
+    if existing and existing.exists and not force:
+        return existing
+
+    entry = find_entry(slug, timeout=timeout)
+    if entry is None:
+        raise PetStoreError(f"pet '{slug}' is not in the petdex manifest")
+
+    directory = pets_dir() / slug
+    directory.mkdir(parents=True, exist_ok=True)
+
+    sprite_ext = ".png" if entry.spritesheet_url.lower().split("?")[0].endswith(".png") else ".webp"
+    sprite_path = directory / f"spritesheet{sprite_ext}"
+
+    _download(entry.spritesheet_url, sprite_path, timeout=timeout)
+
+    # Fetch the upstream pet.json if present; otherwise synthesize a minimal
+    # one so the local layout is self-describing.
+    meta: dict = {}
+    if entry.pet_json_url:
+        try:
+            meta = _download_json(entry.pet_json_url, timeout=timeout)
+        except Exception as exc:  # noqa: BLE001 - non-fatal, fall back below
+            logger.debug("pet.json fetch failed for %s: %s", slug, exc)
+    if not isinstance(meta, dict) or not meta:
+        meta = {"id": slug, "displayName": entry.display_name, "description": ""}
+    meta["spritesheetPath"] = sprite_path.name
+    meta.setdefault("id", slug)
+    meta.setdefault("displayName", entry.display_name)
+    (directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
+
+    pet = load_pet(slug)
+    if pet is None or not pet.exists:
+        raise PetStoreError(f"install of '{slug}' did not produce a spritesheet")
+    return pet
+
+
+def slugify(name: str) -> str:
+    """Lowercase, hyphenate, and strip a display name into a filesystem slug."""
+    slug = re.sub(r"[^a-z0-9]+", "-", (name or "").strip().lower()).strip("-")
+    return slug or "pet"
+
+
+def unique_slug(name: str) -> str:
+    """A :func:`slugify` result that doesn't collide with an existing pet dir."""
+    base = slugify(name)
+    slug = base
+    counter = 2
+    while (pets_dir() / slug).exists():
+        slug = f"{base}-{counter}"
+        counter += 1
+    return slug
+
+
+def _write_spritesheet(source, dest: Path) -> None:
+    """Write *source* (PIL image, bytes, or path) as a lossless WebP at *dest*."""
+    if isinstance(source, (bytes, bytearray)):
+        dest.write_bytes(bytes(source))
+        return
+
+    from PIL import Image
+
+    if isinstance(source, (str, Path)):
+        with Image.open(source) as opened:
+            image = opened.convert("RGBA")
+    else:
+        image = source.convert("RGBA")
+    image.save(dest, format="WEBP", lossless=True, quality=100, method=6, exact=True)
+
+
+def register_local_pet(
+    spritesheet,
+    *,
+    slug: str,
+    display_name: str = "",
+    description: str = "",
+) -> InstalledPet:
+    """Write a locally-generated pet into the store and return it.
+
+    *spritesheet* may be a PIL image, raw WebP/PNG bytes, or a path. The pet
+    appears in :func:`installed_pets` immediately, and because :func:`install_pet`
+    returns an already-on-disk pet before consulting the manifest, it can be
+    adopted (``pet.select`` / ``/pet <slug>``) without a manifest entry.
+    """
+    slug = slugify(slug)
+    directory = pets_dir() / slug
+    directory.mkdir(parents=True, exist_ok=True)
+    sprite_path = directory / "spritesheet.webp"
+    try:
+        _write_spritesheet(spritesheet, sprite_path)
+    except Exception as exc:  # noqa: BLE001 - normalize to one error type
+        raise PetStoreError(f"could not write spritesheet for '{slug}': {exc}") from exc
+
+    meta = {
+        "id": slug,
+        "displayName": display_name or slug,
+        "description": description or "",
+        "spritesheetPath": sprite_path.name,
+        "createdBy": "generator",
+    }
+    (directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
+
+    pet = load_pet(slug)
+    if pet is None or not pet.exists:
+        raise PetStoreError(f"register of generated pet '{slug}' did not produce a spritesheet")
+    return pet
+
+
+_THUMB_FRAME_W = 192
+_THUMB_FRAME_H = 208
+_THUMB_W = 96  # rendered ~40px; 2x+ keeps it crisp on HiDPI
+
+
+def _thumbs_dir() -> Path:
+    path = pets_dir() / ".thumbs"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def _is_petdex_host(url: str) -> bool:
+    """True only for petdex.dev hosts — bounds server-side fetch (anti-SSRF)."""
+    from urllib.parse import urlparse
+
+    try:
+        host = (urlparse(url).hostname or "").lower()
+    except ValueError:
+        return False
+    return host == "petdex.dev" or host.endswith(".petdex.dev")
+
+
+def thumbnail_png(slug: str, *, source_url: str = "", timeout: float = 30.0) -> bytes | None:
+    """Return a small idle-frame PNG for *slug*, cached on disk.
+
+    Crops the top-left (idle, frame 0) cell of the spritesheet and downsamples
+    it to a thumbnail. Source preference: an installed spritesheet on disk, else
+    *source_url* — but only when it points at petdex (so the gateway never
+    fetches an arbitrary client-supplied URL). Returns ``None`` when there's no
+    usable source or Pillow/network fails; callers render a placeholder.
+
+    Doing this server-side sidesteps the renderer's CSP / R2 hotlink limits that
+    break a direct ``<img src=cdn>`` and lets the result ride the authenticated
+    gateway as a same-origin data URL.
+    """
+    slug = slug.strip()
+    if not slug:
+        return None
+
+    cache = _thumbs_dir() / f"{slug}.png"
+    if cache.is_file():
+        try:
+            return cache.read_bytes()
+        except OSError:
+            pass
+
+    sheet_bytes: bytes | None = None
+    pet = load_pet(slug)
+    if pet and pet.exists:
+        try:
+            sheet_bytes = pet.spritesheet.read_bytes()
+        except OSError:
+            sheet_bytes = None
+
+    if sheet_bytes is None and source_url and _is_petdex_host(source_url):
+        try:
+            import httpx
+
+            resp = httpx.get(
+                source_url,
+                timeout=timeout,
+                follow_redirects=True,
+                headers={"User-Agent": "hermes-agent-petdex"},
+            )
+            resp.raise_for_status()
+            sheet_bytes = resp.content
+        except Exception as exc:  # noqa: BLE001 - cosmetic, degrade to placeholder
+            logger.debug("thumb fetch failed for %s: %s", slug, exc)
+
+    if not sheet_bytes:
+        return None
+
+    try:
+        import io
+
+        from PIL import Image
+
+        with Image.open(io.BytesIO(sheet_bytes)) as im:
+            frame = im.convert("RGBA").crop(
+                (0, 0, min(_THUMB_FRAME_W, im.width), min(_THUMB_FRAME_H, im.height))
+            )
+            height = round(_THUMB_W * _THUMB_FRAME_H / _THUMB_FRAME_W)
+            frame = frame.resize((_THUMB_W, height), Image.NEAREST)
+            buf = io.BytesIO()
+            frame.save(buf, format="PNG")
+            data = buf.getvalue()
+    except Exception as exc:  # noqa: BLE001
+        logger.debug("thumb crop failed for %s: %s", slug, exc)
+        return None
+
+    try:
+        cache.write_bytes(data)
+    except OSError:
+        pass
+    return data
+
+
+def remove_pet(slug: str) -> bool:
+    """Delete an installed pet directory.  Returns True if anything was removed."""
+    import shutil
+
+    directory = pets_dir() / slug.strip()
+    if not directory.is_dir():
+        return False
+    shutil.rmtree(directory, ignore_errors=True)
+    return not directory.exists()
+
+
+def _download(url: str, dest: Path, *, timeout: float) -> None:
+    import httpx
+
+    try:
+        with httpx.stream(
+            "GET",
+            url,
+            timeout=timeout,
+            follow_redirects=True,
+            headers={"User-Agent": "hermes-agent-petdex"},
+        ) as resp:
+            resp.raise_for_status()
+            tmp = dest.with_suffix(dest.suffix + ".part")
+            with tmp.open("wb") as fh:
+                for chunk in resp.iter_bytes():
+                    fh.write(chunk)
+            tmp.replace(dest)
+    except Exception as exc:  # noqa: BLE001
+        raise PetStoreError(f"download failed for {url}: {exc}") from exc
+
+
+def _download_json(url: str, *, timeout: float) -> dict:
+    import httpx
+
+    resp = httpx.get(
+        url,
+        timeout=timeout,
+        follow_redirects=True,
+        headers={"User-Agent": "hermes-agent-petdex"},
+    )
+    resp.raise_for_status()
+    data = resp.json()
+    return data if isinstance(data, dict) else {}
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -8,6 +8,7 @@ import json
 import logging
 import os
 import threading
+import contextvars
 from collections import OrderedDict
 from pathlib import Path

@@ -489,15 +490,41 @@ PLATFORM_HINTS = {
        "files arrive as downloadable documents. You can also include image "
        "URLs in markdown format ![alt](url) and they will be sent as photos."
    ),
+    "whatsapp_cloud": (
+        "You are on a text messaging communication platform, WhatsApp "
+        "(via Meta's official Business Cloud API). Standard markdown "
+        "(**bold**, ~~strike~~, # headers, [links](url)) is auto-converted "
+        "to WhatsApp's native syntax (*bold*, ~strike~, etc.) — feel free "
+        "to write in markdown. Tables are NOT supported — prefer bullet "
+        "lists or labeled key:value pairs. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png) become photo attachments, "
+        "videos (.mp4) play inline, audio (.mp3, .ogg) sends as voice/audio "
+        "messages, other files arrive as documents. Image URLs in markdown "
+        "format ![alt](url) also work. "
+        "IMPORTANT: this platform has a 24-hour conversation window — if the "
+        "user hasn't messaged in 24h, free-form replies are refused by Meta "
+        "(error 131047). This rarely matters for live chat, but is worth "
+        "knowing if you're scheduling a delayed message."
+    ),
    "telegram": (
        "You are on a text messaging communication platform, Telegram. "
-        "Standard markdown is automatically converted to Telegram format. "
+        "Standard Markdown is automatically converted to Telegram formatting. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram has NO table syntax — prefer bullet lists or labeled "
-        "key: value pairs over pipe tables (any tables you do emit are "
-        "auto-rewritten into row-group bullets, which you can produce "
-        "directly for cleaner output). "
+        "Telegram now supports rich Markdown, so lean into it: whenever it "
+        "makes the answer clearer or easier to scan, actively reach for real "
+        "Markdown tables (pipe `| col | col |` syntax), bullet and numbered "
+        "lists, task lists (`- [ ]` / `- [x]`), headings, nested blockquotes, "
+        "collapsible details, footnotes/references, math/formulas (`$...$`, "
+        "`$$...$$`), underline, subscript/superscript, marked (highlighted) "
+        "text, and anchors. Default to structured formatting over dense "
+        "paragraphs for any comparison, set of steps, key/value summary, or "
+        "tabular data. Prefer real Markdown tables and task lists over "
+        "hand-built bullet substitutes when presenting structured data; these "
+        "degrade gracefully (tables become readable bullet groups) when rich "
+        "rendering is unavailable, but advanced constructs like math and "
+        "collapsible details may render as plain source text in that case. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -932,6 +959,52 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2


+def _get_context_file_max_chars() -> int:
+    """Return the configured context-file truncation limit.
+
+    ``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
+    fallback. Users with larger context windows can raise
+    ``context_file_max_chars`` in config.yaml without patching Hermes.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        val = load_config().get("context_file_max_chars")
+        if isinstance(val, (int, float)) and val > 0:
+            return int(val)
+    except Exception as e:
+        logger.debug("Could not read context_file_max_chars from config: %s", e)
+    return CONTEXT_FILE_MAX_CHARS
+
+# Collect truncation warnings so the caller (run_agent) can surface them.
+# A ContextVar (not a module-global list) isolates accumulation per thread /
+# per async task, so concurrent gateway-session prompt builds can't drain or
+# clear each other's pending warnings (cross-session leak). Each build runs in
+# its own context, collects its own warnings, and drains them synchronously.
+_truncation_warnings: "contextvars.ContextVar[Optional[list]]" = contextvars.ContextVar(
+    "context_file_truncation_warnings", default=None
+)
+
+
+def _record_truncation_warning(msg: str) -> None:
+    """Append a truncation warning to the current context's accumulator."""
+    warnings = _truncation_warnings.get()
+    if warnings is None:
+        warnings = []
+        _truncation_warnings.set(warnings)
+    warnings.append(msg)
+
+
+def drain_truncation_warnings() -> list:
+    """Return and clear any truncation warnings accumulated in this context."""
+    warnings = _truncation_warnings.get()
+    if not warnings:
+        return []
+    drained = list(warnings)
+    warnings.clear()
+    return drained
+
+
 # =========================================================================
 # Skills prompt cache
 # =========================================================================
@@ -1138,7 +1211,7 @@ def build_skills_system_prompt(
        or get_session_env("HERMES_SESSION_PLATFORM")
        or ""
    )
-    disabled = get_disabled_skill_names()
+    disabled = get_disabled_skill_names(_platform_hint or None)
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
@@ -1418,13 +1491,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -

    lines = [
        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, OpenAI Whisper STT, and browser automation (Browser Use) by default. Modal execution is optional.",
        "Current capability status:",
    ]
    lines.extend(_status_line(feature) for feature in features.items())
    lines.extend(
        [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
+            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys.",
            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
@@ -1437,10 +1510,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================

-def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
+def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
    """Head/tail truncation with a marker in the middle."""
+    if max_chars is None:
+        max_chars = _get_context_file_max_chars()
    if len(content) <= max_chars:
        return content
+    msg = (
+        f"⚠️  Context file {filename} TRUNCATED: "
+        f"{len(content)} chars exceeds limit of {max_chars} — "
+        f"increase context_file_max_chars or trim the file!"
+    )
+    logger.warning(msg)
+    _record_truncation_warning(msg)
    head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
    tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
    head = content[:head_chars]
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -104,6 +104,7 @@ _PREFIX_PATTERNS = [
    r"mem0_[A-Za-z0-9]{10,}",           # Mem0 Platform API key
    r"brv_[A-Za-z0-9]{10,}",            # ByteRover API key
    r"xai-[A-Za-z0-9]{30,}",            # xAI (Grok) API key
+    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -26,6 +26,91 @@ _skill_commands_platform: Optional[str] = None
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+# ---------------------------------------------------------------------------
+# Skill-scaffolding markers and the canonical extractor.
+#
+# When a user invokes a /skill (or /bundle), Hermes expands the turn into a
+# model-facing message that embeds the full skill body plus scaffolding. That
+# expanded text is what flows into the agent loop — and into memory providers
+# via MemoryManager. Providers that store or embed the raw user turn (mem0,
+# openviking, hindsight, retaindb, byterover, honcho, supermemory) would
+# otherwise capture the entire skill body instead of what the user actually
+# asked. ``extract_user_instruction_from_skill_message`` recovers just the
+# user's instruction so memory stays clean.
+#
+# These markers MUST stay byte-identical to the builders below
+# (``_build_skill_message`` here, ``build_bundle_invocation_message`` in
+# agent/skill_bundles.py). They are co-located with the single-skill builder
+# on purpose, and the bundle markers are asserted against the bundle builder in
+# tests/openviking_plugin/test_openviking.py::test_skill_markers_match_hermes_scaffolding.
+# ---------------------------------------------------------------------------
+_SKILL_INVOCATION_PREFIX = "[IMPORTANT: The user has invoked the "
+_SINGLE_SKILL_MARKER = "The full skill content is loaded below.]"
+_SINGLE_SKILL_INSTRUCTION = (
+    "The user has provided the following instruction alongside the skill invocation: "
+)
+_RUNTIME_NOTE = "\n\n[Runtime note:"
+_BUNDLE_MARKER = " skill bundle,"
+_BUNDLE_USER_INSTRUCTION = "\nUser instruction: "
+_BUNDLE_FIRST_SKILL_BLOCK = "\n\n[Loaded as part of the "
+
+
+def extract_user_instruction_from_skill_message(content: Any) -> Optional[str]:
+    """Recover the user's instruction from a slash-skill-expanded turn.
+
+    Returns:
+        - The original string unchanged when it is NOT skill scaffolding
+          (a normal user message passes straight through).
+        - The extracted user instruction when the scaffolding carried one.
+        - ``None`` when the content is skill scaffolding with no user
+          instruction (i.e. a bare ``/skill`` invocation). Callers that feed
+          memory providers should skip the turn in that case — there is no
+          user content worth storing.
+    """
+    if not isinstance(content, str):
+        return None
+
+    if not content.startswith(_SKILL_INVOCATION_PREFIX):
+        return content
+
+    if _BUNDLE_MARKER in content:
+        return _extract_bundle_user_instruction(content)
+
+    if _SINGLE_SKILL_MARKER in content:
+        return _extract_single_skill_user_instruction(content)
+
+    return None
+
+
+def _extract_single_skill_user_instruction(message: str) -> Optional[str]:
+    # Single-skill format appends the user instruction after the skill body, so
+    # the last occurrence is the user-provided one; the body may quote this text.
+    marker_idx = message.rfind(_SINGLE_SKILL_INSTRUCTION)
+    if marker_idx < 0:
+        return None
+
+    instruction = message[marker_idx + len(_SINGLE_SKILL_INSTRUCTION):]
+    runtime_idx = instruction.find(_RUNTIME_NOTE)
+    if runtime_idx >= 0:
+        instruction = instruction[:runtime_idx]
+    instruction = instruction.strip()
+    return instruction or None
+
+
+def _extract_bundle_user_instruction(message: str) -> Optional[str]:
+    # Bundle format puts the user instruction before the loaded skills, so the
+    # first occurrence is the user-provided one.
+    marker_idx = message.find(_BUNDLE_USER_INSTRUCTION)
+    if marker_idx < 0:
+        return None
+
+    instruction = message[marker_idx + len(_BUNDLE_USER_INSTRUCTION):]
+    first_skill_idx = instruction.find(_BUNDLE_FIRST_SKILL_BLOCK)
+    if first_skill_idx >= 0:
+        instruction = instruction[:first_skill_idx]
+    instruction = instruction.strip()
+    return instruction or None
+

 def _resolve_skill_commands_platform() -> Optional[str]:
    """Return the current platform scope used for disabled-skill filtering.
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -43,14 +43,20 @@ EXCLUDED_SKILL_DIRS = frozenset(
    )
 )

+# Supporting files live inside a skill package and are loaded explicitly via
+# skill_view(skill, file_path=...). They are not standalone skills and must not
+# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or
+# archive workflow preserves a complete old skill package under references/.
+SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts"))
+

 def is_excluded_skill_path(path) -> bool:
-    """True if any component of *path* is in EXCLUDED_SKILL_DIRS.
+    """True if *path* should be skipped by active skill scanners.

-    Use this on every SKILL.md path produced by ``rglob`` to prune
-    dependency, virtualenv, VCS, and cache directories. Centralising the
-    check here keeps every skill-scanning site in sync with the shared
-    exclusion set.
+    Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to
+    prune dependency, virtualenv, VCS, cache, and progressive-disclosure
+    support-package paths. Centralising the check here keeps every
+    skill-scanning site in sync with the shared exclusion set.

    Accepts a Path or string.
    """
@@ -59,7 +65,36 @@ def is_excluded_skill_path(path) -> bool:
    except AttributeError:
        from pathlib import PurePath
        parts = PurePath(str(path)).parts
-    return any(part in EXCLUDED_SKILL_DIRS for part in parts)
+    return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path(
+        path
+    )
+
+
+def is_skill_support_path(path) -> bool:
+    """True if *path* is under a support dir of an actual skill root.
+
+    ``references/``, ``templates/``, ``assets/``, and ``scripts/`` are
+    progressive-disclosure support areas when they sit directly inside a skill
+    directory containing ``SKILL.md``. They are not active discovery roots for
+    standalone skills. A preserved package such as
+    ``some-skill/references/old-skill-package/SKILL.md`` is documentation data
+    unless the caller explicitly loads it via ``file_path``.
+
+    Legitimate categories or skill names such as ``skills/scripts/foo`` remain
+    discoverable because their ``scripts`` component is not directly under a
+    directory that contains ``SKILL.md``.
+    """
+    path_obj = path if isinstance(path, Path) else Path(str(path))
+    parts = path_obj.parts
+    # Last component may be a file or candidate skill directory name. Only
+    # components before the leaf can be containing support directories.
+    for idx, part in enumerate(parts[:-1]):
+        if part not in SKILL_SUPPORT_DIRS or idx == 0:
+            continue
+        skill_root = Path(*parts[:idx])
+        if (skill_root / "SKILL.md").exists():
+            return True
+    return False


 # ── Lazy YAML loader ─────────────────────────────────────────────────────
@@ -272,27 +307,65 @@ def skill_matches_environment(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


+_RAW_CONFIG_CACHE: Dict[Tuple[str, int, int], Dict[str, Any]] = {}
+
+
+def _raw_config_cache_clear() -> None:
+    """Test hook — drop the shared raw config cache."""
+    _RAW_CONFIG_CACHE.clear()
+
+
+def _load_raw_config() -> Dict[str, Any]:
+    """Read config.yaml with a shared mtime+size keyed cache.
+
+    This module intentionally avoids importing ``hermes_cli.config`` on the
+    skill prompt/build path. A tiny local cache gives the same repeated-read
+    win without pulling the heavier CLI config stack into startup.
+    """
+    config_path = get_config_path()
+    if not config_path.exists():
+        return {}
+    try:
+        stat = config_path.stat()
+        cache_key = (str(config_path), stat.st_mtime_ns, stat.st_size)
+    except OSError:
+        cache_key = None
+
+    if cache_key is not None:
+        cached = _RAW_CONFIG_CACHE.get(cache_key)
+        if cached is not None:
+            return cached
+
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception as e:
+        logger.debug("Could not read skill config %s: %s", config_path, e)
+        return {}
+    if not isinstance(parsed, dict):
+        return {}
+
+    if cache_key is not None:
+        _RAW_CONFIG_CACHE.clear()
+        _RAW_CONFIG_CACHE[cache_key] = parsed
+    return parsed
+
+
 def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    """Read disabled skill names from config.yaml.

    Args:
        platform: Explicit platform name (e.g. ``"telegram"``).  When
            *None*, resolves from ``HERMES_PLATFORM`` or
-            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
-            global disabled list when no platform is determined.
+            ``HERMES_SESSION_PLATFORM`` env vars.  Returns the global
+            disabled list, unioned with the platform-specific list when a
+            platform is resolved (a globally-disabled skill stays disabled
+            on every platform).

    Reads the config file directly (no CLI config imports) to stay
    lightweight.
    """
-    config_path = get_config_path()
-    if not config_path.exists():
-        return set()
-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception as e:
-        logger.debug("Could not read skill config %s: %s", config_path, e)
-        return set()
-    if not isinstance(parsed, dict):
+    parsed = _load_raw_config()
+    if not parsed:
        return set()

    skills_cfg = parsed.get("skills")
@@ -305,13 +378,14 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
        or os.getenv("HERMES_PLATFORM")
        or get_session_env("HERMES_SESSION_PLATFORM")
    )
+    global_disabled = _normalize_string_set(skills_cfg.get("disabled"))
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
        )
        if platform_disabled is not None:
-            return _normalize_string_set(platform_disabled)
-    return _normalize_string_set(skills_cfg.get("disabled"))
+            return global_disabled | _normalize_string_set(platform_disabled)
+    return global_disabled


 def _normalize_string_set(values) -> Set[str]:
@@ -336,6 +410,7 @@ _EXTERNAL_DIRS_CACHE: Dict[Tuple[str, int], List[Path]] = {}
 def _external_dirs_cache_clear() -> None:
    """Test hook — drop the in-process cache."""
    _EXTERNAL_DIRS_CACHE.clear()
+    _raw_config_cache_clear()


 def get_external_skills_dirs() -> List[Path]:
@@ -368,11 +443,8 @@ def get_external_skills_dirs() -> List[Path]:
            # Return a copy so callers can't mutate the cached list.
            return list(cached)

-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception:
-        return []
-    if not isinstance(parsed, dict):
+    parsed = _load_raw_config()
+    if not parsed:
        return []

    skills_cfg = parsed.get("skills")
@@ -584,15 +656,7 @@ def resolve_skill_config_values(
    current values (or the declared default if the key isn't set).
    Path values are expanded via ``os.path.expanduser``.
    """
-    config_path = get_config_path()
-    config: Dict[str, Any] = {}
-    if config_path.exists():
-        try:
-            parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-            if isinstance(parsed, dict):
-                config = parsed
-        except Exception:
-            pass
+    config = _load_raw_config()

    resolved: Dict[str, Any] = {}
    for var in config_vars:
@@ -632,12 +696,21 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
-    directories so dependencies cannot register nested skills.
+    Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill
+    support directories. Support directories (references/templates/assets/
+    scripts) can contain arbitrary markdown and even archived package
+    ``SKILL.md`` files, but they are progressive-disclosure data loaded through
+    ``skill_view(..., file_path=...)`` rather than active skill roots.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
-        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
+        has_skill_md = "SKILL.md" in files
+        dirs[:] = [
+            d
+            for d in dirs
+            if d not in EXCLUDED_SKILL_DIRS
+            and not (has_skill_md and d in SKILL_SUPPORT_DIRS)
+        ]
        if filename in files:
            matches.append(Path(root) / filename)
    for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
--- a/agent/ssl_guard.py
+++ b/agent/ssl_guard.py
@@ -0,0 +1,94 @@
+"""Preventive SSL CA certificate checks for Hermes Agent.
+
+This module catches broken CA bundle paths before OpenAI/httpx turns them into
+opaque ``FileNotFoundError: [Errno 2] No such file or directory`` failures.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import ssl
+from pathlib import Path
+
+from agent.errors import SSLConfigurationError
+
+logger = logging.getLogger(__name__)
+
+_CA_BUNDLE_ENV_VARS = (
+    "HERMES_CA_BUNDLE",
+    "SSL_CERT_FILE",
+    "REQUESTS_CA_BUNDLE",
+    "CURL_CA_BUNDLE",
+)
+
+_SKIP_VALUES = {"1", "true", "yes", "on"}
+
+
+def _skip_ssl_guard_enabled() -> bool:
+    return os.getenv("HERMES_SKIP_SSL_GUARD", "").strip().lower() in _SKIP_VALUES
+
+
+def _repair_hint() -> str:
+    return (
+        "Repair: python -m pip install --force-reinstall certifi openai httpx\n"
+        "If you configured a custom corporate CA bundle, fix or unset the "
+        "broken CA bundle environment variable."
+    )
+
+
+def _ssl_err(message: str) -> SSLConfigurationError:
+    """Create a consistent, user-actionable SSL configuration error."""
+    return SSLConfigurationError(f"{message}\n{_repair_hint()}")
+
+
+def _validate_bundle_path(label: str, value: str, *, require_substantial: bool = False) -> None:
+    path = Path(value).expanduser()
+    if not path.exists():
+        raise _ssl_err(f"{label} points to a missing CA bundle: {value}")
+    if not path.is_file():
+        raise _ssl_err(f"{label} does not point to a CA bundle file: {value}")
+    if require_substantial and path.stat().st_size < 1024:
+        raise _ssl_err(f"{label} at {value} appears corrupted (too small)")
+    try:
+        ctx = ssl.create_default_context(cafile=str(path))
+    except Exception as exc:
+        raise _ssl_err(f"{label} CA bundle at {value} cannot be loaded: {exc}") from exc
+    if not ctx.get_ca_certs():
+        raise _ssl_err(f"{label} CA bundle at {value} did not load any certificates")
+
+
+def verify_ca_bundle() -> None:
+    """Verify configured and bundled CA certificates are present and loadable.
+
+    Raises:
+        SSLConfigurationError: If an explicit CA-bundle environment variable
+            points at a bad path, or if certifi's bundled ``cacert.pem`` is
+            missing/corrupt.
+    """
+    if _skip_ssl_guard_enabled():
+        logger.debug("SSL CA bundle guard skipped via HERMES_SKIP_SSL_GUARD")
+        return
+
+    for env_var in _CA_BUNDLE_ENV_VARS:
+        value = os.getenv(env_var)
+        if value:
+            _validate_bundle_path(env_var, value)
+
+    try:
+        import certifi
+    except Exception as exc:
+        raise _ssl_err(f"certifi is not importable: {exc}") from exc
+
+    ca_bundle = str(certifi.where())
+    _validate_bundle_path("certifi", ca_bundle, require_substantial=True)
+
+
+def verify_ca_bundle_with_fallback() -> None:
+    """Backward-compatible wrapper for older call sites.
+
+    The old PR name mentioned a platform fallback, but allowing startup with a
+    broken certifi bundle still leaves httpx/OpenAI and requests call sites
+    failing later. Keep the wrapper name but enforce the same check.
+    """
+    verify_ca_bundle()
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -40,6 +40,7 @@ from agent.prompt_builder import (
    TASK_COMPLETION_GUIDANCE,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
+    drain_truncation_warnings,
 )
 from agent.runtime_cwd import resolve_context_cwd

@@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
    warm across turns.
    """
    parts = build_system_prompt_parts(agent, system_message=system_message)
-    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+    joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+
+    # Surface context-file truncation warnings through the normal agent status
+    # channel so gateway/CLI users see them in chat instead of only in logs.
+    for warning in drain_truncation_warnings():
+        agent._emit_status(warning)
+
+    return joined


 def invalidate_system_prompt(agent: Any) -> None:
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -186,10 +186,21 @@ class AnthropicTransport(ProviderTransport):
    def validate_response(self, response: Any) -> bool:
        """Check Anthropic response structure is valid.

-        An empty content list is legitimate when ``stop_reason == "end_turn"``
-        — the model's canonical way of signalling "nothing more to add" after
-        a tool turn that already delivered the user-facing text. Treating it
-        as invalid falsely retries a completed response.
+        An empty content list is legitimate for terminal stop reasons that
+        carry no text payload:
+
+        - ``end_turn`` — the model's canonical "nothing more to add" after a
+          tool turn that already delivered the user-facing text.
+        - ``refusal`` — the model declined to respond (Claude 4.5+). The
+          Messages API returns an empty ``content`` list with this stop
+          reason. Treating it as invalid sends a deterministic refusal into
+          the invalid-response retry loop, which reproduces the refusal on
+          every attempt and surfaces a misleading "rate limited / invalid
+          response" error instead of the refusal. ``normalize_response`` maps
+          ``refusal`` → ``content_filter`` so the agent loop's refusal handler
+          can surface it.
+
+        Treating either as invalid falsely retries a completed response.
        """
        if response is None:
            return False
@@ -197,7 +208,7 @@ class AnthropicTransport(ProviderTransport):
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return getattr(response, "stop_reason", None) == "end_turn"
+            return getattr(response, "stop_reason", None) in {"end_turn", "refusal"}
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -531,6 +531,7 @@ class ChatCompletionsTransport(ProviderTransport):
                supports_reasoning=params.get("supports_reasoning", False),
                qwen_session_metadata=params.get("qwen_session_metadata"),
                model=model,
+                base_url=params.get("base_url"),
                ollama_num_ctx=params.get("ollama_num_ctx"),
                session_id=params.get("session_id"),
            )
@@ -664,8 +665,42 @@ class ChatCompletionsTransport(ProviderTransport):
        if rd:
            provider_data["reasoning_details"] = rd

+        # OpenAI structured-refusal field. When a model declines, the SDK
+        # populates ``message.refusal`` with the explanation and leaves
+        # ``content`` empty. OpenAI-compatible proxies that front Anthropic /
+        # Bedrock (e.g. Nous Portal) surface a Claude refusal this way — or via
+        # ``finish_reason="content_filter"`` — instead of the native
+        # ``stop_reason="refusal"``. Without capturing it the refusal looks
+        # like an empty response, so the agent loop retries a deterministic
+        # refusal three times and gives up with "no content after retries".
+        # Promote it to content + a ``content_filter`` finish reason so the
+        # loop's refusal handler surfaces it clearly and stops. ``refusal`` is
+        # ``None`` for normal responses, so this is a no-op in the common case.
+        content = msg.content
+        refusal = getattr(msg, "refusal", None)
+        if refusal is None and hasattr(msg, "model_extra"):
+            _msg_extra = getattr(msg, "model_extra", None) or {}
+            if isinstance(_msg_extra, dict):
+                refusal = _msg_extra.get("refusal")
+        if isinstance(refusal, str) and refusal.strip():
+            # Record the refusal explanation regardless — it's useful provider
+            # metadata even when the model also returned a usable payload.
+            provider_data["refusal"] = refusal
+            _has_text = isinstance(content, str) and content.strip()
+            _has_tool_calls = bool(tool_calls)
+            # Only promote to a terminal ``content_filter`` when the refusal is
+            # the *sole* payload — no visible text and no tool calls. A response
+            # that carries real content (or tool calls) alongside a refusal note
+            # is a normal, usable turn: surfacing it as a failed safety refusal
+            # would discard the model's actual work. In the empty-payload case,
+            # adopt the refusal as content so the loop has something to show.
+            if not _has_text and not _has_tool_calls:
+                content = refusal
+                if finish_reason in (None, "stop"):
+                    finish_reason = "content_filter"
+
        return NormalizedResponse(
-            content=msg.content,
+            content=content,
            tool_calls=tool_calls,
            finish_reason=finish_reason,
            reasoning=reasoning,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -218,22 +218,10 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs.pop("timeout", None)

        if is_codex_backend:
-            prompt_cache_key = kwargs.get("prompt_cache_key")
-            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
-            if cache_scope_id:
-                existing_extra_headers = kwargs.get("extra_headers")
-                merged_extra_headers: Dict[str, str] = {}
-                if isinstance(existing_extra_headers, dict):
-                    merged_extra_headers.update(
-                        {
-                            str(key): str(value)
-                            for key, value in existing_extra_headers.items()
-                            if key and value is not None
-                        }
-                    )
-                merged_extra_headers["session_id"] = cache_scope_id
-                merged_extra_headers["x-client-request-id"] = cache_scope_id
-                kwargs["extra_headers"] = merged_extra_headers
+            # chatgpt.com/backend-api/codex rejects body-level
+            # ``extra_headers`` with HTTP 400. Correlation/cache routing for
+            # this backend must not be sent through the Responses payload.
+            kwargs.pop("extra_headers", None)

        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
--- a/apps/bootstrap-installer/package.json
+++ b/apps/bootstrap-installer/package.json
@@ -16,7 +16,7 @@
  },
  "dependencies": {
    "@nous-research/ui": "0.16.0",
-    "@tailwindcss/vite": "^4.2.1",
+    "@tailwindcss/vite": "^4.2.4",
    "@tailwindcss/typography": "^0.5.19",
    "@tauri-apps/api": "^2.0.0",
    "@tauri-apps/plugin-dialog": "^2.0.0",
@@ -40,8 +40,8 @@
    "@tauri-apps/cli": "^2.0.0",
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
-    "@vitejs/plugin-react": "^5.2.0",
+    "@vitejs/plugin-react": "^6.0.2",
    "typescript": "^6.0.3",
-    "vite": "^7.3.1"
+    "vite": "^8.0.16"
  }
 }
--- a/apps/bootstrap-installer/src-tauri/src/update.rs
+++ b/apps/bootstrap-installer/src-tauri/src/update.rs
@@ -3,8 +3,9 @@
 //! Driven when the installer is launched as `Hermes-Setup.exe --update` (see
 //! `AppMode` in lib.rs). The desktop app hands off to us — it exits, then we:
 //!
-//!   1. wait for the old Hermes desktop process to fully exit (so the venv
-//!      shim is free; otherwise `hermes update` aborts with exit code 2),
+//!   1. wait for the old Hermes desktop process to fully exit (so both the
+//!      venv shim and packaged app.asar are free; otherwise `hermes update`
+//!      or repair bootstrap can race locked files),
 //!   2. run `hermes update --yes --gateway` (Python/repo update; this does NOT
 //!      rebuild apps/desktop by design — see cmd_update in hermes_cli/main.py),
 //!   3. run `hermes desktop --build-only` (the rebuild step update skips),
@@ -38,8 +39,8 @@ use crate::events::{BootstrapEvent, LogStream, StageInfo, StageState};
 /// hermes_cli/main.py (sys.exit(2)). We surface a targeted message for this.
 const UPDATE_EXIT_CONCURRENT: i32 = 2;

-/// How long to wait for the old desktop process to release the venv shim
-/// before giving up and letting `hermes update`'s own guard decide.
+/// How long to wait for the old desktop process to release files under the
+/// install tree before giving up and letting `hermes update`'s own guard decide.
 const DESKTOP_EXIT_WAIT: Duration = Duration::from_secs(20);
 const DESKTOP_EXIT_POLL: Duration = Duration::from_millis(500);

@@ -150,8 +151,10 @@ async fn run_update(app: AppHandle) -> Result<()> {
    // ---- pre-step: wait for the old desktop to die -----------------------
    // The desktop exec'd us then called app.exit(), but process teardown is
    // async on Windows. If it still holds the venv shim, `hermes update`
-    // aborts with exit 2. Give it a bounded window to clear.
-    wait_for_venv_free(&install_root, &app).await;
+    // aborts with exit 2. If it still holds the packaged app.asar,
+    // install.ps1's repair/re-clone path cannot move/remove the install tree.
+    // Give both handles a bounded window to clear.
+    wait_for_install_locks_free(&install_root, &app, "update").await;

    // ---- stage 1: hermes update -----------------------------------------
    // Pass --branch so `hermes update` targets the branch this installer was
@@ -173,8 +176,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
        vec!["update".into(), "--yes".into(), "--gateway".into()];
    // --force skips `hermes update`'s Windows running-exe guard (which would
    // `sys.exit(2)` and dead-end the handoff). By contract the desktop has
-    // already exited and waited for the venv shim to unlock before launching
-    // us, and wait_for_venv_free below force-kills any straggler — so by the
+    // already exited and waited for the install locks to clear before launching
+    // us, and wait_for_install_locks_free below force-kills any straggler — so by the
    // time `hermes update` runs there is no legitimate hermes.exe to protect,
    // and the guard would only produce a false "Hermes is still running" stop.
    update_args.push("--force".into());
@@ -391,48 +394,57 @@ async fn run_update(app: AppHandle) -> Result<()> {
    Ok(())
 }

-/// Poll until the venv shim is no longer locked (Windows) or a bounded timeout
-/// elapses. On non-Windows this is a short fixed grace since file locking
-/// isn't the failure mode there.
-async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
-    let shim = venv_hermes(install_root);
+/// Poll until the venv shim AND packaged desktop app bundle are no longer locked
+/// (Windows) or a bounded timeout elapses. On non-Windows this is a short fixed
+/// grace since file locking isn't the failure mode there.
+pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHandle, stage: &str) {
+    let lock_targets = install_lock_probe_paths(install_root);
    let deadline = Instant::now() + DESKTOP_EXIT_WAIT;

-    emit_log(app, Some("update"), LogStream::Stdout, "[update] waiting for Hermes to exit…");
+    emit_log(app, Some(stage), LogStream::Stdout, "[handoff] waiting for Hermes to exit…");

    loop {
-        if !is_locked(&shim) {
+        let locked = locked_paths(&lock_targets);
+        if locked.is_empty() {
            return;
        }
        if Instant::now() >= deadline {
-            // Last resort: a backend hermes.exe (or a grandchild it spawned)
-            // is still holding the shim. The desktop should have reaped its
-            // tree before handing off, but SIGTERM races / detached
-            // grandchildren / AV handles can leave a straggler. Rather than
-            // "proceed anyway" straight into uv's "Access is denied", force-kill
-            // every hermes.exe except ourselves, then give the OS a beat to
-            // unload the image.
+            // Last resort: a backend hermes.exe (or the desktop Hermes.exe
+            // itself) is still holding one of the update-sensitive files. The
+            // desktop should have reaped its tree before handing off, but
+            // SIGTERM races / detached grandchildren / AV handles can leave a
+            // straggler. Rather than "proceed anyway" straight into uv's
+            // "Access is denied" or install.ps1's locked app.asar failure,
+            // force-kill every Hermes.exe except ourselves, then give the OS a
+            // beat to unload the image.
            emit_log(
                app,
-                Some("update"),
+                Some(stage),
                LogStream::Stdout,
-                "[update] Hermes still holding the venv shim; force-killing stragglers…",
+                &format!(
+                    "[handoff] Hermes still holding install files ({}); force-killing stragglers…",
+                    format_locked_paths(&locked)
+                ),
            );
            force_kill_other_hermes();
            tokio::time::sleep(Duration::from_millis(800)).await;
-            if !is_locked(&shim) {
+            let locked_after_kill = locked_paths(&lock_targets);
+            if locked_after_kill.is_empty() {
                emit_log(
                    app,
-                    Some("update"),
+                    Some(stage),
                    LogStream::Stdout,
-                    "[update] venv shim freed after force-kill",
+                    "[handoff] install files freed after force-kill",
                );
            } else {
                emit_log(
                    app,
-                    Some("update"),
+                    Some(stage),
                    LogStream::Stdout,
-                    "[update] venv shim still locked; proceeding (--force + quarantine will handle it)",
+                    &format!(
+                        "[handoff] install files still locked ({}); proceeding (--force + quarantine will handle it)",
+                        format_locked_paths(&locked_after_kill)
+                    ),
                );
            }
            return;
@@ -441,13 +453,44 @@ async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
    }
 }

+fn install_lock_probe_paths(install_root: &Path) -> Vec<PathBuf> {
+    let mut paths = vec![venv_hermes(install_root)];
+    paths.extend(desktop_app_payload_paths(install_root));
+    paths
+}
+
+fn desktop_app_payload_paths(install_root: &Path) -> Vec<PathBuf> {
+    let release = install_root.join("apps").join("desktop").join("release");
+    if cfg!(target_os = "windows") {
+        vec![
+            release.join("win-unpacked").join("resources").join("app.asar"),
+            release.join("win-arm64-unpacked").join("resources").join("app.asar"),
+        ]
+    } else if cfg!(target_os = "macos") {
+        vec![
+            release.join("mac").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
+            release.join("mac-arm64").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
+        ]
+    } else {
+        vec![release.join("linux-unpacked").join("resources").join("app.asar")]
+    }
+}
+
+fn locked_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
+    paths.iter().filter(|p| is_locked(p)).cloned().collect()
+}
+
+fn format_locked_paths(paths: &[PathBuf]) -> String {
+    paths.iter().map(|p| p.display().to_string()).collect::<Vec<_>>().join(", ")
+}
+
 /// Force-kill any `hermes.exe` other than this process. Windows-only; a no-op
 /// elsewhere (POSIX has no mandatory-lock contention). We can't selectively
 /// target "the backend" by PID here — the desktop already exited and we never
 /// knew its children — so we kill the whole `hermes.exe` image tree via
 /// taskkill, excluding our own PID.
 ///
-/// Safe w.r.t. our own update child: this runs inside `wait_for_venv_free`,
+/// Safe w.r.t. our own update child: this runs inside the install-lock wait,
 /// which completes BEFORE we spawn `venv\Scripts\hermes.exe update`. At this
 /// point no update-driven hermes.exe exists yet, so the only hermes.exe images
 /// are stragglers from the old desktop — exactly what we want gone. (`/FI PID
@@ -891,6 +934,29 @@ mod tests {
        assert!(!is_locked(Path::new("/nonexistent/does/not/exist/xyz")));
    }

+    #[test]
+    fn lock_probe_paths_include_desktop_app_payload() {
+        let root = Path::new("/x/hermes-agent");
+        let probes = install_lock_probe_paths(root);
+
+        assert!(
+            probes.iter().any(|p| p == &venv_hermes(root)),
+            "venv shim remains part of the update lock probe"
+        );
+        assert!(
+            probes.iter().any(|p| p.ends_with(Path::new("resources/app.asar"))),
+            "packaged app.asar must be probed so repair/re-clone waits for the old desktop to exit"
+        );
+    }
+
+    #[test]
+    fn locked_paths_ignores_missing_payloads() {
+        let root = Path::new("/nonexistent/hermes-agent");
+        let probes = install_lock_probe_paths(root);
+
+        assert!(locked_paths(&probes).is_empty());
+    }
+
    #[test]
    fn parses_update_branch_from_space_or_equals_args() {
        assert_eq!(
--- a/apps/bootstrap-installer/tsconfig.json
+++ b/apps/bootstrap-installer/tsconfig.json
@@ -1,8 +1,8 @@
 {
  "compilerOptions": {
-    "target": "ES2022",
+    "target": "ES2023",
    "useDefineForClassFields": true,
-    "lib": ["ES2022", "DOM", "DOM.Iterable"],
+    "lib": ["ES2023", "DOM", "DOM.Iterable"],
    "module": "ESNext",
    "skipLibCheck": true,
    "moduleResolution": "bundler",
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -34,7 +34,7 @@ It builds and launches the GUI against your existing install — same config, ke

 ### Prebuilt installers

-Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/desktop).
+Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/).

 ---

--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -0,0 +1,112 @@
+const path = require('node:path')
+
+// Match the POSIX fallback surface used by the Python terminal environment.
+// macOS apps launched from Finder/Dock often inherit only /usr/bin:/bin:/usr/sbin:/sbin,
+// which misses Apple Silicon Homebrew and user-installed CLI tools such as codex.
+const POSIX_SANE_PATH_ENTRIES = Object.freeze([
+  '/opt/homebrew/bin',
+  '/opt/homebrew/sbin',
+  '/usr/local/sbin',
+  '/usr/local/bin',
+  '/usr/sbin',
+  '/usr/bin',
+  '/sbin',
+  '/bin'
+])
+
+function delimiterForPlatform(platform = process.platform) {
+  return platform === 'win32' ? ';' : ':'
+}
+
+function pathModuleForPlatform(platform = process.platform) {
+  return platform === 'win32' ? path.win32 : path.posix
+}
+
+function pathEnvKey(env = process.env, platform = process.platform) {
+  if (platform !== 'win32') return 'PATH'
+  return Object.keys(env || {}).find(key => key.toUpperCase() === 'PATH') || 'PATH'
+}
+
+function currentPathValue(env = process.env, platform = process.platform) {
+  const key = pathEnvKey(env, platform)
+  return env?.[key] || ''
+}
+
+function appendUniquePathEntries(entries, { delimiter = path.delimiter } = {}) {
+  const seen = new Set()
+  const ordered = []
+
+  for (const entry of entries) {
+    if (!entry) continue
+    const parts = Array.isArray(entry) ? entry : String(entry).split(delimiter)
+    for (const part of parts) {
+      if (!part || seen.has(part)) continue
+      seen.add(part)
+      ordered.push(part)
+    }
+  }
+
+  return ordered.join(delimiter)
+}
+
+function buildDesktopBackendPath({
+  hermesHome,
+  venvRoot,
+  currentPath = '',
+  platform = process.platform,
+  pathModule = pathModuleForPlatform(platform)
+} = {}) {
+  const delimiter = delimiterForPlatform(platform)
+  const hermesNodeBin = hermesHome ? pathModule.join(hermesHome, 'node', 'bin') : null
+  const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
+  const saneEntries = platform === 'win32' ? [] : POSIX_SANE_PATH_ENTRIES
+
+  return appendUniquePathEntries(
+    [hermesNodeBin, venvBin, currentPath, saneEntries],
+    { delimiter }
+  )
+}
+
+function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
+  if (!hermesHome) return hermesHome
+  const resolved = pathModule.resolve(String(hermesHome))
+  const parent = pathModule.dirname(resolved)
+  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
+    return pathModule.dirname(parent)
+  }
+  return resolved
+}
+
+function buildDesktopBackendEnv({
+  hermesHome,
+  pythonPathEntries = [],
+  venvRoot,
+  currentEnv = process.env,
+  platform = process.platform,
+  pathModule = pathModuleForPlatform(platform)
+} = {}) {
+  const delimiter = delimiterForPlatform(platform)
+  const currentPythonPath = currentEnv?.PYTHONPATH || ''
+  const key = pathEnvKey(currentEnv, platform)
+
+  return {
+    PYTHONPATH: appendUniquePathEntries([...pythonPathEntries, currentPythonPath], { delimiter }),
+    [key]: buildDesktopBackendPath({
+      hermesHome,
+      venvRoot,
+      currentPath: currentPathValue(currentEnv, platform),
+      platform,
+      pathModule
+    })
+  }
+}
+
+module.exports = {
+  POSIX_SANE_PATH_ENTRIES,
+  appendUniquePathEntries,
+  buildDesktopBackendEnv,
+  buildDesktopBackendPath,
+  delimiterForPlatform,
+  normalizeHermesHomeRoot,
+  pathEnvKey
+}
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -0,0 +1,111 @@
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const path = require('node:path')
+
+const {
+  POSIX_SANE_PATH_ENTRIES,
+  appendUniquePathEntries,
+  buildDesktopBackendEnv,
+  buildDesktopBackendPath,
+  normalizeHermesHomeRoot,
+  pathEnvKey
+} = require('./backend-env.cjs')
+
+test('desktop backend PATH adds Hermes-managed bins and missing POSIX sane entries', () => {
+  const result = buildDesktopBackendPath({
+    hermesHome: '/Users/test/.hermes',
+    venvRoot: '/Users/test/.hermes/hermes-agent/venv',
+    currentPath: '/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin',
+    platform: 'darwin',
+    pathModule: path.posix
+  })
+
+  const entries = result.split(':')
+  assert.equal(entries[0], '/Users/test/.hermes/node/bin')
+  assert.equal(entries[1], '/Users/test/.hermes/hermes-agent/venv/bin')
+  assert.ok(entries.includes('/opt/homebrew/bin'), 'Apple Silicon Homebrew bin is added')
+  assert.ok(entries.includes('/opt/homebrew/sbin'), 'Apple Silicon Homebrew sbin is added')
+  assert.ok(entries.includes('/usr/local/sbin'), 'missing standard sbin is added')
+
+  for (const expected of POSIX_SANE_PATH_ENTRIES) {
+    assert.ok(entries.includes(expected), `${expected} should be present`)
+  }
+})
+
+test('desktop backend PATH preserves first occurrence and avoids duplicates', () => {
+  const result = buildDesktopBackendPath({
+    hermesHome: '/Users/test/.hermes',
+    venvRoot: '/Users/test/.hermes/hermes-agent/venv',
+    currentPath: '/opt/homebrew/bin:/usr/bin:/opt/homebrew/bin:/bin',
+    platform: 'darwin',
+    pathModule: path.posix
+  })
+
+  const entries = result.split(':')
+  assert.equal(entries.filter(entry => entry === '/opt/homebrew/bin').length, 1)
+  assert.ok(
+    entries.indexOf('/opt/homebrew/bin') < entries.indexOf('/opt/homebrew/sbin'),
+    'existing Homebrew bin keeps its precedence over appended missing sane entries'
+  )
+})
+
+test('buildDesktopBackendEnv extends PYTHONPATH and backend PATH together', () => {
+  const env = buildDesktopBackendEnv({
+    hermesHome: '/Users/test/.hermes',
+    pythonPathEntries: ['/repo/hermes-agent'],
+    venvRoot: '/Users/test/.hermes/hermes-agent/venv',
+    currentEnv: {
+      PATH: '/usr/bin:/bin',
+      PYTHONPATH: '/existing/pythonpath'
+    },
+    platform: 'darwin',
+    pathModule: path.posix
+  })
+
+  assert.equal(env.PYTHONPATH, '/repo/hermes-agent:/existing/pythonpath')
+  assert.ok(env.PATH.startsWith('/Users/test/.hermes/node/bin:/Users/test/.hermes/hermes-agent/venv/bin:'))
+  assert.ok(env.PATH.includes('/opt/homebrew/bin'))
+})
+
+test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root', () => {
+  assert.equal(
+    normalizeHermesHomeRoot('/Users/test/.hermes/profiles/oracle', { pathModule: path.posix }),
+    '/Users/test/.hermes'
+  )
+  assert.equal(
+    normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
+    'C:\\Users\\test\\AppData\\Local\\hermes'
+  )
+  assert.equal(
+    normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
+    '/Users/test/.hermes'
+  )
+})
+
+test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
+  const env = buildDesktopBackendEnv({
+    hermesHome: 'C:\\Users\\test\\AppData\\Local\\hermes',
+    pythonPathEntries: ['C:\\repo\\hermes-agent'],
+    venvRoot: 'C:\\Users\\test\\AppData\\Local\\hermes\\hermes-agent\\venv',
+    currentEnv: {
+      Path: 'C:\\Windows\\System32;C:\\Windows',
+      PYTHONPATH: 'C:\\existing\\pythonpath'
+    },
+    platform: 'win32',
+    pathModule: path.win32
+  })
+
+  assert.equal(pathEnvKey({ Path: 'x' }, 'win32'), 'Path')
+  assert.equal(env.PATH, undefined)
+  assert.ok(env.Path.startsWith('C:\\Users\\test\\AppData\\Local\\hermes\\node\\bin;'))
+  assert.ok(env.Path.includes('\\venv\\Scripts;'))
+  assert.ok(env.Path.includes(';C:\\Windows\\System32;C:\\Windows'))
+  assert.equal(env.Path.includes('/opt/homebrew/bin'), false)
+})
+
+test('appendUniquePathEntries drops empty entries and keeps first occurrence', () => {
+  assert.equal(
+    appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }),
+    '/a:/b:/c'
+  )
+})
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -0,0 +1,66 @@
+const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m
+
+/**
+ * Watch a child process's stdout for the `HERMES_DASHBOARD_READY port=<N>`
+ * line that web_server.py prints after uvicorn binds its socket.
+ *
+ * Returns the parsed port. Rejects if:
+ *   - the child exits before emitting the line
+ *   - the child emits an `error` event
+ *   - no line arrives within the timeout
+ *
+ * A single `cleanup()` tears down every listener (data/exit/error/timeout)
+ * on every terminal path — resolve, reject, or timeout — so repeated
+ * backend spawns don't leak listener slots on the child.
+ */
+function waitForDashboardPort(child, timeoutMs = 45_000) {
+  return new Promise((resolve, reject) => {
+    let buf = ''
+    let done = false
+
+    function cleanup() {
+      if (done) return
+      done = true
+      clearTimeout(timer)
+      child.stdout.off('data', onData)
+      child.off('exit', onExit)
+      child.off('error', onError)
+    }
+
+    function onData(chunk) {
+      buf += chunk.toString()
+      let nl
+      while ((nl = buf.indexOf('\n')) !== -1) {
+        const line = buf.slice(0, nl)
+        buf = buf.slice(nl + 1)
+        const m = line.match(_READY_RE)
+        if (m) {
+          cleanup()
+          resolve(parseInt(m[1], 10))
+          return
+        }
+      }
+    }
+
+    function onExit(code, signal) {
+      cleanup()
+      reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`))
+    }
+
+    function onError(err) {
+      cleanup()
+      reject(err)
+    }
+
+    const timer = setTimeout(() => {
+      cleanup()
+      reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`))
+    }, timeoutMs)
+
+    child.stdout.on('data', onData)
+    child.on('exit', onExit)
+    child.on('error', onError)
+  })
+}
+
+module.exports = { waitForDashboardPort }
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -166,6 +166,39 @@ function profileRemoteOverride(config, profile) {
  return { url, authMode: normAuthMode(entry.authMode), token: entry.token }
 }

+/**
+ * In global-remote mode one backend serves every Desktop profile, so REST calls
+ * that are scoped by renderer-side `request.profile` must carry that scope as a
+ * query parameter. Local pooled backends and per-profile remote overrides do not
+ * need this: they already run against a backend scoped to the target profile.
+ */
+function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
+  const scopedProfile = connectionScopeKey(profile)
+  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
+    return path
+  }
+
+  const rawPath = String(path || '')
+  if (!rawPath) {
+    return path
+  }
+
+  let parsed
+  try {
+    parsed = new URL(rawPath, 'http://hermes.local')
+  } catch {
+    return path
+  }
+
+  if (parsed.searchParams.has('profile')) {
+    return path
+  }
+
+  parsed.searchParams.set('profile', scopedProfile)
+
+  return `${parsed.pathname}${parsed.search}${parsed.hash}`
+}
+
 function tokenPreview(value) {
  const raw = String(value || '')

@@ -247,6 +280,7 @@ module.exports = {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
+  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -24,6 +24,7 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
+  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -90,6 +91,72 @@ test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
  assert.equal(profileRemoteOverride(null, 'coder'), null)
 })

+// --- pathWithGlobalRemoteProfile ---
+
+test('pathWithGlobalRemoteProfile appends profile in global remote mode', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info?profile=iris'
+  )
+})
+
+test('pathWithGlobalRemoteProfile preserves existing query params', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/options?force=1', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/options?force=1&profile=iris'
+  )
+})
+
+test('pathWithGlobalRemoteProfile does not replace an explicit profile query', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info?profile=default', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info?profile=default'
+  )
+})
+
+test('pathWithGlobalRemoteProfile skips local and per-profile remote override paths', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
+      globalRemote: false,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info'
+  )
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: true
+    }),
+    '/api/model/info'
+  )
+})
+
+test('pathWithGlobalRemoteProfile skips empty profile/path safely', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', '', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info'
+  )
+  assert.equal(
+    pathWithGlobalRemoteProfile('', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    ''
+  )
+})
+
 // --- normalizeRemoteBaseUrl ---

 test('normalizeRemoteBaseUrl strips trailing slashes, hash, and query', () => {
--- a/apps/desktop/electron/dashboard-token.cjs
+++ b/apps/desktop/electron/dashboard-token.cjs
@@ -0,0 +1,99 @@
+/**
+ * Helpers for local dashboard session-token discovery.
+ *
+ * The desktop main process can pass HERMES_DASHBOARD_SESSION_TOKEN when it
+ * spawns the local dashboard, but the dashboard is the source of truth for the
+ * token it actually serves to the renderer. If those drift, HTTP readiness
+ * probes still pass while /api/ws rejects the renderer's token.
+ */
+
+const DEFAULT_TOKEN_FETCH_TIMEOUT_MS = 3_000
+
+async function fetchPublicText(url, options = {}) {
+  const { protocol } = new URL(url)
+  if (protocol !== 'http:' && protocol !== 'https:') {
+    throw new Error(`Unsupported Hermes backend URL protocol: ${protocol}`)
+  }
+
+  const timeoutMs = options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
+  const res = await fetch(url, { signal: AbortSignal.timeout(timeoutMs) }).catch(error => {
+    if (error.name === 'TimeoutError') {
+      throw new Error(`Timed out connecting to Hermes backend after ${timeoutMs}ms`)
+    }
+    throw error
+  })
+  const text = await res.text()
+
+  if (!res.ok) throw new Error(`${res.status}: ${text || res.statusText}`)
+
+  return text
+}
+
+function extractInjectedDashboardToken(html) {
+  const match = /window\.__HERMES_SESSION_TOKEN__\s*=\s*("(?:\\.|[^"\\])*")/.exec(String(html || ''))
+  if (!match) return null
+  try {
+    return JSON.parse(match[1])
+  } catch {
+    return null
+  }
+}
+
+function dashboardIndexUrl(baseUrl) {
+  return `${String(baseUrl || '').replace(/\/+$/, '')}/`
+}
+
+async function resolveServedDashboardToken(baseUrl, fallbackToken, options = {}) {
+  const fetchText = options.fetchText || fetchPublicText
+  const html = await fetchText(dashboardIndexUrl(baseUrl), {
+    timeoutMs: options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
+  })
+  const servedToken = extractInjectedDashboardToken(html)
+
+  if (servedToken && servedToken !== fallbackToken && typeof options.rememberLog === 'function') {
+    options.rememberLog('[boot] dashboard served a different session token; using served token for WebSocket auth')
+  }
+
+  return servedToken || fallbackToken
+}
+
+/**
+ * A served token that differs from our spawn token while our child is DEAD
+ * came from a process we did not spawn (orphan/port squatter that satisfied
+ * the public /api/status readiness probe). With a live child the mismatch is
+ * benign: our own backend regenerated the token because the env pin did not
+ * survive the spawn.
+ */
+function isForeignBackendToken({ servedToken, spawnToken, childAlive }) {
+  return Boolean(servedToken) && servedToken !== spawnToken && !childAlive
+}
+
+/**
+ * Resolve the token the backend actually serves, adopting benign drift and
+ * failing loudly on a foreign backend. `childAlive` is a thunk so liveness is
+ * sampled after the fetch, not before.
+ */
+async function adoptServedDashboardToken(baseUrl, spawnToken, { childAlive, label = 'Hermes backend', ...options }) {
+  const servedToken = await resolveServedDashboardToken(baseUrl, spawnToken, options).catch(error => {
+    options.rememberLog?.(`[boot] could not read served dashboard token (${label}): ${error.message}`)
+    return spawnToken
+  })
+
+  if (isForeignBackendToken({ servedToken, spawnToken, childAlive: childAlive() })) {
+    throw new Error(
+      `${label} exited and ${dashboardIndexUrl(baseUrl)} is served by a process we did not spawn; refusing its session token.`
+    )
+  }
+
+  return servedToken
+}
+
+module.exports = {
+  DEFAULT_TOKEN_FETCH_TIMEOUT_MS,
+  adoptServedDashboardToken,
+  dashboardIndexUrl,
+  extractInjectedDashboardToken,
+  fetchPublicText,
+  isForeignBackendToken,
+  resolveServedDashboardToken
+}
--- a/apps/desktop/electron/dashboard-token.test.cjs
+++ b/apps/desktop/electron/dashboard-token.test.cjs
@@ -0,0 +1,142 @@
+/**
+ * Tests for electron/dashboard-token.cjs.
+ *
+ * Run with: node --test electron/dashboard-token.test.cjs
+ * (Wired into npm test:desktop:platforms in package.json.)
+ */
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+
+const {
+  adoptServedDashboardToken,
+  dashboardIndexUrl,
+  extractInjectedDashboardToken,
+  fetchPublicText,
+  isForeignBackendToken,
+  resolveServedDashboardToken
+} = require('./dashboard-token.cjs')
+
+test('extractInjectedDashboardToken reads the JSON-encoded dashboard token', () => {
+  const html = '<script>window.__HERMES_SESSION_TOKEN__="served-token";window.__HERMES_BASE_PATH__=""</script>'
+  assert.equal(extractInjectedDashboardToken(html), 'served-token')
+})
+
+test('extractInjectedDashboardToken handles escaped token strings', () => {
+  const html = '<script>window.__HERMES_SESSION_TOKEN__="served\\\\token\\"quoted";</script>'
+  assert.equal(extractInjectedDashboardToken(html), 'served\\token"quoted')
+})
+
+test('extractInjectedDashboardToken returns null for missing or malformed values', () => {
+  assert.equal(extractInjectedDashboardToken('<html></html>'), null)
+  assert.equal(extractInjectedDashboardToken('<script>window.__HERMES_SESSION_TOKEN__={bad}</script>'), null)
+})
+
+test('dashboardIndexUrl preserves dashboard path prefixes', () => {
+  assert.equal(dashboardIndexUrl('http://127.0.0.1:9120'), 'http://127.0.0.1:9120/')
+  assert.equal(dashboardIndexUrl('https://host.example/hermes/'), 'https://host.example/hermes/')
+})
+
+test('resolveServedDashboardToken uses the served token and logs when it differs', async () => {
+  const logs = []
+  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
+    fetchText: async url => {
+      assert.equal(url, 'http://127.0.0.1:9120/')
+      return '<script>window.__HERMES_SESSION_TOKEN__="served-token";</script>'
+    },
+    rememberLog: line => logs.push(line)
+  })
+
+  assert.equal(token, 'served-token')
+  assert.equal(logs.length, 1)
+  assert.match(logs[0], /served a different session token/)
+})
+
+test('resolveServedDashboardToken falls back when the served HTML has no token', async () => {
+  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
+    fetchText: async () => '<html></html>',
+    rememberLog: () => {
+      throw new Error('should not log when no served token is present')
+    }
+  })
+
+  assert.equal(token, 'spawn-token')
+})
+
+test('resolveServedDashboardToken does not log when served token matches fallback', async () => {
+  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'same-token', {
+    fetchText: async () => '<script>window.__HERMES_SESSION_TOKEN__="same-token";</script>',
+    rememberLog: () => {
+      throw new Error('should not log when token already matches')
+    }
+  })
+
+  assert.equal(token, 'same-token')
+})
+
+test('resolveServedDashboardToken propagates fetch errors so callers can fall back explicitly', async () => {
+  await assert.rejects(
+    () =>
+      resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
+        fetchText: async () => {
+          throw new Error('boom')
+        }
+      }),
+    /boom/
+  )
+})
+
+test('fetchPublicText rejects unsupported protocols', async () => {
+  await assert.rejects(() => fetchPublicText('file:///tmp/index.html'), /Unsupported Hermes backend URL protocol/)
+})
+
+test('isForeignBackendToken only flags a mismatched token from a dead child', () => {
+  const cases = [
+    [{ servedToken: 'other', spawnToken: 'mine', childAlive: false }, true],
+    // Live child + drift = our backend regenerated the token (env pin lost).
+    [{ servedToken: 'other', spawnToken: 'mine', childAlive: true }, false],
+    [{ servedToken: 'mine', spawnToken: 'mine', childAlive: false }, false],
+    [{ servedToken: 'mine', spawnToken: 'mine', childAlive: true }, false],
+    [{ servedToken: null, spawnToken: 'mine', childAlive: false }, false],
+    [{ servedToken: '', spawnToken: 'mine', childAlive: false }, false]
+  ]
+  for (const [input, expected] of cases) {
+    assert.equal(isForeignBackendToken(input), expected, JSON.stringify(input))
+  }
+})
+
+test('adoptServedDashboardToken adopts drift from a live child', async () => {
+  const token = await adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
+    childAlive: () => true,
+    fetchText: async () => '<script>window.__HERMES_SESSION_TOKEN__="served-token";</script>'
+  })
+
+  assert.equal(token, 'served-token')
+})
+
+test('adoptServedDashboardToken refuses a foreign token when our child is dead', async () => {
+  await assert.rejects(
+    () =>
+      adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
+        childAlive: () => false,
+        fetchText: async () => '<script>window.__HERMES_SESSION_TOKEN__="squatter-token";</script>',
+        label: 'Hermes backend for profile "work"'
+      }),
+    /profile "work".*process we did not spawn/
+  )
+})
+
+test('adoptServedDashboardToken falls back to the spawn token when the fetch fails', async () => {
+  const logs = []
+  const token = await adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
+    childAlive: () => true,
+    fetchText: async () => {
+      throw new Error('boom')
+    },
+    rememberLog: line => logs.push(line)
+  })
+
+  assert.equal(token, 'spawn-token')
+  assert.equal(logs.length, 1)
+  assert.match(logs[0], /could not read served dashboard token \(Hermes backend\): boom/)
+})
--- a/apps/desktop/electron/git-worktrees.cjs
+++ b/apps/desktop/electron/git-worktrees.cjs
@@ -0,0 +1,174 @@
+'use strict'
+
+// Resolve git-worktree relationships for a set of session cwds, reading git's
+// on-disk metadata directly (no `git` spawn per path):
+//
+//   - A normal checkout has a `.git` DIRECTORY at its root → it's the main
+//     worktree; its repo root IS that directory's parent.
+//   - A linked worktree has a `.git` FILE: `gitdir: <repo>/.git/worktrees/<name>`.
+//     That admin dir's `commondir` points back at the shared `<repo>/.git`, whose
+//     parent is the main repo root.
+//
+// Grouping by repoRoot therefore clusters a repo's main checkout with all of its
+// linked worktrees, regardless of how the worktree directories are named. The
+// branch (read from the worktree's own HEAD) gives each worktree a meaningful
+// label.
+
+const fs = require('node:fs')
+const path = require('node:path')
+const { resolveRequestedPathForIpc } = require('./hardening.cjs')
+
+// Walk up from `start` to the nearest ancestor that carries a `.git` entry
+// (file for a linked worktree, dir for the main checkout). Capped so a stray
+// path can't loop forever.
+function findGitHost(start, fsImpl) {
+  let dir = start
+
+  for (let i = 0; i < 64; i += 1) {
+    const dotgit = path.join(dir, '.git')
+
+    try {
+      if (fsImpl.existsSync(dotgit)) {
+        return dir
+      }
+    } catch {
+      return null
+    }
+
+    const parent = path.dirname(dir)
+
+    if (parent === dir) {
+      return null
+    }
+
+    dir = parent
+  }
+
+  return null
+}
+
+function readBranch(gitDir, fsImpl) {
+  try {
+    const head = fsImpl.readFileSync(path.join(gitDir, 'HEAD'), 'utf8').trim()
+    const ref = head.match(/^ref:\s*refs\/heads\/(.+)$/)
+
+    if (ref) {
+      return ref[1]
+    }
+
+    // Detached HEAD: surface a short sha so the worktree still gets a label.
+    return /^[0-9a-f]{7,40}$/i.test(head) ? head.slice(0, 8) : null
+  } catch {
+    return null
+  }
+}
+
+// Given the directory that owns the `.git` entry, resolve its worktree identity.
+function resolveFromHost(host, fsImpl) {
+  const dotgit = path.join(host, '.git')
+  let stat
+
+  try {
+    stat = fsImpl.statSync(dotgit)
+  } catch {
+    return null
+  }
+
+  if (stat.isDirectory()) {
+    return {
+      repoRoot: host,
+      worktreeRoot: host,
+      isMainWorktree: true,
+      branch: readBranch(dotgit, fsImpl)
+    }
+  }
+
+  // Linked worktree: `.git` is a file pointing at the admin dir.
+  let contents
+
+  try {
+    contents = fsImpl.readFileSync(dotgit, 'utf8').trim()
+  } catch {
+    return null
+  }
+
+  const match = contents.match(/^gitdir:\s*(.+)$/m)
+
+  if (!match) {
+    return null
+  }
+
+  const adminDir = path.resolve(host, match[1].trim())
+
+  // `commondir` resolves to the shared `<repo>/.git`; fall back to walking two
+  // levels up from `<repo>/.git/worktrees/<name>` if it's missing.
+  let commonDir
+
+  try {
+    const rel = fsImpl.readFileSync(path.join(adminDir, 'commondir'), 'utf8').trim()
+    commonDir = path.resolve(adminDir, rel)
+  } catch {
+    commonDir = path.dirname(path.dirname(adminDir))
+  }
+
+  return {
+    repoRoot: path.dirname(commonDir),
+    worktreeRoot: host,
+    isMainWorktree: false,
+    branch: readBranch(adminDir, fsImpl)
+  }
+}
+
+function resolveWorktree(startPath, fsImpl = fs) {
+  let resolved
+
+  try {
+    resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Worktree lookup' })
+  } catch {
+    return null
+  }
+
+  let start = resolved
+
+  try {
+    const stat = fsImpl.statSync(resolved)
+
+    if (!stat.isDirectory()) {
+      start = path.dirname(resolved)
+    }
+  } catch {
+    return null
+  }
+
+  const host = findGitHost(start, fsImpl)
+
+  if (!host) {
+    return null
+  }
+
+  return resolveFromHost(host, fsImpl)
+}
+
+// Batch entry point for the renderer: maps each requested cwd to its worktree
+// info (or null when it isn't inside a git checkout / can't be read). Dedupes so
+// many sessions sharing a cwd cost one lookup.
+async function worktreesForIpc(cwds, options = {}) {
+  const fsImpl = options.fs || fs
+  const list = Array.isArray(cwds) ? cwds : []
+  const out = {}
+
+  for (const cwd of list) {
+    if (typeof cwd !== 'string' || !cwd.trim() || cwd in out) {
+      continue
+    }
+
+    out[cwd] = resolveWorktree(cwd, fsImpl)
+  }
+
+  return out
+}
+
+module.exports = {
+  resolveWorktree,
+  worktreesForIpc
+}
--- a/apps/desktop/electron/hardening.cjs
+++ b/apps/desktop/electron/hardening.cjs
@@ -1,4 +1,5 @@
 const fs = require('node:fs')
+const os = require('node:os')
 const path = require('node:path')
 const { fileURLToPath } = require('node:url')

@@ -142,7 +143,14 @@ function rejectUnsafePathSyntax(filePath, purpose = 'File read') {

 function resolveRequestedPathForIpc(filePath, options = {}) {
  const purpose = String(options.purpose || 'File read')
-  const raw = rejectUnsafePathSyntax(filePath, purpose)
+  let raw = rejectUnsafePathSyntax(filePath, purpose)
+
+  // Gateway-reported cwds (config `terminal.cwd`, remote sessions) routinely
+  // arrive as `~/...`. Node's fs has no shell — without expansion the path
+  // resolves under process.cwd() and every read "ENOENT"s forever.
+  if (raw === '~' || raw.startsWith('~/') || raw.startsWith('~\\')) {
+    raw = path.join(os.homedir(), raw.slice(1))
+  }

  if (/^file:/i.test(raw)) {
    let resolvedPath
--- a/apps/desktop/electron/hardening.test.cjs
+++ b/apps/desktop/electron/hardening.test.cjs
@@ -106,6 +106,19 @@ test('resolveRequestedPathForIpc resolves relative paths from the trimmed base d
  )
 })

+test('resolveRequestedPathForIpc expands ~ to the home directory', () => {
+  assert.equal(resolveRequestedPathForIpc('~', { purpose: 'Directory read' }), path.resolve(os.homedir()))
+  assert.equal(
+    resolveRequestedPathForIpc('~/www/project', { purpose: 'Directory read' }),
+    path.resolve(os.homedir(), 'www/project')
+  )
+  // `~user` shorthand is NOT expanded — only the caller's own home.
+  assert.equal(
+    resolveRequestedPathForIpc('~other/secret', { baseDir: os.tmpdir(), purpose: 'Directory read' }),
+    path.resolve(os.tmpdir(), '~other/secret')
+  )
+})
+
 test('resolveReadableFileForIpc validates existence type size and sensitivity', async t => {
  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-hardening-'))
  t.after(() => fs.rmSync(tempDir, { recursive: true, force: true }))
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -5,7 +5,34 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  revalidateConnection: () => ipcRenderer.invoke('hermes:connection:revalidate'),
  touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
  getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
-  openSessionWindow: sessionId => ipcRenderer.invoke('hermes:window:openSession', sessionId),
+  openSessionWindow: (sessionId, opts) => ipcRenderer.invoke('hermes:window:openSession', sessionId, opts),
+  openNewSessionWindow: () => ipcRenderer.invoke('hermes:window:openNewSession'),
+  petOverlay: {
+    // Main renderer → main process: window lifecycle + drag. `request` is
+    // `{ bounds, screen }`; resolves with the screen bounds it actually used.
+    open: request => ipcRenderer.invoke('hermes:pet-overlay:open', request),
+    close: () => ipcRenderer.invoke('hermes:pet-overlay:close'),
+    setBounds: bounds => ipcRenderer.send('hermes:pet-overlay:set-bounds', bounds),
+    setIgnoreMouse: ignore => ipcRenderer.send('hermes:pet-overlay:ignore-mouse', ignore),
+    // Flip the overlay focusable (and focus it) while the composer needs keys.
+    setFocusable: focusable => ipcRenderer.send('hermes:pet-overlay:set-focusable', focusable),
+    // Main renderer → overlay (forwarded by main): push the latest pet state.
+    pushState: payload => ipcRenderer.send('hermes:pet-overlay:state', payload),
+    // Overlay → main renderer (forwarded by main): pop back in / composer submit.
+    control: payload => ipcRenderer.send('hermes:pet-overlay:control', payload),
+    // Overlay subscribes to state pushes.
+    onState: callback => {
+      const listener = (_event, payload) => callback(payload)
+      ipcRenderer.on('hermes:pet-overlay:state', listener)
+      return () => ipcRenderer.removeListener('hermes:pet-overlay:state', listener)
+    },
+    // Main renderer subscribes to overlay control messages.
+    onControl: callback => {
+      const listener = (_event, payload) => callback(payload)
+      ipcRenderer.on('hermes:pet-overlay:control', listener)
+      return () => ipcRenderer.removeListener('hermes:pet-overlay:control', listener)
+    }
+  },
  getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
  getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
  saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
@@ -39,6 +66,8 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  watchPreviewFile: url => ipcRenderer.invoke('hermes:watchPreviewFile', url),
  stopPreviewFileWatch: id => ipcRenderer.invoke('hermes:stopPreviewFileWatch', id),
  setTitleBarTheme: payload => ipcRenderer.send('hermes:titlebar-theme', payload),
+  setNativeTheme: mode => ipcRenderer.send('hermes:native-theme', mode),
+  setTranslucency: payload => ipcRenderer.send('hermes:translucency', payload),
  setPreviewShortcutActive: active => ipcRenderer.send('hermes:previewShortcutActive', Boolean(active)),
  openExternal: url => ipcRenderer.invoke('hermes:openExternal', url),
  fetchLinkTitle: url => ipcRenderer.invoke('hermes:fetchLinkTitle', url),
@@ -52,6 +81,7 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  getRecentLogs: () => ipcRenderer.invoke('hermes:logs:recent'),
  readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
  gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
+  worktrees: cwds => ipcRenderer.invoke('hermes:fs:worktrees', cwds),
  terminal: {
    dispose: id => ipcRenderer.invoke('hermes:terminal:dispose', id),
    resize: (id, size) => ipcRenderer.invoke('hermes:terminal:resize', id, size),
@@ -91,6 +121,16 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    ipcRenderer.on('hermes:window-state-changed', listener)
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
+  onFocusSession: callback => {
+    const listener = (_event, sessionId) => callback(sessionId)
+    ipcRenderer.on('hermes:focus-session', listener)
+    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
+  },
+  onNotificationAction: callback => {
+    const listener = (_event, payload) => callback(payload)
+    ipcRenderer.on('hermes:notification-action', listener)
+    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
+  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -5,22 +5,31 @@

 const { pathToFileURL } = require('node:url')

+// Secondary windows open at the minimum usable size — a compact side panel for
+// subagent watch / cmd-click session pop-out, not a second full desktop.
+const SESSION_WINDOW_MIN_WIDTH = 420
+const SESSION_WINDOW_MIN_HEIGHT = 620
+
 // Build the renderer URL for a secondary window. The renderer uses a
 // HashRouter, so the session route lives after the '#'. The `?win=secondary`
 // flag MUST sit in the query string BEFORE the '#': anything after the '#' is
 // treated as the route by HashRouter and would break routeSessionId(). The
 // renderer reads the flag from window.location.search to suppress the install /
-// onboarding overlays and the global session sidebar.
-function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath } = {}) {
-  const route = `#/${encodeURIComponent(sessionId)}`
+// onboarding overlays and the global session sidebar. `new=1` marks the compact
+// scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
+// session): the renderer resumes it lazily so the gateway never builds an agent
+// just to stream into it.
+function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
+  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
+  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`

  if (devServer) {
    const base = devServer.endsWith('/') ? devServer.slice(0, -1) : devServer

-    return `${base}/?win=secondary${route}`
+    return `${base}/${query}${route}`
  }

-  return `${pathToFileURL(rendererIndexPath).toString()}?win=secondary${route}`
+  return `${pathToFileURL(rendererIndexPath).toString()}${query}${route}`
 }

 // A small registry keyed by sessionId that guarantees one window per chat:
@@ -83,4 +92,9 @@ function createSessionWindowRegistry() {
  }
 }

-module.exports = { buildSessionWindowUrl, createSessionWindowRegistry }
+module.exports = {
+  buildSessionWindowUrl,
+  createSessionWindowRegistry,
+  SESSION_WINDOW_MIN_HEIGHT,
+  SESSION_WINDOW_MIN_WIDTH
+}
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -76,6 +76,18 @@ test('buildSessionWindowUrl builds a packaged file URL with the flag before the
  assert.match(url, /^file:\/\/.*index\.html\?win=secondary#\/abc$/)
 })

+test('buildSessionWindowUrl adds the watch flag for spectator windows, before the hash', () => {
+  const url = buildSessionWindowUrl('abc', { devServer: 'http://localhost:5173', watch: true })
+
+  assert.equal(url, 'http://localhost:5173/?win=secondary&watch=1#/abc')
+})
+
+test('buildSessionWindowUrl routes new-session windows to the draft (#/)', () => {
+  const url = buildSessionWindowUrl(null, { devServer: 'http://localhost:5173', newSession: true })
+
+  assert.equal(url, 'http://localhost:5173/?win=secondary&new=1#/')
+})
+
 test('registry opens one window per session and focuses on re-open', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -8,7 +8,7 @@ const path = require('node:path')
 const ELECTRON_DIR = __dirname

 function readElectronFile(name) {
-  return fs.readFileSync(path.join(ELECTRON_DIR, name), 'utf8')
+  return fs.readFileSync(path.join(ELECTRON_DIR, name), 'utf8').replace(/\r\n/g, '\n')
 }

 function requireHiddenChildOptions(source, needle) {
@@ -42,6 +42,9 @@ test('intentional or interactive desktop child processes stay documented', () =>
  const source = readElectronFile('main.cjs')

  assert.match(source, /windowsHide: false/)
+  assert.match(source, /handOffWindowsBootstrapRecovery/)
+  assert.match(source, /'--repair', '--branch'/)
+  assert.match(source, /'--update', '--branch'/)
  assert.match(source, /nodePty\.spawn\(command, args/)
  assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
 })
--- a/apps/desktop/electron/windows-user-env.cjs
+++ b/apps/desktop/electron/windows-user-env.cjs
@@ -0,0 +1,76 @@
+// windows-user-env.cjs
+//
+// Read a User-scoped environment variable straight from the Windows registry
+// (HKCU\Environment).
+//
+// A GUI app launched from Explorer inherits the environment block captured at
+// login, so a variable set via `setx` AFTER login is invisible in process.env
+// even though a fresh shell — and the Hermes CLI — sees it immediately. The
+// desktop's HERMES_HOME resolution relies on process.env, so that stale-snapshot
+// gap silently sends the backend to the default %LOCALAPPDATA%\hermes. Reading
+// the live registry value closes the gap. See #45471.
+
+const { execFileSync } = require('node:child_process')
+
+// Parse the output of `reg query HKCU\Environment /v <name>`, which looks like:
+//
+//   HKEY_CURRENT_USER\Environment
+//       HERMES_HOME    REG_SZ    F:\Hermes\data
+//
+// Returns the raw value string (spaces inside the value preserved), or null when
+// the requested value line isn't present.
+function parseRegQueryValue(stdout, name) {
+  if (!stdout || !name) return null
+  const typePattern =
+    /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
+  for (const rawLine of String(stdout).split(/\r?\n/)) {
+    const line = rawLine.trim()
+    const match = line.match(typePattern)
+    if (match && match[1].toLowerCase() === name.toLowerCase()) {
+      return match[2]
+    }
+  }
+  return null
+}
+
+// Expand %VAR% references against an env map. REG_EXPAND_SZ values store
+// unexpanded references; plain REG_SZ paths have none, so this is a no-op for
+// the common F:\... case. Unknown references are left verbatim.
+function expandWindowsEnvRefs(value, env = process.env) {
+  if (!value) return value
+  return value.replace(/%([^%]+)%/g, (whole, name) => {
+    const key = Object.keys(env).find(k => k.toUpperCase() === String(name).toUpperCase())
+    return key != null && env[key] != null ? env[key] : whole
+  })
+}
+
+// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
+// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
+// (the value doesn't exist), or when the value is empty.
+function readWindowsUserEnvVar(
+  name,
+  { platform = process.platform, env = process.env, exec = execFileSync } = {}
+) {
+  if (platform !== 'win32' || !name) return null
+  let stdout
+  try {
+    stdout = exec('reg', ['query', 'HKCU\\Environment', '/v', name], {
+      encoding: 'utf8',
+      windowsHide: true,
+      timeout: 5000
+    })
+  } catch {
+    // `reg` missing, or value absent (reg exits 1) — caller falls back.
+    return null
+  }
+  const raw = parseRegQueryValue(stdout, name)
+  if (raw == null) return null
+  const expanded = expandWindowsEnvRefs(raw, env).trim()
+  return expanded || null
+}
+
+module.exports = {
+  expandWindowsEnvRefs,
+  parseRegQueryValue,
+  readWindowsUserEnvVar
+}
--- a/apps/desktop/electron/windows-user-env.test.cjs
+++ b/apps/desktop/electron/windows-user-env.test.cjs
@@ -0,0 +1,90 @@
+const assert = require('node:assert/strict')
+const { test } = require('node:test')
+
+const {
+  expandWindowsEnvRefs,
+  parseRegQueryValue,
+  readWindowsUserEnvVar
+} = require('./windows-user-env.cjs')
+
+// ── parseRegQueryValue ─────────────────────────────────────────────────────
+
+test('parseRegQueryValue extracts a REG_SZ value', () => {
+  const out = [
+    '',
+    'HKEY_CURRENT_USER\\Environment',
+    '    HERMES_HOME    REG_SZ    F:\\Hermes\\data',
+    ''
+  ].join('\r\n')
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
+})
+
+test('parseRegQueryValue matches the name case-insensitively', () => {
+  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Hermes_Home    REG_EXPAND_SZ    %USERPROFILE%\\h\r\n'
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), '%USERPROFILE%\\h')
+})
+
+test('parseRegQueryValue preserves spaces inside the value', () => {
+  const out = '    HERMES_HOME    REG_SZ    C:\\Program Files\\Hermes\r\n'
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'C:\\Program Files\\Hermes')
+})
+
+test('parseRegQueryValue returns null when the value line is absent', () => {
+  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Path    REG_SZ    C:\\x\r\n'
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), null)
+  assert.equal(parseRegQueryValue('', 'HERMES_HOME'), null)
+  assert.equal(parseRegQueryValue('garbage', 'HERMES_HOME'), null)
+})
+
+// ── expandWindowsEnvRefs ───────────────────────────────────────────────────
+
+test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
+  assert.equal(
+    expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
+    'C:\\Users\\jeff\\h'
+  )
+})
+
+test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {
+  assert.equal(expandWindowsEnvRefs('F:\\Hermes\\data', {}), 'F:\\Hermes\\data')
+  assert.equal(expandWindowsEnvRefs('%NOPE%\\x', {}), '%NOPE%\\x')
+})
+
+// ── readWindowsUserEnvVar ──────────────────────────────────────────────────
+
+test('readWindowsUserEnvVar returns null off Windows without spawning', () => {
+  let spawned = false
+  const exec = () => {
+    spawned = true
+    return ''
+  }
+  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'linux', exec }), null)
+  assert.equal(spawned, false)
+})
+
+test('readWindowsUserEnvVar queries HKCU\\Environment and expands the value', () => {
+  const calls = []
+  const exec = (cmd, args) => {
+    calls.push([cmd, args])
+    return 'HKEY_CURRENT_USER\\Environment\r\n    HERMES_HOME    REG_EXPAND_SZ    %DRIVE%\\Hermes\r\n'
+  }
+  const value = readWindowsUserEnvVar('HERMES_HOME', {
+    platform: 'win32',
+    env: { DRIVE: 'F:' },
+    exec
+  })
+  assert.equal(value, 'F:\\Hermes')
+  assert.deepEqual(calls, [['reg', ['query', 'HKCU\\Environment', '/v', 'HERMES_HOME']]])
+})
+
+test('readWindowsUserEnvVar returns null when reg exits non-zero (value missing)', () => {
+  const exec = () => {
+    throw new Error('reg exited 1')
+  }
+  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
+})
+
+test('readWindowsUserEnvVar returns null for an empty value', () => {
+  const exec = () => '    HERMES_HOME    REG_SZ    \r\n'
+  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
+})
--- a/apps/desktop/index.html
+++ b/apps/desktop/index.html
@@ -9,6 +9,28 @@
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png" />
    <link rel="shortcut icon" href="/apple-touch-icon.png" />
    <title>Hermes</title>
+    <script>
+      // Pre-paint the themed background before the app bundle loads. Without
+      // this, the first frame (which is what `ready-to-show` waits for) is the
+      // UA-default white page, and the real theme only lands once the whole
+      // module graph has executed — i.e. the "white flash" on every new
+      // window. applyTheme() in src/themes/context.tsx keeps these keys fresh.
+      try {
+        let bg = localStorage.getItem('hermes-boot-background')
+        let scheme = localStorage.getItem('hermes-boot-color-scheme')
+        if (!bg) {
+          const dark = window.matchMedia('(prefers-color-scheme: dark)').matches
+          bg = dark ? '#111111' : '#f7f7f7'
+          scheme = dark ? 'dark' : 'light'
+        }
+        document.documentElement.style.backgroundColor = bg
+        if (scheme === 'dark' || scheme === 'light') {
+          document.documentElement.style.colorScheme = scheme
+        }
+      } catch {
+        // localStorage unavailable — keep UA defaults.
+      }
+    </script>
  </head>
  <body>
    <div id="root" class="scrollbar-dt"></div>
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -18,7 +18,9 @@
    "profile:main": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
    "profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
    "start": "npm run build && electron .",
-    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && node scripts/assert-dist-built.cjs",
+    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
+    "postbuild": "node scripts/assert-dist-built.cjs",
+    "prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
    "builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 electron-builder",
    "pack": "npm run build && npm run builder -- --dir",
    "dist": "npm run build && npm run builder",
@@ -35,7 +37,7 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/windows-user-env.test.cjs",
    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
@@ -89,6 +91,7 @@
    "react-router-dom": "^7.17.0",
    "react-shiki": "^0.9.3",
    "remark-math": "^6.0.0",
+    "remend": "^1.3.0",
    "shiki": "^4.0.2",
    "streamdown": "^2.5.0",
    "tailwind-merge": "^3.5.0",
@@ -97,6 +100,7 @@
    "unicode-animations": "^1.0.3",
    "unified": "^11.0.5",
    "unist-util-visit-parents": "^6.0.2",
+    "use-stick-to-bottom": "^1.1.6",
    "vfile": "^6.0.3",
    "web-haptics": "^0.0.6"
  },
@@ -131,6 +135,7 @@
  },
  "build": {
    "electronVersion": "40.9.3",
+    "electronDist": "../../node_modules/electron/dist",
    "appId": "com.nousresearch.hermes",
    "productName": "Hermes",
    "executableName": "Hermes",
--- a/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
+++ b/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
@@ -0,0 +1,59 @@
+const fs = require('node:fs')
+const path = require('node:path')
+
+if (process.platform !== 'darwin') {
+  process.exit(0)
+}
+
+const desktopRoot = path.resolve(__dirname, '..')
+const repoRoot = path.resolve(desktopRoot, '..', '..')
+const electronMacPath = path.join(repoRoot, 'node_modules', 'app-builder-lib', 'out', 'electron', 'electronMac.js')
+
+const marker = 'hermes-macos-electron-binary-fallback'
+const needle = `    await Promise.all([
+        doRename(path.join(contentsPath, "MacOS"), electronBranding.productName, appPlist.CFBundleExecutable),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
+    ]);`
+const replacement = `    // ${marker}: electron-builder 26.8.x can sometimes copy
+    // Electron.app without its main MacOS/Electron binary before this rename.
+    // Restore it from the installed Electron runtime so local desktop installs
+    // do not fail with ENOENT during macOS arm64 packaging.
+    const macosDir = path.join(contentsPath, "MacOS");
+    const bundledElectronBinary = path.join(macosDir, electronBranding.productName);
+    if (!fs.existsSync(bundledElectronBinary)) {
+        const candidates = [
+            path.join(packager.info.framework.distMacOsAppName, "Contents", "MacOS", electronBranding.productName),
+            path.join(process.cwd(), "..", "..", "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
+        ];
+        const sourceBinary = candidates.find(candidate => fs.existsSync(candidate));
+        if (sourceBinary == null) {
+            throw new Error("Electron binary missing from packaged app and Electron runtime: " + bundledElectronBinary);
+        }
+        await (0, promises_1.copyFile)(sourceBinary, bundledElectronBinary);
+        await (0, promises_1.chmod)(bundledElectronBinary, 0o755);
+    }
+    await Promise.all([
+        doRename(macosDir, electronBranding.productName, appPlist.CFBundleExecutable),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
+    ]);`
+
+if (!fs.existsSync(electronMacPath)) {
+  console.warn(`[patch-electron-builder] skipped: ${electronMacPath} not found`)
+  process.exit(0)
+}
+
+const source = fs.readFileSync(electronMacPath, 'utf8')
+if (source.includes(marker)) {
+  console.log('[patch-electron-builder] macOS Electron binary fallback already applied')
+  process.exit(0)
+}
+
+if (!source.includes(needle)) {
+  console.warn('[patch-electron-builder] skipped: expected electronMac.js shape not found')
+  process.exit(0)
+}
+
+fs.writeFileSync(electronMacPath, source.replace(needle, replacement))
+console.log('[patch-electron-builder] applied macOS Electron binary fallback')
--- a/apps/desktop/src/app/agents/index.tsx
+++ b/apps/desktop/src/app/agents/index.tsx
@@ -3,8 +3,8 @@ import { type ReactNode, useEffect, useMemo, useState } from 'react'

 import { useElapsedSeconds } from '@/components/chat/activity-timer'
 import { ActivityTimerText } from '@/components/chat/activity-timer-text'
-import { BrailleSpinner } from '@/components/ui/braille-spinner'
 import { FadeText } from '@/components/ui/fade-text'
+import { GlyphSpinner } from '@/components/ui/glyph-spinner'
 import { type Translations, useI18n } from '@/i18n'
 import { AlertCircle, CheckCircle2, Sparkles } from '@/lib/icons'
 import { useEnterAnimation } from '@/lib/use-enter-animation'
@@ -25,7 +25,7 @@ import { OverlayView } from '../overlays/overlay-view'
 function statusGlyph(status: SubagentStatus, a: Translations['agents']): ReactNode {
  if (status === 'running' || status === 'queued') {
    return (
-      <BrailleSpinner
+      <GlyphSpinner
        ariaLabel={a.running}
        className="size-3.5 shrink-0 text-[0.95rem] text-muted-foreground/80"
        spinner="breathe"
@@ -290,7 +290,7 @@ function StreamLine({
      <span className={cn('min-w-0 flex-1 wrap-anywhere', tone, isMono && 'font-mono text-[0.69rem]')}>
        {entry.text}
        {active ? (
-          <BrailleSpinner
+          <GlyphSpinner
            ariaLabel={t.agents.streaming}
            className="ml-1 inline-block size-2.5 align-middle text-muted-foreground/70"
            spinner="breathe"
@@ -372,7 +372,9 @@ function SubagentRow({ node, depth = 0, nowMs }: { node: SubagentNode; depth?: n

      {open && fileLines.length > 0 ? (
        <div className="grid min-w-0 gap-0.5 pl-6">
-          <p className="text-[0.58rem] font-medium tracking-wider text-muted-foreground/60 uppercase">{t.agents.files}</p>
+          <p className="text-[0.58rem] font-medium tracking-wider text-muted-foreground/60 uppercase">
+            {t.agents.files}
+          </p>
          {fileLines.slice(0, 8).map(line => (
            <p className="wrap-break-word font-mono text-[0.67rem] leading-relaxed text-muted-foreground/80" key={line}>
              {line}
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -18,11 +18,12 @@ import {
 } from '@/components/ui/pagination'
 import { TextTab, TextTabMeta } from '@/components/ui/text-tab'
 import { Tip } from '@/components/ui/tooltip'
-import { getSessionMessages, listSessions } from '@/hermes'
+import { getSessionMessages, listAllProfileSessions } from '@/hermes'
 import { type Translations, useI18n } from '@/i18n'
 import { sessionTitle } from '@/lib/chat-runtime'
 import { ExternalLink, ExternalLinkIcon, hostPathLabel, urlSlugTitleLabel, useLinkTitle } from '@/lib/external-link'
 import { FileImage, FileText, FolderOpen, Link2 } from '@/lib/icons'
+import { mediaExternalUrl } from '@/lib/media'
 import { cn } from '@/lib/utils'
 import { notifyError } from '@/store/notifications'
 import type { SessionInfo, SessionMessage } from '@/types/hermes'
@@ -124,17 +125,12 @@ function artifactKind(value: string): ArtifactKind {
 }

 function artifactHref(value: string): string {
-  if (
-    value.startsWith('http://') ||
-    value.startsWith('https://') ||
-    value.startsWith('file://') ||
-    value.startsWith('data:')
-  ) {
+  if (value.startsWith('http://') || value.startsWith('https://') || value.startsWith('data:')) {
    return value
  }

-  if (value.startsWith('/')) {
-    return `file://${encodeURI(value)}`
+  if (value.startsWith('file://') || value.startsWith('/')) {
+    return mediaExternalUrl(value)
  }

  return value
@@ -388,8 +384,8 @@ export function ArtifactsView({ setStatusbarItemGroup: _setStatusbarItemGroup, .
    setRefreshing(true)

    try {
-      const sessions = (await listSessions(30, 1)).sessions
-      const results = await Promise.allSettled(sessions.map(session => getSessionMessages(session.id)))
+      const sessions = (await listAllProfileSessions(30, 1)).sessions
+      const results = await Promise.allSettled(sessions.map(session => getSessionMessages(session.id, session.profile)))
      const nextArtifacts: ArtifactRecord[] = []

      results.forEach((result, index) => {
--- a/apps/desktop/src/app/chat/composer/completion-drawer.tsx
+++ b/apps/desktop/src/app/chat/composer/completion-drawer.tsx
@@ -2,25 +2,21 @@ import type { Unstable_TriggerAdapter } from '@assistant-ui/core'
 import { ComposerPrimitive } from '@assistant-ui/react'
 import type { ReactNode } from 'react'

-export const COMPLETION_DRAWER_CLASS = [
-  'absolute bottom-[calc(100%+0.375rem)] left-0 z-50',
-  'w-80 max-w-[calc(100vw-2rem)]',
-  'max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
-  'rounded-xl border border-(--ui-stroke-secondary)',
-  'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_97%,transparent)]',
-  'p-1 text-xs text-popover-foreground shadow-lg',
-  'backdrop-blur-md'
-].join(' ')
+import { composerFusedDockCard } from '@/components/chat/composer-dock'
+import { cn } from '@/lib/utils'

-export const COMPLETION_DRAWER_BELOW_CLASS = [
-  'absolute left-0 top-[calc(100%+0.375rem)] z-50',
-  'w-80 max-w-[calc(100vw-2rem)]',
-  'max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
-  'rounded-xl border border-(--ui-stroke-secondary)',
-  'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_97%,transparent)]',
-  'p-1 text-xs text-popover-foreground shadow-lg',
-  'backdrop-blur-md'
-].join(' ')
+// Same docked chrome as the queue/status stack, but its own thing: a narrow,
+// left-aligned card (not full width) that fuses to the composer's edge instead
+// of floating above it. `left-1` matches the stack's `mx-1` inset; the negative
+// margin overlaps the seam so the composer's (now-transparent) edge border reads
+// as shared. Fused (opaque) fill — the composer surface swaps to the same fill
+// while a drawer is open, so the two paint as one panel.
+const DRAWER_SHELL =
+  'absolute left-1 z-50 w-80 max-w-[calc(100%-0.5rem)] max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain p-1 text-xs text-popover-foreground'
+
+export const COMPLETION_DRAWER_CLASS = cn(DRAWER_SHELL, 'bottom-full -mb-[9px]', composerFusedDockCard('top'))
+
+export const COMPLETION_DRAWER_BELOW_CLASS = cn(DRAWER_SHELL, 'top-full -mt-[9px]', composerFusedDockCard('bottom'))

 export function ComposerCompletionDrawer({
  adapter,
--- a/apps/desktop/src/app/chat/composer/context-menu.tsx
+++ b/apps/desktop/src/app/chat/composer/context-menu.tsx
@@ -11,6 +11,7 @@ import {
  DropdownMenuSeparator,
  DropdownMenuTrigger
 } from '@/components/ui/dropdown-menu'
+import { Kbd } from '@/components/ui/kbd'
 import { useI18n } from '@/i18n'
 import { Clipboard, FileText, FolderOpen, type IconComponent, ImageIcon, Link, MessageSquareText } from '@/lib/icons'
 import { cn } from '@/lib/utils'
@@ -86,7 +87,7 @@ export function ContextMenu({

          <div className="px-2 py-1 text-[0.7rem] text-muted-foreground/80">
            {c.tipPre}
-            <kbd className="rounded bg-muted/70 px-1 py-px font-mono text-[0.65rem]">@</kbd>
+            <Kbd size="sm">@</Kbd>
            {c.tipPost}
          </div>
        </DropdownMenuContent>
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -1,5 +1,6 @@
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
+import { KbdCombo } from '@/components/ui/kbd'
 import { Tip } from '@/components/ui/tooltip'
 import { useI18n } from '@/i18n'
 import { triggerHaptic } from '@/lib/haptics'
@@ -8,6 +9,7 @@ import { formatCombo } from '@/lib/keybinds/combo'
 import { cn } from '@/lib/utils'

 import type { ConversationStatus } from './hooks/use-voice-conversation'
+import { ModelPill } from './model-pill'
 import type { ChatBarState, VoiceStatus } from './types'

 export const ICON_BTN = 'size-(--composer-control-size) shrink-0 rounded-md'
@@ -63,7 +65,15 @@ export function ComposerControls({
 }) {
  const { t } = useI18n()
  const c = t.composer
-  const steerLabel = `${c.steer} (${formatCombo('mod+enter')})`
+  const steerCombo = formatCombo('mod+enter')
+  const steerLabel = `${c.steer} (${steerCombo})`
+
+  const steerTip = (
+    <span className="inline-flex items-center gap-1.5">
+      {c.steer}
+      <KbdCombo combo="mod+enter" size="sm" variant="inverted" />
+    </span>
+  )

  if (conversation.active) {
    return <ConversationPill {...conversation} disabled={disabled} />
@@ -73,9 +83,11 @@ export function ComposerControls({

  return (
    <div className="ml-auto flex shrink-0 items-center gap-(--composer-control-gap)">
-      <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
-      {canSteer && (
-        <Tip label={steerLabel}>
+      <ModelPill disabled={disabled} model={state.model} />
+      {/* While the agent runs and the user is typing, steer takes over the mic's
+          slot rather than crowding the row with an extra button. */}
+      {canSteer ? (
+        <Tip label={steerTip}>
          <Button
            aria-label={steerLabel}
            className={GHOST_ICON_BTN}
@@ -88,6 +100,8 @@ export function ComposerControls({
            <SteeringWheel size={16} />
          </Button>
        </Tip>
+      ) : (
+        <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
      )}
      {showVoicePrimary ? (
        <Tip label={c.startVoice}>
--- a/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
+++ b/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
@@ -24,6 +24,7 @@ afterEach(cleanup)
 // state stays stale while the DOM already holds the text.
 function Harness({
  busy = false,
+  disabled = false,
  queued = [],
  onSubmit,
  onQueue,
@@ -31,6 +32,7 @@ function Harness({
  onDrain
 }: {
  busy?: boolean
+  disabled?: boolean
  queued?: readonly string[]
  onSubmit: (text: string) => void
  onQueue: (text: string) => void
@@ -52,6 +54,10 @@ function Harness({
  }

  const submitDraft = () => {
+    if (disabled) {
+      return
+    }
+
    const editor = editorRef.current
    if (editor) {
      const domText = composerPlainText(editor)
@@ -84,6 +90,10 @@ function Harness({
      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0

+      if (disabled) {
+        return
+      }
+
      if (!busy && !hasLivePayload && queued.length > 0) {
        onDrain()

@@ -186,4 +196,23 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
    expect(onDrain).toHaveBeenCalledTimes(1)
    expect(onSubmit).not.toHaveBeenCalled()
  })
+
+  it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
+    const onSubmit = vi.fn()
+    const onDrain = vi.fn()
+    const { getByTestId } = render(
+      <Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
+    )
+    const editor = getByTestId('editor')
+
+    await act(async () => {
+      editor.textContent = 'draft while reconnecting'
+      fireEvent.input(editor)
+      fireEvent.keyDown(editor, { key: 'Enter' })
+    })
+
+    expect(editor.textContent).toBe('draft while reconnecting')
+    expect(onDrain).not.toHaveBeenCalled()
+    expect(onSubmit).not.toHaveBeenCalled()
+  })
 })
--- a/apps/desktop/src/app/chat/composer/focus.ts
+++ b/apps/desktop/src/app/chat/composer/focus.ts
@@ -10,6 +10,7 @@
 * steal focus from the composer effect.
 */

+import { RICH_INPUT_SLOT } from './rich-editor'
 import type { InlineRefInput } from './inline-refs'

 export type ComposerTarget = 'edit' | 'main'
@@ -123,3 +124,12 @@ export const focusComposerInput = (el: HTMLElement | null) => {
  window.requestAnimationFrame(focus)
  window.setTimeout(focus, 0)
 }
+
+/** Drop focus from the main composer input (status-stack chrome, sidebar, etc.). */
+export const blurComposerInput = () => {
+  const el = document.querySelector(`[data-slot="${RICH_INPUT_SLOT}"]`) as HTMLElement | null
+
+  if (el && document.activeElement === el) {
+    el.blur()
+  }
+}
--- a/apps/desktop/src/app/chat/composer/help-hint.tsx
+++ b/apps/desktop/src/app/chat/composer/help-hint.tsx
@@ -1,11 +1,23 @@
 import type { ReactNode } from 'react'

+import { KbdCombo } from '@/components/ui/kbd'
 import { useI18n } from '@/i18n'

 import { COMPLETION_DRAWER_CLASS } from './completion-drawer'

 const COMMON_COMMAND_KEYS = ['/help', '/clear', '/resume', '/details', '/copy', '/quit']
-const HOTKEY_KEYS = ['@', '/', '?', 'Enter', 'Cmd/Ctrl+Shift+K', 'Cmd/Ctrl+/', 'Esc', '↑ / ↓']
+
+/** Stable ids → i18n `hotkeyDescs` keys. Combos resolve mod labels per OS. */
+const COMPOSER_HOTKEY_ROWS = [
+  { id: 'composer.mention', combos: ['@'] },
+  { id: 'composer.slash', combos: ['/'] },
+  { id: 'composer.help', combos: ['?'] },
+  { id: 'composer.sendNewline', combos: ['enter', 'shift+enter'] },
+  { id: 'composer.sendQueued', combos: ['mod+shift+k'] },
+  { id: 'keybinds.openPanel', combos: ['mod+/'] },
+  { id: 'composer.cancel', combos: ['escape'] },
+  { id: 'composer.history', combos: ['up', 'down'] }
+] as const

 export function HelpHint() {
  const { t } = useI18n()
@@ -20,8 +32,8 @@ export function HelpHint() {
      </Section>

      <Section title={c.hotkeys}>
-        {HOTKEY_KEYS.map(key => (
-          <Row description={c.hotkeyDescs[key] ?? ''} key={key} keyLabel={key} />
+        {COMPOSER_HOTKEY_ROWS.map(row => (
+          <HotkeyRow description={c.hotkeyDescs[row.id] ?? ''} combos={[...row.combos]} key={row.id} />
        ))}
      </Section>

@@ -57,3 +69,16 @@ function Row({ description, keyLabel, mono = false }: { description: string; key
    </div>
  )
 }
+
+function HotkeyRow({ combos, description }: { combos: string[]; description: string }) {
+  return (
+    <div className="flex min-w-0 items-center gap-2 rounded-md px-2.5 py-1 text-xs">
+      <span className="flex shrink-0 items-center gap-1">
+        {combos.map(combo => (
+          <KbdCombo combo={combo} key={combo} size="sm" />
+        ))}
+      </span>
+      <span className="min-w-0 truncate text-muted-foreground/80">{description}</span>
+    </div>
+  )
+}
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -14,6 +14,7 @@ import {
 } from 'react'

 import { hermesDirectiveFormatter, type SlashChipKind } from '@/components/assistant-ui/directive-text'
+import { composerFill, composerSurfaceGlass } from '@/components/chat/composer-dock'
 import { Button } from '@/components/ui/button'
 import { useMediaQuery } from '@/hooks/use-media-query'
 import { useResizeObserver } from '@/hooks/use-resize-observer'
@@ -42,12 +43,16 @@ import {
 import {
  $queuedPromptsBySession,
  enqueueQueuedPrompt,
+  MAX_AUTO_DRAIN_ATTEMPTS,
+  migrateQueuedPrompts,
  promoteQueuedPrompt,
  type QueuedPromptEntry,
  removeQueuedPrompt,
-  shouldAutoDrainOnSettle,
+  shouldAutoDrain,
  updateQueuedPrompt
 } from '@/store/composer-queue'
+import { $statusItemsBySession } from '@/store/composer-status'
+import { notify } from '@/store/notifications'
 import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
 import { useTheme } from '@/themes'
@@ -80,12 +85,16 @@ import {
 import { QueuePanel } from './queue-panel'
 import {
  composerPlainText,
+  deleteSelectionInEditor,
+  insertPlainTextAtCaret,
+  normalizeComposerEditorDom,
  placeCaretEnd,
  refChipElement,
  renderComposerContents,
  RICH_INPUT_SLOT,
  slashChipElement
 } from './rich-editor'
+import { ComposerStatusStack } from './status-stack'
 import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
 import { ComposerTriggerPopover } from './trigger-popover'
 import type { ChatBarProps } from './types'
@@ -128,6 +137,12 @@ function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind {
  return 'command'
 }

+/** A `/` query is at its arg stage once it's past the command name. */
+const slashArgStage = (query: string) => query.includes(' ')
+
+/** The `/command` token of a slash query (`personality x` → `/personality`). */
+const slashCommandToken = (query: string) => `/${query.split(/\s+/, 1)[0]?.toLowerCase() ?? ''}`
+
 interface QueueEditState {
  attachments: ComposerAttachment[]
  draft: string
@@ -168,8 +183,8 @@ export function ChatBar({
  const draft = useAuiState(s => s.composer.text)
  const attachments = useStore($composerAttachments)
  const queuedPromptsBySession = useStore($queuedPromptsBySession)
+  const statusItemsBySession = useStore($statusItemsBySession)
  const scrolledUp = useStore($threadScrolledUp)
-  const sessionMessages = useStore($messages)
  const activeQueueSessionKey = queueSessionKey || sessionId || null

  const queuedPrompts = useMemo(
@@ -177,15 +192,29 @@ export function ChatBar({
    [activeQueueSessionKey, queuedPromptsBySession]
  )

+  // Status items (subagents, background processes) are keyed by the RUNTIME
+  // session id — gateway events and process.list both speak that id. Only the
+  // queue uses the stored-session fallback key (prompts can queue pre-resume).
+  const statusSessionId = sessionId ?? null
+
+  const statusStackVisible = useMemo(
+    () =>
+      queuedPrompts.length > 0 || (statusSessionId ? (statusItemsBySession[statusSessionId]?.length ?? 0) > 0 : false),
+    [queuedPrompts.length, statusItemsBySession, statusSessionId]
+  )
+
  const composerRef = useRef<HTMLFormElement | null>(null)
  const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
  const editorRef = useRef<HTMLDivElement | null>(null)
  const draftRef = useRef(draft)
-  const previousBusyRef = useRef(busy)
  const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null)
  const activeQueueSessionKeyRef = useRef(activeQueueSessionKey)
  activeQueueSessionKeyRef.current = activeQueueSessionKey
+  const prevQueueKeyRef = useRef(activeQueueSessionKey)
  const drainingQueueRef = useRef(false)
+  // Per-entry auto-drain failure counts; bounds retries so a persistent 404
+  // can't spin-loop. Cleared on success; reset naturally on remount/reconnect.
+  const drainFailuresRef = useRef(new Map<string, number>())
  const urlInputRef = useRef<HTMLInputElement | null>(null)

  const [urlOpen, setUrlOpen] = useState(false)
@@ -226,6 +255,8 @@ export function ChatBar({
  const gatewayState = useStore($gatewayState)
  const newSessionPlaceholders = t.composer.newSessionPlaceholders
  const followUpPlaceholders = t.composer.followUpPlaceholders
+  const reconnecting = gatewayState === 'closed' || gatewayState === 'error'
+  const inputDisabled = disabled && !reconnecting

  // Resting placeholder: a starter for brand-new sessions, a continuation for
  // existing ones. Picked once and only re-rolled when we genuinely move to a
@@ -256,11 +287,13 @@ export function ChatBar({
    setRestingPlaceholder(pickPlaceholder(sessionId ? followUpPlaceholders : newSessionPlaceholders))
  }, [followUpPlaceholders, newSessionPlaceholders, sessionId])

-  // When the bar is disabled it's because the gateway isn't open. Distinguish a
-  // cold start ("Starting Hermes...") from a dropped connection we're trying to
-  // restore (e.g. after the Mac slept) so the stuck state reads as recoverable.
+  // When the transport is disabled it's because the gateway isn't open.
+  // Distinguish a cold start ("Starting Hermes...") from a dropped connection
+  // we're trying to restore. During reconnect, keep the textbox editable so a
+  // flaky network doesn't block drafting; only submit/backend actions stay
+  // disabled until the gateway is open again.
  const placeholder = disabled
-    ? gatewayState === 'closed' || gatewayState === 'error'
+    ? reconnecting
      ? t.composer.placeholderReconnecting
      : t.composer.placeholderStarting
    : restingPlaceholder
@@ -302,13 +335,13 @@ export function ChatBar({
  )

  useEffect(() => {
-    if (!disabled) {
+    if (!inputDisabled) {
      focusInput()
    }
-  }, [disabled, focusInput, focusKey, focusRequestId])
+  }, [focusInput, focusKey, focusRequestId, inputDisabled])

  useEffect(() => {
-    if (disabled) {
+    if (inputDisabled) {
      return undefined
    }

@@ -328,7 +361,7 @@ export function ChatBar({
      offFocus()
      offInsert()
    }
-  }, [appendExternalText, disabled])
+  }, [appendExternalText, inputDisabled])

  // Keep draftRef in sync with the assistant-ui composer state for callers
  // that read the latest text outside the React render cycle. We don't push
@@ -507,48 +540,6 @@ export function ChatBar({
    })
  }, [])

-  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
-    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
-
-    if (imageBlobs.length > 0) {
-      event.preventDefault()
-
-      if (onAttachImageBlob) {
-        triggerHaptic('selection')
-
-        for (const blob of imageBlobs) {
-          void onAttachImageBlob(blob)
-        }
-      }
-
-      return
-    }
-
-    // Trim surrounding whitespace so a copy that dragged along leading/trailing
-    // blank lines (common when selecting from terminals, code blocks, web pages)
-    // doesn't dump multiline padding into the composer. Internal newlines are
-    // preserved — only the edges are cleaned up.
-    const pastedText = event.clipboardData.getData('text').trim()
-
-    if (!pastedText) {
-      event.preventDefault()
-
-      return
-    }
-
-    if (DATA_IMAGE_URL_RE.test(pastedText)) {
-      event.preventDefault()
-
-      return
-    }
-
-    event.preventDefault()
-    document.execCommand('insertText', false, pastedText)
-    const nextDraft = composerPlainText(event.currentTarget)
-    draftRef.current = nextDraft
-    aui.composer().setText(nextDraft)
-  }
-
  const [trigger, setTrigger] = useState<TriggerState | null>(null)
  const [triggerActive, setTriggerActive] = useState(0)
  const [triggerItems, setTriggerItems] = useState<readonly Unstable_TriggerItem[]>([])
@@ -585,7 +576,15 @@ export function ChatBar({
    }

    const before = textBeforeCaret(editor)
-    const detected = detectTrigger(before ?? composerPlainText(editor))
+    const found = detectTrigger(before ?? composerPlainText(editor))
+
+    // The arg-stage popover is only useful for commands with an options screen.
+    // For a no-arg command it would dead-end on "No matches", so drop it — the
+    // directive is already complete.
+    const detected =
+      found?.kind === '/' && slashArgStage(found.query) && !desktopSlashCommandTakesArgs(slashCommandToken(found.query))
+        ? null
+        : found

    setTrigger(detected)

@@ -602,9 +601,7 @@ export function ChatBar({
  // (which drives `hasComposerPayload` → the send button). Shared by the input
  // and compositionend paths so committed IME text reaches state through either.
  const flushEditorToDraft = (editor: HTMLDivElement) => {
-    if (editor.childNodes.length === 1 && editor.firstChild?.nodeName === 'BR') {
-      editor.replaceChildren()
-    }
+    normalizeComposerEditorDom(editor)

    const nextDraft = composerPlainText(editor)

@@ -627,6 +624,46 @@ export function ChatBar({
    flushEditorToDraft(event.currentTarget)
  }

+  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
+    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
+
+    if (imageBlobs.length > 0) {
+      event.preventDefault()
+
+      if (onAttachImageBlob) {
+        triggerHaptic('selection')
+
+        for (const blob of imageBlobs) {
+          void onAttachImageBlob(blob)
+        }
+      }
+
+      return
+    }
+
+    // Trim surrounding whitespace so a copy that dragged along leading/trailing
+    // blank lines (common when selecting from terminals, code blocks, web pages)
+    // doesn't dump multiline padding into the composer. Internal newlines are
+    // preserved — only the edges are cleaned up.
+    const pastedText = event.clipboardData.getData('text').trim()
+
+    if (!pastedText) {
+      event.preventDefault()
+
+      return
+    }
+
+    if (DATA_IMAGE_URL_RE.test(pastedText)) {
+      event.preventDefault()
+
+      return
+    }
+
+    event.preventDefault()
+    insertPlainTextAtCaret(event.currentTarget, pastedText)
+    flushEditorToDraft(event.currentTarget)
+  }
+
  const triggerAdapter: Unstable_TriggerAdapter | null =
    trigger?.kind === '@' ? at.adapter : trigger?.kind === '/' ? slash.adapter : null

@@ -642,6 +679,12 @@ export function ChatBar({

  const triggerLoading = trigger?.kind === '@' ? at.loading : trigger?.kind === '/' ? slash.loading : false

+  // Suppress the "No matches" empty state once a slash command is past its name:
+  // a no-arg command has nothing to offer, and a fully-typed arg commits on
+  // Space/Tab — neither should dead-end on a popover.
+  const argStageEmpty =
+    trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length
+
  const closeTrigger = () => {
    setTrigger(null)
    setTriggerItems([])
@@ -652,6 +695,25 @@ export function ChatBar({
    setTriggerActive(idx => Math.min(idx, Math.max(0, triggerItems.length - 1)))
  }, [triggerItems.length])

+  // Commit the literally-typed `/command arg` as a directive chip — used when
+  // the completion list is empty because the arg is already fully typed (the
+  // backend completer drops exact matches). Reuses the chip path via a
+  // synthetic item whose serialized form is the verbatim text.
+  const commitTypedSlashDirective = () => {
+    if (trigger?.kind !== '/') {
+      return
+    }
+
+    const text = `/${trigger.query.trimEnd()}`
+
+    replaceTriggerWithChip({
+      id: text,
+      type: 'slash',
+      label: text.slice(1),
+      metadata: { command: slashCommandToken(trigger.query), display: text, meta: '', group: '', action: '', rawText: text }
+    })
+  }
+
  const replaceTriggerWithChip = (item: Unstable_TriggerItem) => {
    const editor = editorRef.current

@@ -688,8 +750,7 @@ export function ChatBar({
    // already an arg pick (`/personality alice`), so it commits normally.
    const command = (item.metadata as { command?: string } | undefined)?.command ?? ''

-    const expandsToArgs =
-      trigger.kind === '/' && !serialized.includes(' ') && desktopSlashCommandTakesArgs(command)
+    const expandsToArgs = trigger.kind === '/' && !serialized.includes(' ') && desktopSlashCommandTakesArgs(command)

    const text = starter || serialized.endsWith(' ') ? serialized : `${serialized} `
    const directive = !starter && serialized.match(/^@([^:]+):(.+)$/)
@@ -771,6 +832,18 @@ export function ChatBar({
      return
    }

+    // Non-collapsed Backspace/Delete: native selection-delete is ~O(n²) on large
+    // drafts (Ctrl+A → Delete froze ~1.3s). Collapsed carets fall through.
+    if (
+      (event.key === 'Backspace' || event.key === 'Delete') &&
+      deleteSelectionInEditor(event.currentTarget)
+    ) {
+      event.preventDefault()
+      flushEditorToDraft(event.currentTarget)
+
+      return
+    }
+
    // Cmd/Ctrl+Shift+K drains the next queued message. Plain Cmd/Ctrl+K is
    // reserved for the global command palette.
    if ((event.metaKey || event.ctrlKey) && !event.altKey && event.shiftKey && event.key.toLowerCase() === 'k') {
@@ -800,7 +873,15 @@ export function ChatBar({
        return
      }

-      if (event.key === 'Enter' || event.key === 'Tab') {
+      // Enter / Tab / Space all accept the highlighted item: a no-arg command
+      // commits its directive chip, an arg-taking command expands to its
+      // options step, and an arg option commits the full `/cmd arg` chip. Space
+      // is slash-only (an `@` mention takes a literal space) and gated to a
+      // non-empty query so a bare `/ ` still types a space.
+      const acceptOnSpace = event.key === ' ' && trigger.kind === '/' && Boolean(trigger.query.trim())
+      const accept = event.key === 'Enter' || event.key === 'Tab' || acceptOnSpace
+
+      if (accept) {
        event.preventDefault()
        triggerKeyConsumedRef.current = true
        const item = triggerItems[triggerActive]
@@ -821,6 +902,24 @@ export function ChatBar({
      }
    }

+    // Arg stage with nothing left to suggest — a fully-typed arg the backend
+    // completer no longer echoes (it drops the exact match), e.g.
+    // `/personality creative`. Space/Tab still commit what's typed as a single
+    // directive chip; Enter falls through to submit (send it as-is).
+    if (
+      trigger?.kind === '/' &&
+      !triggerItems.length &&
+      (event.key === ' ' || event.key === 'Tab') &&
+      slashArgStage(trigger.query) &&
+      trigger.query.trim()
+    ) {
+      event.preventDefault()
+      triggerKeyConsumedRef.current = true
+      commitTypedSlashDirective()
+
+      return
+    }
+
    // ArrowUp/ArrowDown navigate, in priority order: the queue (edit entries in
    // place) then sent-message history. The history ring is derived from live
    // session messages each press — single source of truth, no mirror.
@@ -853,7 +952,9 @@ export function ChatBar({
      event.preventDefault()
      triggerKeyConsumedRef.current = true

-      const history = deriveUserHistory(sessionMessages, chatMessageText)
+      // $messages is read imperatively (not subscribed) so the composer
+      // doesn't re-render on every streaming delta flush.
+      const history = deriveUserHistory($messages.get(), chatMessageText)
      const entry = browseBackward(sessionId, currentDraft, history)

      if (entry !== null) {
@@ -878,7 +979,7 @@ export function ChatBar({
        event.preventDefault()
        triggerKeyConsumedRef.current = true

-        const history = deriveUserHistory(sessionMessages, chatMessageText)
+        const history = deriveUserHistory($messages.get(), chatMessageText)
        const result = browseForward(sessionId, history)

        if (result !== null) {
@@ -914,6 +1015,10 @@ export function ChatBar({
      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0

+      if (disabled) {
+        return
+      }
+
      if (!busy && !hasLivePayload && queuedPrompts.length > 0) {
        void drainNextQueued()

@@ -1113,11 +1218,8 @@ export function ChatBar({
    }
  }

-  const stashAt = (
-    scope: string | null,
-    text = draftRef.current,
-    attachments = $composerAttachments.get()
-  ) => stashSessionDraft(scope, text, attachments)
+  const stashAt = (scope: string | null, text = draftRef.current, attachments = $composerAttachments.get()) =>
+    stashSessionDraft(scope, text, attachments)

  // Per-thread draft swap — the composer's only session coupling. Lifecycle
  // never clears composer state; this effect alone stashes on leave, restores
@@ -1315,6 +1417,7 @@ export function ChatBar({
          return false
        }

+        drainFailuresRef.current.delete(entry.id)
        removeQueuedPrompt(activeQueueSessionKey, entry.id)
        resetBrowseState(sessionId)

@@ -1326,16 +1429,17 @@ export function ChatBar({
    [activeQueueSessionKey, onSubmit, queuedPrompts, sessionId]
  )

-  const drainNextQueued = useCallback(
-    () =>
-      runDrain(entries => {
-        const skip = queueEdit?.entryId
+  const pickDrainHead = useCallback(
+    (entries: QueuedPromptEntry[]) => {
+      const skip = queueEditRef.current?.entryId

-        return skip ? entries.find(e => e.id !== skip) : entries[0]
-      }),
-    [queueEdit, runDrain]
+      return skip ? entries.find(e => e.id !== skip) : entries[0]
+    },
+    [] // reads the edit id off a ref so the lock-holder always sees the latest
  )

+  const drainNextQueued = useCallback(() => runDrain(pickDrainHead), [pickDrainHead, runDrain])
+
  const sendQueuedNow = useCallback(
    (id: string) => {
      if (!activeQueueSessionKey || id === queueEdit?.entryId) {
@@ -1353,30 +1457,76 @@ export function ChatBar({
        return true
      }

+      // A manual send clears the auto-drain backoff so a stuck entry the user
+      // taps gets a fresh attempt (and re-enables auto-retry on success).
+      drainFailuresRef.current.delete(id)
+
      return runDrain(entries => entries.find(e => e.id === id))
    },
    [activeQueueSessionKey, busy, onCancel, queueEdit, runDrain]
  )

-  // Auto-drain on busy → false (turn settled). Queued turns always flow once
-  // the session is idle again — whether the turn finished naturally or the
-  // user interrupted it. Interrupting to reach a queued message is the whole
-  // point of the queue, so we never suppress the drain. To cancel queued
-  // turns, the user deletes them from the panel.
-  useEffect(() => {
-    const wasBusy = previousBusyRef.current
-    previousBusyRef.current = busy
-
-    if (
-      shouldAutoDrainOnSettle({
-        isBusy: busy,
-        queueLength: queuedPrompts.length,
-        wasBusy
-      })
-    ) {
-      void drainNextQueued()
+  // Edge-independent auto-drain: send the head whenever the session is idle and
+  // the queue is non-empty, bounding retries so a thrown/rejected onSubmit (e.g.
+  // a stale-session 404) can't strand the entry permanently nor spin-loop. The
+  // drain lock serializes sends; a remount/reconnect resets the failure counts.
+  const autoDrainNext = useCallback(() => {
+    if (busy || drainingQueueRef.current || !activeQueueSessionKey) {
+      return
    }
-  }, [busy, drainNextQueued, queuedPrompts.length])
+
+    const entry = pickDrainHead(queuedPrompts)
+
+    if (!entry || (drainFailuresRef.current.get(entry.id) ?? 0) >= MAX_AUTO_DRAIN_ATTEMPTS) {
+      return
+    }
+
+    const onFail = () => {
+      const fails = (drainFailuresRef.current.get(entry.id) ?? 0) + 1
+      drainFailuresRef.current.set(entry.id, fails)
+
+      if (fails >= MAX_AUTO_DRAIN_ATTEMPTS) {
+        notify({
+          id: 'composer-queue-stuck',
+          kind: 'error',
+          title: t.composer.queueStuckTitle,
+          message: t.composer.queueStuckBody
+        })
+      }
+    }
+
+    void runDrain(() => entry)
+      .then(sent => {
+        if (!sent) {
+          onFail()
+        }
+      })
+      .catch(onFail)
+  }, [activeQueueSessionKey, busy, pickDrainHead, queuedPrompts, runDrain, t])
+
+  // Re-key on a runtime session-id change. A stable stored id (queueSessionKey)
+  // never churns, so a change there is a real session switch and must NOT
+  // migrate; only the runtime-derived key (queueSessionKey falsy → key is
+  // sessionId) churns on a backend bounce/resume of the same conversation.
+  useEffect(() => {
+    const prev = prevQueueKeyRef.current
+    prevQueueKeyRef.current = activeQueueSessionKey
+
+    if (queueSessionKey || !prev || !activeQueueSessionKey || prev === activeQueueSessionKey) {
+      return
+    }
+
+    migrateQueuedPrompts(prev, activeQueueSessionKey)
+  }, [activeQueueSessionKey, queueSessionKey])
+
+  // Queued turns flow whenever the session is idle — on the busy→false settle
+  // edge, on mount/reconnect, and after a re-key — so a swallowed edge can't
+  // strand them. To cancel queued turns, the user deletes them from the panel.
+  useEffect(() => {
+    if (shouldAutoDrain({ isBusy: busy, queueLength: queuedPrompts.length })) {
+      autoDrainNext()
+    }
+  }, [autoDrainNext, busy, queuedPrompts.length])

  // Queue-edit cleanup: on session swap the scope effect already stashed the
  // edit snapshot; only restore into the composer when still on the same scope.
@@ -1411,6 +1561,10 @@ export function ChatBar({
  }

  const submitDraft = () => {
+    if (disabled) {
+      return
+    }
+
    // Source the text from the DOM editor, not React state. The AUI composer
    // state (`draft`) and the derived `hasComposerPayload` lag the DOM by a
    // render, so on fast typing or IME composition the final keystroke(s) may
@@ -1591,6 +1745,7 @@ export function ChatBar({
  const input = (
    <div className={cn('relative', stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1')}>
      <div
+        aria-disabled={inputDisabled ? true : undefined}
        aria-label={t.composer.message}
        autoCapitalize="off"
        autoCorrect="off"
@@ -1601,7 +1756,7 @@ export function ChatBar({
          stacked && 'pl-3',
          stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1'
        )}
-        contentEditable={!disabled}
+        contentEditable={!inputDisabled}
        data-placeholder={placeholder}
        data-slot={RICH_INPUT_SLOT}
        onBlur={() => window.setTimeout(closeTrigger, 80)}
@@ -1669,6 +1824,7 @@ export function ChatBar({
          className="group/composer absolute bottom-0 left-1/2 z-30 w-[min(var(--composer-width),calc(100%-2rem))] max-w-full -translate-x-1/2 rounded-2xl pt-2 pb-[var(--composer-shell-pad-block-end)]"
          data-drag-active={dragActive ? '' : undefined}
          data-slot="composer-root"
+          data-status-stack={statusStackVisible ? '' : undefined}
          data-thread-scrolled-up={scrolledUp ? '' : undefined}
          onDragEnter={handleDragEnter}
          onDragLeave={handleDragLeave}
@@ -1686,7 +1842,7 @@ export function ChatBar({
          ref={composerRef}
        >
          {showHelpHint && <HelpHint />}
-          {trigger && (
+          {trigger && !argStageEmpty && (
            <ComposerTriggerPopover
              activeIndex={triggerActive}
              items={triggerItems}
@@ -1696,26 +1852,30 @@ export function ChatBar({
              onPick={replaceTriggerWithChip}
            />
          )}
-          {activeQueueSessionKey && queuedPrompts.length > 0 && (
-            // Out of flow so the queue never inflates the composer's measured
-            // height (that drives thread bottom padding → chat resizes on
-            // queue). Overlaps -mb-2 onto the surface's top border for a shared
-            // edge; capped + scrollable. Overlays the chat instead of pushing it.
-            <div className="absolute inset-x-0 bottom-full z-6 -mb-2 max-h-[40vh] overflow-y-auto">
-              <QueuePanel
-                busy={busy}
-                editingId={queueEdit?.entryId ?? null}
-                entries={queuedPrompts}
-                onDelete={id => {
-                  if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) {
-                    exitQueuedEdit('cancel')
-                  }
-                }}
-                onEdit={beginQueuedEdit}
-                onSendNow={id => void sendQueuedNow(id)}
-              />
-            </div>
-          )}
+          {/* Session-scoped status stack (todos, subagents, background tasks,
+              queue). Out of flow so it never inflates the composer's measured
+              height; it overlays the chat instead of pushing it, and publishes
+              its own --status-stack-measured-height so the thread's clearance
+              accounts for it. Collapses to nothing when every status is empty. */}
+          <ComposerStatusStack
+            queue={
+              activeQueueSessionKey && queuedPrompts.length > 0 ? (
+                <QueuePanel
+                  busy={busy}
+                  editingId={queueEdit?.entryId ?? null}
+                  entries={queuedPrompts}
+                  onDelete={id => {
+                    if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) {
+                      exitQueuedEdit('cancel')
+                    }
+                  }}
+                  onEdit={beginQueuedEdit}
+                  onSendNow={id => void sendQueuedNow(id)}
+                />
+              ) : null
+            }
+            sessionId={statusSessionId}
+          />
          <div
            className="pointer-events-none absolute inset-0 rounded-[inherit]"
            style={{ background: COMPOSER_FADE_BACKGROUND }}
@@ -1723,9 +1883,8 @@ export function ChatBar({
          <div className="relative w-full rounded-[inherit]">
            <div
              className={cn(
-                'relative z-4 isolate rounded-[inherit] border border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(18%*var(--composer-ring-strength)),var(--dt-input))] transition-[border-color] duration-200 ease-out',
+                'group/composer-surface relative z-4 isolate rounded-[inherit] border border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(18%*var(--composer-ring-strength)),var(--dt-input))] transition-[border-color] duration-200 ease-out focus-within:border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(45%*var(--composer-ring-strength)),transparent)]',
                COMPOSER_DROP_FADE_CLASS,
-                'group-focus-within/composer:border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(45%*var(--composer-ring-strength)),transparent)]',
                'group-has-data-[state=open]/composer:border-t-transparent',
                dragActive && COMPOSER_DROP_ACTIVE_CLASS
              )}
@@ -1736,20 +1895,14 @@ export function ChatBar({
                aria-hidden
                className={cn(
                  'pointer-events-none absolute inset-0 -z-10 rounded-[inherit]',
-                  'bg-[color-mix(in_srgb,var(--dt-card)_72%,transparent)]',
-                  'backdrop-blur-[0.75rem] backdrop-saturate-[1.12]',
-                  '[-webkit-backdrop-filter:blur(0.75rem)_saturate(1.12)]',
-                  'transition-[background-color] duration-150 ease-out',
-                  'group-data-[thread-scrolled-up]/composer:bg-[color-mix(in_srgb,var(--dt-card)_48%,transparent)]',
-                  'group-focus-within/composer:bg-[color-mix(in_srgb,var(--dt-card)_85%,transparent)]'
+                  composerFill,
+                  composerSurfaceGlass
                )}
              />
              <div
                className={cn(
                  'relative z-1 flex min-h-0 w-full flex-col gap-(--composer-row-gap) overflow-hidden rounded-[inherit] px-(--composer-surface-pad-x) py-(--composer-surface-pad-y) transition-opacity duration-200 ease-out',
-                  scrolledUp
-                    ? 'opacity-30 group-hover/composer:opacity-100 group-focus-within/composer:opacity-100'
-                    : 'opacity-100'
+                  scrolledUp ? 'opacity-30 group-hover/composer:opacity-100 group-focus-within/composer-surface:opacity-100' : 'opacity-100'
                )}
                data-slot="composer-fade"
              >
@@ -1824,12 +1977,8 @@ export function ChatBarFallback() {
          aria-hidden
          className={cn(
            'pointer-events-none absolute inset-0 -z-10 rounded-[inherit]',
-            'bg-[color-mix(in_srgb,var(--dt-card)_72%,transparent)]',
-            'backdrop-blur-[0.75rem] backdrop-saturate-[1.12]',
-            '[-webkit-backdrop-filter:blur(0.75rem)_saturate(1.12)]',
-            'transition-[background-color] duration-150 ease-out',
-            'group-data-[thread-scrolled-up]/composer:bg-[color-mix(in_srgb,var(--dt-card)_48%,transparent)]',
-            'group-focus-within/composer:bg-[color-mix(in_srgb,var(--dt-card)_85%,transparent)]'
+            composerFill,
+            composerSurfaceGlass
          )}
        />
      </div>
--- a/apps/desktop/src/app/chat/composer/inline-refs.ts
+++ b/apps/desktop/src/app/chat/composer/inline-refs.ts
@@ -3,7 +3,12 @@ import { contextPath } from '@/lib/chat-runtime'

 import type { DroppedFile } from '../hooks/use-composer-actions'

-import { composerPlainText, escapeHtml, placeCaretEnd, refChipHtml } from './rich-editor'
+import {
+  composerPlainText,
+  normalizeComposerEditorDom,
+  placeCaretEnd,
+  refChipElement
+} from './rich-editor'

 /** A chip to insert: a raw `@kind:value` string, or a typed value + display label. */
 export type InlineRefInput = string | { kind: string; label?: string; value: string }
@@ -89,56 +94,102 @@ export function droppedFileInlineRefs(candidates: DroppedFile[], cwd: string | n
  return candidates.map(candidate => droppedFileInlineRef(candidate, cwd)).filter((ref): ref is string => Boolean(ref))
 }

-export function insertInlineRefsIntoEditor(editor: HTMLDivElement, refs: readonly InlineRefInput[]) {
-  if (!refs.length) {
+function parseInlineRef(ref: InlineRefInput): { kind: string; label?: string; rawValue: string } | null {
+  if (typeof ref !== 'string') {
+    return { kind: ref.kind, label: ref.label, rawValue: ref.value }
+  }
+
+  const match = ref.match(/^@([^:]+):(.+)$/)
+
+  if (!match) {
    return null
  }

-  const refsHtml = refs
-    .map(ref => {
-      if (typeof ref !== 'string') {
-        return refChipHtml(ref.kind, ref.value, ref.label)
-      }
+  return { kind: match[1] || 'file', rawValue: match[2] || '' }
+}

-      const match = ref.match(/^@([^:]+):(.+)$/)
+function plainTextInRange(editor: HTMLDivElement, range: Range, edge: 'after' | 'before') {
+  const slice = range.cloneRange()
+  slice.selectNodeContents(editor)

-      return match ? refChipHtml(match[1], match[2]) : escapeHtml(ref)
-    })
-    .join(' ')
+  if (edge === 'before') {
+    slice.setEnd(range.startContainer, range.startOffset)
+  } else {
+    slice.setStart(range.endContainer, range.endOffset)
+  }
+
+  const container = document.createElement('div')
+  container.appendChild(slice.cloneContents())
+
+  return composerPlainText(container)
+}
+
+function buildRefFragment(
+  refs: readonly { kind: string; label?: string; rawValue: string }[],
+  { needsBeforeSpace, needsAfterSpace }: { needsAfterSpace: boolean; needsBeforeSpace: boolean }
+) {
+  const fragment = document.createDocumentFragment()
+
+  if (needsBeforeSpace) {
+    fragment.append(document.createTextNode(' '))
+  }
+
+  refs.forEach((ref, index) => {
+    if (index > 0) {
+      fragment.append(document.createTextNode(' '))
+    }
+
+    fragment.append(refChipElement(ref.kind, ref.rawValue, ref.label))
+  })
+
+  if (needsAfterSpace) {
+    fragment.append(document.createTextNode(' '))
+  }
+
+  return fragment
+}
+
+export function insertInlineRefsIntoEditor(editor: HTMLDivElement, refs: readonly InlineRefInput[]) {
+  const parsed = refs.map(parseInlineRef).filter((ref): ref is NonNullable<typeof ref> => ref !== null)
+
+  if (!parsed.length) {
+    return null
+  }
+
+  editor.focus({ preventScroll: true })

  const selection = window.getSelection()
-
  const range =
    selection?.rangeCount && editor.contains(selection.getRangeAt(0).commonAncestorContainer)
      ? selection.getRangeAt(0)
      : null

-  editor.focus({ preventScroll: true })
+  if (range && selection) {
+    const beforeText = plainTextInRange(editor, range, 'before')
+    const afterText = plainTextInRange(editor, range, 'after')

-  if (range) {
-    const beforeRange = range.cloneRange()
-    beforeRange.selectNodeContents(editor)
-    beforeRange.setEnd(range.startContainer, range.startOffset)
-    const beforeContainer = document.createElement('div')
-    beforeContainer.appendChild(beforeRange.cloneContents())
-
-    const afterRange = range.cloneRange()
-    afterRange.selectNodeContents(editor)
-    afterRange.setStart(range.endContainer, range.endOffset)
-    const afterContainer = document.createElement('div')
-    afterContainer.appendChild(afterRange.cloneContents())
-
-    const beforeText = composerPlainText(beforeContainer)
-    const afterText = composerPlainText(afterContainer)
-    const needsBeforeSpace = beforeText.length > 0 && !/\s$/.test(beforeText)
-    const needsAfterSpace = afterText.length === 0 || !/^\s/.test(afterText)
-
-    document.execCommand('insertHTML', false, `${needsBeforeSpace ? ' ' : ''}${refsHtml}${needsAfterSpace ? ' ' : ''}`)
+    range.insertNode(
+      buildRefFragment(parsed, {
+        needsAfterSpace: afterText.length === 0 || !/^\s/.test(afterText),
+        needsBeforeSpace: beforeText.length > 0 && !/\s$/.test(beforeText)
+      })
+    )
+    range.collapse(false)
+    selection.removeAllRanges()
+    selection.addRange(range)
  } else {
    const current = composerPlainText(editor)
+
+    editor.append(
+      buildRefFragment(parsed, {
+        needsAfterSpace: true,
+        needsBeforeSpace: current.length > 0 && !/\s$/.test(current)
+      })
+    )
    placeCaretEnd(editor)
-    document.execCommand('insertHTML', false, `${current && !/\s$/.test(current) ? ' ' : ''}${refsHtml} `)
  }

+  normalizeComposerEditorDom(editor)
+
  return composerPlainText(editor)
 }
--- a/apps/desktop/src/app/chat/composer/model-pill.tsx
+++ b/apps/desktop/src/app/chat/composer/model-pill.tsx
@@ -0,0 +1,86 @@
+import { useStore } from '@nanostores/react'
+import { useState } from 'react'
+
+import { ModelMenuCloseContext } from '@/app/shell/model-menu-panel'
+import { Button } from '@/components/ui/button'
+import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
+import { GlyphSpinner } from '@/components/ui/glyph-spinner'
+import { useI18n } from '@/i18n'
+import { ChevronDown } from '@/lib/icons'
+import { formatModelStatusLabel } from '@/lib/model-status-label'
+import { cn } from '@/lib/utils'
+import {
+  $currentFastMode,
+  $currentModel,
+  $currentProvider,
+  $currentReasoningEffort,
+  setModelPickerOpen
+} from '@/store/session'
+
+import type { ChatBarState } from './types'
+
+const PILL = cn(
+  'h-(--composer-control-size) max-w-40 shrink-0 gap-1 rounded-md px-2 text-xs font-normal',
+  'text-(--ui-text-tertiary) hover:bg-(--chrome-action-hover) hover:text-foreground'
+)
+
+/**
+ * Composer model selector — the relocated status-bar pill. Reuses the live
+ * `model.options` dropdown (`modelMenuContent`) verbatim; falls back to the
+ * full picker when the gateway is closed and no live menu exists.
+ */
+export function ModelPill({ disabled, model }: { disabled: boolean; model: ChatBarState['model'] }) {
+  const copy = useI18n().t.shell.statusbar
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const fastMode = useStore($currentFastMode)
+  const reasoningEffort = useStore($currentReasoningEffort)
+  const [open, setOpen] = useState(false)
+
+  // The model resolves a beat after the gateway/session comes up. Rather than
+  // flash a literal "No model", show a quiet loader (inherits the pill text
+  // color at half opacity) until a model lands.
+  const label = (
+    <>
+      {currentModel.trim() ? (
+        <span className="truncate">{formatModelStatusLabel(currentModel, { fastMode, reasoningEffort })}</span>
+      ) : (
+        <GlyphSpinner className="opacity-50" spinner="braille" />
+      )}
+      <ChevronDown className="size-2.5 shrink-0 opacity-50" />
+    </>
+  )
+
+  const title = currentProvider ? copy.modelTitle(currentProvider, currentModel || copy.modelNone) : copy.switchModel
+
+  if (!model.modelMenuContent) {
+    return (
+      <Button
+        aria-label={copy.openModelPicker}
+        className={PILL}
+        disabled={disabled}
+        onClick={() => setModelPickerOpen(true)}
+        title={copy.openModelPicker}
+        type="button"
+        variant="ghost"
+      >
+        {label}
+      </Button>
+    )
+  }
+
+  return (
+    <DropdownMenu onOpenChange={setOpen} open={open}>
+      <DropdownMenuTrigger asChild>
+        <Button aria-label={title} className={PILL} disabled={disabled} title={title} type="button" variant="ghost">
+          {label}
+        </Button>
+      </DropdownMenuTrigger>
+      <DropdownMenuContent align="end" className="w-64 p-0" side="top" sideOffset={8}>
+        <ModelMenuCloseContext.Provider value={() => setOpen(false)}>
+          {model.modelMenuContent}
+        </ModelMenuCloseContext.Provider>
+      </DropdownMenuContent>
+    </DropdownMenu>
+  )
+}
--- a/apps/desktop/src/app/chat/composer/queue-panel.tsx
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -1,7 +1,6 @@
-import { useState } from 'react'
-
+import { StatusRow } from '@/components/chat/status-row'
+import { StatusSection } from '@/components/chat/status-section'
 import { Button } from '@/components/ui/button'
-import { DisclosureCaret } from '@/components/ui/disclosure-caret'
 import { Tip } from '@/components/ui/tooltip'
 import { type Translations, useI18n } from '@/i18n'
 import { ArrowUp, Pencil, Trash2 } from '@/lib/icons'
@@ -23,108 +22,84 @@ const entryPreview = (entry: QueuedPromptEntry, c: Translations['composer']) =>
 export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendNow }: QueuePanelProps) {
  const { t } = useI18n()
  const c = t.composer
-  const [collapsed, setCollapsed] = useState(true)

  if (entries.length === 0) {
    return null
  }

  return (
-    <div className="rounded-t-2xl border border-b-0 border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] pt-0.5 pb-1 mx-1">
-      <button
-        className="flex w-full items-center gap-1.5 px-2 text-left text-[0.6rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
-        onClick={() => setCollapsed(open => !open)}
-        type="button"
-      >
-        <DisclosureCaret className="shrink-0" open={!collapsed} size="1em" />
-        <span className="truncate">{c.queued(entries.length)}</span>
-      </button>
+    <StatusSection label={c.queued(entries.length)}>
+      {entries.map(entry => {
+        const isEditing = editingId === entry.id
+        const attachmentsCount = entry.attachments.length

-      {!collapsed && (
-        <div className="space-y-0.5 px-1 pb-0.5">
-          {entries.map(entry => {
-            const isEditing = editingId === entry.id
-            const attachmentsCount = entry.attachments.length
-            const sendLabel = busy ? c.sendQueuedNext : c.sendQueuedNow
-
-            return (
-              <div
-                className={cn(
-                  'group/queue-row flex items-center gap-1.5 rounded-lg border border-transparent px-1.5 py-0.5',
-                  'transition-colors duration-300 ease-out hover:bg-(--chrome-action-hover) hover:transition-none',
-                  isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
-                )}
-                key={entry.id}
-              >
-                <span
-                  aria-hidden
-                  className="h-3.5 w-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent"
-                />
-                <div className="min-w-0 flex-1">
-                  <p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry, c)}</p>
-                  {(attachmentsCount > 0 || isEditing) && (
-                    <div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
-                      {attachmentsCount > 0 && <span>{c.attachments(attachmentsCount)}</span>}
-                      {isEditing && (
-                        <span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
-                          {c.editingInComposer}
-                        </span>
-                      )}
-                    </div>
+        return (
+          <StatusRow
+            className={cn(
+              'border border-transparent',
+              isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
+            )}
+            key={entry.id}
+            trailing={
+              <>
+                <Tip label={c.queueEdit}>
+                  <Button
+                    aria-label={c.queueEdit}
+                    className="size-5 rounded-md"
+                    disabled={Boolean(editingId) && !isEditing}
+                    onClick={() => onEdit(entry)}
+                    size="icon-xs"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <Pencil size={11} />
+                  </Button>
+                </Tip>
+                <Tip label={busy ? c.queueSendNext : c.queueSend}>
+                  <Button
+                    aria-label={busy ? c.queueSendNext : c.queueSend}
+                    className="size-5 rounded-md"
+                    disabled={isEditing}
+                    onClick={() => onSendNow(entry.id)}
+                    size="icon-xs"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <ArrowUp size={11} />
+                  </Button>
+                </Tip>
+                <Tip label={c.queueDelete}>
+                  <Button
+                    aria-label={c.queueDelete}
+                    className="size-5 rounded-md"
+                    onClick={() => onDelete(entry.id)}
+                    size="icon-xs"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <Trash2 size={11} />
+                  </Button>
+                </Tip>
+              </>
+            }
+            trailingVisible={isEditing}
+          >
+            <div className="min-w-0 flex-1">
+              <p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry, c)}</p>
+              {(attachmentsCount > 0 || isEditing) && (
+                <div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
+                  {attachmentsCount > 0 && <span>{c.attachments(attachmentsCount)}</span>}
+                  {isEditing && (
+                    <span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
+                      {c.editingInComposer}
+                    </span>
                  )}
                </div>
-                <div
-                  className={cn(
-                    'flex shrink-0 items-center gap-0 transition-opacity',
-                    isEditing
-                      ? 'opacity-100'
-                      : 'opacity-0 group-hover/queue-row:opacity-100 group-focus-within/queue-row:opacity-100'
-                  )}
-                >
-                  <Tip label={c.editQueued}>
-                    <Button
-                      aria-label={c.editQueued}
-                      className="h-5 w-5 rounded-md"
-                      disabled={Boolean(editingId) && !isEditing}
-                      onClick={() => onEdit(entry)}
-                      size="icon-xs"
-                      type="button"
-                      variant="ghost"
-                    >
-                      <Pencil size={11} />
-                    </Button>
-                  </Tip>
-                  <Tip label={sendLabel}>
-                    <Button
-                      aria-label={sendLabel}
-                      className="h-5 w-5 rounded-md"
-                      disabled={isEditing}
-                      onClick={() => onSendNow(entry.id)}
-                      size="icon-xs"
-                      type="button"
-                      variant="ghost"
-                    >
-                      <ArrowUp size={11} />
-                    </Button>
-                  </Tip>
-                  <Tip label={c.deleteQueued}>
-                    <Button
-                      aria-label={c.deleteQueued}
-                      className="h-5 w-5 rounded-md"
-                      onClick={() => onDelete(entry.id)}
-                      size="icon-xs"
-                      type="button"
-                      variant="ghost"
-                    >
-                      <Trash2 size={11} />
-                    </Button>
-                  </Tip>
-                </div>
-              </div>
-            )
-          })}
-        </div>
-      )}
-    </div>
+              )}
+            </div>
+          </StatusRow>
+        )
+      })}
+    </StatusSection>
  )
 }
--- a/apps/desktop/src/app/chat/composer/rich-editor.test.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.test.ts
@@ -1,6 +1,25 @@
 import { describe, expect, it } from 'vitest'

-import { composerPlainText, renderComposerContents, RICH_INPUT_SLOT } from './rich-editor'
+import { insertInlineRefsIntoEditor } from './inline-refs'
+import {
+  composerPlainText,
+  deleteSelectionInEditor,
+  insertPlainTextAtCaret,
+  normalizeComposerEditorDom,
+  refChipElement,
+  renderComposerContents,
+  RICH_INPUT_SLOT
+} from './rich-editor'
+
+const caretIn = (editor: HTMLElement) => {
+  const range = document.createRange()
+  const selection = window.getSelection()!
+
+  range.selectNodeContents(editor)
+  range.collapse(false)
+  selection.removeAllRanges()
+  selection.addRange(range)
+}

 describe('renderComposerContents', () => {
  it('renders refs and raw text without interpreting user text as HTML', () => {
@@ -16,3 +35,100 @@ describe('renderComposerContents', () => {
    expect(composerPlainText(editor)).toBe('@file:`<img src=x onerror=alert(1)>` <b>raw</b>')
  })
 })
+
+describe('normalizeComposerEditorDom', () => {
+  it('unwraps a single insertHTML wrapper div so plain text stays one line', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    editor.innerHTML = '<div><span data-ref-text="@file:`src/foo.ts`" contenteditable="false">foo.ts</span> </div>'
+
+    normalizeComposerEditorDom(editor)
+
+    expect(composerPlainText(editor)).toBe('@file:`src/foo.ts` ')
+    expect(editor.querySelector(':scope > div')).toBeNull()
+  })
+
+  it('removes a trailing br after a ref chip', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    editor.append(refChipElement('file', '`src/foo.ts`'), document.createElement('br'))
+
+    normalizeComposerEditorDom(editor)
+
+    expect(composerPlainText(editor)).toBe('@file:`src/foo.ts`')
+    expect(editor.querySelector('br')).toBeNull()
+  })
+})
+
+describe('insertInlineRefsIntoEditor', () => {
+  it('inserts chips without wrapper divs or spurious newlines', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+
+    insertInlineRefsIntoEditor(editor, ['@file:`src/foo.ts`'])
+
+    expect(editor.querySelector(':scope > div')).toBeNull()
+    expect(composerPlainText(editor)).toBe('@file:`src/foo.ts` ')
+  })
+})
+
+describe('insertPlainTextAtCaret', () => {
+  it('inserts multiline text as text nodes + br', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    document.body.append(editor)
+    caretIn(editor)
+
+    insertPlainTextAtCaret(editor, 'one\ntwo\nthree')
+
+    expect(editor.querySelectorAll('br').length).toBe(2)
+    expect(composerPlainText(editor)).toBe('one\ntwo\nthree')
+
+    editor.remove()
+  })
+
+  it('replaces the selected span', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    editor.textContent = 'abXYef'
+    document.body.append(editor)
+
+    const text = editor.firstChild!
+    const selection = window.getSelection()!
+    const range = document.createRange()
+
+    range.setStart(text, 2)
+    range.setEnd(text, 4)
+    selection.removeAllRanges()
+    selection.addRange(range)
+
+    insertPlainTextAtCaret(editor, 'cd')
+
+    expect(composerPlainText(editor)).toBe('abcdef')
+
+    editor.remove()
+  })
+})
+
+describe('deleteSelectionInEditor', () => {
+  it('clears a non-collapsed range and leaves a collapsed caret', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    editor.textContent = 'hello world'
+    document.body.append(editor)
+
+    const selection = window.getSelection()!
+    const range = document.createRange()
+
+    range.selectNodeContents(editor)
+    selection.removeAllRanges()
+    selection.addRange(range)
+
+    expect(deleteSelectionInEditor(editor)).toBe(true)
+    expect(composerPlainText(editor)).toBe('')
+    expect(selection.getRangeAt(0).collapsed).toBe(true)
+    expect(deleteSelectionInEditor(editor)).toBe(false)
+
+    editor.remove()
+  })
+})
--- a/Show More
+++ b/Show More