Merge branch 'bb/pets' into bb/pets-gen

Carry forward the overlay/waiting-state updates and resolve the gateway merge conflict. Also tighten the desktop pet-generation flow by cleaning superseded previews, using the draft's source prompt during hatch, and previewing rows from the returned sheet taxonomy.
feat(pets): wire the waiting state across CLI, TUI, and desktop
2026-06-23 18:33:19 +08:00 · 2026-06-17 12:14:37 -05:00 · 2026-06-17 11:55:28 -05:00 · 2026-06-17 11:46:46 -05:00 · 2026-06-17 11:38:39 -05:00 · 2026-06-17 11:29:23 -05:00
559 changed files with 40429 additions and 11963 deletions
--- a/.github/workflows/contributor-check.yml
+++ b/.github/workflows/contributor-check.yml
@@ -1,12 +1,11 @@
 name: Contributor Attribution Check

 on:
-  pull_request:
-    branches: [main]
  # No paths filter — the job must always run so the required check
  # reports a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-
+  pull_request:
+    branches: [main]
 permissions:
  contents: read

--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -11,8 +11,20 @@ on:
      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:
+    inputs:
+      skills_index_run_id:
+        description: 'Optional Build Skills Index run ID whose skills-index artifact should be deployed'
+        required: false
+        type: string
+      rebuild_skills_index:
+        description: 'Force a fresh multi-source crawl instead of reusing the latest healthy index'
+        required: false
+        default: false
+        type: boolean

 permissions:
+  contents: read
+  actions: read
  pages: write
  id-token: write

@@ -55,26 +67,81 @@ jobs:
      - name: Install PyYAML for skill extraction
        run: pip install pyyaml==6.0.2 httpx==0.28.1

-      - name: Build skills index (unified multi-source catalog)
+      - name: Prepare skills index (unified multi-source catalog)
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ github.token }}
+          GITHUB_TOKEN: ${{ github.token }}
+          SKILLS_INDEX_RUN_ID: ${{ github.event.inputs.skills_index_run_id || '' }}
+          REBUILD_SKILLS_INDEX: ${{ github.event.inputs.rebuild_skills_index || 'false' }}
        run: |
-          # Rebuild the unified catalog. The file is gitignored, so a fresh
-          # checkout starts without it and we want the freshest crawl in
-          # every deploy.
+          # The unified external catalog is expensive to crawl and can burn
+          # through the repository installation's GitHub API quota when several
+          # docs deploys land close together.  Normal docs deploys therefore
+          # reuse the latest healthy catalog: first the artifact from a
+          # scheduled skills-index run, then the currently live index.  Only a
+          # manual force rebuild does a fresh crawl here.
          #
-          # This MUST be fatal. build_skills_index.py runs a health check and
-          # exits non-zero WITHOUT writing the output file when a source
-          # collapses (e.g. a GitHub API rate limit zeroes the github /
-          # claude-marketplace / well-known taps all at once). Letting the
-          # deploy continue would either (a) ship a degenerate index missing
-          # whole hubs — the June 2026 regression where OpenAI/Anthropic/
-          # HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
-          # local-only catalog. Failing here keeps the last good deployment
-          # live (GitHub Pages serves the previous build) instead of
-          # publishing a broken catalog. Re-run the workflow once the
-          # transient rate limit clears.
+          # If we do crawl, the build remains fatal. build_skills_index.py runs
+          # the health check BEFORE writing and exits non-zero on source
+          # collapse, keeping the last good Pages deployment live instead of
+          # publishing a degenerate catalog.
+          set -euo pipefail
+          INDEX_PATH="website/static/api/skills-index.json"
+          mkdir -p "$(dirname "$INDEX_PATH")"
+
+          validate_index() {
+            python3 - "$INDEX_PATH" <<'PY'
+          import json
+          import sys
+          from pathlib import Path
+
+          path = Path(sys.argv[1])
+          try:
+              data = json.loads(path.read_text(encoding="utf-8"))
+          except Exception as exc:
+              print(f"invalid skills index JSON: {exc}", file=sys.stderr)
+              sys.exit(1)
+          skills = data.get("skills")
+          if not isinstance(skills, list) or len(skills) < 1500:
+              count = len(skills) if isinstance(skills, list) else "missing"
+              print(f"skills index too small: {count}", file=sys.stderr)
+              sys.exit(1)
+          print(f"skills index ready: {len(skills)} skills")
+          PY
+          }
+
+          if [ "$REBUILD_SKILLS_INDEX" = "true" ]; then
+            python3 scripts/build_skills_index.py
+            validate_index
+            exit 0
+          fi
+
+          if [ -n "$SKILLS_INDEX_RUN_ID" ]; then
+            tmpdir="$(mktemp -d)"
+            echo "Downloading skills-index artifact from run $SKILLS_INDEX_RUN_ID"
+            if gh run download "$SKILLS_INDEX_RUN_ID" --name skills-index --dir "$tmpdir"; then
+              candidate="$(find "$tmpdir" -name skills-index.json -type f | head -n 1 || true)"
+              if [ -n "$candidate" ]; then
+                cp "$candidate" "$INDEX_PATH"
+                if validate_index; then
+                  exit 0
+                fi
+              fi
+            fi
+            echo "::warning::Could not use skills-index artifact from run $SKILLS_INDEX_RUN_ID; trying live index"
+          fi
+
+          echo "Downloading currently live skills index"
+          if curl -fsSL --retry 3 --retry-delay 5 \
+            "https://hermes-agent.nousresearch.com/docs/api/skills-index.json" \
+            -o "$INDEX_PATH" && validate_index; then
+            exit 0
+          fi
+
+          echo "::warning::Live skills index unavailable or unhealthy; falling back to a fresh crawl"
+          rm -f "$INDEX_PATH"
          python3 scripts/build_skills_index.py
+          validate_index

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -18,13 +18,12 @@ on:
      - docker/**
      - .hadolint.yaml
      - .github/workflows/docker-lint.yml
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - Dockerfile
-      - docker/**
-      - .hadolint.yaml
-      - .github/workflows/docker-lint.yml

 permissions:
  contents: read
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -11,16 +11,13 @@ on:
      - 'docker/**'
      - '.github/workflows/docker-publish.yml'
      - '.github/actions/hermes-smoke-test/**'
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - '**/*.py'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'Dockerfile'
-      - 'docker/**'
-      - '.github/workflows/docker-publish.yml'
-      - '.github/actions/hermes-smoke-test/**'
+
  release:
    types: [published]

--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -1,10 +1,12 @@
 name: Docs Site Checks

 on:
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
-    paths:
-      - 'website/**'
-      - '.github/workflows/docs-site-checks.yml'
+    branches: [main]
+
  workflow_dispatch:

 permissions:
@@ -14,9 +16,9 @@ jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
        with:
          node-version: 22
          cache: npm
@@ -26,9 +28,9 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
-          python-version: '3.11'
+          python-version: "3.11"

      - name: Install ascii-guard
        run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
--- a/.github/workflows/history-check.yml
+++ b/.github/workflows/history-check.yml
@@ -14,6 +14,9 @@ name: History Check
 # the PR head and main to be non-empty.

 on:
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -24,9 +27,9 @@ jobs:
  check-common-ancestor:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
-          fetch-depth: 0  # full history both sides for merge-base
+          fetch-depth: 0 # full history both sides for merge-base

      - name: Reject PRs with no common ancestor on main
        run: |
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -15,12 +15,12 @@ on:
      - "**/*.md"
      - "docs/**"
      - "website/**"
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
-      - "website/**"

 permissions:
  contents: read
@@ -154,7 +154,6 @@ jobs:
              });
            }

-
  ruff-blocking:
    # Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
    # PLW1514 (unspecified-encoding) — catches bare ``open()`` /
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -1,255 +0,0 @@
-name: Nix Lockfile Fix
-
-on:
-  push:
-    branches: [main]
-    paths:
-      - 'package-lock.json'
-      - 'package.json'
-      - 'ui-tui/package.json'
-      - 'apps/desktop/package.json'
-  workflow_dispatch:
-    inputs:
-      pr_number:
-        description: 'PR number to fix (leave empty to run on the selected branch)'
-        required: false
-        type: string
-  issue_comment:
-    types: [edited]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  # ── Auto-fix on main ───────────────────────────────────────────────
-  # Fires when a push to main touches package.json or package-lock.json.
-  # Runs fix-lockfiles and pushes the hash update commit directly to main
-  # so Nix builds never stay broken.
-  #
-  # Safety invariants:
-  #   1. The fix commit only touches nix/*.nix files, which are NOT in
-  #      the paths filter above, so this cannot re-trigger itself.
-  #   2. An explicit file-whitelist check before commit aborts if
-  #      fix-lockfiles ever modifies unexpected files.
-  #   3. Job-level concurrency with cancel-in-progress: true ensures
-  #      back-to-back pushes collapse to the newest; ref: main checkout
-  #      always operates on the latest branch state.
-  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
-  #      triggers downstream nix.yml verification.
-  auto-fix-main:
-    if: github.event_name == 'push'
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    concurrency:
-      group: auto-fix-main
-      cancel-in-progress: true
-    steps:
-      - name: Generate GitHub App token
-        id: app-token
-        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
-        with:
-          app-id: ${{ secrets.APP_ID }}
-          private-key: ${{ secrets.APP_PRIVATE_KEY }}
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          ref: main
-          token: ${{ steps.app-token.outputs.token }}
-
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-
-          # Ensure only nix/lib.nix (home of the single npmDepsHash) was
-          # modified — prevents accidental self-triggering if fix-lockfiles
-          # ever touches package files.
-          unexpected="$(git diff --name-only | grep -Ev '^nix/lib\.nix$' || true)"
-          if [ -n "$unexpected" ]; then
-            echo "::error::Unexpected modified files: $unexpected"
-            exit 1
-          fi
-
-          # Record the base SHA before committing — used to detect package
-          # file changes if we need to rebase after a non-fast-forward push.
-          BASE_SHA="$(git rev-parse HEAD)"
-
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/lib.nix
-          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
-            -m "Source: $GITHUB_SHA" \
-            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
-
-          # Retry push with rebase in case main advanced with an unrelated
-          # commit during the nix build. Without this, a non-fast-forward
-          # rejection silently loses the fix. If package files changed during
-          # the rebase, abort — a fresh auto-fix run will handle the new state.
-          for attempt in 1 2 3; do
-            if git push origin HEAD:main; then
-              exit 0
-            fi
-            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
-            git fetch origin main
-
-            # If package files changed between our base and the new main,
-            # our computed hashes are stale. Abort and let the next triggered
-            # run recompute from the correct package-lock state.
-            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
-              'package-lock.json' 'package.json' \
-              'ui-tui/package.json' 'apps/desktop/package.json' || true)"
-            if [ -n "$pkg_changed" ]; then
-              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
-              exit 0
-            fi
-
-            git rebase origin/main
-          done
-          echo "::error::Failed to push after 3 rebase attempts"
-          exit 1
-
-  # ── PR fix (manual / checkbox) ─────────────────────────────────────
-  # Existing behavior: run on manual dispatch OR when a task-list
-  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
-  fix:
-    if: |
-      github.event_name == 'workflow_dispatch' ||
-      (github.event_name == 'issue_comment'
-       && github.event.issue.pull_request != null
-       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
-       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Authorize & resolve PR
-        id: resolve
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
-        with:
-          script: |
-            // 1. Verify the actor has write access — applies to both checkbox
-            //    clicks and manual dispatch.
-            const { data: perm } =
-              await github.rest.repos.getCollaboratorPermissionLevel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                username: context.actor,
-              });
-            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
-              core.setFailed(
-                `${context.actor} lacks write access (has: ${perm.permission})`
-              );
-              return;
-            }
-
-            // 2. Resolve which ref to check out.
-            let prNumber = '';
-            if (context.eventName === 'issue_comment') {
-              prNumber = String(context.payload.issue.number);
-            } else if (context.eventName === 'workflow_dispatch') {
-              prNumber = context.payload.inputs.pr_number || '';
-            }
-
-            if (!prNumber) {
-              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
-              core.setOutput('repo', context.repo.repo);
-              core.setOutput('owner', context.repo.owner);
-              core.setOutput('pr', '');
-              return;
-            }
-
-            const { data: pr } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: Number(prNumber),
-            });
-            core.setOutput('ref', pr.head.ref);
-            core.setOutput('repo', pr.head.repo.name);
-            core.setOutput('owner', pr.head.repo.owner.login);
-            core.setOutput('pr', String(pr.number));
-
-      # Wipe the sticky lockfile-check comment to a "running" state as soon
-      # as the job is authorized, so the user sees their click was picked up
-      # before the ~minute of nix build work.
-      - name: Mark sticky as running
-        if: steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### 🔄 Applying lockfile fix…
-
-            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
-          ref: ${{ steps.resolve.outputs.ref }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          fetch-depth: 0
-
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/lib.nix
-          git commit -m "fix(nix): refresh npm lockfile hashes"
-          git push
-
-      - name: Update sticky (applied)
-        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile fix applied
-
-            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (already current)
-        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile hashes already current
-
-            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (failed)
-        if: failure() && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ❌ Lockfile fix failed
-
-            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -1,105 +0,0 @@
-name: Nix
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  nix:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 30
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Resolve head SHA
-        if: github.event_name == 'pull_request'
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check flake
-        id: flake
-        continue-on-error: true
-        run: nix flake check --print-build-logs
-
-      # When the flake check fails, run a targeted diagnostic to see if
-      # the failure is specifically a stale npm lockfile hash in one of the
-      # known npm subpackages (tui / web).  This avoids surfacing a generic
-      # "build failed" message when the fix is a single known command.
-      - name: Diagnose npm lockfile hashes
-        id: hash_check
-        if: steps.flake.outcome == 'failure' && runner.os == 'Linux'
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
-      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
-      # mode rather than silently ignoring it.
-      - name: Fail if hash check crashed without reporting
-        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
-        run: |
-          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
-          exit 1
-
-      - name: Post sticky PR comment (stale hashes)
-        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.hash_check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles` and commit the diff
-
-      # Clear the sticky comment when either the flake check passed outright (no
-      # hash check needed) or the hash check explicitly returned stale=false
-      # (check failed for a non-hash reason).
-      - name: Clear sticky PR comment (resolved)
-        if: |
-          github.event_name == 'pull_request' &&
-          (steps.hash_check.outputs.stale == 'false' ||
-           steps.flake.outcome == 'success')
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Final fail if flake check failed
-        if: steps.flake.outcome == 'failure'
-        run: |
-          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
-            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
-          else
-            echo "::error::Nix flake check failed. See logs above."
-          fi
-          exit 1
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -20,29 +20,23 @@ name: OSV-Scanner
 # vulnerabilities in pinned deps that we may need to patch deliberately.

 on:
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'package.json'
-      - 'package-lock.json'
-      - 'ui-tui/package.json'
-      - 'website/package.json'
-      - 'website/package-lock.json'
-      - '.github/workflows/osv-scanner.yml'
  push:
    branches: [main]
    paths:
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'package.json'
-      - 'package-lock.json'
-      - 'website/package-lock.json'
+      - "uv.lock"
+      - "pyproject.toml"
+      - "package.json"
+      - "package-lock.json"
+      - "website/package-lock.json"
  schedule:
    # Weekly scan against main — catches CVEs published after merge for
    # deps that haven't changed since.
-    - cron: '0 9 * * 1'
+    - cron: "0 9 * * 1"
  workflow_dispatch:

 permissions:
@@ -54,7 +48,7 @@ permissions:
 jobs:
  scan:
    name: Scan lockfiles
-    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2  # v2.3.8
+    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
    with:
      # Scan explicit lockfiles rather than recursing, so we only look at
      # the three sources of truth and skip vendored / test / worktree dirs.
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -53,4 +53,4 @@ jobs:
      - name: Trigger Deploy Site workflow
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
+        run: gh workflow run deploy-site.yml --repo ${{ github.repository }} -f skills_index_run_id=${{ github.run_id }}
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -1,11 +1,11 @@
 name: Supply Chain Audit

 on:
-  pull_request:
-    types: [opened, synchronize, reopened]
  # No paths filter — the jobs must always run so required checks
  # report a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
+  pull_request:
+    types: [opened, synchronize, reopened]

 permissions:
  pull-requests: write
@@ -29,8 +29,10 @@ jobs:
      scan: ${{ steps.filter.outputs.scan }}
      # True when pyproject.toml changed in this PR
      deps: ${{ steps.filter.outputs.deps }}
+      # True when the curated MCP catalog / bundled MCP manifests changed.
+      mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0
      - name: Check for relevant file changes
@@ -54,6 +56,14 @@ jobs:
          else
            echo "deps=false" >> "$GITHUB_OUTPUT"
          fi
+          MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
+            'optional-mcps/**' \
+            'hermes_cli/mcp_catalog.py' || true)
+          if [ -n "$MCP_CATALOG_FILES" ]; then
+            echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
+          fi

  scan:
    name: Scan PR for critical supply chain risks
@@ -62,7 +72,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

@@ -197,7 +207,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

@@ -268,3 +278,50 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - run: echo "No pyproject.toml changes, skipping dependency bounds check."
+
+  mcp-catalog-review:
+    name: MCP catalog security review
+    needs: changes
+    if: needs.changes.outputs.mcp_catalog == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+
+      - name: Require explicit MCP catalog review label
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          PR="${{ github.event.pull_request.number }}"
+          LABELS=$(gh pr view "$PR" --json labels --jq '.labels[].name' || true)
+          if echo "$LABELS" | grep -Fxq 'mcp-catalog-reviewed'; then
+            echo "MCP catalog review label present."
+            exit 0
+          fi
+
+          BODY="## ⚠️ MCP catalog security review required
+
+          This PR changes the bundled MCP catalog or MCP catalog installer code. MCP entries can define local commands that users later install into \`mcp_servers\`, so this needs explicit maintainer review before merge.
+
+          A maintainer should verify:
+          - any new/changed \`optional-mcps/**/manifest.yaml\` command and args are expected,
+          - stdio transports do not use shell+egress/exfiltration payloads,
+          - git install refs are pinned and bootstrap commands are minimal,
+          - requested env vars/secrets match the upstream MCP's documented needs.
+
+          After review, add the \`mcp-catalog-reviewed\` label and re-run this check."
+
+          gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
+          echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
+          exit 1
+
+  mcp-catalog-review-gate:
+    name: MCP catalog security review
+    needs: changes
+    if: always() && needs.changes.outputs.mcp_catalog != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No MCP catalog changes, skipping MCP catalog security review."
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,11 +6,11 @@ on:
    paths-ignore:
      - "**/*.md"
      - "docs/**"
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths-ignore:
-      - "**/*.md"
-      - "docs/**"

 permissions:
  contents: read
@@ -219,4 +219,4 @@ jobs:
        env:
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -4,6 +4,9 @@ name: Typecheck
 on:
  push:
    branches: [main]
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -23,3 +26,20 @@ jobs:
          cache: npm
      - run: npm ci
      - run: npm run --prefix ${{ matrix.package }} typecheck
+
+  # Production build of the desktop renderer. `typecheck` runs `tsc` only,
+  # which does NOT exercise Vite/Rolldown module resolution — so an
+  # unresolvable package export (e.g. a transitive @assistant-ui/tap that no
+  # longer exports "./react-shim") slips past typecheck and only explodes when
+  # users build apps/desktop from source on install/update. Run the real
+  # `vite build` here so that class of break fails in CI instead.
+  desktop-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: npm run --prefix apps/desktop build
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -47,15 +47,15 @@ on:
  push:
    branches: [main]
    paths:
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - '.github/workflows/uv-lockfile-check.yml'
+      - "pyproject.toml"
+      - "uv.lock"
+      - ".github/workflows/uv-lockfile-check.yml"
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-    paths:
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - '.github/workflows/uv-lockfile-check.yml'

 permissions:
  contents: read
@@ -71,10 +71,10 @@ jobs:
    timeout-minutes: 5
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -78,7 +78,41 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
 | **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

-### Clone and install
+### Install with the standard installer
+
+For most contributors, the best development bootstrap is the same path users
+take: run the standard installer, then work inside the repository it cloned.
+The installer creates the Hermes venv, wires the `hermes` command, stamps the
+install method for `hermes update`, and clones the full git project into
+`$HERMES_HOME/hermes-agent` (usually `~/.hermes/hermes-agent`). That keeps your
+development environment on the same layout the CLI, updater, lazy dependency
+installer, gateway, and docs assume.
+
+```bash
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
+
+# Add dev/test extras on top of the standard install.
+uv pip install -e ".[all,dev]"
+
+# Optional: browser tools / docs site dependencies.
+npm install
+```
+
+After that, create branches and run tests from that checkout:
+
+```bash
+git checkout -b fix/description
+scripts/run_tests.sh
+```
+
+### Manual clone fallback
+
+Use this only if you intentionally do not want Hermes' managed install layout
+(for example, a throwaway clone inside a container or CI job). If you install
+this way, make sure you run the `hermes` entrypoint from this venv; running the
+system `python3 -m hermes_cli.main` can pick up unrelated system Python
+packages.

 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
@@ -109,15 +143,19 @@ echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ### Run

 ```bash
-# Symlink for global access
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-
-# Verify
+# The standard installer already put `hermes` on PATH.
 hermes doctor
 hermes chat -q "Hello"
 ```

+If you used the manual clone fallback, run `./hermes` from the checkout or
+symlink this clone's venv explicitly:
+
+```bash
+mkdir -p ~/.local/bin
+ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
+```
+
 ### Run tests

 ```bash
--- a/README.md
+++ b/README.md
@@ -181,16 +181,20 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration

 We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.

-Quick start for contributors — clone and go with `setup-hermes.sh`:
+Quick start for contributors — use the standard installer, then work from the
+full git checkout it creates at `$HERMES_HOME/hermes-agent` (usually
+`~/.hermes/hermes-agent`). This matches the layout used by `hermes update`, the
+managed venv, lazy dependencies, gateway, and docs tooling.

 ```bash
-git clone https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-./setup-hermes.sh     # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
-./hermes              # auto-detects the venv, no need to `source` first
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
+uv pip install -e ".[all,dev]"
+scripts/run_tests.sh
 ```

-Manual path (equivalent to the above):
+Manual clone fallback (for throwaway clones/CI where you intentionally do not
+want the managed install layout):

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -164,16 +164,18 @@ hermes claw migrate --overwrite  # 覆盖已有冲突

 欢迎贡献！请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。

-贡献者快速开始——克隆并使用 `setup-hermes.sh`：
+贡献者快速开始——使用标准安装器，然后在它创建的完整 git checkout 中开发：
+`$HERMES_HOME/hermes-agent`（通常是 `~/.hermes/hermes-agent`）。这会匹配
+`hermes update`、托管 venv、lazy dependencies、gateway 和 docs tooling 使用的布局。

 ```bash
-git clone https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-./setup-hermes.sh     # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
-./hermes              # 自动检测 venv，无需先 source
+curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
+cd "${HERMES_HOME:-$HOME/.hermes}/hermes-agent"
+uv pip install -e ".[all,dev]"
+scripts/run_tests.sh
 ```

-手动安装（等效于上述命令）：
+手动克隆备用路径（用于一次性 clone / CI，或你明确不想使用 managed install layout 时）：

 ```bash
 curl -LsSf https://astral.sh/uv/install.sh | sh
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -27,7 +27,7 @@ import threading
 import time
 import uuid
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 from urllib.parse import urlparse, parse_qs, urlunparse

 from agent.context_compressor import ContextCompressor
@@ -195,6 +195,7 @@ def init_agent(
    status_callback: callable = None,
    notice_callback: callable = None,
    notice_clear_callback: callable = None,
+    event_callback: Optional[Callable[[str, dict], None]] = None,
    max_tokens: int = None,
    reasoning_config: Dict[str, Any] = None,
    service_tier: str = None,
@@ -299,6 +300,7 @@ def init_agent(
    # would mangle the escape sequences.  None = use builtins.print.
    agent._print_fn = None
    agent.background_review_callback = None  # Optional sync callback for gateway delivery
+    agent.memory_notifications = "on"  # Memory update notifications: "off", "on", "verbose"
    agent.skip_context_files = skip_context_files
    agent.load_soul_identity = load_soul_identity
    agent.pass_session_id = pass_session_id
@@ -425,6 +427,7 @@ def init_agent(
    agent.status_callback = status_callback
    agent.notice_callback = notice_callback
    agent.notice_clear_callback = notice_clear_callback
+    agent.event_callback = event_callback
    agent.tool_gen_callback = tool_gen_callback

    
@@ -900,6 +903,9 @@ def init_agent(
        agent.api_key = client_kwargs.get("api_key", "")
        agent.base_url = client_kwargs.get("base_url", agent.base_url)
        try:
+            from agent.ssl_guard import verify_ca_bundle_with_fallback
+
+            verify_ca_bundle_with_fallback()
            agent.client = agent._create_openai_client(client_kwargs, reason="agent_init", shared=True)
            if not agent.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {agent.model}")
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -881,6 +881,8 @@ def try_recover_primary_transport(

 def drop_thinking_only_and_merge_users(
    messages: List[Dict[str, Any]],
+    *,
+    drop_codex_reasoning_items: bool = True,
 ) -> List[Dict[str, Any]]:
    """Drop thinking-only assistant turns; merge any adjacent user messages left behind.

@@ -902,7 +904,13 @@ def drop_thinking_only_and_merge_users(
        return messages

    # Pass 1: drop thinking-only assistant turns.
-    kept = [m for m in messages if not _ra().AIAgent._is_thinking_only_assistant(m)]
+    kept = [
+        m for m in messages
+        if not _ra().AIAgent._is_thinking_only_assistant(
+            m,
+            drop_codex_reasoning_items=drop_codex_reasoning_items,
+        )
+    ]
    dropped = len(messages) - len(kept)
    if dropped == 0:
        return messages
@@ -1209,12 +1217,23 @@ def dump_api_request_debug(

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
-        atomic_json_write(dump_file, dump_payload, default=str)
+
+        # Redact secrets before persisting/printing. This dump captures the
+        # full request body (system prompt, tool defs, context-embedded
+        # values), and this path fires unconditionally on API errors — so it
+        # otherwise lands any context-embedded secret in cleartext on disk.
+        # Run the serialized dump through the same scrubber used for logs/tool
+        # output, then hand the resulting payload back to the shared atomic
+        # JSON writer so request dumps keep the same write semantics as before.
+        from agent.redact import redact_sensitive_text
+        _serialized = json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)
+        _redacted_payload = json.loads(redact_sensitive_text(_serialized, force=True))
+        atomic_json_write(dump_file, _redacted_payload, default=str)

        agent._vprint(f"{agent.log_prefix}🧾 Request debug dump written to: {dump_file}")

        if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"):
-            print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
+            print(json.dumps(_redacted_payload, ensure_ascii=False, indent=2, default=str))

        return dump_file
    except Exception as dump_error:
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -751,6 +751,9 @@ def build_anthropic_client(
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
+    if normalized_base_url:
+        import re as _re
+        normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1144,7 +1144,8 @@ def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
    normalized = (base_url or "").strip().lower().rstrip("/")
    if not normalized:
        return False
-    if normalized.endswith("/anthropic"):
+    path = urlparse(normalized).path.rstrip("/")
+    if path.endswith("/anthropic") or path.endswith("/anthropic/v1"):
        return True
    hostname = base_url_hostname(normalized)
    if hostname == "api.anthropic.com":
@@ -3078,23 +3079,20 @@ def _try_configured_fallback_chain(
        if not fb_provider or fb_provider.lower() == skip:
            continue
        fb_model = str(entry.get("model", "")).strip() or None
-        fb_base_url = str(entry.get("base_url", "")).strip() or None
-        fb_api_key = str(entry.get("api_key", "")).strip() or None

        label = f"fallback_chain[{i}]({fb_provider})"

        try:
-            fb_client = _resolve_single_provider(
-                fb_provider, fb_model, fb_base_url, fb_api_key)
+            fb_client, resolved_model = _resolve_fallback_entry(entry)
        except Exception:
-            fb_client = None
+            fb_client, resolved_model = None, None

        if fb_client is not None:
            logger.info(
                "Auxiliary %s: %s on %s — configured fallback to %s (%s)",
-                task, reason, failed_provider, label, fb_model or "default",
+                task, reason, failed_provider, label, resolved_model or fb_model or "default",
            )
-            return fb_client, fb_model, label
+            return fb_client, resolved_model or fb_model, label
        tried.append(label)

    if tried:
@@ -3105,6 +3103,103 @@ def _try_configured_fallback_chain(
    return None, None, ""


+def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
+    """Resolve inline or env-backed API key from a fallback-chain entry."""
+    explicit = str(entry.get("api_key") or "").strip()
+    if explicit:
+        return explicit
+    key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
+    if key_env:
+        return os.getenv(key_env, "").strip() or None
+    return None
+
+
+def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
+    """Resolve one fallback entry through the central provider router."""
+    provider = str(entry.get("provider") or "").strip()
+    model = str(entry.get("model") or "").strip() or None
+    if not provider or not model:
+        return None, None
+    base_url = str(entry.get("base_url") or "").strip() or None
+    api_key = _fallback_entry_api_key(entry)
+    api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
+    return resolve_provider_client(
+        provider,
+        model=model,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+        api_mode=api_mode,
+    )
+
+
+def _try_main_fallback_chain(
+    task: Optional[str],
+    failed_provider: str = "",
+    reason: str = "error",
+) -> Tuple[Optional[Any], Optional[str], str]:
+    """Try the top-level main-agent fallback chain for an auxiliary call.
+
+    ``provider: auto`` auxiliary tasks should respect the user's declared
+    main fallback policy before dropping into Hermes' built-in discovery
+    chain. The top-level chain is read through ``get_fallback_chain`` so
+    both modern ``fallback_providers`` and legacy ``fallback_model`` entries
+    participate in the same order as the main agent.
+    """
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.fallback_config import get_fallback_chain
+
+        chain = get_fallback_chain(load_config())
+    except Exception as exc:
+        logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
+        return None, None, ""
+
+    if not chain:
+        return None, None, ""
+
+    failed_norm = (failed_provider or "").strip().lower()
+    main_norm = (_read_main_provider() or "").strip().lower()
+    skip = {p for p in (failed_norm, main_norm, "auto") if p}
+    tried: List[str] = []
+
+    for i, entry in enumerate(chain):
+        if not isinstance(entry, dict):
+            continue
+        fb_provider = str(entry.get("provider") or "").strip()
+        fb_model = str(entry.get("model") or "").strip()
+        if not fb_provider or not fb_model:
+            continue
+        fb_norm = fb_provider.lower()
+        label = f"fallback_providers[{i}]({fb_provider})"
+        if fb_norm in skip:
+            tried.append(f"{label} (skipped)")
+            continue
+        if _is_provider_unhealthy(fb_norm):
+            _log_skip_unhealthy(fb_norm, task)
+            tried.append(f"{label} (unhealthy)")
+            continue
+        try:
+            fb_client, resolved_model = _resolve_fallback_entry(entry)
+        except Exception as exc:
+            logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
+            fb_client, resolved_model = None, None
+        if fb_client is not None:
+            logger.info(
+                "Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
+                task or "call", reason, failed_provider or "auto", label,
+                resolved_model or fb_model,
+            )
+            return fb_client, resolved_model or fb_model, fb_provider
+        tried.append(label)
+
+    if tried:
+        logger.debug(
+            "Auxiliary %s: main fallback chain exhausted (tried: %s)",
+            task or "call", ", ".join(tried),
+        )
+    return None, None, ""
+
+
 def _resolve_single_provider(
    provider: str,
    model: Optional[str] = None,
@@ -3115,16 +3210,19 @@ def _resolve_single_provider(

    Uses the existing provider resolution infrastructure where possible.
    """
-    # Reuse resolve_provider_client which handles provider→client mapping
+    # Reuse resolve_provider_client which handles provider→client mapping.
    client, resolved_model = resolve_provider_client(
        provider=provider,
        model=model,
-        base_url=base_url,
-        api_key=api_key,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
    )
    return client

-def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(
+    main_runtime: Optional[Dict[str, Any]] = None,
+    task: Optional[str] = None,
+) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -3222,7 +3320,22 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
                            main_provider, resolved or main_model)
                return client, resolved or main_model

-    # ── Step 2: aggregator / fallback chain ──────────────────────────────
+    # ── Step 2: user-configured fallback policy ─────────────────────────
+    # In auto mode, respect the task-specific fallback chain first, then the
+    # main agent's top-level fallback_providers/fallback_model chain. The
+    # hardcoded provider discovery chain below is only the convenience default
+    # for users who have not declared a fallback policy.
+    if task:
+        fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
+            task, main_provider or "auto", reason="main provider unavailable")
+        if fb_client is not None:
+            return fb_client, fb_model
+    fb_client, fb_model, _fb_label = _try_main_fallback_chain(
+        task, main_provider or "auto", reason="main provider unavailable")
+    if fb_client is not None:
+        return fb_client, fb_model
+
+    # ── Step 3: aggregator / fallback chain ──────────────────────────────
    tried = []
    for label, try_fn in _get_provider_chain():
        if _is_provider_unhealthy(label):
@@ -3343,6 +3456,7 @@ def resolve_provider_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
+    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -3463,7 +3577,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto(main_runtime=main_runtime)
+        client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -4356,11 +4470,16 @@ def _client_cache_key(
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
+    task: Optional[str] = None,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    # `auto` can now resolve through task-specific or main fallback policy,
+    # so the task participates in the cache key. Non-auto providers keep the
+    # old cache shape because the explicit provider/model tuple is sufficient.
+    task_key = (task or "") if provider == "auto" else ""
    pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -4553,6 +4672,7 @@ def _get_cached_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
+    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -4590,6 +4710,7 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=main_runtime,
        is_vision=is_vision,
+        task=task,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -4634,6 +4755,7 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=runtime,
        is_vision=is_vision,
+        task=task,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -5004,7 +5126,7 @@ def _build_call_kwargs(

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
-    if provider == "nous" or auxiliary_is_nous:
+    if provider == "nous":
        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
    if merged_extra:
        kwargs["extra_body"] = merged_extra
@@ -5139,7 +5261,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5465,14 +5587,19 @@ def call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. Main agent model (last-resort safety net)
-            # For auto users (no explicit aux provider), use the full
-            # auto-detection chain instead — its Step 1 IS the main agent
-            # model, so users on `auto` already get main-model fallback.
+            #   2. For auto: top-level main fallback_providers/fallback_model
+            #   3. For auto: built-in auxiliary discovery chain
+            #   4. For explicit aux providers: main agent model safety net
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_payment_fallback(
-                    resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
+                    task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
+                        task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_payment_fallback(
+                        resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
@@ -5635,7 +5762,7 @@ async def async_call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", async_mode=True)
+                client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5903,13 +6030,19 @@ async def async_call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. Main agent model (last-resort safety net)
-            # Auto users get the full auto-detection chain instead — its
-            # Step 1 IS the main agent model.
+            #   2. For auto: top-level main fallback_providers/fallback_model
+            #   3. For auto: built-in auxiliary discovery chain
+            #   4. For explicit aux providers: main agent model safety net
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_payment_fallback(
-                    resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
+                    task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
+                        task, resolved_provider or "auto", reason=reason)
+                if fb_client is None:
+                    fb_client, fb_model, fb_label = _try_payment_fallback(
+                        resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -237,18 +237,25 @@ _COMBINED_REVIEW_PROMPT = (
 def summarize_background_review_actions(
    review_messages: List[Dict],
    prior_snapshot: List[Dict],
+    notification_mode: str = "on",
 ) -> List[str]:
    """Build the human-facing action summary for a background review pass.

-    Walks the review agent's session messages and collects "successful tool
-    action" descriptions to surface to the user (e.g. "Memory updated").
-    Tool messages already present in ``prior_snapshot`` are skipped so we
-    don't re-surface stale results from the prior conversation that the
-    review agent inherited via ``conversation_history`` (issue #14944).
+    Walks the review agent's session messages and collects successful memory
+    and skill-management actions to surface to the user. Tool messages already
+    present in ``prior_snapshot`` are skipped so stale inherited results are
+    not re-surfaced as fresh background work (issue #14944).

-    Matching is by ``tool_call_id`` when available, with a content-equality
-    fallback for tool messages that lack one.
+    ``notification_mode`` controls display detail:
+    - ``off``: return no actions.
+    - ``on``: generic "Memory updated"/tool messages.
+    - ``verbose``: include compact content previews from tool-call arguments.
    """
+    mode = str(notification_mode or "on").lower()
+    if mode == "off":
+        return []
+    verbose = mode == "verbose"
+
    existing_tool_call_ids = set()
    existing_tool_contents = set()
    for prior in prior_snapshot or []:
@@ -262,6 +269,42 @@ def summarize_background_review_actions(
            if isinstance(content, str):
                existing_tool_contents.add(content)

+    # Map review-agent tool results back to the calls that produced them.  The
+    # result JSON only says "Entry added"; the call arguments contain action,
+    # target, and content previews.  Restricting to notify_tools also prevents
+    # helper tools from surfacing as memory work just because they succeeded.
+    notify_tools = {"memory", "skill_manage"}
+    all_tool_call_ids: set = set()
+    call_details: dict = {}
+    for msg in review_messages or []:
+        if not isinstance(msg, dict) or msg.get("role") != "assistant":
+            continue
+        for tc in msg.get("tool_calls", []) or []:
+            if not isinstance(tc, dict):
+                continue
+            fn = tc.get("function", {}) or {}
+            fn_name = fn.get("name", "")
+            tcid = tc.get("id")
+            if tcid:
+                all_tool_call_ids.add(tcid)
+            if fn_name not in notify_tools:
+                continue
+            try:
+                args = json.loads(fn.get("arguments", "{}"))
+            except (json.JSONDecodeError, TypeError):
+                args = {}
+            if tcid:
+                call_details[tcid] = {
+                    "tool": fn_name,
+                    "action": args.get("action", "?"),
+                    "target": args.get("target", "memory"),
+                    "content": args.get("content", ""),
+                    "old_text": args.get("old_text", ""),
+                    "name": args.get("name", ""),
+                    "old_string": args.get("old_string", ""),
+                    "new_string": args.get("new_string", ""),
+                }
+
    actions: List[str] = []
    for msg in review_messages or []:
        if not isinstance(msg, dict) or msg.get("role") != "tool":
@@ -273,6 +316,8 @@ def summarize_background_review_actions(
            content_str = msg.get("content")
            if isinstance(content_str, str) and content_str in existing_tool_contents:
                continue
+        if tcid and all_tool_call_ids and tcid not in call_details:
+            continue
        try:
            data = json.loads(msg.get("content", "{}"))
        except (json.JSONDecodeError, TypeError):
@@ -280,19 +325,75 @@ def summarize_background_review_actions(
        if not isinstance(data, dict) or not data.get("success"):
            continue
        message = data.get("message", "")
-        target = data.get("target", "")
-        if "created" in message.lower():
-            actions.append(message)
-        elif "updated" in message.lower():
-            actions.append(message)
-        elif "added" in message.lower() or (target and "add" in message.lower()):
-            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-            actions.append(f"{label} updated")
-        elif "Entry added" in message:
-            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-            actions.append(f"{label} updated")
-        elif "removed" in message.lower() or "replaced" in message.lower():
+        detail = call_details.get(tcid, {})
+        target = data.get("target", "") or detail.get("target", "")
+        is_skill = detail.get("tool") == "skill_manage"
+
+        message_lower = message.lower()
+        if not verbose:
+            if "created" in message_lower:
+                actions.append(message)
+                continue
+            if "updated" in message_lower:
+                actions.append(message)
+                continue
+            if is_skill and "patched" in message_lower:
+                actions.append(message)
+                continue
+
+        if is_skill:
+            label = "Skill"
+        elif target:
            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+        else:
+            continue
+
+        if verbose:
+            action = detail.get("action", "")
+            content = detail.get("content", "")
+            old_text = detail.get("old_text", "")
+            skill_name = detail.get("name", "")
+            max_preview = 120
+            if is_skill:
+                change = data.get("_change", {})
+                old_string = change.get("old", "") or detail.get("old_string", "")
+                new_string = change.get("new", "") or detail.get("new_string", "")
+                description = change.get("description", "")
+                if action == "patch" and (old_string or new_string):
+                    old_preview = old_string[:80].replace("\n", " ") + (
+                        "…" if len(old_string) > 80 else ""
+                    )
+                    new_preview = new_string[:80].replace("\n", " ") + (
+                        "…" if len(new_string) > 80 else ""
+                    )
+                    actions.append(
+                        f"📝 Skill '{skill_name}' patched: "
+                        f"\"{old_preview}\" → \"{new_preview}\""
+                    )
+                elif action == "create" and description:
+                    actions.append(f"📝 Skill '{skill_name}' created: {description}")
+                elif action == "edit" and description:
+                    actions.append(f"📝 Skill '{skill_name}' rewritten: {description}")
+                else:
+                    actions.append(f"📝 {message}" if message else f"Skill {action}")
+            elif action == "add" and content:
+                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
+                actions.append(f"{label} ➕ {preview}")
+            elif action == "replace" and content:
+                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
+                actions.append(f"{label} ✏️ {preview}")
+            elif action == "remove" and old_text:
+                preview = old_text[:60] + ("…" if len(old_text) > 60 else "")
+                actions.append(f"{label} ➖ {preview}")
+            else:
+                actions.append(f"{label} updated")
+        elif (
+            "added" in message_lower
+            or "replaced" in message_lower
+            or "removed" in message_lower
+            or (target and "add" in message.lower())
+            or "Entry added" in message
+        ):
            actions.append(f"{label} updated")
    return actions

@@ -522,6 +623,7 @@ def _run_review_in_thread(
        actions = summarize_background_review_actions(
            review_messages,
            messages_snapshot,
+            notification_mode=getattr(agent, "memory_notifications", "on"),
        )

        if actions:
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -58,17 +58,34 @@ _bedrock_runtime_client_cache: Dict[str, Any] = {}
 _bedrock_control_client_cache: Dict[str, Any] = {}


+_MIN_BOTO3_VERSION = (1, 34, 59)
+
+
 def _require_boto3():
-    """Import boto3, raising a clear error if not installed."""
+    """Import boto3, raising a clear error if not installed or too old."""
    try:
        import boto3
-        return boto3
    except ImportError:
        raise ImportError(
            "The 'boto3' package is required for the AWS Bedrock provider. "
            "Install it with: pip install boto3\n"
            "Or install Hermes with Bedrock support: pip install -e '.[bedrock]'"
        )
+    # converse() / converse_stream() were added in boto3 1.34.59.
+    # When Hermes is installed editable into system Python, the system boto3
+    # (e.g. Ubuntu 24.04 ships 1.34.46) may take precedence over the venv
+    # version pinned in pyproject.toml.
+    try:
+        version = tuple(int(x) for x in boto3.__version__.split(".")[:3])
+    except (AttributeError, ValueError):
+        return boto3  # can't parse — don't block on version check
+    if version < _MIN_BOTO3_VERSION:
+        raise RuntimeError(
+            f"boto3 {boto3.__version__} does not support converse_stream "
+            f"(minimum 1.34.59 required). Upgrade with: "
+            f"pip install --upgrade boto3"
+        )
+    return boto3


 def _get_bedrock_runtime_client(region: str):
@@ -935,11 +952,14 @@ def build_converse_kwargs(
    if system_prompt:
        kwargs["system"] = system_prompt

-    if temperature is not None:
-        kwargs["inferenceConfig"]["temperature"] = temperature
+    from agent.anthropic_adapter import _forbids_sampling_params

-    if top_p is not None:
-        kwargs["inferenceConfig"]["topP"] = top_p
+    if not _forbids_sampling_params(model):
+        if temperature is not None:
+            kwargs["inferenceConfig"]["temperature"] = temperature
+
+        if top_p is not None:
+            kwargs["inferenceConfig"]["topP"] = top_p

    if stop_sequences:
        kwargs["inferenceConfig"]["stopSequences"] = stop_sequences
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -1081,6 +1081,7 @@ def _normalize_codex_response(
    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+    saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"}
    saw_commentary_phase = False
    saw_final_answer_phase = False
    saw_reasoning_item = False
@@ -1095,6 +1096,7 @@ def _normalize_codex_response(

        if item_status in {"queued", "in_progress", "incomplete"}:
            has_incomplete_items = True
+            saw_streaming_or_item_incomplete = True

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
@@ -1252,7 +1254,9 @@ def _normalize_codex_response(
        finish_reason = "tool_calls"
    elif leaked_tool_call_text:
        finish_reason = "incomplete"
-    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+    elif saw_streaming_or_item_incomplete:
+        finish_reason = "incomplete"
+    elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase:
        finish_reason = "incomplete"
    elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
        # Response contains only reasoning (encrypted thinking state and/or
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -40,6 +40,16 @@ from agent.model_metadata import estimate_request_tokens_rough

 logger = logging.getLogger(__name__)

+# Stable marker the gateway matches on to re-tag the auto-compaction lifecycle
+# status as ``kind="compacting"`` (tui_gateway/server.py::_status_update), so
+# drivers like the desktop app can show an explicit "Summarizing…" indicator
+# instead of the transcript appearing to silently reset. Keep the marker phrase
+# intact if you reword COMPACTION_STATUS.
+COMPACTION_STATUS_MARKER = "Compacting context"
+COMPACTION_STATUS = (
+    f"🗜️ {COMPACTION_STATUS_MARKER} — summarizing earlier conversation so I can continue..."
+)
+

 def _compression_lock_holder(agent: Any) -> str:
    """Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
@@ -324,9 +334,7 @@ def compress_context(
        f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
        focus_topic,
    )
-    agent._emit_status(
-        "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
-    )
+    agent._emit_status(COMPACTION_STATUS)

    # ── Compression lock ────────────────────────────────────────────────
    # Atomic, state.db-backed lock per session_id.  Without this, two
@@ -595,6 +603,20 @@ def compress_context(
            force=True,
        )

+    # Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
+    # the completed old session before its details are lost.
+    _old_sid_for_event = locals().get("old_session_id")
+    if getattr(agent, "event_callback", None):
+        try:
+            agent.event_callback("session:compress", {
+                "platform": agent.platform or "",
+                "session_id": agent.session_id,
+                "old_session_id": _old_sid_for_event or "",
+                "compression_count": agent.context_compressor.compression_count,
+            })
+        except Exception as e:
+            logger.debug("event_callback error on session:compress: %s", e)
+
    # Keep the post-compression rough estimate for diagnostics, but do not
    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
    # can remain above threshold even after the next real API request fits.
@@ -631,7 +653,11 @@ def compress_context(
    return compressed, new_system_prompt


-def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
+def try_shrink_image_parts_in_messages(
+    api_messages: list,
+    *,
+    max_dimension: int = 8000,
+) -> bool:
    """Re-encode all native image parts at a smaller size to recover from
    image-too-large errors (Anthropic 5 MB, unknown other providers).

@@ -642,7 +668,8 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    Strategy: look for ``image_url`` / ``input_image`` parts carrying a
    ``data:image/...;base64,...`` payload.  For each one whose encoded
    size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
-    ceiling with header overhead), write the base64 to a tempfile, call
+    ceiling with header overhead) or whose longest side exceeds
+    ``max_dimension``, write the base64 to a tempfile, call
    ``vision_tools._resize_image_for_vision`` to produce a smaller data
    URL, and substitute it in place.

@@ -664,10 +691,9 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    # after a confirmed provider rejection, so the alternative is failure.
    target_bytes = 4 * 1024 * 1024
    # Anthropic enforces an 8000px per-side dimension cap independently of
-    # the 5 MB byte cap.  A tall screenshot can be well under 5 MB yet far
-    # over 8000px (e.g. 1200×12000 at 0.06 MB).  We check pixel dimensions
-    # even when the byte budget is fine.
-    max_dimension = 8000
+    # the 5 MB byte cap.  In many-image requests, the provider can report a
+    # lower cap (observed: 2000px).  The caller passes that parsed ceiling
+    # when the rejection includes it.
    changed_count = 0
    # Track parts that are over the target but could NOT be shrunk under it.
    # If any survive, retrying is pointless — the same oversized payload will
@@ -684,9 +710,9 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
        # Check both byte size AND pixel dimensions.
        needs_shrink = len(url) > target_bytes  # over byte budget
        if not needs_shrink:
-            # Even if bytes are fine, check pixel dimensions against
-            # Anthropic's 8000px cap.  A tall image can be tiny in bytes
-            # yet huge in pixels.
+            # Even if bytes are fine, check pixel dimensions against the
+            # provider's reported per-side cap.  A screenshot can be tiny in
+            # bytes yet too large in pixels.
            try:
                import base64 as _b64_dim
                header_d, _, data_d = url.partition(",")
@@ -795,6 +821,8 @@ def try_shrink_image_parts_in_messages(api_messages: list) -> bool:


 __all__ = [
+    "COMPACTION_STATUS",
+    "COMPACTION_STATUS_MARKER",
    "check_compression_model_feasibility",
    "replay_compression_warning",
    "compress_context",
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -71,6 +71,35 @@ logger = logging.getLogger(__name__)
 INTERRUPT_WAITING_FOR_MODEL_PREFIX = "Operation interrupted: waiting for model response ("


+def _image_error_max_dimension(error: Exception) -> Optional[int]:
+    """Extract a provider-reported image dimension ceiling, if present."""
+    parts = []
+    for value in (
+        error,
+        getattr(error, "message", None),
+        getattr(error, "body", None),
+    ):
+        if value:
+            try:
+                parts.append(str(value))
+            except Exception:
+                pass
+    text = " ".join(parts).lower()
+    if "image" not in text or "dimension" not in text or "max allowed size" not in text:
+        return None
+
+    match = re.search(r"max allowed size(?:\s+for [^:]+)?:\s*(\d{3,5})\s*pixels?", text)
+    if not match:
+        return None
+    try:
+        max_dimension = int(match.group(1))
+    except ValueError:
+        return None
+    if 512 <= max_dimension <= 8000:
+        return max_dimension
+    return None
+
+
 def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
    """Return a user-facing error when Ollama is loaded with too little context."""
    if not getattr(agent, "tools", None):
@@ -271,11 +300,20 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
                agent.session_id, exc,
            )

-    if stored_prompt:
+    if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
        # Continuing session — reuse the exact system prompt from the
        # previous turn so the Anthropic cache prefix matches.
        agent._cached_system_prompt = stored_prompt
        return
+    if stored_prompt:
+        stored_state = "stale_runtime"
+        logger.info(
+            "Stored system prompt for session %s has stale runtime identity; "
+            "rebuilding for model=%s provider=%s.",
+            agent.session_id,
+            getattr(agent, "model", "") or "",
+            getattr(agent, "provider", "") or "",
+        )

    if conversation_history and stored_state in ("null", "empty"):
        # Continuing session whose stored prompt is unusable.  The
@@ -337,6 +375,30 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
            )


+def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
+    """Return False when the persisted Model/Provider lines are stale."""
+
+    def line_value(label: str) -> str:
+        prefix = f"{label}:"
+        value = ""
+        for line in prompt.splitlines():
+            if line.startswith(prefix):
+                value = line[len(prefix):].strip()
+        return value
+
+    stored_model = line_value("Model")
+    current_model = str(getattr(agent, "model", "") or "").strip()
+    if stored_model and current_model and stored_model != current_model:
+        return False
+
+    stored_provider = line_value("Provider")
+    current_provider = str(getattr(agent, "provider", "") or "").strip()
+    if stored_provider and current_provider and stored_provider != current_provider:
+        return False
+
+    return True
+
+
 def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
    if is_partial_stub and dropped_tools:
        tool_list = ", ".join(dropped_tools[:3])
@@ -368,6 +430,42 @@ def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List
        )


+# Shared recovery hint appended to every content-policy refusal message. Both
+# the HTTP-200 refusal path (``finish_reason=content_filter``) and the
+# exception path (a provider moderation error classified as
+# ``content_policy_blocked``) end with the same actionable next steps, so they
+# share one trailer to keep the guidance from drifting between the two sites.
+_CONTENT_POLICY_RECOVERY_HINT = (
+    "Try rephrasing the request, narrowing the context, or "
+    "adding a fallback provider with `hermes fallback add`."
+)
+
+
+def _content_policy_blocked_result(
+    messages: List[Dict],
+    api_call_count: int,
+    *,
+    final_response: str,
+    error_detail: str,
+) -> Dict[str, Any]:
+    """Build the terminal turn result for a content-policy block.
+
+    A content-policy refusal is deterministic for the unchanged prompt, so the
+    turn ends here (no retry). Both the HTTP-200 refusal handler and the
+    exception-path handler return the identical shape — a failed, non-completed
+    turn carrying the user-facing message and a ``content_policy_blocked:``
+    prefixed error — so they funnel through this one builder.
+    """
+    return {
+        "final_response": final_response,
+        "messages": messages,
+        "api_calls": api_call_count,
+        "completed": False,
+        "failed": True,
+        "error": f"content_policy_blocked: {error_detail}",
+    }
+
+
 def run_conversation(
    agent,
    user_message: str,
@@ -707,7 +805,10 @@ def run_conversation(
        # a thinking-only turn. Runs on the per-call copy only — the
        # stored conversation history keeps the reasoning block for the
        # UI transcript and session persistence.
-        api_messages = agent._drop_thinking_only_and_merge_users(api_messages)
+        api_messages = agent._drop_thinking_only_and_merge_users(
+            api_messages,
+            drop_codex_reasoning_items=agent.api_mode != "codex_responses",
+        )

        # Normalize message whitespace and tool-call JSON for consistent
        # prefix matching.  Ensures bit-perfect prefixes across turns,
@@ -1316,6 +1417,106 @@ def run_conversation(
                        )
                        finish_reason = "length"

+                # ── Content-policy refusal (HTTP 200) ──────────────────
+                # The model — or the provider's safety system — returned a
+                # *successful* response whose stop/finish reason is a refusal:
+                # Anthropic ``stop_reason="refusal"`` → ``content_filter``;
+                # OpenAI / portal ``finish_reason="content_filter"`` or a
+                # populated ``message.refusal`` (mapped in the chat_completions
+                # transport); Bedrock ``guardrail_intervened``. The content is
+                # typically empty, so without this branch the response falls
+                # through to the empty-response / invalid-response retry loops
+                # and is mis-surfaced as "rate limited" / "no content after
+                # retries" — burning paid attempts reproducing a deterministic
+                # refusal. Surface it clearly and stop. Mirrors the
+                # exception-based ``content_policy_blocked`` recovery: try a
+                # configured fallback once, otherwise return the refusal.
+                if finish_reason == "content_filter":
+                    _refusal_transport = agent._get_transport()
+                    if agent.api_mode == "anthropic_messages":
+                        _refusal_result = _refusal_transport.normalize_response(
+                            response, strip_tool_prefix=agent._is_anthropic_oauth
+                        )
+                    else:
+                        _refusal_result = _refusal_transport.normalize_response(response)
+                    _refusal_text = (getattr(_refusal_result, "content", None) or "").strip()
+                    # Some refusals carry the explanation only in the reasoning
+                    # channel; fall back to it so the user sees *something*.
+                    if not _refusal_text:
+                        _refusal_text = (agent._extract_reasoning(_refusal_result) or "").strip()
+
+                    agent._invoke_api_request_error_hook(
+                        task_id=effective_task_id,
+                        turn_id=turn_id,
+                        api_request_id=api_request_id,
+                        api_call_count=api_call_count,
+                        api_start_time=api_start_time,
+                        api_kwargs=api_kwargs,
+                        error_type="ContentPolicyBlocked",
+                        error_message=_refusal_text or "model declined to respond (content_filter)",
+                        status_code=None,
+                        retry_count=retry_count,
+                        max_retries=max_retries,
+                        retryable=False,
+                        reason=FailoverReason.content_policy_blocked.value,
+                    )
+
+                    if thinking_spinner:
+                        thinking_spinner.stop("")
+                        thinking_spinner = None
+                    if agent.thinking_callback:
+                        agent.thinking_callback("")
+
+                    # Deterministic for the unchanged prompt — never retry.
+                    # Try a configured fallback once (a different model may not
+                    # refuse); otherwise surface the refusal terminally.
+                    if agent._has_pending_fallback():
+                        agent._buffer_status(
+                            "⚠️ Model declined to respond (safety refusal) — trying fallback..."
+                        )
+                    if agent._try_activate_fallback():
+                        retry_count = 0
+                        compression_attempts = 0
+                        _retry.primary_recovery_attempted = False
+                        continue
+
+                    agent._flush_status_buffer()
+                    _refusal_log = (
+                        _refusal_text[:500] + "..."
+                        if len(_refusal_text) > 500
+                        else _refusal_text
+                    )
+                    logger.warning(
+                        "%sModel declined to respond (finish_reason=content_filter). "
+                        "model=%s provider=%s refusal=%s",
+                        agent.log_prefix, agent.model, agent.provider,
+                        _refusal_log or "(no text)",
+                    )
+                    agent._emit_status(
+                        "⚠️ The model declined to respond to this request (safety refusal)."
+                    )
+
+                    _refusal_detail = (
+                        f"Model's explanation: {_refusal_text}"
+                        if _refusal_text
+                        else "The model returned no explanation."
+                    )
+                    _refusal_response = (
+                        "⚠️  The model declined to respond to this request "
+                        "(safety refusal — not a Hermes/gateway failure).\n\n"
+                        f"{_refusal_detail}\n\n"
+                        f"{_CONTENT_POLICY_RECOVERY_HINT}"
+                    )
+
+                    agent._cleanup_task_resources(effective_task_id)
+                    agent._persist_session(messages, conversation_history)
+                    return _content_policy_blocked_result(
+                        messages,
+                        api_call_count,
+                        final_response=_refusal_response,
+                        error_detail=_refusal_text or "model declined (content_filter)",
+                    )
+
                if finish_reason == "length":
                    if getattr(response, "id", "") == PARTIAL_STREAM_STUB_ID:
                        agent._vprint(
@@ -2067,7 +2268,11 @@ def run_conversation(
                    and not _retry.image_shrink_retry_attempted
                ):
                    _retry.image_shrink_retry_attempted = True
-                    if agent._try_shrink_image_parts_in_messages(api_messages):
+                    image_max_dimension = _image_error_max_dimension(api_error) or 8000
+                    if agent._try_shrink_image_parts_in_messages(
+                        api_messages,
+                        max_dimension=image_max_dimension,
+                    ):
                        agent._vprint(
                            f"{agent.log_prefix}📐 Image(s) exceeded provider size limit — "
                            f"shrank and retrying...",
@@ -3083,20 +3288,17 @@ def run_conversation(
                    if classified.reason == FailoverReason.content_policy_blocked:
                        _summary = agent._summarize_api_error(api_error)
                        _policy_response = (
-                            f"⚠️  The model provider's safety filter blocked this request "
-                            f"(not a Hermes/gateway failure).\n\n"
+                            "⚠️  The model provider's safety filter blocked this request "
+                            "(not a Hermes/gateway failure).\n\n"
                            f"Provider message: {_summary}\n\n"
-                            f"Try rephrasing the request, narrowing the context, or "
-                            f"adding a fallback provider with `hermes fallback add`."
+                            f"{_CONTENT_POLICY_RECOVERY_HINT}"
+                        )
+                        return _content_policy_blocked_result(
+                            messages,
+                            api_call_count,
+                            final_response=_policy_response,
+                            error_detail=_summary,
                        )
-                        return {
-                            "final_response": _policy_response,
-                            "messages": messages,
-                            "api_calls": api_call_count,
-                            "completed": False,
-                            "failed": True,
-                            "error": f"content_policy_blocked: {_summary}",
-                        }
                    return {
                        "final_response": None,
                        "messages": messages,
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -70,16 +70,6 @@ def _resolve_args() -> list[str]:

 def _resolve_home_dir() -> str:
    """Return a stable HOME for child ACP processes."""
-
-    try:
-        from hermes_constants import get_subprocess_home
-
-        profile_home = get_subprocess_home()
-        if profile_home:
-            return profile_home
-    except Exception:
-        pass
-
    home = os.environ.get("HOME", "").strip()
    if home:
        return home
@@ -105,7 +95,10 @@ def _resolve_home_dir() -> str:

 def _build_subprocess_env() -> dict[str, str]:
    env = os.environ.copy()
-    env["HOME"] = _resolve_home_dir()
+    home = _resolve_home_dir()
+    env["HOME"] = home
+    from hermes_constants import apply_subprocess_home_env
+    apply_subprocess_home_env(env)
    return env


--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -454,16 +454,16 @@ def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
        report["attempted"] = True  # we tried but there was nothing to do
        return report

-    # Load and rewrite the live jobs under the scheduler's lock.
+    # Load and rewrite the live jobs under the scheduler's cross-process lock.
    try:
-        from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
+        from cron.jobs import load_jobs, save_jobs, _jobs_lock
    except ImportError as e:
        report["error"] = f"cron module unavailable: {e}"
        return report

    report["attempted"] = True
    try:
-        with _jobs_file_lock:
+        with _jobs_lock():
            live_jobs = load_jobs()
            changed = False

--- a/agent/display.py
+++ b/agent/display.py
@@ -12,6 +12,7 @@ import time
 from dataclasses import dataclass, field
 from difflib import unified_diff
 from pathlib import Path
+from typing import Any

 from utils import safe_json_loads
 from agent.tool_result_classification import file_mutation_result_landed
@@ -168,6 +169,27 @@ def _oneline(text: str) -> str:
    return " ".join(text.split())


+def _truncate_preview(text: str, max_len: int | None) -> str:
+    if max_len and max_len > 0 and len(text) > max_len:
+        if max_len <= 3:
+            return "." * max_len
+        return text[:max_len - 3] + "..."
+    return text
+
+
+def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
+    if not isinstance(tasks, list):
+        return 0, []
+    goals: list[str] = []
+    for task in tasks:
+        if not isinstance(task, dict):
+            continue
+        raw_goal = task.get("goal")
+        goal = "?" if raw_goal is None else _oneline(str(raw_goal))
+        goals.append(_truncate_preview(goal or "?", per_goal_len))
+    return len(goals), goals
+
+
 def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
    """Build a short preview of a tool call's primary argument for display.

@@ -191,6 +213,22 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
        "clarify": "question", "skill_manage": "name",
    }

+    # delegate_task: show goal (single) or individual task goals (batch)
+    if tool_name == "delegate_task":
+        tasks = args.get("tasks")
+        if tasks and isinstance(tasks, list):
+            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=40)
+            preview = (
+                f"{task_count} tasks: " + " | ".join(goals)
+                if goals else f"{len(tasks)} parallel tasks"
+            )
+            return _truncate_preview(preview, max_len)
+        goal = args.get("goal", "")
+        if goal is None:
+            return None
+        preview = _oneline(str(goal))
+        return _truncate_preview(preview, max_len) if preview else None
+
    if tool_name == "process":
        action = args.get("action", "")
        sid = args.get("session_id", "")
@@ -858,20 +896,6 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
    return False, ""


-def _used_free_parallel(result: str | None) -> bool:
-    """True when a web result came from Parallel's free Search MCP.
-
-    Only the keyless Parallel path tags its result with ``provider="parallel"``;
-    the paid REST path and every other provider omit it. Used to label the tool
-    line "Parallel search" / "Parallel fetch" exactly when the free MCP served
-    the call.
-    """
-    if not isinstance(result, str) or '"provider"' not in result:
-        return False
-    data = safe_json_loads(result)
-    return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
-
-
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
@@ -909,17 +933,15 @@ def get_cute_tool_message(
        return f"{line}{failure_suffix}"

    if tool_name == "web_search":
-        verb = "Parallel search" if _used_free_parallel(result) else "search"
-        return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)}  {dur}")
+        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
-        verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra}  {dur}")
-        return _wrap(f"┊ 📄 {verb:<9} pages  {dur}")
+            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 fetch     pages  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
@@ -1035,7 +1057,10 @@ def get_cute_tool_message(
    if tool_name == "delegate_task":
        tasks = args.get("tasks")
        if tasks and isinstance(tasks, list):
-            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
+            task_count, goals = _delegate_task_goal_parts(tasks, per_goal_len=30)
+            detail = " | ".join(goals) if goals else "parallel"
+            count_label = task_count or len(tasks)
+            return _wrap(f"┊ 🔀 delegate  {count_label}x: {_trunc(detail, 35)}  {dur}")
        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")

    preview = build_tool_preview(tool_name, args) or ""
--- a/agent/errors.py
+++ b/agent/errors.py
@@ -0,0 +1,3 @@
+class SSLConfigurationError(Exception):
+    """Raised when SSL/TLS certificate bundle configuration fails."""
+    pass
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -46,11 +46,6 @@ def build_write_denied_paths(home: str) -> set[str]:
            # Top-level Anthropic PKCE credential store remains sensitive even
            # when a profile is active; default/non-profile sessions still read it.
            str(hermes_root / ".anthropic_oauth.json"),
-            os.path.join(home, ".bashrc"),
-            os.path.join(home, ".zshrc"),
-            os.path.join(home, ".profile"),
-            os.path.join(home, ".bash_profile"),
-            os.path.join(home, ".zprofile"),
            os.path.join(home, ".netrc"),
            os.path.join(home, ".pgpass"),
            os.path.join(home, ".npmrc"),
@@ -104,12 +99,6 @@ def is_write_denied(path: str) -> bool:
        if resolved.startswith(prefix):
            return True

-    # Hermes control-plane files: block both the ACTIVE profile's view
-    # (hermes_home) AND the global root view. Without the root pass, a
-    # profile-mode session leaves <root>/auth.json + <root>/config.yaml
-    # writable — letting a prompt-injected write_file overwrite the global
-    # files that every profile inherits from (same shape as #15981).
-    control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
    mcp_tokens_dir_name = "mcp-tokens"

    hermes_dirs = []
@@ -122,12 +111,6 @@ def is_write_denied(path: str) -> bool:
            continue

    for base_real in hermes_dirs:
-        for name in control_file_names:
-            try:
-                if resolved == os.path.realpath(os.path.join(base_real, name)):
-                    return True
-            except Exception:
-                continue
        try:
            mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
            if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -41,6 +41,16 @@ DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
 GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535


+def bare_gemini_model_id(model: str) -> str:
+    """Strip Gemini's own provider prefix from an aggregator-style model id."""
+    name = (model or "").strip()
+    lowered = name.lower()
+    for prefix in ("google/", "gemini/"):
+        if lowered.startswith(prefix):
+            return name[len(prefix):].strip() or name
+    return name
+
+
 def is_native_gemini_base_url(base_url: str) -> bool:
    """Return True when the endpoint speaks Gemini's native REST API."""
    normalized = str(base_url or "").strip().rstrip("/").lower()
@@ -914,6 +924,7 @@ class GeminiNativeClient:
            thinking_config=thinking_config,
        )

+        model = bare_gemini_model_id(model)
        if stream:
            return self._stream_completion(model=model, request=request, timeout=timeout)

--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -33,6 +33,7 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
+from agent.skill_commands import extract_user_instruction_from_skill_message
 from tools.registry import tool_error

 logger = logging.getLogger(__name__)
@@ -430,16 +431,37 @@ class MemoryManager:

    # -- Prefetch / recall ---------------------------------------------------

+    @staticmethod
+    def _strip_skill_scaffolding(text: str) -> Optional[str]:
+        """Return memory-worthy user text, or None to skip the turn.
+
+        When a user invokes a /skill or /bundle, Hermes expands the turn into
+        a model-facing message that embeds the entire skill body. Feeding that
+        verbatim to memory providers pollutes their stores/embeddings with
+        prompt scaffolding instead of what the user actually asked. We recover
+        just the user's instruction here, once, for every provider — so this
+        is fixed for the whole provider fan-out, not per backend.
+
+        - Non-skill messages pass through unchanged.
+        - Skill turns with a user instruction return that instruction.
+        - Bare skill invocations (no instruction) return None → callers skip
+          the turn, since there is no user content worth remembering.
+        """
+        return extract_user_instruction_from_skill_message(text)
+
    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
        """Collect prefetch context from all providers.

        Returns merged context text labeled by provider. Empty providers
        are skipped. Failures in one provider don't block others.
        """
+        clean_query = self._strip_skill_scaffolding(query)
+        if not clean_query:
+            return ""
        parts = []
        for provider in self._providers:
            try:
-                result = provider.prefetch(query, session_id=session_id)
+                result = provider.prefetch(clean_query, session_id=session_id)
                if result and result.strip():
                    parts.append(result)
            except Exception as e:
@@ -460,10 +482,14 @@ class MemoryManager:
        if not providers:
            return

+        clean_query = self._strip_skill_scaffolding(query)
+        if not clean_query:
+            return
+
        def _run() -> None:
            for provider in providers:
                try:
-                    provider.queue_prefetch(query, session_id=session_id)
+                    provider.queue_prefetch(clean_query, session_id=session_id)
                except Exception as e:
                    logger.debug(
                        "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
@@ -515,6 +541,11 @@ class MemoryManager:
        if not providers:
            return

+        clean_user_content = self._strip_skill_scaffolding(user_content)
+        if not clean_user_content:
+            return
+        user_content = clean_user_content
+
        def _run() -> None:
            for provider in providers:
                try:
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -5,6 +5,7 @@ and run_agent.py for pre-flight context checks.
 """

 import ipaddress
+import json
 import logging
 import os
 import re
@@ -16,7 +17,7 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import base_url_host_matches, base_url_hostname
+from utils import atomic_json_write, base_url_host_matches, base_url_hostname

 from hermes_constants import OPENROUTER_MODELS_URL

@@ -111,6 +112,57 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
 _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

+
+def _get_model_metadata_cache_path() -> Path:
+    """Return path to the OpenRouter model metadata disk cache."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
+
+
+def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
+    """Return disk-cache age in seconds, or None if freshness is unknown."""
+    try:
+        cache_path = _get_model_metadata_cache_path()
+        if not cache_path.exists():
+            return None
+        age = time.time() - cache_path.stat().st_mtime
+        if age < 0:
+            return None
+        return age
+    except Exception:
+        return None
+
+
+def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
+    """Load processed OpenRouter metadata cache from disk."""
+    try:
+        cache_path = _get_model_metadata_cache_path()
+        with cache_path.open("r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return {}
+        return {
+            str(key): value
+            for key, value in data.items()
+            if isinstance(value, dict)
+        }
+    except Exception as e:
+        logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
+        return {}
+
+
+def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
+    """Save processed OpenRouter metadata cache to disk atomically."""
+    try:
+        atomic_json_write(
+            _get_model_metadata_cache_path(),
+            data,
+            indent=0,
+            separators=(",", ":"),
+        )
+    except Exception as e:
+        logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
+
 # Descending tiers for context length probing when the model is unknown.
 # We start at 256K (covers GPT-5.x, many current large-context models) and
 # step down on context-length errors until one works.  Tier[0] is also the
@@ -209,7 +261,13 @@ DEFAULT_CONTEXT_LENGTHS = {
    # https://platform.minimax.io/docs/api-reference/text-chat-openai
    "minimax-m3": 1000000,
    "minimax": 204800,
-    # GLM
+    # GLM — GLM-5.2 ships with a 1M context window (verified empirically:
+    # needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
+    # zero errors on api.z.ai/api/coding/paas/v4).  Older GLM models
+    # (5, 5.1, 5-turbo) are ~202K.  Longest-key-first substring matching
+    # ensures "glm-5.2" resolves to 1M while older variants still hit the
+    # generic 202K fallback.
+    "glm-5.2": 1_048_576,
    "glm": 202752,
    # xAI Grok — xAI /v1/models does not return context_length metadata,
    # so these hardcoded fallbacks prevent Hermes from probing-down to
@@ -627,6 +685,15 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
        return _model_metadata_cache

+    if not force_refresh:
+        disk_age = _model_metadata_disk_cache_age_seconds()
+        if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
+            disk_cache = _load_model_metadata_disk_cache()
+            if disk_cache:
+                _model_metadata_cache = disk_cache
+                _model_metadata_cache_time = time.time() - disk_age
+                return _model_metadata_cache
+
    try:
        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
@@ -648,12 +715,24 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any

        _model_metadata_cache = cache
        _model_metadata_cache_time = time.time()
+        _save_model_metadata_disk_cache(cache)
        logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
        return cache

    except Exception as e:
        logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
-        return _model_metadata_cache or {}
+        if _model_metadata_cache:
+            return _model_metadata_cache
+        disk_cache = _load_model_metadata_disk_cache()
+        if disk_cache:
+            _model_metadata_cache = disk_cache
+            disk_age = _model_metadata_disk_cache_age_seconds()
+            if disk_age is not None:
+                _model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
+            else:
+                _model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
+            return _model_metadata_cache
+        return {}


 def fetch_endpoint_model_metadata(
--- a/agent/pet/init.py
+++ b/agent/pet/init.py
@@ -0,0 +1,51 @@
+"""Petdex pet engine — shared core for the CLI, TUI, and desktop surfaces.
+
+Petdex (https://github.com/crafter-station/petdex) is a public gallery of
+animated sprite "pets" for coding agents.  Each pet is a ``pet.json`` plus a
+``spritesheet.{webp,png}`` of 192×208 px cells. Current Codex/petdex sheets use
+an 8-column × 9-row atlas; older Hermes/petdex sheets used an 8-row atlas.
+Hermes infers the row taxonomy from the sheet and maps agent activity onto
+idle/run/review/failed/wave/jump.
+
+This package is the **single source of truth** for the feature so the base
+CLI (Python) and TUI (Ink, via ``tui_gateway``) never duplicate the hard
+parts:
+
+- :mod:`agent.pet.constants` — frame geometry + the :class:`PetState` enum.
+- :mod:`agent.pet.state`     — map agent activity → a :class:`PetState`.
+- :mod:`agent.pet.manifest`  — fetch the public petdex manifest.
+- :mod:`agent.pet.store`     — install / list / resolve pets on disk
+                               (profile-aware via ``get_hermes_home()``).
+- :mod:`agent.pet.render`    — decode a spritesheet and encode frames for a
+                               terminal (kitty / iTerm2 / sixel graphics
+                               protocols, with a Unicode half-block
+                               fallback).
+
+Rendering in the Electron desktop is necessarily TypeScript (canvas), but it
+reuses the same on-disk store and the same state semantics.
+
+The whole feature is a *display* concern: it adds no model tool, mutates no
+system prompt or toolset, and therefore has zero effect on prompt caching.
+"""
+
+from agent.pet.constants import (
+    DEFAULT_SCALE,
+    FRAME_H,
+    FRAME_W,
+    FRAMES_PER_STATE,
+    LOOP_MS,
+    STATE_ROWS,
+    PetState,
+)
+from agent.pet.state import derive_pet_state
+
+__all__ = [
+    "DEFAULT_SCALE",
+    "FRAME_H",
+    "FRAME_W",
+    "FRAMES_PER_STATE",
+    "LOOP_MS",
+    "STATE_ROWS",
+    "PetState",
+    "derive_pet_state",
+]
--- a/agent/pet/constants.py
+++ b/agent/pet/constants.py
@@ -0,0 +1,167 @@
+"""Pet sprite geometry + animation-state taxonomy.
+
+These values are the common petdex/Codex pet geometry. The real ``pet.json``
+usually only carries ``id``/``displayName``/``description``/``spritesheetPath``;
+row taxonomy is inferred from the atlas shape so Hermes can render both legacy
+8-row sheets and current 9-row Codex sheets.
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+# Frame geometry (pixels). Current Codex/petdex spritesheets are 8 columns x 9
+# rows (1536x1872), while older Hermes/petdex sheets used 9 columns x 8 rows
+# (1728x1664). Renderers derive both row taxonomy and real column count from the
+# concrete sheet, so either shape works.
+FRAME_W = 192
+FRAME_H = 208
+
+# Frames consumed per animation state (the petdex web app uses CSS
+# ``steps(6)``).  A sheet may physically contain more columns; we only step
+# through the first ``FRAMES_PER_STATE``.
+FRAMES_PER_STATE = 6
+
+# Full-loop duration for one state, milliseconds (petdex default).
+LOOP_MS = 1100
+
+# Default on-screen scale relative to native frame size.  ``display.pet.scale``
+# is the single master scalar: the desktop canvas multiplies its native pixels
+# by it and every terminal surface derives its half-block/kitty column width
+# from it (see :func:`cols_for_scale`), so one number shrinks all three
+# interfaces together.  (petdex's own clients render at 0.7; we default smaller
+# so the kitty/GUI mascot stays a glanceable corner sprite.  The half-block
+# fallback can't shrink as far — see ``UNICODE_MIN_COLS`` — and clamps to its
+# legibility floor instead.)
+DEFAULT_SCALE = 0.33
+
+# User-settable scale bounds (``/pet scale``, desktop slider).  Floor keeps the
+# pet clickable/visible; ceiling stops a fat-fingered value from filling the
+# screen.  The unicode fallback additionally clamps to ``UNICODE_MIN_COLS``.
+MIN_SCALE = 0.1
+MAX_SCALE = 3.0
+
+
+def clamp_scale(scale: float) -> float:
+    """Clamp *scale* to ``[MIN_SCALE, MAX_SCALE]`` (the single validation point)."""
+    return max(MIN_SCALE, min(MAX_SCALE, scale))
+
+# Terminal cells one native frame spans at ``scale == 1.0``.  A cell is ~8px
+# wide, a frame is ``FRAME_W`` (192) px → 24 cells.  This mirrors the kitty
+# graphics placement (``scaled_px // 8``) so at full scale every renderer agrees.
+BASE_UNICODE_COLS = FRAME_W // 8
+
+# Legibility floor for the half-block fallback.  A half-block cell samples the
+# sprite at only 1 horizontal + 2 vertical taps, so below this width a 192×208
+# pet collapses into an unreadable blob *regardless* of scale.  kitty/GUI draw
+# true pixels and have no such floor — that's why the same ``scale: 0.33`` is
+# crisp there but mush in half-blocks.  ``scale`` shrinks the unicode pet down
+# TO this floor (and grows it above), instead of past it into noise.
+UNICODE_MIN_COLS = 16
+
+
+def cols_for_scale(scale: float) -> int:
+    """Half-block width implied by *scale*, clamped to the legibility floor.
+
+    Above the floor it tracks the kitty cell box (``scaled_px // 8``) so the two
+    renderers converge at larger sizes; below it the floor keeps the sprite
+    readable rather than letting it devolve into a blob.
+    """
+    return max(UNICODE_MIN_COLS, round(BASE_UNICODE_COLS * (scale or DEFAULT_SCALE)))
+
+
+def resolve_cols(scale: float, unicode_cols: int = 0) -> int:
+    """Resolve terminal width: explicit *unicode_cols* override, else from *scale*."""
+    return int(unicode_cols) if unicode_cols and int(unicode_cols) > 0 else cols_for_scale(scale)
+
+
+class PetState(str, Enum):
+    """Animation state a pet can be shown in.
+
+    These are Hermes' activity state names. They are not always identical to the
+    source atlas row names: Codex-format pets use rows like ``jumping`` /
+    ``running`` while the UI keeps the shorter ``jump`` / ``run`` names.
+    """
+
+    IDLE = "idle"
+    WAVE = "wave"
+    RUN = "run"
+    FAILED = "failed"
+    REVIEW = "review"
+    JUMP = "jump"
+    WAITING = "waiting"
+
+
+# Legacy Hermes/petdex row order (top -> bottom) used by the older 8-row,
+# 9-column atlas shape.
+LEGACY_STATE_ROWS: list[str] = [
+    PetState.IDLE.value,
+    PetState.WAVE.value,
+    PetState.RUN.value,
+    PetState.FAILED.value,
+    PetState.REVIEW.value,
+    PetState.JUMP.value,
+    "extra1",
+    "extra2",
+]
+
+# Current Petdex row order (top -> bottom) used by 1536x1872 atlases:
+# 8 columns x 9 rows of 192x208 cells.
+CODEX_STATE_ROWS: list[str] = [
+    PetState.IDLE.value,
+    "running-right",
+    "running-left",
+    "waving",
+    "jumping",
+    PetState.FAILED.value,
+    PetState.WAITING.value,
+    "running",
+    PetState.REVIEW.value,
+]
+
+# Default/fallback for callers without a sheet. Prefer the current 9-row Codex
+# format because generated pets and the public Codex pet contract use it.
+STATE_ROWS: list[str] = CODEX_STATE_ROWS
+
+# Canonical Hermes activity names -> accepted row-name aliases in descending
+# preference. This keeps our internal state names stable (`wave`/`jump`/`run`)
+# while matching Petdex's current `waving`/`jumping`/`running` taxonomy.
+STATE_ALIASES: dict[str, tuple[str, ...]] = {
+    PetState.IDLE.value: (PetState.IDLE.value,),
+    PetState.WAVE.value: (PetState.WAVE.value, "waving"),
+    PetState.JUMP.value: (PetState.JUMP.value, "jumping"),
+    PetState.RUN.value: (PetState.RUN.value, "running"),
+    PetState.FAILED.value: (PetState.FAILED.value,),
+    PetState.REVIEW.value: (PetState.REVIEW.value,),
+    PetState.WAITING.value: (PetState.WAITING.value,),
+}
+
+
+def state_aliases_for(state: "PetState | str") -> tuple[str, ...]:
+    """Return accepted row-name aliases for *state* (always non-empty)."""
+    value = state.value if isinstance(state, PetState) else str(state)
+    aliases = STATE_ALIASES.get(value)
+    return aliases if aliases else (value,)
+
+
+def state_rows_for_grid(row_count: int | None) -> list[str]:
+    """Return the row taxonomy for a spritesheet with *row_count* rows."""
+    try:
+        rows = int(row_count or 0)
+    except (TypeError, ValueError):
+        rows = 0
+
+    if rows >= len(CODEX_STATE_ROWS):
+        return CODEX_STATE_ROWS
+    return LEGACY_STATE_ROWS
+
+
+def state_row_index(state: "PetState | str", row_count: int | None = None) -> int:
+    """Return the spritesheet row index for *state* (clamped, never raises)."""
+    rows = state_rows_for_grid(row_count)
+    for name in state_aliases_for(state):
+        try:
+            return rows.index(name)
+        except ValueError:
+            continue
+    return 0  # fall back to the idle row
--- a/agent/pet/generate/init.py
+++ b/agent/pet/generate/init.py
@@ -0,0 +1,29 @@
+"""Pet generation — base-draft → hatch pipeline.
+
+Public surface used by the gateway RPCs, the CLI ``hermes pets generate``
+command, and tests:
+
+- :func:`generate_base_drafts` / :func:`hatch_pet` — the two-step flow.
+- :class:`HatchResult`, :class:`GenerationError`.
+- :mod:`atlas` — deterministic frame extraction + atlas composition/validation.
+
+Image generation is delegated to the active reference-capable
+:class:`~agent.image_gen_provider.ImageGenProvider` (OpenAI gpt-image-2 or Krea);
+atlas assembly is fully deterministic so it's testable without any API calls.
+"""
+
+from __future__ import annotations
+
+from agent.pet.generate.imagegen import GenerationError
+from agent.pet.generate.orchestrate import (
+    HatchResult,
+    generate_base_drafts,
+    hatch_pet,
+)
+
+__all__ = [
+    "GenerationError",
+    "HatchResult",
+    "generate_base_drafts",
+    "hatch_pet",
+]
--- a/agent/pet/generate/atlas.py
+++ b/agent/pet/generate/atlas.py
@@ -0,0 +1,400 @@
+"""Deterministic spritesheet assembly — generated row strips → Hermes atlas.
+
+Image-generation models are good at *drawing* a row of poses but bad at exact
+grid geometry, so the model never owns the atlas layout: it produces one loose
+horizontal strip per state, and these deterministic ops slice that strip into
+clean, centered, transparent ``192x208`` cells and pack them into the sheet our
+renderer reads.
+
+The atlas is **Hermes-native**, not the petdex/Codex format. Our renderer
+(:mod:`agent.pet.render`) keys frames as ``rows = states, cols = frames`` using
+:data:`agent.pet.constants.STATE_ROWS`, so we emit exactly the six states the
+engine drives — idle, wave, run, failed, review, jump — left-packed with
+trailing transparent cells (which the renderer trims). Sheet is
+``COLUMNS*192 x ROWS*208`` (1152x1248).
+
+The frame-segmentation, fit-to-cell, and transparency-residue logic is adapted
+from OpenAI's ``hatch-pet`` skill (openai/skills, Apache-2.0).
+"""
+
+from __future__ import annotations
+
+import io
+import logging
+import math
+from pathlib import Path
+
+from agent.pet.constants import FRAME_H, FRAME_W
+
+logger = logging.getLogger(__name__)
+
+CELL_WIDTH = FRAME_W
+CELL_HEIGHT = FRAME_H
+
+# (state, row index, frame count). Order/row indices MUST match
+# ``STATE_ROWS`` so the renderer crops the right row for each driven state.
+# Frame counts are the petdex-ish per-state lengths; the renderer trims any
+# trailing blank columns, so rows shorter than ``COLUMNS`` just leave the tail
+# transparent.
+ROW_SPECS: list[tuple[str, int, int]] = [
+    ("idle", 0, 6),
+    ("wave", 1, 4),
+    ("run", 2, 6),
+    ("failed", 3, 6),
+    ("review", 4, 6),
+    ("jump", 5, 5),
+]
+
+ROWS = len(ROW_SPECS)
+COLUMNS = max(count for _, _, count in ROW_SPECS)
+ATLAS_WIDTH = COLUMNS * CELL_WIDTH
+ATLAS_HEIGHT = ROWS * CELL_HEIGHT
+
+FRAME_COUNTS: dict[str, int] = {state: count for state, _, count in ROW_SPECS}
+
+# Alpha at/below which a pixel is "background" for component detection.
+_ALPHA_FLOOR = 16
+# Cell padding kept around a fitted sprite so poses never touch the edge.
+_CELL_PAD = 10
+
+
+# ───────────────────────── background removal ─────────────────────────
+
+
+def _color_distance(r: int, g: int, b: int, key: tuple[int, int, int]) -> float:
+    return math.sqrt((r - key[0]) ** 2 + (g - key[1]) ** 2 + (b - key[2]) ** 2)
+
+
+def _has_transparency(image) -> bool:
+    """True if the strip already carries a real alpha background."""
+    extrema = image.getchannel("A").getextrema()
+    # Min alpha 0 somewhere and a meaningful share of fully-transparent pixels.
+    if extrema[0] > _ALPHA_FLOOR:
+        return False
+    hist = image.getchannel("A").histogram()
+    transparent = sum(hist[: _ALPHA_FLOOR + 1])
+    total = image.width * image.height
+    return transparent > total * 0.05
+
+
+def _dominant_corner_color(image) -> tuple[int, int, int]:
+    """Sample the four corners and return the most common opaque color."""
+    from collections import Counter
+
+    w, h = image.width, image.height
+    px = image.load()
+    counter: Counter = Counter()
+    for x, y in ((0, 0), (w - 1, 0), (0, h - 1), (w - 1, h - 1)):
+        r, g, b, a = px[x, y]
+        if a > _ALPHA_FLOOR:
+            counter[(r, g, b)] += 1
+    if not counter:
+        return (0, 255, 0)
+    return counter.most_common(1)[0][0]
+
+
+def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, threshold: float = 110.0):
+    """Return *image* (RGBA) with its flat background keyed out to transparent.
+
+    If the strip already has a transparent background we leave it alone; else we
+    key out *chroma_key* (or the dominant corner color when not given). This
+    handles both providers that emit transparency natively and those that paint
+    a solid backdrop.
+    """
+    rgba = image.convert("RGBA")
+    if _has_transparency(rgba):
+        return rgba
+
+    key = chroma_key or _dominant_corner_color(rgba)
+    px = rgba.load()
+    for y in range(rgba.height):
+        for x in range(rgba.width):
+            r, g, b, a = px[x, y]
+            if a > _ALPHA_FLOOR and _color_distance(r, g, b, key) <= threshold:
+                px[x, y] = (0, 0, 0, 0)
+    return rgba
+
+
+# ───────────────────────── frame extraction ─────────────────────────
+
+
+def _fit_to_cell(image):
+    """Crop to content, scale to fit a padded cell, and center on transparent."""
+    from PIL import Image
+
+    target = Image.new("RGBA", (CELL_WIDTH, CELL_HEIGHT), (0, 0, 0, 0))
+    bbox = image.getbbox()
+    if bbox is None:
+        return target
+
+    sprite = image.crop(bbox)
+    max_w = CELL_WIDTH - _CELL_PAD
+    max_h = CELL_HEIGHT - _CELL_PAD
+    scale = min(max_w / sprite.width, max_h / sprite.height, 1.0)
+    if scale != 1.0:
+        sprite = sprite.resize(
+            (max(1, round(sprite.width * scale)), max(1, round(sprite.height * scale))),
+            Image.Resampling.LANCZOS,
+        )
+    left = (CELL_WIDTH - sprite.width) // 2
+    top = (CELL_HEIGHT - sprite.height) // 2
+    target.alpha_composite(sprite, (left, top))
+    return target
+
+
+def _connected_components(image) -> list[dict]:
+    """Flood-fill the alpha mask into connected blobs (4-connectivity)."""
+    alpha = image.getchannel("A")
+    w, h = image.size
+    data = alpha.tobytes()
+    visited = bytearray(w * h)
+    out: list[dict] = []
+
+    for start, a in enumerate(data):
+        if a <= _ALPHA_FLOOR or visited[start]:
+            continue
+        stack = [start]
+        visited[start] = 1
+        pixels: list[int] = []
+        min_x = w
+        min_y = h
+        max_x = 0
+        max_y = 0
+        while stack:
+            cur = stack.pop()
+            pixels.append(cur)
+            x = cur % w
+            y = cur // w
+            min_x = min(min_x, x)
+            min_y = min(min_y, y)
+            max_x = max(max_x, x)
+            max_y = max(max_y, y)
+            for nb, ok in (
+                (cur - 1, x > 0),
+                (cur + 1, x + 1 < w),
+                (cur - w, y > 0),
+                (cur + w, y + 1 < h),
+            ):
+                if ok and not visited[nb] and data[nb] > _ALPHA_FLOOR:
+                    visited[nb] = 1
+                    stack.append(nb)
+        out.append(
+            {
+                "pixels": pixels,
+                "area": len(pixels),
+                "bbox": (min_x, min_y, max_x + 1, max_y + 1),
+                "center_x": (min_x + max_x + 1) / 2,
+            }
+        )
+    return out
+
+
+def _group_image(source, components: list[dict], padding: int = 4):
+    from PIL import Image
+
+    w, h = source.size
+    min_x = max(0, min(c["bbox"][0] for c in components) - padding)
+    min_y = max(0, min(c["bbox"][1] for c in components) - padding)
+    max_x = min(w, max(c["bbox"][2] for c in components) + padding)
+    max_y = min(h, max(c["bbox"][3] for c in components) + padding)
+
+    out = Image.new("RGBA", (max_x - min_x, max_y - min_y), (0, 0, 0, 0))
+    src_px = source.load()
+    out_px = out.load()
+    for c in components:
+        for idx in c["pixels"]:
+            x = idx % w
+            y = idx // w
+            out_px[x - min_x, y - min_y] = src_px[x, y]
+    return out
+
+
+def _component_frames(strip, frame_count: int) -> list | None:
+    """Segment a strip into *frame_count* sprites by connected components.
+
+    Picks the ``frame_count`` largest blobs as seeds (left→right), attaches
+    smaller blobs to the nearest seed, and returns one fitted cell per group.
+    Returns ``None`` when it can't find enough distinct sprites (caller falls
+    back to equal slicing).
+    """
+    components = _connected_components(strip)
+    if not components:
+        return None
+
+    largest = max(c["area"] for c in components)
+    seed_threshold = max(120, largest * 0.20)
+    seeds = [c for c in components if c["area"] >= seed_threshold]
+    if len(seeds) < frame_count:
+        seeds = sorted(components, key=lambda c: c["area"], reverse=True)[:frame_count]
+    if len(seeds) < frame_count:
+        return None
+
+    seeds = sorted(
+        sorted(seeds, key=lambda c: c["area"], reverse=True)[:frame_count],
+        key=lambda c: c["center_x"],
+    )
+    seed_ids = {id(s) for s in seeds}
+    groups: list[list[dict]] = [[s] for s in seeds]
+    noise_threshold = max(12, largest * 0.002)
+    for c in components:
+        if id(c) in seed_ids or c["area"] < noise_threshold:
+            continue
+        nearest = min(range(len(seeds)), key=lambda i: abs(seeds[i]["center_x"] - c["center_x"]))
+        groups[nearest].append(c)
+
+    return [_fit_to_cell(_group_image(strip, g)) for g in groups]
+
+
+def _slot_frames(strip, frame_count: int) -> list:
+    """Fallback: slice the strip into *frame_count* equal columns."""
+    slot = strip.width / frame_count
+    frames = []
+    for i in range(frame_count):
+        left = round(i * slot)
+        right = round((i + 1) * slot)
+        frames.append(_fit_to_cell(strip.crop((left, 0, right, strip.height))))
+    return frames
+
+
+def extract_strip_frames(
+    strip,
+    frame_count: int,
+    *,
+    chroma_key: tuple[int, int, int] | None = None,
+    method: str = "auto",
+) -> list:
+    """Turn one generated row strip into *frame_count* clean 192x208 cells.
+
+    *strip* is a PIL image (or path). Background is keyed out, then frames are
+    found by connected components (``auto``) with an equal-slot fallback.
+    """
+    from PIL import Image
+
+    if isinstance(strip, (str, Path)):
+        with Image.open(strip) as opened:
+            strip = opened.convert("RGBA")
+    else:
+        strip = strip.convert("RGBA")
+
+    strip = remove_background(strip, chroma_key=chroma_key)
+
+    if method in ("auto", "components"):
+        frames = _component_frames(strip, frame_count)
+        if frames is not None:
+            return frames
+        if method == "components":
+            raise ValueError(f"could not segment {frame_count} sprites from strip")
+    return _slot_frames(strip, frame_count)
+
+
+# ───────────────────────── atlas composition ─────────────────────────
+
+
+def single_frame(image):
+    """One fitted 192x208 cell from a standalone image (e.g. the base look).
+
+    Used as an idle fallback so a pet always renders even if the idle row
+    generation failed.
+    """
+    from PIL import Image
+
+    if isinstance(image, (str, Path)):
+        with Image.open(image) as opened:
+            image = opened.convert("RGBA")
+    return _fit_to_cell(remove_background(image))
+
+
+def _clear_transparent_rgb(image):
+    """Zero the RGB of fully-transparent pixels (no colored-halo residue)."""
+    from PIL import Image
+
+    rgba = image.convert("RGBA")
+    data = bytearray(rgba.tobytes())
+    for i in range(0, len(data), 4):
+        if data[i + 3] == 0:
+            data[i] = data[i + 1] = data[i + 2] = 0
+    return Image.frombytes("RGBA", rgba.size, bytes(data))
+
+
+def compose_atlas(frames_by_state: dict[str, list]):
+    """Pack per-state frame lists into the Hermes atlas (RGBA, residue-cleared).
+
+    Missing/short states leave their trailing cells transparent; extra frames
+    beyond a state's spec are dropped.
+    """
+    from PIL import Image
+
+    atlas = Image.new("RGBA", (ATLAS_WIDTH, ATLAS_HEIGHT), (0, 0, 0, 0))
+    for state, row, count in ROW_SPECS:
+        frames = frames_by_state.get(state) or []
+        for col, frame in enumerate(frames[:count]):
+            cell = frame.convert("RGBA")
+            if cell.size != (CELL_WIDTH, CELL_HEIGHT):
+                cell = _fit_to_cell(cell)
+            atlas.alpha_composite(cell, (col * CELL_WIDTH, row * CELL_HEIGHT))
+    return _clear_transparent_rgb(atlas)
+
+
+def atlas_to_webp_bytes(atlas) -> bytes:
+    """Encode an atlas image to lossless WebP bytes (the on-disk pet format)."""
+    buf = io.BytesIO()
+    atlas.save(buf, format="WEBP", lossless=True, quality=100, method=6, exact=True)
+    return buf.getvalue()
+
+
+def validate_atlas(atlas) -> dict:
+    """Check geometry, per-cell occupancy, and transparency invariants.
+
+    Returns ``{ok, width, height, errors, warnings, filled_states}``. Errors are
+    blockers (wrong size, empty used cell, opaque/dirty transparency); warnings
+    are soft (a whole state row blank — generation likely dropped a row).
+    """
+    from PIL import Image
+
+    if isinstance(atlas, (str, Path)):
+        with Image.open(atlas) as opened:
+            atlas = opened.convert("RGBA")
+    else:
+        atlas = atlas.convert("RGBA")
+
+    errors: list[str] = []
+    warnings: list[str] = []
+
+    if atlas.size != (ATLAS_WIDTH, ATLAS_HEIGHT):
+        errors.append(f"expected {ATLAS_WIDTH}x{ATLAS_HEIGHT}, got {atlas.width}x{atlas.height}")
+        return {"ok": False, "width": atlas.width, "height": atlas.height, "errors": errors, "warnings": warnings, "filled_states": []}
+
+    filled_states: list[str] = []
+    for state, row, count in ROW_SPECS:
+        row_pixels = 0
+        for col in range(count):
+            left = col * CELL_WIDTH
+            top = row * CELL_HEIGHT
+            cell = atlas.crop((left, top, left + CELL_WIDTH, top + CELL_HEIGHT))
+            nonblank = sum(cell.getchannel("A").histogram()[1:])
+            row_pixels += nonblank
+        if row_pixels > 0:
+            filled_states.append(state)
+        else:
+            warnings.append(f"state '{state}' has no frames")
+
+    if not filled_states:
+        errors.append("atlas is empty — no state produced any frames")
+
+    # Transparent pixels must carry zero RGB (no halo residue).
+    data = atlas.tobytes()
+    residue = 0
+    for i in range(0, len(data), 4):
+        if data[i + 3] == 0 and (data[i] or data[i + 1] or data[i + 2]):
+            residue += 1
+    if residue:
+        errors.append(f"{residue} transparent pixels retain RGB residue")
+
+    return {
+        "ok": not errors,
+        "width": atlas.width,
+        "height": atlas.height,
+        "errors": errors,
+        "warnings": warnings,
+        "filled_states": filled_states,
+    }
--- a/agent/pet/generate/imagegen.py
+++ b/agent/pet/generate/imagegen.py
@@ -0,0 +1,168 @@
+"""Thin image-generation layer for pet sprites.
+
+Wraps the active :class:`~agent.image_gen_provider.ImageGenProvider` with the
+two things sprite generation needs that the agent-facing ``image_generate`` tool
+doesn't expose: **N variants** (loop) and **reference-image grounding** (so each
+animation row stays the same character as the chosen base).
+
+Reference grounding only works on providers that support it — currently OpenAI
+``gpt-image-2`` (image edits) and Krea (style references). We resolve to one of
+those and surface a clear, actionable error otherwise rather than silently
+producing an ungrounded, drifting pet.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Providers that can ground generation on a reference image.
+_REF_CAPABLE = ("openai", "openai-codex", "krea")
+
+
+class GenerationError(RuntimeError):
+    """Raised on any image-generation failure (no provider, API error, IO)."""
+
+
+@dataclass(frozen=True)
+class SpriteProvider:
+    """Resolved provider plus whether it can take reference images."""
+
+    name: str
+    provider: object
+    supports_references: bool
+
+
+def _discover() -> None:
+    try:
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+    except Exception as exc:  # noqa: BLE001 - discovery is best-effort
+        logger.debug("image-gen plugin discovery failed: %s", exc)
+
+
+def resolve_provider(*, require_references: bool = True) -> SpriteProvider:
+    """Pick the image provider to use for sprite work.
+
+    Preference: the configured provider when it's reference-capable, else the
+    first available reference-capable provider. With *require_references* off we
+    fall back to any available provider (used for prompt-only base drafts).
+    """
+    _discover()
+    from agent.image_gen_registry import get_active_provider, get_provider
+
+    # Configured / active provider first.
+    active = None
+    try:
+        active = get_active_provider()
+    except Exception:  # noqa: BLE001
+        active = None
+    if active is not None:
+        name = getattr(active, "name", "")
+        if name in _REF_CAPABLE and active.is_available():
+            return SpriteProvider(name=name, provider=active, supports_references=True)
+
+    # Any available reference-capable provider.
+    for name in _REF_CAPABLE:
+        provider = get_provider(name)
+        if provider is not None and provider.is_available():
+            return SpriteProvider(name=name, provider=provider, supports_references=True)
+
+    if not require_references and active is not None and active.is_available():
+        return SpriteProvider(
+            name=getattr(active, "name", "unknown"), provider=active, supports_references=False
+        )
+
+    raise GenerationError(
+        "Pet generation needs a reference-capable image backend. "
+        "Run `hermes tools` → Image Generation → OpenAI (gpt-image-2) and add an "
+        "OpenAI API key (or configure Krea)."
+    )
+
+
+def _save_local(image_ref: str, *, prefix: str) -> Path:
+    """Return a local path for *image_ref*, downloading it if it's a URL."""
+    if image_ref.startswith(("http://", "https://")):
+        from agent.image_gen_provider import save_url_image
+
+        return Path(save_url_image(image_ref, prefix=prefix))
+    return Path(image_ref)
+
+
+def _rejected_background(error: str) -> bool:
+    """True when a provider error is specifically about the ``background`` param.
+
+    Transparent backgrounds are a per-model capability (e.g. some gpt-image tiers
+    reject ``background=transparent`` outright). We detect that one rejection so
+    we can retry without the flag rather than failing the whole pet — our chroma
+    key pass makes the result transparent regardless.
+    """
+    lowered = (error or "").lower()
+    return "background" in lowered and ("not supported" in lowered or "transparent" in lowered)
+
+
+def generate(
+    prompt: str,
+    *,
+    n: int = 1,
+    reference_images: list[Path] | None = None,
+    provider: SpriteProvider | None = None,
+    prefix: str = "pet_gen",
+) -> list[Path]:
+    """Generate *n* square sprite images and return their local paths.
+
+    *reference_images* grounds the output on a base image (required for rows).
+    We *ask* for a transparent background, but fall back to an opaque generation
+    (cleaned up downstream by the chroma-key pass) on models that reject the
+    flag. Raises :class:`GenerationError` if nothing usable comes back.
+    """
+    sprite = provider or resolve_provider(require_references=bool(reference_images))
+    if reference_images and not sprite.supports_references:
+        raise GenerationError(
+            f"image backend '{sprite.name}' cannot use reference images; "
+            "configure OpenAI gpt-image-2 or Krea for pet generation"
+        )
+
+    refs = [str(p) for p in (reference_images or [])]
+
+    def _run(extra: dict) -> tuple[Path | None, str]:
+        kwargs: dict = {"aspect_ratio": "square", **extra}
+        if refs:
+            kwargs["reference_images"] = refs
+        try:
+            result = sprite.provider.generate(prompt, **kwargs)
+        except Exception as exc:  # noqa: BLE001 - normalize provider crashes
+            logger.debug("provider.generate crashed: %s", exc)
+            return None, str(exc)
+        if not isinstance(result, dict) or not result.get("success"):
+            return None, (result or {}).get("error", "unknown error") if isinstance(result, dict) else "no result"
+        image_ref = result.get("image")
+        if not image_ref:
+            return None, "provider returned no image"
+        try:
+            return _save_local(str(image_ref), prefix=prefix), ""
+        except Exception as exc:  # noqa: BLE001
+            return None, f"could not save generated image: {exc}"
+
+    out: list[Path] = []
+    last_error = ""
+    allow_transparent = True
+    for _ in range(max(1, n)):
+        path, err = _run({"background": "transparent"} if allow_transparent else {})
+        # Model doesn't support the transparent flag → drop it for this and every
+        # remaining variant (no point re-probing a capability we just disproved).
+        if path is None and allow_transparent and _rejected_background(err):
+            allow_transparent = False
+            path, err = _run({})
+        if path is not None:
+            out.append(path)
+        else:
+            last_error = err
+
+    if not out:
+        raise GenerationError(last_error or "image generation produced no output")
+    return out
--- a/agent/pet/generate/orchestrate.py
+++ b/agent/pet/generate/orchestrate.py
@@ -0,0 +1,149 @@
+"""Pet generation orchestration — the base-draft → hatch flow.
+
+Two steps, mirroring the UX across every surface:
+
+1. :func:`generate_base_drafts` — a handful of prompt-only "what should this pet
+   look like" variants. Cheap; the user picks one (or retries for a fresh set).
+2. :func:`hatch_pet` — takes the chosen base and generates one grounded row
+   strip per Hermes state, slices each into frames, composes the atlas, validates
+   it, and writes the pet into the store.
+
+Splitting it this way bounds cost (4 cheap base calls per round; the ~6 row
+calls happen once, on the pet you actually keep) and gives each UI a natural
+preview/loading point.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable
+
+from agent.pet.generate import atlas, imagegen, prompts
+from agent.pet.generate.imagegen import GenerationError, SpriteProvider
+
+logger = logging.getLogger(__name__)
+
+# (event, detail) — e.g. ("row", "idle"), ("compose", ""), ("save", "<slug>").
+ProgressFn = Callable[[str, str], None]
+
+
+@dataclass(frozen=True)
+class HatchResult:
+    """Outcome of a successful :func:`hatch_pet`."""
+
+    slug: str
+    display_name: str
+    spritesheet: Path
+    states: list[str]
+    validation: dict
+
+
+def _harden_transparency(path: Path) -> Path:
+    """Key out any solid backdrop the provider painted; save as an RGBA PNG.
+
+    ``background=transparent`` is requested on every call, but image models honor
+    it inconsistently — some still paint a flat (often near-white) backdrop. We
+    run the same chroma-key pass the row extractor uses so every base draft the
+    user picks between (and the reference the rows are grounded on) is a clean
+    cutout. Best-effort: a decode failure leaves the original untouched.
+    """
+    from PIL import Image
+
+    try:
+        with Image.open(path) as opened:
+            keyed = atlas.remove_background(opened.convert("RGBA"))
+        out = path.with_suffix(".png")
+        keyed.save(out, format="PNG")
+        return out
+    except Exception as exc:  # noqa: BLE001 - cosmetic; fall back to the raw image
+        logger.debug("base draft transparency hardening failed for %s: %s", path, exc)
+        return path
+
+
+def generate_base_drafts(
+    concept: str,
+    *,
+    n: int = 4,
+    style: str = "auto",
+    provider: SpriteProvider | None = None,
+) -> list[Path]:
+    """Generate *n* candidate base looks for *concept*; returns image paths.
+
+    Each draft is hardened to a transparent cutout (see :func:`_harden_transparency`).
+    """
+    prompt = prompts.build_base_prompt(concept, style=style)
+    sprite = provider or imagegen.resolve_provider(require_references=False)
+    raw = imagegen.generate(prompt, n=n, provider=sprite, prefix="pet_base")
+    return [_harden_transparency(p) for p in raw]
+
+
+def hatch_pet(
+    *,
+    base_image: str | Path,
+    slug: str,
+    display_name: str = "",
+    description: str = "",
+    concept: str = "",
+    style: str = "auto",
+    on_progress: ProgressFn | None = None,
+    provider: SpriteProvider | None = None,
+) -> HatchResult:
+    """Turn an approved base image into a full, installed Hermes pet.
+
+    Generates a grounded row strip per state, extracts frames, composes +
+    validates the atlas, and registers it. The idle row falls back to the base
+    look so the pet always renders. Raises :class:`GenerationError` on failure.
+    """
+    base = Path(base_image)
+    if not base.is_file():
+        raise GenerationError(f"base image not found: {base}")
+
+    sprite = provider or imagegen.resolve_provider(require_references=True)
+    progress = on_progress or (lambda *_: None)
+    label = concept or display_name or slug
+
+    frames_by_state: dict[str, list] = {}
+    for state, _row, count in atlas.ROW_SPECS:
+        progress("row", state)
+        row_prompt = prompts.build_row_prompt(state, count, label, style=style)
+        try:
+            strips = imagegen.generate(
+                row_prompt,
+                n=1,
+                reference_images=[base],
+                provider=sprite,
+                prefix=f"pet_row_{state}",
+            )
+            frames_by_state[state] = atlas.extract_strip_frames(strips[0], count, method="auto")
+        except Exception as exc:  # noqa: BLE001 - a single row may fail; keep going
+            logger.warning("pet row '%s' failed: %s", state, exc)
+
+    # Idle is the resting state the renderer falls back to — guarantee it.
+    if not frames_by_state.get("idle"):
+        progress("row", "idle-fallback")
+        frames_by_state["idle"] = [atlas.single_frame(base)]
+
+    progress("compose", "")
+    sheet = atlas.compose_atlas(frames_by_state)
+    validation = atlas.validate_atlas(sheet)
+    if not validation["ok"]:
+        raise GenerationError("; ".join(validation["errors"]) or "atlas validation failed")
+
+    from agent.pet import store
+
+    progress("save", slug)
+    pet = store.register_local_pet(
+        sheet,
+        slug=slug,
+        display_name=display_name or slug,
+        description=description,
+    )
+    return HatchResult(
+        slug=pet.slug,
+        display_name=pet.display_name,
+        spritesheet=pet.spritesheet,
+        states=validation["filled_states"],
+        validation=validation,
+    )
--- a/agent/pet/generate/prompts.py
+++ b/agent/pet/generate/prompts.py
@@ -0,0 +1,74 @@
+"""Prompt builders for pet generation.
+
+Two prompt shapes: a *base* prompt (prompt-only, produces the canonical look the
+user picks between) and per-*state* *row* prompts (grounded on the chosen base,
+produce one horizontal strip of N poses). Prompts stay concise and
+sprite-production oriented; the identity lock and "one transparent row" framing
+matter more than flowery description.
+
+Hermes drives six states (see :data:`agent.pet.generate.atlas.ROW_SPECS`); these
+mirror that set rather than the petdex/Codex nine.
+"""
+
+from __future__ import annotations
+
+# What each Hermes state should depict (kept short — these go straight into the
+# row prompt). Phrased to avoid the common sprite-gen failure modes (detached
+# effects, motion lines, shadows).
+STATE_ACTIONS: dict[str, str] = {
+    "idle": "a calm idle loop: subtle breathing, a tiny blink or gentle bob, no big gestures",
+    "wave": "a friendly greeting: raising a paw/hand/limb to wave, clear up-and-down gesture",
+    "run": "focused active work: leaning in, concentrating, busy 'thinking/processing' energy (NOT foot-running)",
+    "failed": "a sad or deflated reaction: slumped, dejected, small frown — readable but not noisy",
+    "review": "careful inspection: a focused lean, head tilt, studying something intently",
+    "jump": "a happy celebration jump: anticipation, lift off the ground, peak, and land",
+}
+
+_STYLE_HINTS: dict[str, str] = {
+    "auto": "",
+    "pixel": " Render in clean pixel-art style.",
+    "plush": " Render as a soft plush toy.",
+    "clay": " Render as a claymation / soft 3D clay figure.",
+    "sticker": " Render as a glossy die-cut sticker.",
+    "flat-vector": " Render in flat vector mascot style.",
+    "3d-toy": " Render as a glossy 3D toy.",
+    "painterly": " Render in a soft painterly style.",
+}
+
+_BACKGROUND = (
+    "Center one full-body character on a fully transparent background. "
+    "No text, no labels, no shadow, no ground line, no scenery, no frame, no border."
+)
+
+
+def style_hint(style: str | None) -> str:
+    return _STYLE_HINTS.get((style or "auto").strip().lower(), "")
+
+
+def build_base_prompt(concept: str, *, style: str | None = "auto") -> str:
+    """The base look: a single, clean, centered full-body mascot."""
+    concept = (concept or "a cute friendly mascot creature").strip()
+    return (
+        f"A cute, characterful mascot pet: {concept}. "
+        "Compact, whole-body silhouette that reads clearly at small size, "
+        "appealing face, simple consistent palette. "
+        f"{_BACKGROUND}{style_hint(style)}"
+    )
+
+
+def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str | None = "auto") -> str:
+    """A row strip: *frame_count* poses of the SAME character, left→right.
+
+    The attached base image is the identity source of truth; the prompt locks
+    species, palette, face, and props to it.
+    """
+    action = STATE_ACTIONS.get(state, "a simple idle pose")
+    concept = (concept or "the mascot").strip()
+    return (
+        f"Using the attached reference image as the exact same character "
+        f"(same species, face, colors, markings, proportions, and props), "
+        f"draw a single horizontal strip of {frame_count} animation frames showing {action}. "
+        f"The {frame_count} poses must be evenly spaced left to right, each fully separated "
+        "(not overlapping), same size and baseline, forming a smooth loop. "
+        f"Keep the character identical across all frames. {_BACKGROUND}{style_hint(style)}"
+    )
--- a/agent/pet/manifest.py
+++ b/agent/pet/manifest.py
@@ -0,0 +1,128 @@
+"""Fetch the public petdex manifest.
+
+``https://petdex.dev/api/manifest`` 307-redirects to a JSON document on R2:
+
+    {
+      "generatedAt": "...",
+      "total": 2926,
+      "pets": [
+        {"slug": "boba", "displayName": "Boba", "kind": "creature",
+         "submittedBy": "railly",
+         "spritesheetUrl": "https://assets.petdex.dev/.../spritesheet.webp",
+         "petJsonUrl": "https://assets.petdex.dev/.../pet.json",
+         "zipUrl": "https://assets.petdex.dev/.../boba.zip"},
+        ...
+      ]
+    }
+
+Read-only and unauthenticated; no credentials involved.
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+MANIFEST_URL = "https://petdex.dev/api/manifest"
+
+_DEFAULT_TIMEOUT = 20.0
+
+# In-process cache for the (large, slow, identical-per-call) manifest. The list
+# is a static CDN object that barely changes, yet a single session can ask for
+# it many times — every gallery open, plus a full re-fetch per install/select
+# (``find_entry``). A short TTL collapses those into one network hit without
+# going stale for long. Cleared by :func:`clear_cache` (tests).
+_MANIFEST_TTL = 300.0
+_cache: tuple[float, list[ManifestEntry]] | None = None
+
+
+def clear_cache() -> None:
+    """Drop the cached manifest (forces the next fetch to hit the network)."""
+    global _cache
+    _cache = None
+
+
+@dataclass(frozen=True)
+class ManifestEntry:
+    """A single pet's row in the manifest."""
+
+    slug: str
+    display_name: str
+    kind: str
+    submitted_by: str
+    spritesheet_url: str
+    pet_json_url: str
+    zip_url: str
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "ManifestEntry":
+        return cls(
+            slug=str(data.get("slug", "")).strip(),
+            display_name=str(data.get("displayName", "") or data.get("slug", "")),
+            kind=str(data.get("kind", "") or "pet"),
+            submitted_by=str(data.get("submittedBy", "") or ""),
+            spritesheet_url=str(data.get("spritesheetUrl", "") or ""),
+            pet_json_url=str(data.get("petJsonUrl", "") or ""),
+            zip_url=str(data.get("zipUrl", "") or ""),
+        )
+
+
+class ManifestError(RuntimeError):
+    """Raised when the manifest can't be fetched or parsed."""
+
+
+def fetch_manifest(*, timeout: float = _DEFAULT_TIMEOUT, force: bool = False) -> list[ManifestEntry]:
+    """Return every approved pet from the public manifest.
+
+    Cached in-process for ``_MANIFEST_TTL`` seconds (pass ``force=True`` to
+    bypass). Follows the 307 redirect to R2.  Raises :class:`ManifestError` on
+    any network/parse failure so callers can surface a clean message.
+    """
+    global _cache
+
+    if not force and _cache is not None and time.monotonic() - _cache[0] < _MANIFEST_TTL:
+        return _cache[1]
+
+    try:
+        import httpx
+    except ImportError as exc:  # pragma: no cover - httpx is a core dep
+        raise ManifestError("httpx is required to fetch the petdex manifest") from exc
+
+    try:
+        resp = httpx.get(
+            MANIFEST_URL,
+            timeout=timeout,
+            follow_redirects=True,
+            headers={"User-Agent": "hermes-agent-petdex"},
+        )
+        resp.raise_for_status()
+        payload = resp.json()
+    except Exception as exc:  # noqa: BLE001 - normalize to one error type
+        raise ManifestError(f"could not fetch petdex manifest: {exc}") from exc
+
+    pets = payload.get("pets") if isinstance(payload, dict) else None
+    if not isinstance(pets, list):
+        raise ManifestError("petdex manifest had no 'pets' array")
+
+    entries: list[ManifestEntry] = []
+    for raw in pets:
+        if not isinstance(raw, dict):
+            continue
+        entry = ManifestEntry.from_dict(raw)
+        if entry.slug and entry.spritesheet_url:
+            entries.append(entry)
+
+    _cache = (time.monotonic(), entries)
+    return entries
+
+
+def find_entry(slug: str, *, timeout: float = _DEFAULT_TIMEOUT) -> ManifestEntry | None:
+    """Return the manifest entry for *slug*, or ``None`` if not listed."""
+    slug = slug.strip().lower()
+    for entry in fetch_manifest(timeout=timeout):
+        if entry.slug.lower() == slug:
+            return entry
+    return None
--- a/agent/pet/render.py
+++ b/agent/pet/render.py
@@ -0,0 +1,618 @@
+"""Decode a pet spritesheet and encode frames for a terminal.
+
+Shared by the base CLI (writes the escape bytes to its own stdout) and the
+TUI (``tui_gateway`` ships the encoded bytes to Ink, which writes them) so the
+decode + capability-detection + protocol-encoding logic exists exactly once.
+
+Supported output modes, in fidelity order:
+
+- ``kitty``   — the kitty graphics protocol (kitty, Ghostty, WezTerm).
+- ``iterm``   — iTerm2 inline images (iTerm2, WezTerm).
+- ``sixel``   — DEC sixel (xterm -ti vt340, foot, mlterm, WezTerm, …).
+- ``unicode`` — 24-bit half-block downscale; works in any truecolor terminal.
+
+Frame decoding requires Pillow (a core Hermes dependency).  If Pillow or the
+spritesheet is unavailable the renderer degrades to ``unicode`` text or an
+empty string rather than raising.
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import logging
+import os
+import sys
+from functools import lru_cache
+from pathlib import Path
+
+from agent.pet.constants import (
+    DEFAULT_SCALE,
+    FRAME_H,
+    FRAME_W,
+    FRAMES_PER_STATE,
+    PetState,
+    state_row_index,
+)
+
+logger = logging.getLogger(__name__)
+
+# Public render-mode names accepted by ``display.pet.render_mode``.
+RENDER_MODES = ("auto", "kitty", "iterm", "sixel", "unicode", "off")
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Terminal capability detection
+# ─────────────────────────────────────────────────────────────────────────
+
+def detect_terminal_graphics() -> str:
+    """Best-effort detection of the richest graphics protocol available.
+
+    Env-based (non-blocking — we never issue a DA1/terminal query that could
+    hang a pipe).  Returns one of ``kitty`` / ``iterm`` / ``sixel`` /
+    ``unicode``.  Conservative: unknown terminals get ``unicode``, which works
+    anywhere with truecolor.
+    """
+    term = os.environ.get("TERM", "").lower()
+    term_program = os.environ.get("TERM_PROGRAM", "").lower()
+
+    # The VS Code / Cursor integrated terminal sets TERM_PROGRAM=vscode
+    # authoritatively but does NOT scrub the terminal env vars it inherits when
+    # launched from another emulator (ITERM_SESSION_ID, KITTY_WINDOW_ID, …).
+    # Trusting those leaks emits an image protocol the embedded xterm.js can't
+    # display — you get a blank frame. Inline images there are opt-in
+    # (terminal.integrated.enableImages), so default to half-blocks, which
+    # always render in its truecolor grid. Users who enabled images can pin
+    # display.pet.render_mode explicitly.
+    if term_program == "vscode":
+        return "unicode"
+
+    # kitty graphics protocol
+    if os.environ.get("KITTY_WINDOW_ID") or "kitty" in term or "ghostty" in term:
+        return "kitty"
+    if term_program in {"ghostty"}:
+        return "kitty"
+
+    # WezTerm speaks both kitty and iterm; prefer kitty (richer placement).
+    if term_program == "wezterm" or os.environ.get("WEZTERM_PANE"):
+        return "kitty"
+
+    # iTerm2 inline images
+    if term_program == "iterm.app" or os.environ.get("ITERM_SESSION_ID"):
+        return "iterm"
+
+    # sixel-capable terminals (env heuristics only)
+    if term_program in {"mintty"} or "foot" in term or "mlterm" in term:
+        return "sixel"
+    if "sixel" in term:
+        return "sixel"
+
+    return "unicode"
+
+
+def resolve_mode(configured: str | None, *, stream=None) -> str:
+    """Resolve the effective render mode from config + the environment.
+
+    ``configured`` is ``display.pet.render_mode`` (``auto`` → detect).  Returns
+    ``off`` when not attached to a TTY (no point emitting graphics into a pipe
+    or logfile).
+    """
+    mode = (configured or "auto").strip().lower()
+    if mode not in RENDER_MODES:
+        mode = "auto"
+    if mode == "off":
+        return "off"
+
+    stream = stream or sys.stdout
+    try:
+        if not (hasattr(stream, "isatty") and stream.isatty()):
+            return "off"
+    except (ValueError, OSError):
+        return "off"
+
+    if mode == "auto":
+        return detect_terminal_graphics()
+    return mode
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Frame decoding
+# ─────────────────────────────────────────────────────────────────────────
+
+def _open_sheet(path: Path):
+    from PIL import Image
+
+    img = Image.open(path)
+    return img.convert("RGBA")
+
+
+# Max alpha at/below which a frame counts as blank padding.  petdex sheets are
+# left-packed: a state with fewer real frames than ``FRAMES_PER_STATE`` fills
+# the trailing columns with fully transparent cells.  Animating into one flashes
+# the pet blank, so we stop the row at the first such gap.
+_BLANK_ALPHA = 8
+
+
+def _frame_is_blank(frame) -> bool:
+    """True if *frame* has no meaningfully opaque pixel (transparent padding)."""
+    return frame.getchannel("A").getextrema()[1] <= _BLANK_ALPHA
+
+
+@lru_cache(maxsize=16)
+def _raw_frames(
+    sheet_path: str,
+    state_value: str,
+    frame_w: int,
+    frame_h: int,
+    frames_per_state: int,
+) -> tuple:
+    """Cropped, padding-trimmed RGBA frames for one state row (unscaled).
+
+    Steps across the row until the first blank column so pets with ragged
+    per-state frame counts never animate into empty padding.  Cached; returns
+    ``()`` on any decode failure.
+    """
+    try:
+        sheet = _open_sheet(Path(sheet_path))
+        cols = max(1, sheet.width // frame_w)
+        rows = max(1, sheet.height // frame_h)
+        row = state_row_index(state_value, rows)
+        top = row * frame_h
+        # Clamp the row to the sheet (some pets ship fewer rows than the 8 the
+        # taxonomy reserves).
+        if top + frame_h > sheet.height:
+            top = max(0, sheet.height - frame_h)
+
+        frames = []
+        for i in range(min(frames_per_state, cols)):
+            left = i * frame_w
+            frame = sheet.crop((left, top, left + frame_w, top + frame_h))
+            if _frame_is_blank(frame):
+                break  # trailing transparent padding — real frames end here
+            frames.append(frame)
+        return tuple(frames)
+    except Exception as exc:  # noqa: BLE001 - cosmetic feature, never fatal
+        logger.debug("pet frame decode failed (%s, %s): %s", sheet_path, state_value, exc)
+        return ()
+
+
+@lru_cache(maxsize=8)
+def _frames_for(
+    sheet_path: str,
+    state_value: str,
+    frame_w: int,
+    frame_h: int,
+    frames_per_state: int,
+    scale_w: int,
+    scale_h: int,
+):
+    """Return padding-trimmed RGBA frames for one state row, scaled.
+
+    Thin scaling layer over :func:`_raw_frames`; both are cached so repeated
+    frame requests during animation are free.
+    """
+    raw = _raw_frames(sheet_path, state_value, frame_w, frame_h, frames_per_state)
+    if not raw or (scale_w, scale_h) == (frame_w, frame_h):
+        return list(raw)
+    from PIL import Image
+
+    return [f.resize((scale_w, scale_h), Image.LANCZOS) for f in raw]
+
+
+def state_frame_counts(
+    sheet_path: str | Path,
+    *,
+    frame_w: int = FRAME_W,
+    frame_h: int = FRAME_H,
+    frames_per_state: int = FRAMES_PER_STATE,
+) -> dict[str, int]:
+    """Map each driven :class:`PetState` → its real (padding-trimmed) frame count.
+
+    The single source of truth for "how many frames does this state actually
+    have?".  The CLI/TUI consume the trimmed frame lists directly; the gateway
+    ships this map to the desktop canvas, which steps its own loop.
+    """
+    return {
+        state.value: len(
+            _raw_frames(str(sheet_path), state.value, frame_w, frame_h, frames_per_state)
+        )
+        for state in PetState
+    }
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Encoders
+# ─────────────────────────────────────────────────────────────────────────
+
+def _png_bytes(frame) -> bytes:
+    buf = io.BytesIO()
+    frame.save(buf, format="PNG")
+    return buf.getvalue()
+
+
+def _kitty_apc(ctrl: str, data: str) -> str:
+    """Emit a kitty APC escape for *data*, chunked into ≤4096-byte ``m`` pieces."""
+    chunk = 4096
+    if len(data) <= chunk:
+        return f"\x1b_G{ctrl},m=0;{data}\x1b\\"
+    out = [f"\x1b_G{ctrl},m=1;{data[:chunk]}\x1b\\"]
+    rest = data[chunk:]
+    while rest:
+        piece, rest = rest[:chunk], rest[chunk:]
+        out.append(f"\x1b_Gm={1 if rest else 0};{piece}\x1b\\")
+    return "".join(out)
+
+
+def _encode_kitty(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
+    """Encode one frame via the kitty graphics protocol (transmit + display).
+
+    ``a=T`` transmits & displays at the cursor; ``c``/``r`` request a display
+    box in terminal cells so successive frames overwrite the same area.
+    """
+    ctrl = "f=100,a=T,q=2"
+    if cell_cols:
+        ctrl += f",c={cell_cols}"
+    if cell_rows:
+        ctrl += f",r={cell_rows}"
+    return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# kitty Unicode placeholders
+#
+# Ink (the TUI's React-for-terminal layer) owns the screen and measures every
+# cell's width, so it can't host raw kitty image escapes (no width to count,
+# clobbered on the next repaint). kitty's *Unicode placeholder* protocol is the
+# grid-safe path: transmit the image once (q=2, virtual placement U=1), then the
+# host app prints ordinary-width placeholder cells (U+10EEEE + diacritics) whose
+# foreground color encodes the image id. Ink counts those as width-1 text, so
+# layout stays correct and the terminal paints the image underneath.
+#   https://sw.kovidgoyal.net/kitty/graphics-protocol/#unicode-placeholders
+# ─────────────────────────────────────────────────────────────────────────
+
+_KITTY_PLACEHOLDER = "\U0010eeee"
+
+# Row/column diacritics, in order (index → diacritic). Verbatim from kitty's
+# gen/rowcolumn-diacritics.txt (Unicode 6.0.0, combining class 230). Index i is
+# the diacritic that encodes the number i; we only ever need the row index.
+_ROWCOL_DIACRITICS: tuple[int, ...] = (
+    0x0305, 0x030D, 0x030E, 0x0310, 0x0312, 0x033D, 0x033E, 0x033F, 0x0346, 0x034A,
+    0x034B, 0x034C, 0x0350, 0x0351, 0x0352, 0x0357, 0x035B, 0x0363, 0x0364, 0x0365,
+    0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
+    0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0592, 0x0593, 0x0594, 0x0595, 0x0597,
+    0x0598, 0x0599, 0x059C, 0x059D, 0x059E, 0x059F, 0x05A0, 0x05A1, 0x05A8, 0x05A9,
+    0x05AB, 0x05AC, 0x05AF, 0x05C4, 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615,
+    0x0616, 0x0617, 0x0657, 0x0658, 0x0659, 0x065A, 0x065B, 0x065D, 0x065E, 0x06D6,
+    0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, 0x06DF, 0x06E0, 0x06E1, 0x06E2,
+    0x06E4, 0x06E7, 0x06E8, 0x06EB, 0x06EC, 0x0730, 0x0732, 0x0733, 0x0735, 0x0736,
+    0x073A, 0x073D, 0x073F, 0x0740, 0x0741, 0x0743, 0x0745, 0x0747, 0x0749, 0x074A,
+    0x07EB, 0x07EC, 0x07ED, 0x07EE, 0x07EF, 0x07F0, 0x07F1, 0x07F3, 0x0816, 0x0817,
+    0x0818, 0x0819, 0x081B, 0x081C, 0x081D, 0x081E, 0x081F, 0x0820, 0x0821, 0x0822,
+    0x0823, 0x0825, 0x0826, 0x0827, 0x0829, 0x082A, 0x082B, 0x082C, 0x082D, 0x0951,
+    0x0953, 0x0954, 0x0F82, 0x0F83, 0x0F86, 0x0F87, 0x135D, 0x135E, 0x135F, 0x17DD,
+    0x193A, 0x1A17, 0x1A75, 0x1A76, 0x1A77, 0x1A78, 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C,
+    0x1B6B, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73, 0x1CD0, 0x1CD1,
+    0x1CD2, 0x1CDA, 0x1CDB, 0x1CE0, 0x1DC0, 0x1DC1, 0x1DC3, 0x1DC4, 0x1DC5, 0x1DC6,
+    0x1DC7, 0x1DC8, 0x1DC9, 0x1DCB, 0x1DCC, 0x1DD1, 0x1DD2, 0x1DD3, 0x1DD4, 0x1DD5,
+    0x1DD6, 0x1DD7, 0x1DD8, 0x1DD9, 0x1DDA, 0x1DDB, 0x1DDC, 0x1DDD, 0x1DDE, 0x1DDF,
+    0x1DE0, 0x1DE1, 0x1DE2, 0x1DE3, 0x1DE4, 0x1DE5, 0x1DE6, 0x1DFE, 0x20D0, 0x20D1,
+    0x20D4, 0x20D5, 0x20D6, 0x20D7, 0x20DB, 0x20DC, 0x20E1, 0x20E7, 0x20E9, 0x20F0,
+    0x2CEF, 0x2CF0, 0x2CF1, 0x2DE0, 0x2DE1, 0x2DE2, 0x2DE3, 0x2DE4, 0x2DE5, 0x2DE6,
+    0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, 0x2DEF, 0x2DF0,
+    0x2DF1, 0x2DF2, 0x2DF3, 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA,
+    0x2DFB, 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0xA66F, 0xA67C, 0xA67D, 0xA6F0, 0xA6F1,
+    0xA8E0, 0xA8E1, 0xA8E2, 0xA8E3, 0xA8E4, 0xA8E5, 0xA8E6, 0xA8E7, 0xA8E8, 0xA8E9,
+    0xA8EA, 0xA8EB, 0xA8EC, 0xA8ED, 0xA8EE, 0xA8EF, 0xA8F0, 0xA8F1, 0xAAB0, 0xAAB2,
+    0xAAB3, 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, 0xFE20, 0xFE21, 0xFE22, 0xFE23,
+    0xFE24, 0xFE25, 0xFE26, 0x10A0F, 0x10A38, 0x1D185, 0x1D186, 0x1D187, 0x1D188,
+    0x1D189, 0x1D1AA, 0x1D1AB, 0x1D1AC, 0x1D1AD, 0x1D242, 0x1D243, 0x1D244,
+)
+
+
+def kitty_image_id(slug: str) -> int:
+    """Stable per-pet image id in ``[1, 0x7FFF]``.
+
+    The id is encoded in the placeholder's 24-bit foreground color, so it must
+    be non-zero and fit comfortably under ``0xFFFFFF``. A small CRC keeps it
+    deterministic per slug (so re-renders reuse the same terminal-side image)
+    while making collisions between two different pets unlikely.
+    """
+    import zlib
+
+    return (zlib.crc32(slug.encode("utf-8")) % 0x7FFE) + 1
+
+
+def kitty_color_hex(image_id: int) -> str:
+    """Hex foreground color (``#rrggbb``) that encodes *image_id* for kitty."""
+    return "#%06x" % (image_id & 0xFFFFFF)
+
+
+def kitty_placeholder_rows(cols: int, rows: int) -> list[str]:
+    """Build the placeholder text grid for an *rows*×*cols* image.
+
+    Each line is one row of the grid: the first cell carries the row diacritic
+    (column defaults to 0), and the remaining ``cols-1`` bare placeholders let
+    the terminal auto-increment the column. The foreground color (the image id)
+    is applied by the caller / Ink, not embedded here.
+    """
+    cols = max(1, cols)
+    out: list[str] = []
+    for r in range(max(1, rows)):
+        idx = min(r, len(_ROWCOL_DIACRITICS) - 1)
+        first = _KITTY_PLACEHOLDER + chr(_ROWCOL_DIACRITICS[idx])
+        out.append(first + _KITTY_PLACEHOLDER * (cols - 1))
+    return out
+
+
+def _encode_kitty_virtual(frame, *, image_id: int, cols: int, rows: int) -> str:
+    """Transmit a frame as a kitty *virtual* placement for Unicode placeholders.
+
+    ``a=T`` transmits and creates the placement in one shot; ``U=1`` marks it
+    virtual (no on-screen output, cursor untouched); ``q=2`` suppresses the
+    terminal's OK/error replies that would otherwise corrupt the host app's
+    output. Re-sending with the same ``i`` replaces the image, so the static
+    placeholder cells animate underneath.
+    """
+    ctrl = f"a=T,U=1,i={image_id},c={cols},r={rows},f=100,q=2"
+    return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
+
+
+def _encode_iterm(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
+    """Encode one frame as an iTerm2 inline image (OSC 1337 File)."""
+    payload = base64.standard_b64encode(_png_bytes(frame)).decode("ascii")
+    size = len(payload)
+    args = [f"inline=1", f"size={size}", "preserveAspectRatio=1"]
+    if cell_cols:
+        args.append(f"width={cell_cols}")
+    if cell_rows:
+        args.append(f"height={cell_rows}")
+    return f"\x1b]1337;File={';'.join(args)}:{payload}\x07"
+
+
+def _encode_sixel(frame) -> str:
+    """Encode one frame as DEC sixel.
+
+    Quantizes to an adaptive palette (≤255 colors) and emits the sixel band
+    stream.  Pillow has no sixel writer, so this is a compact hand-rolled
+    encoder.  Transparent pixels render as background (color register skipped).
+    """
+    from PIL import Image
+
+    rgba = frame
+    # Composite onto transparent-as-skip: track alpha to decide background.
+    pal = rgba.convert("RGB").quantize(colors=255, method=Image.MEDIANCUT)
+    palette = pal.getpalette() or []
+    px = pal.load()
+    alpha = rgba.getchannel("A").load()
+    w, h = pal.size
+
+    out = ["\x1bP0;1;0q", '"1;1;%d;%d' % (w, h)]
+    # Color register definitions (sixel uses 0..100 scale).
+    used = sorted({px[x, y] for y in range(h) for x in range(w)})
+    for idx in used:
+        r = palette[idx * 3] if idx * 3 < len(palette) else 0
+        g = palette[idx * 3 + 1] if idx * 3 + 1 < len(palette) else 0
+        b = palette[idx * 3 + 2] if idx * 3 + 2 < len(palette) else 0
+        out.append("#%d;2;%d;%d;%d" % (idx, r * 100 // 255, g * 100 // 255, b * 100 // 255))
+
+    # Emit in 6-row bands.
+    for band in range(0, h, 6):
+        for color_idx in used:
+            line = ["#%d" % color_idx]
+            run_char = None
+            run_len = 0
+
+            def flush():
+                nonlocal run_char, run_len
+                if run_char is None:
+                    return
+                if run_len > 3:
+                    line.append("!%d%s" % (run_len, run_char))
+                else:
+                    line.append(run_char * run_len)
+                run_char, run_len = None, 0
+
+            for x in range(w):
+                bits = 0
+                for bit in range(6):
+                    y = band + bit
+                    if y < h and alpha[x, y] > 32 and px[x, y] == color_idx:
+                        bits |= 1 << bit
+                ch = chr(63 + bits)
+                if ch == run_char:
+                    run_len += 1
+                else:
+                    flush()
+                    run_char, run_len = ch, 1
+            flush()
+            out.append("".join(line) + "$")  # carriage return within band
+        out.append("-")  # next band
+    out.append("\x1b\\")
+    return "".join(out)
+
+
+_HALF_BLOCK = "▀"
+
+# A single half-block cell: top pixel + bottom pixel as (r, g, b, a) tuples.
+Cell = tuple[tuple[int, int, int, int], tuple[int, int, int, int]]
+
+
+def _downscale_cells(frame, *, target_cols: int) -> list[list[Cell]]:
+    """Downscale a frame to a grid of half-block cells.
+
+    Each cell pairs a top and bottom pixel so one terminal row encodes two
+    pixel rows.  Returns rows of ``((tr,tg,tb,ta),(br,bg,bb,ba))`` — the
+    framework-neutral representation shared by the ANSI encoder (CLI) and the
+    structured ``cells`` API (Ink).
+    """
+    from PIL import Image
+
+    target_cols = max(4, target_cols)
+    aspect = frame.height / max(1, frame.width)
+    target_rows = max(2, int(round(target_cols * aspect * 0.5)) * 2)
+    small = frame.resize((target_cols, target_rows), Image.LANCZOS).convert("RGBA")
+    px = small.load()
+
+    grid: list[list[Cell]] = []
+    for y in range(0, target_rows, 2):
+        row: list[Cell] = []
+        for x in range(target_cols):
+            top = px[x, y]
+            bottom = px[x, y + 1] if y + 1 < target_rows else (0, 0, 0, 0)
+            row.append((top, bottom))
+        grid.append(row)
+    return grid
+
+
+def _encode_unicode(frame, *, target_cols: int) -> str:
+    """Downscale to truecolor ANSI half-blocks (one char = 2 vertical pixels)."""
+    lines: list[str] = []
+    for row in _downscale_cells(frame, target_cols=target_cols):
+        cells: list[str] = []
+        for (tr, tg, tb, ta), (br, bg, bb, ba) in row:
+            if ta < 32 and ba < 32:
+                cells.append("\x1b[0m ")  # fully transparent → blank
+                continue
+            cells.append(f"\x1b[38;2;{tr};{tg};{tb}m\x1b[48;2;{br};{bg};{bb}m{_HALF_BLOCK}")
+        lines.append("".join(cells) + "\x1b[0m")
+    return "\n".join(lines)
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Public renderer
+# ─────────────────────────────────────────────────────────────────────────
+
+class PetRenderer:
+    """Holds a pet's spritesheet and yields encoded frames per (state, index).
+
+    Construct once per pet, then call :meth:`frame` on an animation timer.
+    Cheap to call repeatedly — decoded frames are cached.
+    """
+
+    def __init__(
+        self,
+        spritesheet: str | Path,
+        *,
+        mode: str = "unicode",
+        scale: float = DEFAULT_SCALE,
+        unicode_cols: int = 20,
+        frame_w: int = FRAME_W,
+        frame_h: int = FRAME_H,
+        frames_per_state: int = FRAMES_PER_STATE,
+    ) -> None:
+        self.spritesheet = str(spritesheet)
+        self.mode = mode if mode in RENDER_MODES else "unicode"
+        self.scale = scale
+        self.unicode_cols = unicode_cols
+        self.frame_w = frame_w
+        self.frame_h = frame_h
+        self.frames_per_state = frames_per_state
+
+    @property
+    def available(self) -> bool:
+        return self.mode != "off" and Path(self.spritesheet).is_file()
+
+    def frame_count(self, state: PetState | str) -> int:
+        return len(self._frames(state))
+
+    def _frames(self, state: PetState | str):
+        value = state.value if isinstance(state, PetState) else str(state)
+        scale_w = max(1, int(self.frame_w * self.scale))
+        scale_h = max(1, int(self.frame_h * self.scale))
+        return _frames_for(
+            self.spritesheet,
+            value,
+            self.frame_w,
+            self.frame_h,
+            self.frames_per_state,
+            scale_w,
+            scale_h,
+        )
+
+    def cells(self, state: PetState | str, index: int, *, cols: int | None = None) -> list[list[Cell]]:
+        """Return one frame as a half-block cell grid (framework-neutral).
+
+        Used by the TUI, which renders the grid with native Ink color props
+        instead of raw ANSI.  Returns ``[]`` when no frame is available.
+        """
+        frames = self._frames(state)
+        if not frames:
+            return []
+        frame = frames[index % len(frames)]
+        return _downscale_cells(frame, target_cols=cols or self.unicode_cols)
+
+    def _cell_box(self, frame) -> tuple[int, int]:
+        """Terminal cell box for a scaled frame (~8×16 px per cell).
+
+        Must match :meth:`frame` graphics sizing — kitty stretches the image to
+        fill ``c``×``r`` cells, so these must reflect the scaled pixel
+        dimensions, not a native-aspect column count (that upscales small pets).
+        """
+        return max(1, frame.width // 8), max(1, frame.height // 16)
+
+    def kitty_payload(self, state: PetState | str, *, image_id: int) -> dict | None:
+        """Build the kitty Unicode-placeholder payload for one state.
+
+        Returns ``{cols, rows, placeholder, frames}`` where ``frames`` is a
+        list of transmit escapes (one per animation frame, all reusing
+        ``image_id``) and ``placeholder`` is the static text grid Ink paints.
+        Placement geometry is derived from the scaled frame pixels (via
+        :meth:`_cell_box`), not ``unicode_cols`` — kitty upscales to fill
+        ``c``×``r`` cells. ``None`` when no frame is available.
+        """
+        frames = self._frames(state)
+        if not frames:
+            return None
+        cols, rows = self._cell_box(frames[0])
+        return {
+            "cols": cols,
+            "rows": rows,
+            "placeholder": kitty_placeholder_rows(cols, rows),
+            "frames": [
+                _encode_kitty_virtual(f, image_id=image_id, cols=cols, rows=rows) for f in frames
+            ],
+        }
+
+    def frame(self, state: PetState | str, index: int) -> str:
+        """Return the encoded escape string for one frame, or ``""``.
+
+        ``index`` is taken modulo the available frame count so callers can pass
+        a free-running counter.
+        """
+        if self.mode == "off":
+            return ""
+        frames = self._frames(state)
+        if not frames:
+            return ""
+        frame = frames[index % len(frames)]
+        cell_cols, cell_rows = self._cell_box(frame)
+
+        try:
+            if self.mode == "kitty":
+                return _encode_kitty(frame, cell_cols=cell_cols, cell_rows=cell_rows)
+            if self.mode == "iterm":
+                return _encode_iterm(frame, cell_cols=cell_cols, cell_rows=cell_rows)
+            if self.mode == "sixel":
+                return _encode_sixel(frame)
+            return _encode_unicode(frame, target_cols=self.unicode_cols)
+        except Exception as exc:  # noqa: BLE001 - degrade silently
+            logger.debug("pet frame encode failed (mode=%s): %s", self.mode, exc)
+            return ""
+
+
+def build_renderer(
+    spritesheet: str | Path,
+    *,
+    configured_mode: str | None = None,
+    scale: float = DEFAULT_SCALE,
+    unicode_cols: int = 20,
+    stream=None,
+) -> PetRenderer:
+    """Convenience factory: resolve the mode from config+env, then construct."""
+    mode = resolve_mode(configured_mode, stream=stream)
+    return PetRenderer(
+        spritesheet,
+        mode=mode,
+        scale=scale,
+        unicode_cols=unicode_cols,
+    )
--- a/agent/pet/state.py
+++ b/agent/pet/state.py
@@ -0,0 +1,81 @@
+"""Map agent activity → a :class:`PetState`.
+
+This is the one place the "what is the agent doing right now?" → "which
+animation row?" decision lives.  Each surface feeds it the signals it already
+tracks:
+
+- CLI    — ``KawaiiSpinner`` waiting/thinking state + tool outcomes.
+- TUI    — gateway ``tool.start/complete`` + ``message.delta/complete`` events.
+- Desktop — the ``$busy``/``$awaitingResponse``/tool-event nanostores
+            (re-implemented in TS, but mirroring this priority order).
+
+Keeping the priority order here (and documenting it) lets the TypeScript
+mirror stay faithful without a second design.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any
+
+from agent.pet.constants import PetState
+
+
+def todos_all_done(todos: Iterable[Any] | None) -> bool:
+    """True iff there's ≥1 todo and every one is completed/cancelled.
+
+    The "celebrate" beat (``JUMP``) fires when a plan finishes; this mirrors
+    the TUI's ``isTodoDone`` so the trigger is defined once across surfaces.
+    Accepts dicts (``{"status": ...}``) or objects with a ``status`` attr.
+    """
+    items = list(todos or [])
+    if not items:
+        return False
+
+    def _status(t: Any) -> Any:
+        return t.get("status") if isinstance(t, dict) else getattr(t, "status", None)
+
+    return all(_status(t) in ("completed", "cancelled") for t in items)
+
+
+def derive_pet_state(
+    *,
+    busy: bool = False,
+    awaiting_input: bool = False,
+    error: bool = False,
+    celebrate: bool = False,
+    just_completed: bool = False,
+    tool_running: bool = False,
+    reasoning: bool = False,
+) -> PetState:
+    """Resolve the animation state from coarse activity signals.
+
+    Priority (highest first) — only one row can show at a time, so the most
+    salient signal wins:
+
+    1. ``error``          → ``FAILED``  (a tool/turn just failed)
+    2. ``celebrate``      → ``JUMP``    (explicit success beat, e.g. todos done)
+    3. ``just_completed`` → ``WAVE``    (turn finished cleanly / greeting)
+    4. ``awaiting_input`` → ``WAITING`` (blocked on the user — a clarify/approval
+       prompt is open; this outranks the in-flight signals below because the turn
+       is paused on *you*, even though a tool is technically mid-call)
+    5. ``tool_running``   → ``RUN``     (a tool is executing)
+    6. ``reasoning``      → ``REVIEW``  (model is thinking / reading)
+    7. ``busy``           → ``RUN``     (turn in flight, unspecified work)
+    8. otherwise          → ``IDLE``
+    """
+    if error:
+        return PetState.FAILED
+    if celebrate:
+        return PetState.JUMP
+    if just_completed:
+        return PetState.WAVE
+    if awaiting_input:
+        return PetState.WAITING
+    if tool_running:
+        return PetState.RUN
+    if reasoning:
+        return PetState.REVIEW
+    if busy:
+        return PetState.RUN
+    return PetState.IDLE
--- a/agent/pet/store.py
+++ b/agent/pet/store.py
@@ -0,0 +1,388 @@
+"""On-disk pet store — install / list / resolve pets.
+
+Pets live under ``get_hermes_home()/pets/<slug>/`` so every profile gets its
+own set (we deliberately do **not** reuse petdex's ``~/.codex/pets`` default —
+that's owned by the petdex npm CLI and isn't profile-aware).  Each installed
+pet directory holds:
+
+    pets/<slug>/
+        pet.json            # {id, displayName, description, spritesheetPath}
+        spritesheet.webp    # (or .png)
+
+The active pet is resolved from the caller-supplied ``display.pet.slug`` config
+value (falling back to the first installed pet), so this module stays free of
+the config loader.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+_DOWNLOAD_TIMEOUT = 60.0
+
+
+class PetStoreError(RuntimeError):
+    """Raised on install/IO failures."""
+
+
+@dataclass(frozen=True)
+class InstalledPet:
+    """A pet present on disk."""
+
+    slug: str
+    display_name: str
+    description: str
+    directory: Path
+    spritesheet: Path
+
+    @property
+    def exists(self) -> bool:
+        return self.spritesheet.is_file()
+
+
+def pets_dir() -> Path:
+    """Return the profile-scoped pets directory (created on demand)."""
+    path = get_hermes_home() / "pets"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def _read_pet_json(directory: Path) -> dict:
+    pet_json = directory / "pet.json"
+    if not pet_json.is_file():
+        return {}
+    try:
+        return json.loads(pet_json.read_text(encoding="utf-8"))
+    except (OSError, ValueError) as exc:
+        logger.debug("unreadable pet.json in %s: %s", directory, exc)
+        return {}
+
+
+def _resolve_spritesheet(directory: Path, meta: dict) -> Path:
+    """Find the spritesheet for a pet dir.
+
+    Honors ``spritesheetPath`` from pet.json, else probes the conventional
+    filenames (``spritesheet.{webp,png}`` and petdex R2's ``sprite.webp``).
+    """
+    declared = str(meta.get("spritesheetPath", "") or "").strip()
+    if declared:
+        candidate = directory / declared
+        if candidate.is_file():
+            return candidate
+    for name in ("spritesheet.webp", "spritesheet.png", "sprite.webp", "sprite.png"):
+        candidate = directory / name
+        if candidate.is_file():
+            return candidate
+    # Default expectation even if missing, so callers get a stable path.
+    return directory / "spritesheet.webp"
+
+
+def load_pet(slug: str) -> InstalledPet | None:
+    """Return the :class:`InstalledPet` for *slug*, or ``None`` if absent."""
+    slug = slug.strip()
+    directory = pets_dir() / slug
+    if not directory.is_dir():
+        return None
+    meta = _read_pet_json(directory)
+    return InstalledPet(
+        slug=slug,
+        display_name=str(meta.get("displayName", "") or slug),
+        description=str(meta.get("description", "") or ""),
+        directory=directory,
+        spritesheet=_resolve_spritesheet(directory, meta),
+    )
+
+
+def installed_pets() -> list[InstalledPet]:
+    """Return every installed pet (dirs containing a usable spritesheet)."""
+    out: list[InstalledPet] = []
+    for child in sorted(pets_dir().iterdir()):
+        if not child.is_dir():
+            continue
+        pet = load_pet(child.name)
+        if pet and pet.exists:
+            out.append(pet)
+    return out
+
+
+def resolve_active_pet(configured_slug: str | None = None) -> InstalledPet | None:
+    """Resolve which pet to display.
+
+    Precedence: the configured slug (``display.pet.slug``) if it's installed,
+    otherwise the first installed pet alphabetically, otherwise ``None``.
+    """
+    if configured_slug:
+        pet = load_pet(configured_slug.strip())
+        if pet and pet.exists:
+            return pet
+    pets = installed_pets()
+    return pets[0] if pets else None
+
+
+def install_pet(slug: str, *, force: bool = False, timeout: float = _DOWNLOAD_TIMEOUT) -> InstalledPet:
+    """Download *slug* from the manifest into the pets directory.
+
+    Idempotent: a fully-installed pet is returned as-is unless *force*.  Raises
+    :class:`PetStoreError` / :class:`~agent.pet.manifest.ManifestError` on
+    failure.
+    """
+    from agent.pet.manifest import find_entry
+
+    slug = slug.strip()
+    existing = load_pet(slug)
+    if existing and existing.exists and not force:
+        return existing
+
+    entry = find_entry(slug, timeout=timeout)
+    if entry is None:
+        raise PetStoreError(f"pet '{slug}' is not in the petdex manifest")
+
+    directory = pets_dir() / slug
+    directory.mkdir(parents=True, exist_ok=True)
+
+    sprite_ext = ".png" if entry.spritesheet_url.lower().split("?")[0].endswith(".png") else ".webp"
+    sprite_path = directory / f"spritesheet{sprite_ext}"
+
+    _download(entry.spritesheet_url, sprite_path, timeout=timeout)
+
+    # Fetch the upstream pet.json if present; otherwise synthesize a minimal
+    # one so the local layout is self-describing.
+    meta: dict = {}
+    if entry.pet_json_url:
+        try:
+            meta = _download_json(entry.pet_json_url, timeout=timeout)
+        except Exception as exc:  # noqa: BLE001 - non-fatal, fall back below
+            logger.debug("pet.json fetch failed for %s: %s", slug, exc)
+    if not isinstance(meta, dict) or not meta:
+        meta = {"id": slug, "displayName": entry.display_name, "description": ""}
+    meta["spritesheetPath"] = sprite_path.name
+    meta.setdefault("id", slug)
+    meta.setdefault("displayName", entry.display_name)
+    (directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
+
+    pet = load_pet(slug)
+    if pet is None or not pet.exists:
+        raise PetStoreError(f"install of '{slug}' did not produce a spritesheet")
+    return pet
+
+
+def slugify(name: str) -> str:
+    """Lowercase, hyphenate, and strip a display name into a filesystem slug."""
+    slug = re.sub(r"[^a-z0-9]+", "-", (name or "").strip().lower()).strip("-")
+    return slug or "pet"
+
+
+def unique_slug(name: str) -> str:
+    """A :func:`slugify` result that doesn't collide with an existing pet dir."""
+    base = slugify(name)
+    slug = base
+    counter = 2
+    while (pets_dir() / slug).exists():
+        slug = f"{base}-{counter}"
+        counter += 1
+    return slug
+
+
+def _write_spritesheet(source, dest: Path) -> None:
+    """Write *source* (PIL image, bytes, or path) as a lossless WebP at *dest*."""
+    if isinstance(source, (bytes, bytearray)):
+        dest.write_bytes(bytes(source))
+        return
+
+    from PIL import Image
+
+    if isinstance(source, (str, Path)):
+        with Image.open(source) as opened:
+            image = opened.convert("RGBA")
+    else:
+        image = source.convert("RGBA")
+    image.save(dest, format="WEBP", lossless=True, quality=100, method=6, exact=True)
+
+
+def register_local_pet(
+    spritesheet,
+    *,
+    slug: str,
+    display_name: str = "",
+    description: str = "",
+) -> InstalledPet:
+    """Write a locally-generated pet into the store and return it.
+
+    *spritesheet* may be a PIL image, raw WebP/PNG bytes, or a path. The pet
+    appears in :func:`installed_pets` immediately, and because :func:`install_pet`
+    returns an already-on-disk pet before consulting the manifest, it can be
+    adopted (``pet.select`` / ``/pet <slug>``) without a manifest entry.
+    """
+    slug = slugify(slug)
+    directory = pets_dir() / slug
+    directory.mkdir(parents=True, exist_ok=True)
+    sprite_path = directory / "spritesheet.webp"
+    try:
+        _write_spritesheet(spritesheet, sprite_path)
+    except Exception as exc:  # noqa: BLE001 - normalize to one error type
+        raise PetStoreError(f"could not write spritesheet for '{slug}': {exc}") from exc
+
+    meta = {
+        "id": slug,
+        "displayName": display_name or slug,
+        "description": description or "",
+        "spritesheetPath": sprite_path.name,
+        "createdBy": "generator",
+    }
+    (directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
+
+    pet = load_pet(slug)
+    if pet is None or not pet.exists:
+        raise PetStoreError(f"register of generated pet '{slug}' did not produce a spritesheet")
+    return pet
+
+
+_THUMB_FRAME_W = 192
+_THUMB_FRAME_H = 208
+_THUMB_W = 96  # rendered ~40px; 2x+ keeps it crisp on HiDPI
+
+
+def _thumbs_dir() -> Path:
+    path = pets_dir() / ".thumbs"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def _is_petdex_host(url: str) -> bool:
+    """True only for petdex.dev hosts — bounds server-side fetch (anti-SSRF)."""
+    from urllib.parse import urlparse
+
+    try:
+        host = (urlparse(url).hostname or "").lower()
+    except ValueError:
+        return False
+    return host == "petdex.dev" or host.endswith(".petdex.dev")
+
+
+def thumbnail_png(slug: str, *, source_url: str = "", timeout: float = 30.0) -> bytes | None:
+    """Return a small idle-frame PNG for *slug*, cached on disk.
+
+    Crops the top-left (idle, frame 0) cell of the spritesheet and downsamples
+    it to a thumbnail. Source preference: an installed spritesheet on disk, else
+    *source_url* — but only when it points at petdex (so the gateway never
+    fetches an arbitrary client-supplied URL). Returns ``None`` when there's no
+    usable source or Pillow/network fails; callers render a placeholder.
+
+    Doing this server-side sidesteps the renderer's CSP / R2 hotlink limits that
+    break a direct ``<img src=cdn>`` and lets the result ride the authenticated
+    gateway as a same-origin data URL.
+    """
+    slug = slug.strip()
+    if not slug:
+        return None
+
+    cache = _thumbs_dir() / f"{slug}.png"
+    if cache.is_file():
+        try:
+            return cache.read_bytes()
+        except OSError:
+            pass
+
+    sheet_bytes: bytes | None = None
+    pet = load_pet(slug)
+    if pet and pet.exists:
+        try:
+            sheet_bytes = pet.spritesheet.read_bytes()
+        except OSError:
+            sheet_bytes = None
+
+    if sheet_bytes is None and source_url and _is_petdex_host(source_url):
+        try:
+            import httpx
+
+            resp = httpx.get(
+                source_url,
+                timeout=timeout,
+                follow_redirects=True,
+                headers={"User-Agent": "hermes-agent-petdex"},
+            )
+            resp.raise_for_status()
+            sheet_bytes = resp.content
+        except Exception as exc:  # noqa: BLE001 - cosmetic, degrade to placeholder
+            logger.debug("thumb fetch failed for %s: %s", slug, exc)
+
+    if not sheet_bytes:
+        return None
+
+    try:
+        import io
+
+        from PIL import Image
+
+        with Image.open(io.BytesIO(sheet_bytes)) as im:
+            frame = im.convert("RGBA").crop(
+                (0, 0, min(_THUMB_FRAME_W, im.width), min(_THUMB_FRAME_H, im.height))
+            )
+            height = round(_THUMB_W * _THUMB_FRAME_H / _THUMB_FRAME_W)
+            frame = frame.resize((_THUMB_W, height), Image.NEAREST)
+            buf = io.BytesIO()
+            frame.save(buf, format="PNG")
+            data = buf.getvalue()
+    except Exception as exc:  # noqa: BLE001
+        logger.debug("thumb crop failed for %s: %s", slug, exc)
+        return None
+
+    try:
+        cache.write_bytes(data)
+    except OSError:
+        pass
+    return data
+
+
+def remove_pet(slug: str) -> bool:
+    """Delete an installed pet directory.  Returns True if anything was removed."""
+    import shutil
+
+    directory = pets_dir() / slug.strip()
+    if not directory.is_dir():
+        return False
+    shutil.rmtree(directory, ignore_errors=True)
+    return not directory.exists()
+
+
+def _download(url: str, dest: Path, *, timeout: float) -> None:
+    import httpx
+
+    try:
+        with httpx.stream(
+            "GET",
+            url,
+            timeout=timeout,
+            follow_redirects=True,
+            headers={"User-Agent": "hermes-agent-petdex"},
+        ) as resp:
+            resp.raise_for_status()
+            tmp = dest.with_suffix(dest.suffix + ".part")
+            with tmp.open("wb") as fh:
+                for chunk in resp.iter_bytes():
+                    fh.write(chunk)
+            tmp.replace(dest)
+    except Exception as exc:  # noqa: BLE001
+        raise PetStoreError(f"download failed for {url}: {exc}") from exc
+
+
+def _download_json(url: str, *, timeout: float) -> dict:
+    import httpx
+
+    resp = httpx.get(
+        url,
+        timeout=timeout,
+        follow_redirects=True,
+        headers={"User-Agent": "hermes-agent-petdex"},
+    )
+    resp.raise_for_status()
+    data = resp.json()
+    return data if isinstance(data, dict) else {}
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -8,6 +8,7 @@ import json
 import logging
 import os
 import threading
+import contextvars
 from collections import OrderedDict
 from pathlib import Path

@@ -511,13 +512,19 @@ PLATFORM_HINTS = {
        "Standard Markdown is automatically converted to Telegram formatting. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram supports rich Markdown, so when it improves clarity you may "
-        "use headings, tables (pipe `| col | col |` syntax), task lists "
-        "(`- [ ]` / `- [x]`), nested blockquotes, collapsible details, "
-        "footnotes/references, math/formulas (`$...$`, `$$...$$`), underline, "
-        "subscript/superscript, marked (highlighted) text, and anchors. Prefer "
-        "real Markdown tables and task lists over hand-built bullet substitutes "
-        "when presenting structured data. "
+        "Telegram now supports rich Markdown, so lean into it: whenever it "
+        "makes the answer clearer or easier to scan, actively reach for real "
+        "Markdown tables (pipe `| col | col |` syntax), bullet and numbered "
+        "lists, task lists (`- [ ]` / `- [x]`), headings, nested blockquotes, "
+        "collapsible details, footnotes/references, math/formulas (`$...$`, "
+        "`$$...$$`), underline, subscript/superscript, marked (highlighted) "
+        "text, and anchors. Default to structured formatting over dense "
+        "paragraphs for any comparison, set of steps, key/value summary, or "
+        "tabular data. Prefer real Markdown tables and task lists over "
+        "hand-built bullet substitutes when presenting structured data; these "
+        "degrade gracefully (tables become readable bullet groups) when rich "
+        "rendering is unavailable, but advanced constructs like math and "
+        "collapsible details may render as plain source text in that case. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -952,6 +959,52 @@ CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2


+def _get_context_file_max_chars() -> int:
+    """Return the configured context-file truncation limit.
+
+    ``CONTEXT_FILE_MAX_CHARS`` remains the upstream-compatible default and
+    fallback. Users with larger context windows can raise
+    ``context_file_max_chars`` in config.yaml without patching Hermes.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        val = load_config().get("context_file_max_chars")
+        if isinstance(val, (int, float)) and val > 0:
+            return int(val)
+    except Exception as e:
+        logger.debug("Could not read context_file_max_chars from config: %s", e)
+    return CONTEXT_FILE_MAX_CHARS
+
+# Collect truncation warnings so the caller (run_agent) can surface them.
+# A ContextVar (not a module-global list) isolates accumulation per thread /
+# per async task, so concurrent gateway-session prompt builds can't drain or
+# clear each other's pending warnings (cross-session leak). Each build runs in
+# its own context, collects its own warnings, and drains them synchronously.
+_truncation_warnings: "contextvars.ContextVar[Optional[list]]" = contextvars.ContextVar(
+    "context_file_truncation_warnings", default=None
+)
+
+
+def _record_truncation_warning(msg: str) -> None:
+    """Append a truncation warning to the current context's accumulator."""
+    warnings = _truncation_warnings.get()
+    if warnings is None:
+        warnings = []
+        _truncation_warnings.set(warnings)
+    warnings.append(msg)
+
+
+def drain_truncation_warnings() -> list:
+    """Return and clear any truncation warnings accumulated in this context."""
+    warnings = _truncation_warnings.get()
+    if not warnings:
+        return []
+    drained = list(warnings)
+    warnings.clear()
+    return drained
+
+
 # =========================================================================
 # Skills prompt cache
 # =========================================================================
@@ -1158,7 +1211,7 @@ def build_skills_system_prompt(
        or get_session_env("HERMES_SESSION_PLATFORM")
        or ""
    )
-    disabled = get_disabled_skill_names()
+    disabled = get_disabled_skill_names(_platform_hint or None)
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
@@ -1457,10 +1510,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================

-def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
+def _truncate_content(content: str, filename: str, max_chars: Optional[int] = None) -> str:
    """Head/tail truncation with a marker in the middle."""
+    if max_chars is None:
+        max_chars = _get_context_file_max_chars()
    if len(content) <= max_chars:
        return content
+    msg = (
+        f"⚠️  Context file {filename} TRUNCATED: "
+        f"{len(content)} chars exceeds limit of {max_chars} — "
+        f"increase context_file_max_chars or trim the file!"
+    )
+    logger.warning(msg)
+    _record_truncation_warning(msg)
    head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
    tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
    head = content[:head_chars]
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -104,6 +104,7 @@ _PREFIX_PATTERNS = [
    r"mem0_[A-Za-z0-9]{10,}",           # Mem0 Platform API key
    r"brv_[A-Za-z0-9]{10,}",            # ByteRover API key
    r"xai-[A-Za-z0-9]{30,}",            # xAI (Grok) API key
+    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -26,6 +26,91 @@ _skill_commands_platform: Optional[str] = None
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+# ---------------------------------------------------------------------------
+# Skill-scaffolding markers and the canonical extractor.
+#
+# When a user invokes a /skill (or /bundle), Hermes expands the turn into a
+# model-facing message that embeds the full skill body plus scaffolding. That
+# expanded text is what flows into the agent loop — and into memory providers
+# via MemoryManager. Providers that store or embed the raw user turn (mem0,
+# openviking, hindsight, retaindb, byterover, honcho, supermemory) would
+# otherwise capture the entire skill body instead of what the user actually
+# asked. ``extract_user_instruction_from_skill_message`` recovers just the
+# user's instruction so memory stays clean.
+#
+# These markers MUST stay byte-identical to the builders below
+# (``_build_skill_message`` here, ``build_bundle_invocation_message`` in
+# agent/skill_bundles.py). They are co-located with the single-skill builder
+# on purpose, and the bundle markers are asserted against the bundle builder in
+# tests/openviking_plugin/test_openviking.py::test_skill_markers_match_hermes_scaffolding.
+# ---------------------------------------------------------------------------
+_SKILL_INVOCATION_PREFIX = "[IMPORTANT: The user has invoked the "
+_SINGLE_SKILL_MARKER = "The full skill content is loaded below.]"
+_SINGLE_SKILL_INSTRUCTION = (
+    "The user has provided the following instruction alongside the skill invocation: "
+)
+_RUNTIME_NOTE = "\n\n[Runtime note:"
+_BUNDLE_MARKER = " skill bundle,"
+_BUNDLE_USER_INSTRUCTION = "\nUser instruction: "
+_BUNDLE_FIRST_SKILL_BLOCK = "\n\n[Loaded as part of the "
+
+
+def extract_user_instruction_from_skill_message(content: Any) -> Optional[str]:
+    """Recover the user's instruction from a slash-skill-expanded turn.
+
+    Returns:
+        - The original string unchanged when it is NOT skill scaffolding
+          (a normal user message passes straight through).
+        - The extracted user instruction when the scaffolding carried one.
+        - ``None`` when the content is skill scaffolding with no user
+          instruction (i.e. a bare ``/skill`` invocation). Callers that feed
+          memory providers should skip the turn in that case — there is no
+          user content worth storing.
+    """
+    if not isinstance(content, str):
+        return None
+
+    if not content.startswith(_SKILL_INVOCATION_PREFIX):
+        return content
+
+    if _BUNDLE_MARKER in content:
+        return _extract_bundle_user_instruction(content)
+
+    if _SINGLE_SKILL_MARKER in content:
+        return _extract_single_skill_user_instruction(content)
+
+    return None
+
+
+def _extract_single_skill_user_instruction(message: str) -> Optional[str]:
+    # Single-skill format appends the user instruction after the skill body, so
+    # the last occurrence is the user-provided one; the body may quote this text.
+    marker_idx = message.rfind(_SINGLE_SKILL_INSTRUCTION)
+    if marker_idx < 0:
+        return None
+
+    instruction = message[marker_idx + len(_SINGLE_SKILL_INSTRUCTION):]
+    runtime_idx = instruction.find(_RUNTIME_NOTE)
+    if runtime_idx >= 0:
+        instruction = instruction[:runtime_idx]
+    instruction = instruction.strip()
+    return instruction or None
+
+
+def _extract_bundle_user_instruction(message: str) -> Optional[str]:
+    # Bundle format puts the user instruction before the loaded skills, so the
+    # first occurrence is the user-provided one.
+    marker_idx = message.find(_BUNDLE_USER_INSTRUCTION)
+    if marker_idx < 0:
+        return None
+
+    instruction = message[marker_idx + len(_BUNDLE_USER_INSTRUCTION):]
+    first_skill_idx = instruction.find(_BUNDLE_FIRST_SKILL_BLOCK)
+    if first_skill_idx >= 0:
+        instruction = instruction[:first_skill_idx]
+    instruction = instruction.strip()
+    return instruction or None
+

 def _resolve_skill_commands_platform() -> Optional[str]:
    """Return the current platform scope used for disabled-skill filtering.
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -43,14 +43,20 @@ EXCLUDED_SKILL_DIRS = frozenset(
    )
 )

+# Supporting files live inside a skill package and are loaded explicitly via
+# skill_view(skill, file_path=...). They are not standalone skills and must not
+# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or
+# archive workflow preserves a complete old skill package under references/.
+SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts"))
+

 def is_excluded_skill_path(path) -> bool:
-    """True if any component of *path* is in EXCLUDED_SKILL_DIRS.
+    """True if *path* should be skipped by active skill scanners.

-    Use this on every SKILL.md path produced by ``rglob`` to prune
-    dependency, virtualenv, VCS, and cache directories. Centralising the
-    check here keeps every skill-scanning site in sync with the shared
-    exclusion set.
+    Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to
+    prune dependency, virtualenv, VCS, cache, and progressive-disclosure
+    support-package paths. Centralising the check here keeps every
+    skill-scanning site in sync with the shared exclusion set.

    Accepts a Path or string.
    """
@@ -59,7 +65,36 @@ def is_excluded_skill_path(path) -> bool:
    except AttributeError:
        from pathlib import PurePath
        parts = PurePath(str(path)).parts
-    return any(part in EXCLUDED_SKILL_DIRS for part in parts)
+    return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path(
+        path
+    )
+
+
+def is_skill_support_path(path) -> bool:
+    """True if *path* is under a support dir of an actual skill root.
+
+    ``references/``, ``templates/``, ``assets/``, and ``scripts/`` are
+    progressive-disclosure support areas when they sit directly inside a skill
+    directory containing ``SKILL.md``. They are not active discovery roots for
+    standalone skills. A preserved package such as
+    ``some-skill/references/old-skill-package/SKILL.md`` is documentation data
+    unless the caller explicitly loads it via ``file_path``.
+
+    Legitimate categories or skill names such as ``skills/scripts/foo`` remain
+    discoverable because their ``scripts`` component is not directly under a
+    directory that contains ``SKILL.md``.
+    """
+    path_obj = path if isinstance(path, Path) else Path(str(path))
+    parts = path_obj.parts
+    # Last component may be a file or candidate skill directory name. Only
+    # components before the leaf can be containing support directories.
+    for idx, part in enumerate(parts[:-1]):
+        if part not in SKILL_SUPPORT_DIRS or idx == 0:
+            continue
+        skill_root = Path(*parts[:idx])
+        if (skill_root / "SKILL.md").exists():
+            return True
+    return False


 # ── Lazy YAML loader ─────────────────────────────────────────────────────
@@ -272,27 +307,65 @@ def skill_matches_environment(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


+_RAW_CONFIG_CACHE: Dict[Tuple[str, int, int], Dict[str, Any]] = {}
+
+
+def _raw_config_cache_clear() -> None:
+    """Test hook — drop the shared raw config cache."""
+    _RAW_CONFIG_CACHE.clear()
+
+
+def _load_raw_config() -> Dict[str, Any]:
+    """Read config.yaml with a shared mtime+size keyed cache.
+
+    This module intentionally avoids importing ``hermes_cli.config`` on the
+    skill prompt/build path. A tiny local cache gives the same repeated-read
+    win without pulling the heavier CLI config stack into startup.
+    """
+    config_path = get_config_path()
+    if not config_path.exists():
+        return {}
+    try:
+        stat = config_path.stat()
+        cache_key = (str(config_path), stat.st_mtime_ns, stat.st_size)
+    except OSError:
+        cache_key = None
+
+    if cache_key is not None:
+        cached = _RAW_CONFIG_CACHE.get(cache_key)
+        if cached is not None:
+            return cached
+
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception as e:
+        logger.debug("Could not read skill config %s: %s", config_path, e)
+        return {}
+    if not isinstance(parsed, dict):
+        return {}
+
+    if cache_key is not None:
+        _RAW_CONFIG_CACHE.clear()
+        _RAW_CONFIG_CACHE[cache_key] = parsed
+    return parsed
+
+
 def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    """Read disabled skill names from config.yaml.

    Args:
        platform: Explicit platform name (e.g. ``"telegram"``).  When
            *None*, resolves from ``HERMES_PLATFORM`` or
-            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
-            global disabled list when no platform is determined.
+            ``HERMES_SESSION_PLATFORM`` env vars.  Returns the global
+            disabled list, unioned with the platform-specific list when a
+            platform is resolved (a globally-disabled skill stays disabled
+            on every platform).

    Reads the config file directly (no CLI config imports) to stay
    lightweight.
    """
-    config_path = get_config_path()
-    if not config_path.exists():
-        return set()
-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception as e:
-        logger.debug("Could not read skill config %s: %s", config_path, e)
-        return set()
-    if not isinstance(parsed, dict):
+    parsed = _load_raw_config()
+    if not parsed:
        return set()

    skills_cfg = parsed.get("skills")
@@ -305,13 +378,14 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
        or os.getenv("HERMES_PLATFORM")
        or get_session_env("HERMES_SESSION_PLATFORM")
    )
+    global_disabled = _normalize_string_set(skills_cfg.get("disabled"))
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
        )
        if platform_disabled is not None:
-            return _normalize_string_set(platform_disabled)
-    return _normalize_string_set(skills_cfg.get("disabled"))
+            return global_disabled | _normalize_string_set(platform_disabled)
+    return global_disabled


 def _normalize_string_set(values) -> Set[str]:
@@ -336,6 +410,7 @@ _EXTERNAL_DIRS_CACHE: Dict[Tuple[str, int], List[Path]] = {}
 def _external_dirs_cache_clear() -> None:
    """Test hook — drop the in-process cache."""
    _EXTERNAL_DIRS_CACHE.clear()
+    _raw_config_cache_clear()


 def get_external_skills_dirs() -> List[Path]:
@@ -368,11 +443,8 @@ def get_external_skills_dirs() -> List[Path]:
            # Return a copy so callers can't mutate the cached list.
            return list(cached)

-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception:
-        return []
-    if not isinstance(parsed, dict):
+    parsed = _load_raw_config()
+    if not parsed:
        return []

    skills_cfg = parsed.get("skills")
@@ -584,15 +656,7 @@ def resolve_skill_config_values(
    current values (or the declared default if the key isn't set).
    Path values are expanded via ``os.path.expanduser``.
    """
-    config_path = get_config_path()
-    config: Dict[str, Any] = {}
-    if config_path.exists():
-        try:
-            parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-            if isinstance(parsed, dict):
-                config = parsed
-        except Exception:
-            pass
+    config = _load_raw_config()

    resolved: Dict[str, Any] = {}
    for var in config_vars:
@@ -632,12 +696,21 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
-    directories so dependencies cannot register nested skills.
+    Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill
+    support directories. Support directories (references/templates/assets/
+    scripts) can contain arbitrary markdown and even archived package
+    ``SKILL.md`` files, but they are progressive-disclosure data loaded through
+    ``skill_view(..., file_path=...)`` rather than active skill roots.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
-        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
+        has_skill_md = "SKILL.md" in files
+        dirs[:] = [
+            d
+            for d in dirs
+            if d not in EXCLUDED_SKILL_DIRS
+            and not (has_skill_md and d in SKILL_SUPPORT_DIRS)
+        ]
        if filename in files:
            matches.append(Path(root) / filename)
    for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
--- a/agent/ssl_guard.py
+++ b/agent/ssl_guard.py
@@ -0,0 +1,94 @@
+"""Preventive SSL CA certificate checks for Hermes Agent.
+
+This module catches broken CA bundle paths before OpenAI/httpx turns them into
+opaque ``FileNotFoundError: [Errno 2] No such file or directory`` failures.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import ssl
+from pathlib import Path
+
+from agent.errors import SSLConfigurationError
+
+logger = logging.getLogger(__name__)
+
+_CA_BUNDLE_ENV_VARS = (
+    "HERMES_CA_BUNDLE",
+    "SSL_CERT_FILE",
+    "REQUESTS_CA_BUNDLE",
+    "CURL_CA_BUNDLE",
+)
+
+_SKIP_VALUES = {"1", "true", "yes", "on"}
+
+
+def _skip_ssl_guard_enabled() -> bool:
+    return os.getenv("HERMES_SKIP_SSL_GUARD", "").strip().lower() in _SKIP_VALUES
+
+
+def _repair_hint() -> str:
+    return (
+        "Repair: python -m pip install --force-reinstall certifi openai httpx\n"
+        "If you configured a custom corporate CA bundle, fix or unset the "
+        "broken CA bundle environment variable."
+    )
+
+
+def _ssl_err(message: str) -> SSLConfigurationError:
+    """Create a consistent, user-actionable SSL configuration error."""
+    return SSLConfigurationError(f"{message}\n{_repair_hint()}")
+
+
+def _validate_bundle_path(label: str, value: str, *, require_substantial: bool = False) -> None:
+    path = Path(value).expanduser()
+    if not path.exists():
+        raise _ssl_err(f"{label} points to a missing CA bundle: {value}")
+    if not path.is_file():
+        raise _ssl_err(f"{label} does not point to a CA bundle file: {value}")
+    if require_substantial and path.stat().st_size < 1024:
+        raise _ssl_err(f"{label} at {value} appears corrupted (too small)")
+    try:
+        ctx = ssl.create_default_context(cafile=str(path))
+    except Exception as exc:
+        raise _ssl_err(f"{label} CA bundle at {value} cannot be loaded: {exc}") from exc
+    if not ctx.get_ca_certs():
+        raise _ssl_err(f"{label} CA bundle at {value} did not load any certificates")
+
+
+def verify_ca_bundle() -> None:
+    """Verify configured and bundled CA certificates are present and loadable.
+
+    Raises:
+        SSLConfigurationError: If an explicit CA-bundle environment variable
+            points at a bad path, or if certifi's bundled ``cacert.pem`` is
+            missing/corrupt.
+    """
+    if _skip_ssl_guard_enabled():
+        logger.debug("SSL CA bundle guard skipped via HERMES_SKIP_SSL_GUARD")
+        return
+
+    for env_var in _CA_BUNDLE_ENV_VARS:
+        value = os.getenv(env_var)
+        if value:
+            _validate_bundle_path(env_var, value)
+
+    try:
+        import certifi
+    except Exception as exc:
+        raise _ssl_err(f"certifi is not importable: {exc}") from exc
+
+    ca_bundle = str(certifi.where())
+    _validate_bundle_path("certifi", ca_bundle, require_substantial=True)
+
+
+def verify_ca_bundle_with_fallback() -> None:
+    """Backward-compatible wrapper for older call sites.
+
+    The old PR name mentioned a platform fallback, but allowing startup with a
+    broken certifi bundle still leaves httpx/OpenAI and requests call sites
+    failing later. Keep the wrapper name but enforce the same check.
+    """
+    verify_ca_bundle()
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -40,6 +40,7 @@ from agent.prompt_builder import (
    TASK_COMPLETION_GUIDANCE,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
+    drain_truncation_warnings,
 )
 from agent.runtime_cwd import resolve_context_cwd

@@ -400,7 +401,14 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
    warm across turns.
    """
    parts = build_system_prompt_parts(agent, system_message=system_message)
-    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+    joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+
+    # Surface context-file truncation warnings through the normal agent status
+    # channel so gateway/CLI users see them in chat instead of only in logs.
+    for warning in drain_truncation_warnings():
+        agent._emit_status(warning)
+
+    return joined


 def invalidate_system_prompt(agent: Any) -> None:
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -186,10 +186,21 @@ class AnthropicTransport(ProviderTransport):
    def validate_response(self, response: Any) -> bool:
        """Check Anthropic response structure is valid.

-        An empty content list is legitimate when ``stop_reason == "end_turn"``
-        — the model's canonical way of signalling "nothing more to add" after
-        a tool turn that already delivered the user-facing text. Treating it
-        as invalid falsely retries a completed response.
+        An empty content list is legitimate for terminal stop reasons that
+        carry no text payload:
+
+        - ``end_turn`` — the model's canonical "nothing more to add" after a
+          tool turn that already delivered the user-facing text.
+        - ``refusal`` — the model declined to respond (Claude 4.5+). The
+          Messages API returns an empty ``content`` list with this stop
+          reason. Treating it as invalid sends a deterministic refusal into
+          the invalid-response retry loop, which reproduces the refusal on
+          every attempt and surfaces a misleading "rate limited / invalid
+          response" error instead of the refusal. ``normalize_response`` maps
+          ``refusal`` → ``content_filter`` so the agent loop's refusal handler
+          can surface it.
+
+        Treating either as invalid falsely retries a completed response.
        """
        if response is None:
            return False
@@ -197,7 +208,7 @@ class AnthropicTransport(ProviderTransport):
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return getattr(response, "stop_reason", None) == "end_turn"
+            return getattr(response, "stop_reason", None) in {"end_turn", "refusal"}
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -531,6 +531,7 @@ class ChatCompletionsTransport(ProviderTransport):
                supports_reasoning=params.get("supports_reasoning", False),
                qwen_session_metadata=params.get("qwen_session_metadata"),
                model=model,
+                base_url=params.get("base_url"),
                ollama_num_ctx=params.get("ollama_num_ctx"),
                session_id=params.get("session_id"),
            )
@@ -664,8 +665,42 @@ class ChatCompletionsTransport(ProviderTransport):
        if rd:
            provider_data["reasoning_details"] = rd

+        # OpenAI structured-refusal field. When a model declines, the SDK
+        # populates ``message.refusal`` with the explanation and leaves
+        # ``content`` empty. OpenAI-compatible proxies that front Anthropic /
+        # Bedrock (e.g. Nous Portal) surface a Claude refusal this way — or via
+        # ``finish_reason="content_filter"`` — instead of the native
+        # ``stop_reason="refusal"``. Without capturing it the refusal looks
+        # like an empty response, so the agent loop retries a deterministic
+        # refusal three times and gives up with "no content after retries".
+        # Promote it to content + a ``content_filter`` finish reason so the
+        # loop's refusal handler surfaces it clearly and stops. ``refusal`` is
+        # ``None`` for normal responses, so this is a no-op in the common case.
+        content = msg.content
+        refusal = getattr(msg, "refusal", None)
+        if refusal is None and hasattr(msg, "model_extra"):
+            _msg_extra = getattr(msg, "model_extra", None) or {}
+            if isinstance(_msg_extra, dict):
+                refusal = _msg_extra.get("refusal")
+        if isinstance(refusal, str) and refusal.strip():
+            # Record the refusal explanation regardless — it's useful provider
+            # metadata even when the model also returned a usable payload.
+            provider_data["refusal"] = refusal
+            _has_text = isinstance(content, str) and content.strip()
+            _has_tool_calls = bool(tool_calls)
+            # Only promote to a terminal ``content_filter`` when the refusal is
+            # the *sole* payload — no visible text and no tool calls. A response
+            # that carries real content (or tool calls) alongside a refusal note
+            # is a normal, usable turn: surfacing it as a failed safety refusal
+            # would discard the model's actual work. In the empty-payload case,
+            # adopt the refusal as content so the loop has something to show.
+            if not _has_text and not _has_tool_calls:
+                content = refusal
+                if finish_reason in (None, "stop"):
+                    finish_reason = "content_filter"
+
        return NormalizedResponse(
-            content=msg.content,
+            content=content,
            tool_calls=tool_calls,
            finish_reason=finish_reason,
            reasoning=reasoning,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -218,22 +218,10 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs.pop("timeout", None)

        if is_codex_backend:
-            prompt_cache_key = kwargs.get("prompt_cache_key")
-            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
-            if cache_scope_id:
-                existing_extra_headers = kwargs.get("extra_headers")
-                merged_extra_headers: Dict[str, str] = {}
-                if isinstance(existing_extra_headers, dict):
-                    merged_extra_headers.update(
-                        {
-                            str(key): str(value)
-                            for key, value in existing_extra_headers.items()
-                            if key and value is not None
-                        }
-                    )
-                merged_extra_headers["session_id"] = cache_scope_id
-                merged_extra_headers["x-client-request-id"] = cache_scope_id
-                kwargs["extra_headers"] = merged_extra_headers
+            # chatgpt.com/backend-api/codex rejects body-level
+            # ``extra_headers`` with HTTP 400. Correlation/cache routing for
+            # this backend must not be sent through the Responses payload.
+            kwargs.pop("extra_headers", None)

        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
--- a/apps/bootstrap-installer/package.json
+++ b/apps/bootstrap-installer/package.json
@@ -16,7 +16,7 @@
  },
  "dependencies": {
    "@nous-research/ui": "0.16.0",
-    "@tailwindcss/vite": "^4.2.1",
+    "@tailwindcss/vite": "^4.2.4",
    "@tailwindcss/typography": "^0.5.19",
    "@tauri-apps/api": "^2.0.0",
    "@tauri-apps/plugin-dialog": "^2.0.0",
@@ -40,8 +40,8 @@
    "@tauri-apps/cli": "^2.0.0",
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
-    "@vitejs/plugin-react": "^5.2.0",
+    "@vitejs/plugin-react": "^6.0.2",
    "typescript": "^6.0.3",
-    "vite": "^7.3.1"
+    "vite": "^8.0.16"
  }
 }
--- a/apps/bootstrap-installer/tsconfig.json
+++ b/apps/bootstrap-installer/tsconfig.json
@@ -1,8 +1,8 @@
 {
  "compilerOptions": {
-    "target": "ES2022",
+    "target": "ES2023",
    "useDefineForClassFields": true,
-    "lib": ["ES2022", "DOM", "DOM.Iterable"],
+    "lib": ["ES2023", "DOM", "DOM.Iterable"],
    "module": "ESNext",
    "skipLibCheck": true,
    "moduleResolution": "bundler",
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -34,7 +34,7 @@ It builds and launches the GUI against your existing install — same config, ke

 ### Prebuilt installers

-Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/desktop).
+Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/).

 ---

--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -67,6 +67,16 @@ function buildDesktopBackendPath({
  )
 }

+function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
+  if (!hermesHome) return hermesHome
+  const resolved = pathModule.resolve(String(hermesHome))
+  const parent = pathModule.dirname(resolved)
+  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
+    return pathModule.dirname(parent)
+  }
+  return resolved
+}
+
 function buildDesktopBackendEnv({
  hermesHome,
  pythonPathEntries = [],
@@ -97,5 +107,6 @@ module.exports = {
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  delimiterForPlatform,
+  normalizeHermesHomeRoot,
  pathEnvKey
 }
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -7,6 +7,7 @@ const {
  appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
+  normalizeHermesHomeRoot,
  pathEnvKey
 } = require('./backend-env.cjs')

@@ -66,6 +67,21 @@ test('buildDesktopBackendEnv extends PYTHONPATH and backend PATH together', () =
  assert.ok(env.PATH.includes('/opt/homebrew/bin'))
 })

+test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root', () => {
+  assert.equal(
+    normalizeHermesHomeRoot('/Users/test/.hermes/profiles/oracle', { pathModule: path.posix }),
+    '/Users/test/.hermes'
+  )
+  assert.equal(
+    normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
+    'C:\\Users\\test\\AppData\\Local\\hermes'
+  )
+  assert.equal(
+    normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
+    '/Users/test/.hermes'
+  )
+})
+
 test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
  const env = buildDesktopBackendEnv({
    hermesHome: 'C:\\Users\\test\\AppData\\Local\\hermes',
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -166,6 +166,39 @@ function profileRemoteOverride(config, profile) {
  return { url, authMode: normAuthMode(entry.authMode), token: entry.token }
 }

+/**
+ * In global-remote mode one backend serves every Desktop profile, so REST calls
+ * that are scoped by renderer-side `request.profile` must carry that scope as a
+ * query parameter. Local pooled backends and per-profile remote overrides do not
+ * need this: they already run against a backend scoped to the target profile.
+ */
+function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
+  const scopedProfile = connectionScopeKey(profile)
+  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
+    return path
+  }
+
+  const rawPath = String(path || '')
+  if (!rawPath) {
+    return path
+  }
+
+  let parsed
+  try {
+    parsed = new URL(rawPath, 'http://hermes.local')
+  } catch {
+    return path
+  }
+
+  if (parsed.searchParams.has('profile')) {
+    return path
+  }
+
+  parsed.searchParams.set('profile', scopedProfile)
+
+  return `${parsed.pathname}${parsed.search}${parsed.hash}`
+}
+
 function tokenPreview(value) {
  const raw = String(value || '')

@@ -247,6 +280,7 @@ module.exports = {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
+  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -24,6 +24,7 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
+  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -90,6 +91,72 @@ test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
  assert.equal(profileRemoteOverride(null, 'coder'), null)
 })

+// --- pathWithGlobalRemoteProfile ---
+
+test('pathWithGlobalRemoteProfile appends profile in global remote mode', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info?profile=iris'
+  )
+})
+
+test('pathWithGlobalRemoteProfile preserves existing query params', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/options?force=1', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/options?force=1&profile=iris'
+  )
+})
+
+test('pathWithGlobalRemoteProfile does not replace an explicit profile query', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info?profile=default', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info?profile=default'
+  )
+})
+
+test('pathWithGlobalRemoteProfile skips local and per-profile remote override paths', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
+      globalRemote: false,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info'
+  )
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: true
+    }),
+    '/api/model/info'
+  )
+})
+
+test('pathWithGlobalRemoteProfile skips empty profile/path safely', () => {
+  assert.equal(
+    pathWithGlobalRemoteProfile('/api/model/info', '', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    '/api/model/info'
+  )
+  assert.equal(
+    pathWithGlobalRemoteProfile('', 'iris', {
+      globalRemote: true,
+      profileRemoteOverride: false
+    }),
+    ''
+  )
+})
+
 // --- normalizeRemoteBaseUrl ---

 test('normalizeRemoteBaseUrl strips trailing slashes, hash, and query', () => {
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -38,7 +38,8 @@ const { adoptServedDashboardToken } = require('./dashboard-token.cjs')
 const { waitForDashboardPort } = require('./backend-ready.cjs')
 const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')
 const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs')
-const { buildDesktopBackendEnv } = require('./backend-env.cjs')
+const { buildDesktopBackendEnv, normalizeHermesHomeRoot } = require('./backend-env.cjs')
+const { readWindowsUserEnvVar } = require('./windows-user-env.cjs')
 const { readDirForIpc } = require('./fs-read-dir.cjs')
 const { gitRootForIpc } = require('./git-root.cjs')
 const { worktreesForIpc } = require('./git-worktrees.cjs')
@@ -62,6 +63,7 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
+  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -240,8 +242,18 @@ if (INSTALL_STAMP) {
 // HERMES_HOME beneath the throwaway userData dir so a fresh-install run never
 // touches the user's real ~/.hermes / %LOCALAPPDATA%\hermes.
 function resolveHermesHome() {
-  if (process.env.HERMES_HOME) return path.resolve(process.env.HERMES_HOME)
+  if (process.env.HERMES_HOME) return normalizeHermesHomeRoot(process.env.HERMES_HOME)
  if (USER_DATA_OVERRIDE) return path.join(path.resolve(USER_DATA_OVERRIDE), 'hermes-home')
+  if (IS_WINDOWS) {
+    // A GUI app launched from Explorer inherits the environment block captured
+    // at login, so a HERMES_HOME set via `setx` AFTER login is invisible in
+    // process.env even though the CLI (a fresh shell) sees it. Without this the
+    // backend silently falls back to %LOCALAPPDATA%\hermes and reports "No
+    // inference provider configured" despite a valid configured home (#45471).
+    // Consult the live User-scoped registry value before the default below.
+    const fromRegistry = readWindowsUserEnvVar('HERMES_HOME')
+    if (fromRegistry) return normalizeHermesHomeRoot(fromRegistry)
+  }
  if (IS_WINDOWS && process.env.LOCALAPPDATA) {
    const localappdata = path.join(process.env.LOCALAPPDATA, 'hermes')
    const legacy = path.join(app.getPath('home'), '.hermes')
@@ -5072,65 +5084,211 @@ function focusWindow(win) {
  win.focus()
 }

+function spawnSecondaryWindow({ sessionId, watch, newSession } = {}) {
+  const icon = getAppIconPath()
+  const win = new BrowserWindow({
+    width: SESSION_WINDOW_MIN_WIDTH,
+    height: SESSION_WINDOW_MIN_HEIGHT,
+    minWidth: SESSION_WINDOW_MIN_WIDTH,
+    minHeight: SESSION_WINDOW_MIN_HEIGHT,
+    title: 'Hermes',
+    titleBarStyle: 'hidden',
+    titleBarOverlay: getTitleBarOverlayOptions(),
+    trafficLightPosition: IS_MAC ? WINDOW_BUTTON_POSITION : undefined,
+    vibrancy: IS_MAC ? 'sidebar' : undefined,
+    opacity: windowOpacity(),
+    icon,
+    // Don't show until the renderer's first themed paint is ready. macOS
+    // `vibrancy` ignores `backgroundColor` and paints a translucent OS
+    // material (which follows the OS appearance, not the app theme), so a
+    // dark-themed app on a light-mode Mac flashes white until the renderer
+    // covers it. ready-to-show fires after the boot-time paint in
+    // themes/context.tsx, so the window appears already themed.
+    show: false,
+    backgroundColor: getWindowBackgroundColor(),
+    webPreferences: {
+      preload: path.join(__dirname, 'preload.cjs'),
+      contextIsolation: true,
+      webviewTag: true,
+      sandbox: true,
+      nodeIntegration: false,
+      devTools: true
+    }
+  })
+
+  if (IS_MAC) {
+    win.setWindowButtonPosition?.(WINDOW_BUTTON_POSITION)
+  }
+
+  win.once('ready-to-show', () => {
+    if (!win.isDestroyed()) win.show()
+  })
+
+  win.on('will-enter-full-screen', () => sendWindowStateChanged(true))
+  win.on('enter-full-screen', () => sendWindowStateChanged(true))
+  win.on('will-leave-full-screen', () => sendWindowStateChanged(false))
+  win.on('leave-full-screen', () => sendWindowStateChanged(false))
+
+  wireCommonWindowHandlers(win)
+
+  win.loadURL(
+    buildSessionWindowUrl(sessionId, {
+      devServer: DEV_SERVER,
+      rendererIndexPath: DEV_SERVER ? undefined : resolveRendererIndex(),
+      watch,
+      newSession
+    })
+  )
+
+  return win
+}
+
 // Open (or focus) a standalone window for a single chat session.
 function createSessionWindow(sessionId, { watch = false } = {}) {
-  return sessionWindows.openOrFocus(sessionId, () => {
-    const icon = getAppIconPath()
-    const win = new BrowserWindow({
-      width: SESSION_WINDOW_MIN_WIDTH,
-      height: SESSION_WINDOW_MIN_HEIGHT,
-      minWidth: SESSION_WINDOW_MIN_WIDTH,
-      minHeight: SESSION_WINDOW_MIN_HEIGHT,
-      title: 'Hermes',
-      titleBarStyle: 'hidden',
-      titleBarOverlay: getTitleBarOverlayOptions(),
-      trafficLightPosition: IS_MAC ? WINDOW_BUTTON_POSITION : undefined,
-      vibrancy: IS_MAC ? 'sidebar' : undefined,
-      opacity: windowOpacity(),
-      icon,
-      // Don't show until the renderer's first themed paint is ready. macOS
-      // `vibrancy` ignores `backgroundColor` and paints a translucent OS
-      // material (which follows the OS appearance, not the app theme), so a
-      // dark-themed app on a light-mode Mac flashes white until the renderer
-      // covers it. ready-to-show fires after the boot-time paint in
-      // themes/context.tsx, so the window appears already themed.
-      show: false,
-      backgroundColor: getWindowBackgroundColor(),
-      webPreferences: {
-        preload: path.join(__dirname, 'preload.cjs'),
-        contextIsolation: true,
-        webviewTag: true,
-        sandbox: true,
-        nodeIntegration: false,
-        devTools: true
-      }
-    })
+  return sessionWindows.openOrFocus(sessionId, () => spawnSecondaryWindow({ sessionId, watch }))
+}

-    if (IS_MAC) {
-      win.setWindowButtonPosition?.(WINDOW_BUTTON_POSITION)
+// Open a fresh compact window on the new-session draft (#/). Not registry-keyed:
+// like ⌘N in a browser, every press opens a new window — and a draft window that
+// later converts to a real session must not get refocused as if it were blank.
+function createNewSessionWindow() {
+  return spawnSecondaryWindow({ newSession: true })
+}
+
+// The pet overlay: a single transparent, frameless, always-on-top window that
+// hosts ONLY the floating mascot. Shift-clicking the in-window pet "pops it out"
+// here so it can leave the app's bounds and stay visible while Hermes is
+// minimized (Codex-style task-completion glance). It carries no gateway
+// connection of its own — the main renderer is the single source of truth and
+// pushes pet state over IPC (hermes:pet-overlay:state); the overlay just renders
+// it. Control flows back (pop-in, composer submit) via hermes:pet-overlay:control.
+let petOverlayWindow = null
+
+function petOverlayUrl() {
+  if (DEV_SERVER) {
+    return `${DEV_SERVER.endsWith('/') ? DEV_SERVER.slice(0, -1) : DEV_SERVER}/?win=overlay#/`
+  }
+
+  return `${pathToFileURL(resolveRendererIndex()).toString()}?win=overlay#/`
+}
+
+function spawnPetOverlayWindow(bounds) {
+  const win = new BrowserWindow({
+    width: Math.max(80, Math.round(bounds?.width || 220)),
+    height: Math.max(80, Math.round(bounds?.height || 220)),
+    x: Number.isFinite(bounds?.x) ? Math.round(bounds.x) : undefined,
+    y: Number.isFinite(bounds?.y) ? Math.round(bounds.y) : undefined,
+    frame: false,
+    transparent: true,
+    resizable: false,
+    movable: true,
+    minimizable: false,
+    maximizable: false,
+    fullscreenable: false,
+    // Windows/Linux need this so the helper window does not get its own
+    // taskbar/alt-tab entry. On macOS, cmd-tab is app-level and this can make
+    // the whole app look like it vanished when the only newly-created visible
+    // window is a frameless overlay. Use NSPanel + Mission Control hiding below
+    // instead, leaving the main Hermes app as the Dock/cmd-tab anchor.
+    skipTaskbar: !IS_MAC,
+    hasShadow: false,
+    alwaysOnTop: true,
+    // macOS panels are non-activating helper windows and can float over full
+    // screen spaces without becoming the app's main switcher window.
+    type: IS_MAC ? 'panel' : undefined,
+    hiddenInMissionControl: IS_MAC,
+    // Non-activating: the overlay must never become the app's key/main window,
+    // or it (a frameless, taskbar-skipping panel) becomes the app's switcher
+    // anchor and the Hermes icon drops out of cmd/alt-tab — especially when the
+    // main window is minimized. We flip this on only while the composer needs
+    // the keyboard (see hermes:pet-overlay:set-focusable).
+    focusable: false,
+    show: false,
+    // Fully transparent — the renderer paints only the sprite + bubble.
+    backgroundColor: '#00000000',
+    webPreferences: {
+      preload: path.join(__dirname, 'preload.cjs'),
+      contextIsolation: true,
+      sandbox: true,
+      nodeIntegration: false,
+      devTools: true,
+      // Keep the sprite animating + bubble updating while the main window is
+      // minimized/blurred — the whole point of the overlay.
+      backgroundThrottling: false
+    }
+  })
+
+  // Float above other apps and follow the user across desktops so the pet is
+  // always reachable. `floating` + `type: panel` is the macOS NSPanel path; the
+  // more aggressive `screen-saver` level can interfere with normal app/window
+  // switching semantics.
+  win.setAlwaysOnTop(true, IS_MAC ? 'floating' : 'screen-saver')
+  win.setHiddenInMissionControl?.(true)
+  try {
+    // Electron docs: macOS may transform process type on each
+    // setVisibleOnAllWorkspaces() call unless skipTransformProcessType=true,
+    // which briefly hides the Dock/cmd-tab presence. Keep Hermes in the normal
+    // ForegroundApplication class so shift-clicking the pet never drops the app
+    // out of app switchers.
+    win.setVisibleOnAllWorkspaces(
+      true,
+      IS_MAC ? { visibleOnFullScreen: true, skipTransformProcessType: true } : undefined
+    )
+  } catch {
+    // Not supported everywhere — best effort.
+  }
+
+  wireCommonWindowHandlers(win)
+
+  win.once('ready-to-show', () => {
+    if (!win.isDestroyed()) win.showInactive()
+  })
+
+  win.on('closed', () => {
+    if (petOverlayWindow === win) {
+      petOverlayWindow = null
    }

-    win.once('ready-to-show', () => {
-      if (!win.isDestroyed()) win.show()
-    })
-
-    win.on('will-enter-full-screen', () => sendWindowStateChanged(true))
-    win.on('enter-full-screen', () => sendWindowStateChanged(true))
-    win.on('will-leave-full-screen', () => sendWindowStateChanged(false))
-    win.on('leave-full-screen', () => sendWindowStateChanged(false))
-
-    wireCommonWindowHandlers(win)
-
-    win.loadURL(
-      buildSessionWindowUrl(sessionId, {
-        devServer: DEV_SERVER,
-        rendererIndexPath: DEV_SERVER ? undefined : resolveRendererIndex(),
-        watch
-      })
-    )
-
-    return win
+    // If the overlay went away on its own (e.g. ⌘W), tell the main renderer to
+    // pop the pet back in so it doesn't stay hidden. Harmless echo when we're
+    // the ones who closed it (popInPet already cleared the active flag).
+    if (mainWindow && !mainWindow.isDestroyed()) {
+      mainWindow.webContents.send('hermes:pet-overlay:control', { type: 'pop-in' })
+    }
  })
+
+  win.loadURL(petOverlayUrl())
+
+  return win
+}
+
+function openPetOverlay(bounds) {
+  if (petOverlayWindow && !petOverlayWindow.isDestroyed()) {
+    if (bounds) {
+      petOverlayWindow.setBounds({
+        x: Math.round(bounds.x),
+        y: Math.round(bounds.y),
+        width: Math.max(80, Math.round(bounds.width)),
+        height: Math.max(80, Math.round(bounds.height))
+      })
+    }
+
+    petOverlayWindow.showInactive()
+
+    return petOverlayWindow
+  }
+
+  petOverlayWindow = spawnPetOverlayWindow(bounds)
+
+  return petOverlayWindow
+}
+
+function closePetOverlay() {
+  if (petOverlayWindow && !petOverlayWindow.isDestroyed()) {
+    petOverlayWindow.close()
+  }
+
+  petOverlayWindow = null
 }

 function createWindow() {
@@ -5202,6 +5360,11 @@ function createWindow() {
  mainWindow.on('will-leave-full-screen', () => sendWindowStateChanged(false))
  mainWindow.on('leave-full-screen', () => sendWindowStateChanged(false))

+  // The overlay rides the main window — closing the app's primary window must
+  // tear it down too (otherwise it strands as an orphan that blocks
+  // window-all-closed from quitting on Windows/Linux).
+  mainWindow.on('closed', () => closePetOverlay())
+
  wireCommonWindowHandlers(mainWindow)

  mainWindow.webContents.on('render-process-gone', (_event, details) => {
@@ -5317,6 +5480,121 @@ ipcMain.handle('hermes:window:openSession', async (_event, sessionId, opts) => {

  return { ok: true }
 })
+ipcMain.handle('hermes:window:openNewSession', async () => {
+  createNewSessionWindow()
+
+  return { ok: true }
+})
+
+// --- Pet overlay (pop-out mascot) -----------------------------------------
+// `request` is `{ bounds, screen }`. A fresh pop-out passes viewport-space
+// bounds (screen=false): convert to screen space by adding the main window's
+// content origin so the pet lands where it sat in-window. A remembered/dragged
+// spot passes screen-space bounds (screen=true) and is used as-is. We return the
+// resolved screen bounds so the renderer can persist exactly where it opened.
+ipcMain.handle('hermes:pet-overlay:open', async (_event, request) => {
+  const bounds = request && request.bounds ? request.bounds : request
+  const isScreen = Boolean(request && request.screen)
+  let screenBounds = bounds
+
+  try {
+    if (bounds && !isScreen && mainWindow && !mainWindow.isDestroyed()) {
+      const content = mainWindow.getContentBounds()
+      screenBounds = {
+        x: content.x + (bounds.x || 0),
+        y: content.y + (bounds.y || 0),
+        width: bounds.width,
+        height: bounds.height
+      }
+    }
+  } catch {
+    // Fall back to raw bounds if the window geometry is unavailable.
+  }
+
+  openPetOverlay(screenBounds)
+
+  return { ok: true, bounds: screenBounds }
+})
+ipcMain.handle('hermes:pet-overlay:close', async () => {
+  closePetOverlay()
+
+  return { ok: true }
+})
+// Drag: the overlay reports a new absolute screen position (it already knows the
+// pointer's screen coords), we just move the window.
+ipcMain.on('hermes:pet-overlay:set-bounds', (_event, bounds) => {
+  if (!petOverlayWindow || petOverlayWindow.isDestroyed() || !bounds) {
+    return
+  }
+
+  petOverlayWindow.setBounds({
+    x: Math.round(bounds.x),
+    y: Math.round(bounds.y),
+    width: Math.max(80, Math.round(bounds.width)),
+    height: Math.max(80, Math.round(bounds.height))
+  })
+})
+// Click-through: the overlay window is a full rectangle but only the pet pixels
+// should be interactive. The renderer toggles this as the cursor enters/leaves
+// the sprite so transparent margins pass clicks to whatever is behind.
+ipcMain.on('hermes:pet-overlay:ignore-mouse', (_event, ignore) => {
+  if (petOverlayWindow && !petOverlayWindow.isDestroyed()) {
+    petOverlayWindow.setIgnoreMouseEvents(Boolean(ignore), { forward: true })
+  }
+})
+// The overlay is a non-activating panel (focusable:false) so it never steals
+// the app's cmd/alt-tab anchor from the main window. But the pop-up composer
+// needs the keyboard, so the renderer asks us to flip it focusable + focus it
+// while the composer is open, then back to non-activating when it closes.
+ipcMain.on('hermes:pet-overlay:set-focusable', (_event, focusable) => {
+  if (!petOverlayWindow || petOverlayWindow.isDestroyed()) {
+    return
+  }
+
+  petOverlayWindow.setFocusable(Boolean(focusable))
+  if (focusable) {
+    petOverlayWindow.focus()
+  }
+})
+// Main renderer → overlay: forward the latest pet state for the overlay to render.
+ipcMain.on('hermes:pet-overlay:state', (_event, payload) => {
+  if (petOverlayWindow && !petOverlayWindow.isDestroyed()) {
+    petOverlayWindow.webContents.send('hermes:pet-overlay:state', payload)
+  }
+})
+// Overlay → main renderer: control messages (pop back in, composer submit).
+ipcMain.on('hermes:pet-overlay:control', (_event, payload) => {
+  if (!mainWindow || mainWindow.isDestroyed()) {
+    return
+  }
+
+  // Double-click toggles the app window: hide it away if it's up front, bring it
+  // back if it's minimized/buried. Pure window control — nothing for the
+  // renderer to do, so don't forward it.
+  if (payload && payload.type === 'toggle-app') {
+    if (mainWindow.isMinimized() || !mainWindow.isVisible()) {
+      mainWindow.show()
+      mainWindow.focus()
+    } else {
+      mainWindow.minimize()
+    }
+
+    return
+  }
+
+  // The mail icon means "take me to the app": raise the main window (it may be
+  // minimized or buried) before the renderer navigates to the latest thread.
+  if (payload && payload.type === 'open-app') {
+    if (mainWindow.isMinimized()) {
+      mainWindow.restore()
+    }
+
+    mainWindow.show()
+    mainWindow.focus()
+  }
+
+  mainWindow.webContents.send('hermes:pet-overlay:control', payload)
+})
 ipcMain.handle('hermes:bootstrap:reset', async () => {
  // Renderer's "Reload and retry" path. Clear the latched failure and
  // reset connection state so the next startHermes() call restarts the
@@ -5586,9 +5864,14 @@ ipcMain.handle('hermes:api', async (_event, request) => {

  await prepareProfileDeleteRequest(request)

-  const connection = await ensureBackend(request?.profile)
+  const profile = request?.profile
+  const connection = await ensureBackend(profile)
  const timeoutMs = resolveTimeoutMs(request?.timeoutMs, DEFAULT_FETCH_TIMEOUT_MS)
-  const url = `${connection.baseUrl}${request.path}`
+  const requestPath = pathWithGlobalRemoteProfile(request.path, profile, {
+    globalRemote: globalRemoteActive(),
+    profileRemoteOverride: profileHasRemoteOverride(profile)
+  })
+  const url = `${connection.baseUrl}${requestPath}`
  // OAuth gateways authenticate REST via the HttpOnly session cookie held in
  // the OAuth partition — route through Electron's net stack bound to that
  // session so the cookie attaches automatically. Token/local modes keep using
@@ -5609,11 +5892,30 @@ ipcMain.handle('hermes:api', async (_event, request) => {

 ipcMain.handle('hermes:notify', (_event, payload) => {
  if (!Notification.isSupported()) return false
-  new Notification({
+  // Action buttons render only on signed macOS builds; elsewhere they're dropped
+  // and the body click still works.
+  const actions = Array.isArray(payload?.actions) ? payload.actions : []
+  const notification = new Notification({
    title: payload?.title || 'Hermes',
    body: payload?.body || '',
-    silent: Boolean(payload?.silent)
-  }).show()
+    silent: Boolean(payload?.silent),
+    actions: actions.map(action => ({ type: 'button', text: String(action?.text || '') }))
+  })
+  notification.on('click', () => {
+    if (!mainWindow || mainWindow.isDestroyed()) return
+    focusWindow(mainWindow)
+    if (payload?.sessionId) {
+      mainWindow.webContents.send('hermes:focus-session', payload.sessionId)
+    }
+  })
+  notification.on('action', (_actionEvent, index) => {
+    if (!mainWindow || mainWindow.isDestroyed()) return
+    const action = actions[index]
+    if (action?.id) {
+      mainWindow.webContents.send('hermes:notification-action', { sessionId: payload?.sessionId, actionId: action.id })
+    }
+  })
+  notification.show()
  return true
 })

@@ -6497,6 +6799,10 @@ function configureSpellChecker() {
 }

 app.on('before-quit', () => {
+  // The always-on-top overlay isn't a "real" app window; close it so a stray
+  // pet can't keep the process alive or float over a quit app.
+  closePetOverlay()
+
  // Quitting mid-install should stop the installer, not orphan it.
  if (bootstrapAbortController) {
    try {
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -6,6 +6,33 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
  getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
  openSessionWindow: (sessionId, opts) => ipcRenderer.invoke('hermes:window:openSession', sessionId, opts),
+  openNewSessionWindow: () => ipcRenderer.invoke('hermes:window:openNewSession'),
+  petOverlay: {
+    // Main renderer → main process: window lifecycle + drag. `request` is
+    // `{ bounds, screen }`; resolves with the screen bounds it actually used.
+    open: request => ipcRenderer.invoke('hermes:pet-overlay:open', request),
+    close: () => ipcRenderer.invoke('hermes:pet-overlay:close'),
+    setBounds: bounds => ipcRenderer.send('hermes:pet-overlay:set-bounds', bounds),
+    setIgnoreMouse: ignore => ipcRenderer.send('hermes:pet-overlay:ignore-mouse', ignore),
+    // Flip the overlay focusable (and focus it) while the composer needs keys.
+    setFocusable: focusable => ipcRenderer.send('hermes:pet-overlay:set-focusable', focusable),
+    // Main renderer → overlay (forwarded by main): push the latest pet state.
+    pushState: payload => ipcRenderer.send('hermes:pet-overlay:state', payload),
+    // Overlay → main renderer (forwarded by main): pop back in / composer submit.
+    control: payload => ipcRenderer.send('hermes:pet-overlay:control', payload),
+    // Overlay subscribes to state pushes.
+    onState: callback => {
+      const listener = (_event, payload) => callback(payload)
+      ipcRenderer.on('hermes:pet-overlay:state', listener)
+      return () => ipcRenderer.removeListener('hermes:pet-overlay:state', listener)
+    },
+    // Main renderer subscribes to overlay control messages.
+    onControl: callback => {
+      const listener = (_event, payload) => callback(payload)
+      ipcRenderer.on('hermes:pet-overlay:control', listener)
+      return () => ipcRenderer.removeListener('hermes:pet-overlay:control', listener)
+    }
+  },
  getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
  getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
  saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
@@ -94,6 +121,16 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    ipcRenderer.on('hermes:window-state-changed', listener)
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
+  onFocusSession: callback => {
+    const listener = (_event, sessionId) => callback(sessionId)
+    ipcRenderer.on('hermes:focus-session', listener)
+    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
+  },
+  onNotificationAction: callback => {
+    const listener = (_event, payload) => callback(payload)
+    ipcRenderer.on('hermes:notification-action', listener)
+    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
+  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -15,12 +15,13 @@ const SESSION_WINDOW_MIN_HEIGHT = 620
 // flag MUST sit in the query string BEFORE the '#': anything after the '#' is
 // treated as the route by HashRouter and would break routeSessionId(). The
 // renderer reads the flag from window.location.search to suppress the install /
-// onboarding overlays and the global session sidebar. `watch=1` marks a
-// spectator window (e.g. a running subagent's session): the renderer resumes
-// it lazily so the gateway never builds an agent just to stream into it.
-function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch } = {}) {
-  const query = `?win=secondary${watch ? '&watch=1' : ''}`
-  const route = `#/${encodeURIComponent(sessionId)}`
+// onboarding overlays and the global session sidebar. `new=1` marks the compact
+// scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
+// session): the renderer resumes it lazily so the gateway never builds an agent
+// just to stream into it.
+function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
+  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
+  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`

  if (devServer) {
    const base = devServer.endsWith('/') ? devServer.slice(0, -1) : devServer
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -82,6 +82,12 @@ test('buildSessionWindowUrl adds the watch flag for spectator windows, before th
  assert.equal(url, 'http://localhost:5173/?win=secondary&watch=1#/abc')
 })

+test('buildSessionWindowUrl routes new-session windows to the draft (#/)', () => {
+  const url = buildSessionWindowUrl(null, { devServer: 'http://localhost:5173', newSession: true })
+
+  assert.equal(url, 'http://localhost:5173/?win=secondary&new=1#/')
+})
+
 test('registry opens one window per session and focuses on re-open', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
--- a/apps/desktop/electron/windows-user-env.cjs
+++ b/apps/desktop/electron/windows-user-env.cjs
@@ -0,0 +1,76 @@
+// windows-user-env.cjs
+//
+// Read a User-scoped environment variable straight from the Windows registry
+// (HKCU\Environment).
+//
+// A GUI app launched from Explorer inherits the environment block captured at
+// login, so a variable set via `setx` AFTER login is invisible in process.env
+// even though a fresh shell — and the Hermes CLI — sees it immediately. The
+// desktop's HERMES_HOME resolution relies on process.env, so that stale-snapshot
+// gap silently sends the backend to the default %LOCALAPPDATA%\hermes. Reading
+// the live registry value closes the gap. See #45471.
+
+const { execFileSync } = require('node:child_process')
+
+// Parse the output of `reg query HKCU\Environment /v <name>`, which looks like:
+//
+//   HKEY_CURRENT_USER\Environment
+//       HERMES_HOME    REG_SZ    F:\Hermes\data
+//
+// Returns the raw value string (spaces inside the value preserved), or null when
+// the requested value line isn't present.
+function parseRegQueryValue(stdout, name) {
+  if (!stdout || !name) return null
+  const typePattern =
+    /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
+  for (const rawLine of String(stdout).split(/\r?\n/)) {
+    const line = rawLine.trim()
+    const match = line.match(typePattern)
+    if (match && match[1].toLowerCase() === name.toLowerCase()) {
+      return match[2]
+    }
+  }
+  return null
+}
+
+// Expand %VAR% references against an env map. REG_EXPAND_SZ values store
+// unexpanded references; plain REG_SZ paths have none, so this is a no-op for
+// the common F:\... case. Unknown references are left verbatim.
+function expandWindowsEnvRefs(value, env = process.env) {
+  if (!value) return value
+  return value.replace(/%([^%]+)%/g, (whole, name) => {
+    const key = Object.keys(env).find(k => k.toUpperCase() === String(name).toUpperCase())
+    return key != null && env[key] != null ? env[key] : whole
+  })
+}
+
+// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
+// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
+// (the value doesn't exist), or when the value is empty.
+function readWindowsUserEnvVar(
+  name,
+  { platform = process.platform, env = process.env, exec = execFileSync } = {}
+) {
+  if (platform !== 'win32' || !name) return null
+  let stdout
+  try {
+    stdout = exec('reg', ['query', 'HKCU\\Environment', '/v', name], {
+      encoding: 'utf8',
+      windowsHide: true,
+      timeout: 5000
+    })
+  } catch {
+    // `reg` missing, or value absent (reg exits 1) — caller falls back.
+    return null
+  }
+  const raw = parseRegQueryValue(stdout, name)
+  if (raw == null) return null
+  const expanded = expandWindowsEnvRefs(raw, env).trim()
+  return expanded || null
+}
+
+module.exports = {
+  expandWindowsEnvRefs,
+  parseRegQueryValue,
+  readWindowsUserEnvVar
+}
--- a/apps/desktop/electron/windows-user-env.test.cjs
+++ b/apps/desktop/electron/windows-user-env.test.cjs
@@ -0,0 +1,90 @@
+const assert = require('node:assert/strict')
+const { test } = require('node:test')
+
+const {
+  expandWindowsEnvRefs,
+  parseRegQueryValue,
+  readWindowsUserEnvVar
+} = require('./windows-user-env.cjs')
+
+// ── parseRegQueryValue ─────────────────────────────────────────────────────
+
+test('parseRegQueryValue extracts a REG_SZ value', () => {
+  const out = [
+    '',
+    'HKEY_CURRENT_USER\\Environment',
+    '    HERMES_HOME    REG_SZ    F:\\Hermes\\data',
+    ''
+  ].join('\r\n')
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
+})
+
+test('parseRegQueryValue matches the name case-insensitively', () => {
+  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Hermes_Home    REG_EXPAND_SZ    %USERPROFILE%\\h\r\n'
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), '%USERPROFILE%\\h')
+})
+
+test('parseRegQueryValue preserves spaces inside the value', () => {
+  const out = '    HERMES_HOME    REG_SZ    C:\\Program Files\\Hermes\r\n'
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'C:\\Program Files\\Hermes')
+})
+
+test('parseRegQueryValue returns null when the value line is absent', () => {
+  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Path    REG_SZ    C:\\x\r\n'
+  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), null)
+  assert.equal(parseRegQueryValue('', 'HERMES_HOME'), null)
+  assert.equal(parseRegQueryValue('garbage', 'HERMES_HOME'), null)
+})
+
+// ── expandWindowsEnvRefs ───────────────────────────────────────────────────
+
+test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
+  assert.equal(
+    expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
+    'C:\\Users\\jeff\\h'
+  )
+})
+
+test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {
+  assert.equal(expandWindowsEnvRefs('F:\\Hermes\\data', {}), 'F:\\Hermes\\data')
+  assert.equal(expandWindowsEnvRefs('%NOPE%\\x', {}), '%NOPE%\\x')
+})
+
+// ── readWindowsUserEnvVar ──────────────────────────────────────────────────
+
+test('readWindowsUserEnvVar returns null off Windows without spawning', () => {
+  let spawned = false
+  const exec = () => {
+    spawned = true
+    return ''
+  }
+  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'linux', exec }), null)
+  assert.equal(spawned, false)
+})
+
+test('readWindowsUserEnvVar queries HKCU\\Environment and expands the value', () => {
+  const calls = []
+  const exec = (cmd, args) => {
+    calls.push([cmd, args])
+    return 'HKEY_CURRENT_USER\\Environment\r\n    HERMES_HOME    REG_EXPAND_SZ    %DRIVE%\\Hermes\r\n'
+  }
+  const value = readWindowsUserEnvVar('HERMES_HOME', {
+    platform: 'win32',
+    env: { DRIVE: 'F:' },
+    exec
+  })
+  assert.equal(value, 'F:\\Hermes')
+  assert.deepEqual(calls, [['reg', ['query', 'HKCU\\Environment', '/v', 'HERMES_HOME']]])
+})
+
+test('readWindowsUserEnvVar returns null when reg exits non-zero (value missing)', () => {
+  const exec = () => {
+    throw new Error('reg exited 1')
+  }
+  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
+})
+
+test('readWindowsUserEnvVar returns null for an empty value', () => {
+  const exec = () => '    HERMES_HOME    REG_SZ    \r\n'
+  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
+})
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -20,6 +20,7 @@
    "start": "npm run build && electron .",
    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
    "postbuild": "node scripts/assert-dist-built.cjs",
+    "prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
    "builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 electron-builder",
    "pack": "npm run build && npm run builder -- --dir",
    "dist": "npm run build && npm run builder",
@@ -36,7 +37,7 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/windows-user-env.test.cjs",
    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
@@ -134,6 +135,7 @@
  },
  "build": {
    "electronVersion": "40.9.3",
+    "electronDist": "../../node_modules/electron/dist",
    "appId": "com.nousresearch.hermes",
    "productName": "Hermes",
    "executableName": "Hermes",
--- a/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
+++ b/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
@@ -0,0 +1,59 @@
+const fs = require('node:fs')
+const path = require('node:path')
+
+if (process.platform !== 'darwin') {
+  process.exit(0)
+}
+
+const desktopRoot = path.resolve(__dirname, '..')
+const repoRoot = path.resolve(desktopRoot, '..', '..')
+const electronMacPath = path.join(repoRoot, 'node_modules', 'app-builder-lib', 'out', 'electron', 'electronMac.js')
+
+const marker = 'hermes-macos-electron-binary-fallback'
+const needle = `    await Promise.all([
+        doRename(path.join(contentsPath, "MacOS"), electronBranding.productName, appPlist.CFBundleExecutable),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
+    ]);`
+const replacement = `    // ${marker}: electron-builder 26.8.x can sometimes copy
+    // Electron.app without its main MacOS/Electron binary before this rename.
+    // Restore it from the installed Electron runtime so local desktop installs
+    // do not fail with ENOENT during macOS arm64 packaging.
+    const macosDir = path.join(contentsPath, "MacOS");
+    const bundledElectronBinary = path.join(macosDir, electronBranding.productName);
+    if (!fs.existsSync(bundledElectronBinary)) {
+        const candidates = [
+            path.join(packager.info.framework.distMacOsAppName, "Contents", "MacOS", electronBranding.productName),
+            path.join(process.cwd(), "..", "..", "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
+        ];
+        const sourceBinary = candidates.find(candidate => fs.existsSync(candidate));
+        if (sourceBinary == null) {
+            throw new Error("Electron binary missing from packaged app and Electron runtime: " + bundledElectronBinary);
+        }
+        await (0, promises_1.copyFile)(sourceBinary, bundledElectronBinary);
+        await (0, promises_1.chmod)(bundledElectronBinary, 0o755);
+    }
+    await Promise.all([
+        doRename(macosDir, electronBranding.productName, appPlist.CFBundleExecutable),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
+        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
+    ]);`
+
+if (!fs.existsSync(electronMacPath)) {
+  console.warn(`[patch-electron-builder] skipped: ${electronMacPath} not found`)
+  process.exit(0)
+}
+
+const source = fs.readFileSync(electronMacPath, 'utf8')
+if (source.includes(marker)) {
+  console.log('[patch-electron-builder] macOS Electron binary fallback already applied')
+  process.exit(0)
+}
+
+if (!source.includes(needle)) {
+  console.warn('[patch-electron-builder] skipped: expected electronMac.js shape not found')
+  process.exit(0)
+}
+
+fs.writeFileSync(electronMacPath, source.replace(needle, replacement))
+console.log('[patch-electron-builder] applied macOS Electron binary fallback')
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -23,6 +23,7 @@ import { type Translations, useI18n } from '@/i18n'
 import { sessionTitle } from '@/lib/chat-runtime'
 import { ExternalLink, ExternalLinkIcon, hostPathLabel, urlSlugTitleLabel, useLinkTitle } from '@/lib/external-link'
 import { FileImage, FileText, FolderOpen, Link2 } from '@/lib/icons'
+import { mediaExternalUrl } from '@/lib/media'
 import { cn } from '@/lib/utils'
 import { notifyError } from '@/store/notifications'
 import type { SessionInfo, SessionMessage } from '@/types/hermes'
@@ -124,17 +125,12 @@ function artifactKind(value: string): ArtifactKind {
 }

 function artifactHref(value: string): string {
-  if (
-    value.startsWith('http://') ||
-    value.startsWith('https://') ||
-    value.startsWith('file://') ||
-    value.startsWith('data:')
-  ) {
+  if (value.startsWith('http://') || value.startsWith('https://') || value.startsWith('data:')) {
    return value
  }

-  if (value.startsWith('/')) {
-    return `file://${encodeURI(value)}`
+  if (value.startsWith('file://') || value.startsWith('/')) {
+    return mediaExternalUrl(value)
  }

  return value
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -9,6 +9,7 @@ import { formatCombo } from '@/lib/keybinds/combo'
 import { cn } from '@/lib/utils'

 import type { ConversationStatus } from './hooks/use-voice-conversation'
+import { ModelPill } from './model-pill'
 import type { ChatBarState, VoiceStatus } from './types'

 export const ICON_BTN = 'size-(--composer-control-size) shrink-0 rounded-md'
@@ -66,6 +67,7 @@ export function ComposerControls({
  const c = t.composer
  const steerCombo = formatCombo('mod+enter')
  const steerLabel = `${c.steer} (${steerCombo})`
+
  const steerTip = (
    <span className="inline-flex items-center gap-1.5">
      {c.steer}
@@ -81,8 +83,10 @@ export function ComposerControls({

  return (
    <div className="ml-auto flex shrink-0 items-center gap-(--composer-control-gap)">
-      <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
-      {canSteer && (
+      <ModelPill disabled={disabled} model={state.model} />
+      {/* While the agent runs and the user is typing, steer takes over the mic's
+          slot rather than crowding the row with an extra button. */}
+      {canSteer ? (
        <Tip label={steerTip}>
          <Button
            aria-label={steerLabel}
@@ -96,6 +100,8 @@ export function ComposerControls({
            <SteeringWheel size={16} />
          </Button>
        </Tip>
+      ) : (
+        <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
      )}
      {showVoicePrimary ? (
        <Tip label={c.startVoice}>
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -85,6 +85,8 @@ import {
 import { QueuePanel } from './queue-panel'
 import {
  composerPlainText,
+  deleteSelectionInEditor,
+  insertPlainTextAtCaret,
  normalizeComposerEditorDom,
  placeCaretEnd,
  refChipElement,
@@ -135,6 +137,12 @@ function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind {
  return 'command'
 }

+/** A `/` query is at its arg stage once it's past the command name. */
+const slashArgStage = (query: string) => query.includes(' ')
+
+/** The `/command` token of a slash query (`personality x` → `/personality`). */
+const slashCommandToken = (query: string) => `/${query.split(/\s+/, 1)[0]?.toLowerCase() ?? ''}`
+
 interface QueueEditState {
  attachments: ComposerAttachment[]
  draft: string
@@ -532,48 +540,6 @@ export function ChatBar({
    })
  }, [])

-  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
-    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
-
-    if (imageBlobs.length > 0) {
-      event.preventDefault()
-
-      if (onAttachImageBlob) {
-        triggerHaptic('selection')
-
-        for (const blob of imageBlobs) {
-          void onAttachImageBlob(blob)
-        }
-      }
-
-      return
-    }
-
-    // Trim surrounding whitespace so a copy that dragged along leading/trailing
-    // blank lines (common when selecting from terminals, code blocks, web pages)
-    // doesn't dump multiline padding into the composer. Internal newlines are
-    // preserved — only the edges are cleaned up.
-    const pastedText = event.clipboardData.getData('text').trim()
-
-    if (!pastedText) {
-      event.preventDefault()
-
-      return
-    }
-
-    if (DATA_IMAGE_URL_RE.test(pastedText)) {
-      event.preventDefault()
-
-      return
-    }
-
-    event.preventDefault()
-    document.execCommand('insertText', false, pastedText)
-    const nextDraft = composerPlainText(event.currentTarget)
-    draftRef.current = nextDraft
-    aui.composer().setText(nextDraft)
-  }
-
  const [trigger, setTrigger] = useState<TriggerState | null>(null)
  const [triggerActive, setTriggerActive] = useState(0)
  const [triggerItems, setTriggerItems] = useState<readonly Unstable_TriggerItem[]>([])
@@ -610,7 +576,15 @@ export function ChatBar({
    }

    const before = textBeforeCaret(editor)
-    const detected = detectTrigger(before ?? composerPlainText(editor))
+    const found = detectTrigger(before ?? composerPlainText(editor))
+
+    // The arg-stage popover is only useful for commands with an options screen.
+    // For a no-arg command it would dead-end on "No matches", so drop it — the
+    // directive is already complete.
+    const detected =
+      found?.kind === '/' && slashArgStage(found.query) && !desktopSlashCommandTakesArgs(slashCommandToken(found.query))
+        ? null
+        : found

    setTrigger(detected)

@@ -650,6 +624,46 @@ export function ChatBar({
    flushEditorToDraft(event.currentTarget)
  }

+  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
+    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
+
+    if (imageBlobs.length > 0) {
+      event.preventDefault()
+
+      if (onAttachImageBlob) {
+        triggerHaptic('selection')
+
+        for (const blob of imageBlobs) {
+          void onAttachImageBlob(blob)
+        }
+      }
+
+      return
+    }
+
+    // Trim surrounding whitespace so a copy that dragged along leading/trailing
+    // blank lines (common when selecting from terminals, code blocks, web pages)
+    // doesn't dump multiline padding into the composer. Internal newlines are
+    // preserved — only the edges are cleaned up.
+    const pastedText = event.clipboardData.getData('text').trim()
+
+    if (!pastedText) {
+      event.preventDefault()
+
+      return
+    }
+
+    if (DATA_IMAGE_URL_RE.test(pastedText)) {
+      event.preventDefault()
+
+      return
+    }
+
+    event.preventDefault()
+    insertPlainTextAtCaret(event.currentTarget, pastedText)
+    flushEditorToDraft(event.currentTarget)
+  }
+
  const triggerAdapter: Unstable_TriggerAdapter | null =
    trigger?.kind === '@' ? at.adapter : trigger?.kind === '/' ? slash.adapter : null

@@ -665,6 +679,12 @@ export function ChatBar({

  const triggerLoading = trigger?.kind === '@' ? at.loading : trigger?.kind === '/' ? slash.loading : false

+  // Suppress the "No matches" empty state once a slash command is past its name:
+  // a no-arg command has nothing to offer, and a fully-typed arg commits on
+  // Space/Tab — neither should dead-end on a popover.
+  const argStageEmpty =
+    trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length
+
  const closeTrigger = () => {
    setTrigger(null)
    setTriggerItems([])
@@ -675,6 +695,25 @@ export function ChatBar({
    setTriggerActive(idx => Math.min(idx, Math.max(0, triggerItems.length - 1)))
  }, [triggerItems.length])

+  // Commit the literally-typed `/command arg` as a directive chip — used when
+  // the completion list is empty because the arg is already fully typed (the
+  // backend completer drops exact matches). Reuses the chip path via a
+  // synthetic item whose serialized form is the verbatim text.
+  const commitTypedSlashDirective = () => {
+    if (trigger?.kind !== '/') {
+      return
+    }
+
+    const text = `/${trigger.query.trimEnd()}`
+
+    replaceTriggerWithChip({
+      id: text,
+      type: 'slash',
+      label: text.slice(1),
+      metadata: { command: slashCommandToken(trigger.query), display: text, meta: '', group: '', action: '', rawText: text }
+    })
+  }
+
  const replaceTriggerWithChip = (item: Unstable_TriggerItem) => {
    const editor = editorRef.current

@@ -793,6 +832,18 @@ export function ChatBar({
      return
    }

+    // Non-collapsed Backspace/Delete: native selection-delete is ~O(n²) on large
+    // drafts (Ctrl+A → Delete froze ~1.3s). Collapsed carets fall through.
+    if (
+      (event.key === 'Backspace' || event.key === 'Delete') &&
+      deleteSelectionInEditor(event.currentTarget)
+    ) {
+      event.preventDefault()
+      flushEditorToDraft(event.currentTarget)
+
+      return
+    }
+
    // Cmd/Ctrl+Shift+K drains the next queued message. Plain Cmd/Ctrl+K is
    // reserved for the global command palette.
    if ((event.metaKey || event.ctrlKey) && !event.altKey && event.shiftKey && event.key.toLowerCase() === 'k') {
@@ -822,7 +873,15 @@ export function ChatBar({
        return
      }

-      if (event.key === 'Enter' || event.key === 'Tab') {
+      // Enter / Tab / Space all accept the highlighted item: a no-arg command
+      // commits its directive chip, an arg-taking command expands to its
+      // options step, and an arg option commits the full `/cmd arg` chip. Space
+      // is slash-only (an `@` mention takes a literal space) and gated to a
+      // non-empty query so a bare `/ ` still types a space.
+      const acceptOnSpace = event.key === ' ' && trigger.kind === '/' && Boolean(trigger.query.trim())
+      const accept = event.key === 'Enter' || event.key === 'Tab' || acceptOnSpace
+
+      if (accept) {
        event.preventDefault()
        triggerKeyConsumedRef.current = true
        const item = triggerItems[triggerActive]
@@ -843,6 +902,24 @@ export function ChatBar({
      }
    }

+    // Arg stage with nothing left to suggest — a fully-typed arg the backend
+    // completer no longer echoes (it drops the exact match), e.g.
+    // `/personality creative`. Space/Tab still commit what's typed as a single
+    // directive chip; Enter falls through to submit (send it as-is).
+    if (
+      trigger?.kind === '/' &&
+      !triggerItems.length &&
+      (event.key === ' ' || event.key === 'Tab') &&
+      slashArgStage(trigger.query) &&
+      trigger.query.trim()
+    ) {
+      event.preventDefault()
+      triggerKeyConsumedRef.current = true
+      commitTypedSlashDirective()
+
+      return
+    }
+
    // ArrowUp/ArrowDown navigate, in priority order: the queue (edit entries in
    // place) then sent-message history. The history ring is derived from live
    // session messages each press — single source of truth, no mirror.
@@ -1765,7 +1842,7 @@ export function ChatBar({
          ref={composerRef}
        >
          {showHelpHint && <HelpHint />}
-          {trigger && (
+          {trigger && !argStageEmpty && (
            <ComposerTriggerPopover
              activeIndex={triggerActive}
              items={triggerItems}
--- a/apps/desktop/src/app/chat/composer/model-pill.tsx
+++ b/apps/desktop/src/app/chat/composer/model-pill.tsx
@@ -0,0 +1,86 @@
+import { useStore } from '@nanostores/react'
+import { useState } from 'react'
+
+import { ModelMenuCloseContext } from '@/app/shell/model-menu-panel'
+import { Button } from '@/components/ui/button'
+import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
+import { GlyphSpinner } from '@/components/ui/glyph-spinner'
+import { useI18n } from '@/i18n'
+import { ChevronDown } from '@/lib/icons'
+import { formatModelStatusLabel } from '@/lib/model-status-label'
+import { cn } from '@/lib/utils'
+import {
+  $currentFastMode,
+  $currentModel,
+  $currentProvider,
+  $currentReasoningEffort,
+  setModelPickerOpen
+} from '@/store/session'
+
+import type { ChatBarState } from './types'
+
+const PILL = cn(
+  'h-(--composer-control-size) max-w-40 shrink-0 gap-1 rounded-md px-2 text-xs font-normal',
+  'text-(--ui-text-tertiary) hover:bg-(--chrome-action-hover) hover:text-foreground'
+)
+
+/**
+ * Composer model selector — the relocated status-bar pill. Reuses the live
+ * `model.options` dropdown (`modelMenuContent`) verbatim; falls back to the
+ * full picker when the gateway is closed and no live menu exists.
+ */
+export function ModelPill({ disabled, model }: { disabled: boolean; model: ChatBarState['model'] }) {
+  const copy = useI18n().t.shell.statusbar
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const fastMode = useStore($currentFastMode)
+  const reasoningEffort = useStore($currentReasoningEffort)
+  const [open, setOpen] = useState(false)
+
+  // The model resolves a beat after the gateway/session comes up. Rather than
+  // flash a literal "No model", show a quiet loader (inherits the pill text
+  // color at half opacity) until a model lands.
+  const label = (
+    <>
+      {currentModel.trim() ? (
+        <span className="truncate">{formatModelStatusLabel(currentModel, { fastMode, reasoningEffort })}</span>
+      ) : (
+        <GlyphSpinner className="opacity-50" spinner="braille" />
+      )}
+      <ChevronDown className="size-2.5 shrink-0 opacity-50" />
+    </>
+  )
+
+  const title = currentProvider ? copy.modelTitle(currentProvider, currentModel || copy.modelNone) : copy.switchModel
+
+  if (!model.modelMenuContent) {
+    return (
+      <Button
+        aria-label={copy.openModelPicker}
+        className={PILL}
+        disabled={disabled}
+        onClick={() => setModelPickerOpen(true)}
+        title={copy.openModelPicker}
+        type="button"
+        variant="ghost"
+      >
+        {label}
+      </Button>
+    )
+  }
+
+  return (
+    <DropdownMenu onOpenChange={setOpen} open={open}>
+      <DropdownMenuTrigger asChild>
+        <Button aria-label={title} className={PILL} disabled={disabled} title={title} type="button" variant="ghost">
+          {label}
+        </Button>
+      </DropdownMenuTrigger>
+      <DropdownMenuContent align="end" className="w-64 p-0" side="top" sideOffset={8}>
+        <ModelMenuCloseContext.Provider value={() => setOpen(false)}>
+          {model.modelMenuContent}
+        </ModelMenuCloseContext.Provider>
+      </DropdownMenuContent>
+    </DropdownMenu>
+  )
+}
--- a/apps/desktop/src/app/chat/composer/rich-editor.test.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.test.ts
@@ -3,12 +3,24 @@ import { describe, expect, it } from 'vitest'
 import { insertInlineRefsIntoEditor } from './inline-refs'
 import {
  composerPlainText,
+  deleteSelectionInEditor,
+  insertPlainTextAtCaret,
  normalizeComposerEditorDom,
  refChipElement,
  renderComposerContents,
  RICH_INPUT_SLOT
 } from './rich-editor'

+const caretIn = (editor: HTMLElement) => {
+  const range = document.createRange()
+  const selection = window.getSelection()!
+
+  range.selectNodeContents(editor)
+  range.collapse(false)
+  selection.removeAllRanges()
+  selection.addRange(range)
+}
+
 describe('renderComposerContents', () => {
  it('renders refs and raw text without interpreting user text as HTML', () => {
    const editor = document.createElement('div')
@@ -59,3 +71,64 @@ describe('insertInlineRefsIntoEditor', () => {
    expect(composerPlainText(editor)).toBe('@file:`src/foo.ts` ')
  })
 })
+
+describe('insertPlainTextAtCaret', () => {
+  it('inserts multiline text as text nodes + br', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    document.body.append(editor)
+    caretIn(editor)
+
+    insertPlainTextAtCaret(editor, 'one\ntwo\nthree')
+
+    expect(editor.querySelectorAll('br').length).toBe(2)
+    expect(composerPlainText(editor)).toBe('one\ntwo\nthree')
+
+    editor.remove()
+  })
+
+  it('replaces the selected span', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    editor.textContent = 'abXYef'
+    document.body.append(editor)
+
+    const text = editor.firstChild!
+    const selection = window.getSelection()!
+    const range = document.createRange()
+
+    range.setStart(text, 2)
+    range.setEnd(text, 4)
+    selection.removeAllRanges()
+    selection.addRange(range)
+
+    insertPlainTextAtCaret(editor, 'cd')
+
+    expect(composerPlainText(editor)).toBe('abcdef')
+
+    editor.remove()
+  })
+})
+
+describe('deleteSelectionInEditor', () => {
+  it('clears a non-collapsed range and leaves a collapsed caret', () => {
+    const editor = document.createElement('div')
+    editor.dataset.slot = RICH_INPUT_SLOT
+    editor.textContent = 'hello world'
+    document.body.append(editor)
+
+    const selection = window.getSelection()!
+    const range = document.createRange()
+
+    range.selectNodeContents(editor)
+    selection.removeAllRanges()
+    selection.addRange(range)
+
+    expect(deleteSelectionInEditor(editor)).toBe(true)
+    expect(composerPlainText(editor)).toBe('')
+    expect(selection.getRangeAt(0).collapsed).toBe(true)
+    expect(deleteSelectionInEditor(editor)).toBe(false)
+
+    editor.remove()
+  })
+})
--- a/apps/desktop/src/app/chat/composer/rich-editor.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.ts
@@ -132,6 +132,63 @@ export function renderComposerContents(target: HTMLElement, text: string) {
  appendComposerContents(target, text)
 }

+/** Caret range when the selection lives inside `editor`; else null. */
+function composerSelectionRange(editor: HTMLElement) {
+  const selection = window.getSelection()
+  const range = selection?.rangeCount ? selection.getRangeAt(0) : null
+
+  if (!selection || !range || !editor.contains(range.commonAncestorContainer)) {
+    return null
+  }
+
+  return { range, selection }
+}
+
+/** Insert plain text at the caret (replacing any selection). Pastes use this
+ *  instead of `execCommand('insertText')` — Chromium's editing pipeline is
+ *  ~O(n²) on large multiline blobs. */
+export function insertPlainTextAtCaret(editor: HTMLElement, text: string) {
+  const hit = composerSelectionRange(editor)
+  const fragment = document.createDocumentFragment()
+
+  appendTextWithBreaks(fragment, text)
+
+  const tail = fragment.lastChild
+
+  if (hit) {
+    hit.range.deleteContents()
+    hit.range.insertNode(fragment)
+  } else {
+    editor.append(fragment)
+  }
+
+  if (tail) {
+    const caret = document.createRange()
+    caret.setStartAfter(tail)
+    caret.collapse(true)
+    const selection = hit?.selection ?? window.getSelection()
+    selection?.removeAllRanges()
+    selection?.addRange(caret)
+  }
+}
+
+/** Remove a non-collapsed selection in-editor. Skips collapsed carets so word/
+ *  line delete (Opt/Cmd+Backspace) stays native. Returns whether anything ran. */
+export function deleteSelectionInEditor(editor: HTMLElement) {
+  const hit = composerSelectionRange(editor)
+
+  if (!hit || hit.range.collapsed) {
+    return false
+  }
+
+  hit.range.deleteContents()
+  hit.range.collapse(true)
+  hit.selection.removeAllRanges()
+  hit.selection.addRange(hit.range)
+
+  return true
+}
+
 /** Serialize a draft string into chip-HTML for the contenteditable surface. */
 export function composerHtml(text: string) {
  let cursor = 0
--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -1,3 +1,5 @@
+import type { ReactNode } from 'react'
+
 import type { HermesGateway } from '@/hermes'
 import type { ComposerAttachment } from '@/store/composer'

@@ -22,6 +24,8 @@ export interface ChatBarState {
    canSwitch: boolean
    loading?: boolean
    quickModels?: QuickModelOption[]
+    /** Reused status-bar dropdown (built with gateway + selectModel upstream). */
+    modelMenuContent?: ReactNode
  }
  tools: { enabled: boolean; label: string; suggestions?: ContextSuggestion[] }
  voice: { enabled: boolean; active: boolean }
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -42,6 +42,7 @@ import {
  $sessions,
  sessionPinId
 } from '@/store/session'
+import { isSecondaryWindow } from '@/store/windows'
 import type { ModelOptionsResponse } from '@/types/hermes'

 import { routeSessionId } from '../routes'
@@ -61,6 +62,7 @@ import { threadLoadingState } from './thread-loading'

 interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  gateway: HermesGateway | null
+  modelMenuContent?: React.ReactNode
  onToggleSelectedPin: () => void
  onDeleteSelectedSession: () => void
  onCancel: () => Promise<void> | void
@@ -119,10 +121,10 @@ function ChatHeader({
      ? pinnedSessionIds.includes(selectedSessionId)
      : false

-  // A brand-new session has no session to pin/delete/rename, so the header is
-  // just a dead "New session" label + chevron. Drop it (and its border)
-  // entirely until there's a real session to act on.
-  if (!selectedSessionId && !activeSessionId && !isRoutedSessionView) {
+  // Secondary windows (new-session scratch, subagent watch, cmd-click pop-out)
+  // are compact side panels — they drop the session-actions header + border
+  // entirely. A brand-new draft has nothing to pin/delete/rename either.
+  if (isSecondaryWindow() || (!selectedSessionId && !activeSessionId && !isRoutedSessionView)) {
    return null
  }

@@ -249,6 +251,7 @@ function ChatRuntimeBoundary({
 export function ChatView({
  className,
  gateway,
+  modelMenuContent,
  onToggleSelectedPin,
  onDeleteSelectedSession,
  onCancel,
@@ -302,7 +305,10 @@ export function ChatView({
  // waiting for the resume effect (which paints a frame later) to clear them.
  const routeSessionMismatch = isRoutedSessionView && routedSessionId !== selectedSessionId

-  const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
+  // The compact new-session pop-out skips the wordmark/tagline intro — it's a
+  // scratch window, not the full-height empty state.
+  const showIntro =
+    !isSecondaryWindow() && freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty

  // Session is still loading if the route references a session we haven't
  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
@@ -342,6 +348,7 @@ export function ChatView({
        provider: currentProvider,
        canSwitch: gatewayOpen,
        loading: !gatewayOpen || (!currentModel && !currentProvider),
+        modelMenuContent,
        quickModels
      },
      tools: {
@@ -354,7 +361,7 @@ export function ChatView({
        active: false
      }
    }),
-    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
+    [contextSuggestions, currentModel, currentProvider, gatewayOpen, modelMenuContent, quickModels]
  )

  // Drop files anywhere in the conversation area, not just on the composer
--- a/apps/desktop/src/app/chat/scroll-to-bottom-button.test.tsx
+++ b/apps/desktop/src/app/chat/scroll-to-bottom-button.test.tsx
@@ -0,0 +1,67 @@
+import { cleanup, fireEvent, render, screen } from '@testing-library/react'
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+import { clearAllPrompts, setApprovalRequest } from '@/store/prompts'
+import { $activeSessionId } from '@/store/session'
+import { onScrollToBottomRequest, resetThreadScroll, setThreadAtBottom } from '@/store/thread-scroll'
+
+import { ScrollToBottomButton } from './scroll-to-bottom-button'
+
+function pendingApproval() {
+  $activeSessionId.set('sess-1')
+  setApprovalRequest({ command: 'rm -rf /tmp/x', description: 'dangerous command', sessionId: 'sess-1' })
+}
+
+afterEach(() => {
+  cleanup()
+  clearAllPrompts()
+  resetThreadScroll()
+  $activeSessionId.set(null)
+})
+
+// `getByRole('button')` excludes aria-hidden nodes, so "queryByRole null" is the
+// control's hidden (parked-at-bottom) state.
+describe('ScrollToBottomButton', () => {
+  it('stays hidden while parked at the bottom', () => {
+    render(<ScrollToBottomButton />)
+
+    expect(screen.queryByRole('button')).toBeNull()
+  })
+
+  it('is a plain jump-to-bottom control when scrolled up with no approval', () => {
+    setThreadAtBottom(false)
+    render(<ScrollToBottomButton />)
+
+    expect(screen.getByRole('button', { name: 'Scroll to bottom' })).toBeTruthy()
+    expect(screen.queryByText('Approval needed')).toBeNull()
+  })
+
+  it('morphs into the approval pill when scrolled up with a pending approval', () => {
+    pendingApproval()
+    setThreadAtBottom(false)
+    render(<ScrollToBottomButton />)
+
+    expect(screen.getByRole('button', { name: 'Approval needed' })).toBeTruthy()
+    expect(screen.getByText('Approval needed')).toBeTruthy()
+  })
+
+  it('does not morph while a pending approval is still in view (at bottom)', () => {
+    pendingApproval()
+    render(<ScrollToBottomButton />)
+
+    // Parked at bottom → control hidden, so it can't claim "approval needed".
+    expect(screen.queryByRole('button')).toBeNull()
+  })
+
+  it('re-arms sticky-bottom on click', () => {
+    const handler = vi.fn()
+    const stop = onScrollToBottomRequest(handler)
+    setThreadAtBottom(false)
+    render(<ScrollToBottomButton />)
+
+    fireEvent.click(screen.getByRole('button'))
+
+    expect(handler).toHaveBeenCalledTimes(1)
+    stop()
+  })
+})
--- a/apps/desktop/src/app/chat/scroll-to-bottom-button.tsx
+++ b/apps/desktop/src/app/chat/scroll-to-bottom-button.tsx
@@ -5,6 +5,7 @@ import { Codicon } from '@/components/ui/codicon'
 import { useI18n } from '@/i18n'
 import { triggerHaptic } from '@/lib/haptics'
 import { cn } from '@/lib/utils'
+import { $approvalRequest } from '@/store/prompts'
 import { $threadJumpButtonVisible, requestScrollToBottom } from '@/store/thread-scroll'

 /**
@@ -15,6 +16,13 @@ import { $threadJumpButtonVisible, requestScrollToBottom } from '@/store/thread-
 * / background cards. Visible only while the user has scrolled meaningfully
 * away from the bottom; clicking re-arms sticky-bottom and pins the viewport.
 *
+ * When the turn is BLOCKED on an approval, this same control morphs into an
+ * "Approval needed" pill — the only response surface is the inline Run/Reject
+ * bar on the parked tool row, which is always the bottom-most content, so the
+ * existing scroll-to-bottom action lands the user right on it. One control, no
+ * collision, no second scroll path (native scrollIntoView would scroll
+ * overflow:hidden ancestors that can't scroll back and wreck the layout).
+ *
 * Enter/exit motion lives in styles.css under `.thread-jump-button` — a
 * directional scale (contract in from 1.1, contract out to 0.9) keyed off
 * `data-state`. `idle` (never-shown) stays silent so it can't flash on mount;
@@ -23,6 +31,11 @@ import { $threadJumpButtonVisible, requestScrollToBottom } from '@/store/thread-
 export function ScrollToBottomButton() {
  const { t } = useI18n()
  const visible = useStore($threadJumpButtonVisible)
+  const request = useStore($approvalRequest)
+  // Scrolled away while an approval is pending → the inline Run/Reject bar is
+  // below the fold. Relabel so the user knows the session needs them, not just
+  // that there's more to read.
+  const approval = visible && Boolean(request)
  const hasShownRef = useRef(false)

  if (visible) {
@@ -30,15 +43,17 @@ export function ScrollToBottomButton() {
  }

  const state = visible ? 'in' : hasShownRef.current ? 'out' : 'idle'
+  const label = approval ? t.assistant.approval.jumpToApproval : t.assistant.thread.scrollToBottom

  return (
    <button
      aria-hidden={!visible}
-      aria-label={t.assistant.thread.scrollToBottom}
+      aria-label={label}
      className={cn(
-        'thread-jump-button absolute left-1/2 z-20 grid size-8 place-items-center rounded-full',
-        'border border-border/65 bg-(--composer-fill) text-muted-foreground hover:text-foreground',
-        'backdrop-blur-[0.75rem] [-webkit-backdrop-filter:blur(0.75rem)]',
+        'thread-jump-button absolute left-1/2 z-20 grid place-items-center backdrop-blur-[0.75rem] [-webkit-backdrop-filter:blur(0.75rem)]',
+        approval
+          ? 'h-8 grid-flow-col gap-1.5 rounded-full border border-primary/40 bg-(--composer-fill) px-3 text-primary hover:bg-primary/10'
+          : 'size-8 rounded-full border border-border/65 bg-(--composer-fill) text-muted-foreground hover:text-foreground',
        !visible && 'pointer-events-none'
      )}
      data-state={state}
@@ -52,7 +67,8 @@ export function ScrollToBottomButton() {
      tabIndex={visible ? 0 : -1}
      type="button"
    >
-      <Codicon name="arrow-down" size="1rem" />
+      <Codicon name="arrow-down" size={approval ? '0.875rem' : '1rem'} />
+      {approval && <span className="text-xs font-medium">{label}</span>}
    </button>
  )
 }
--- a/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
+++ b/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
@@ -284,6 +284,7 @@ export function ProfileRail() {
          selectProfile(name)
        }}
        open={createOpen}
+        profiles={profiles}
      />

      <RenameProfileDialog
--- a/apps/desktop/src/app/chat/sidebar/session-actions-menu.tsx
+++ b/apps/desktop/src/app/chat/sidebar/session-actions-menu.tsx
@@ -4,7 +4,7 @@ import { useEffect, useRef, useState } from 'react'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
 import { ContextMenu, ContextMenuContent, ContextMenuItem, ContextMenuTrigger } from '@/components/ui/context-menu'
-import { writeClipboardText } from '@/components/ui/copy-button'
+import { CopyButton } from '@/components/ui/copy-button'
 import {
  Dialog,
  DialogContent,
@@ -49,26 +49,17 @@ function useSessionActions({ sessionId, title, pinned = false, profile, onPin, o
  const r = t.sidebar.row
  const [renameOpen, setRenameOpen] = useState(false)

+  const pinItem: ItemSpec = {
+    disabled: !onPin,
+    icon: 'pin',
+    label: pinned ? r.unpin : r.pin,
+    onSelect: () => {
+      triggerHaptic('selection')
+      onPin?.()
+    }
+  }
+
  const items: ItemSpec[] = [
-    {
-      disabled: !onPin,
-      icon: 'pin',
-      label: pinned ? r.unpin : r.pin,
-      onSelect: () => {
-        triggerHaptic('selection')
-        onPin?.()
-      }
-    },
-    {
-      disabled: !sessionId,
-      icon: 'copy',
-      label: r.copyId,
-      onSelect: event => {
-        event.preventDefault()
-        triggerHaptic('selection')
-        void writeClipboardText(sessionId).catch(err => notifyError(err, r.copyIdFailed))
-      }
-    },
    ...(canOpenSessionWindow()
      ? [
          {
@@ -122,13 +113,28 @@ function useSessionActions({ sessionId, title, pinned = false, profile, onPin, o
    }
  ]

-  const renderItems = (Item: MenuItem) =>
-    items.map(({ className, disabled, icon, label, onSelect, variant }) => (
-      <Item className={className} disabled={disabled} key={label} onSelect={onSelect} variant={variant}>
-        <Codicon name={icon} size="0.875rem" />
-        <span>{label}</span>
-      </Item>
-    ))
+  const renderMenuItem = (Item: MenuItem, { className, disabled, icon, label, onSelect, variant }: ItemSpec) => (
+    <Item className={className} disabled={disabled} key={label} onSelect={onSelect} variant={variant}>
+      <Codicon name={icon} size="0.875rem" />
+      <span>{label}</span>
+    </Item>
+  )
+
+  const renderItems = (Item: MenuItem) => (
+    <>
+      {renderMenuItem(Item, pinItem)}
+      <CopyButton
+        appearance={Item === DropdownMenuItem ? 'menu-item' : 'context-menu-item'}
+        disabled={!sessionId}
+        errorMessage={r.copyIdFailed}
+        key={r.copyId}
+        label={r.copyId}
+        onCopyError={err => notifyError(err, r.copyIdFailed)}
+        text={sessionId}
+      />
+      {items.map(spec => renderMenuItem(Item, spec))}
+    </>
+  )

  const renameDialog = (
    <RenameSessionDialog
--- a/apps/desktop/src/app/command-palette/index.tsx
+++ b/apps/desktop/src/app/command-palette/index.tsx
@@ -5,6 +5,7 @@ import { useCallback, useEffect, useMemo, useState } from 'react'
 import { useNavigate } from 'react-router-dom'

 import { HUD_HEADING, HUD_ITEM, HUD_POSITION, HUD_SURFACE, HUD_TEXT } from '@/app/floating-hud'
+import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request'
 import { setTerminalTakeover } from '@/app/right-sidebar/store'
 import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from '@/components/ui/command'
 import { KbdCombo } from '@/components/ui/kbd'
@@ -20,6 +21,7 @@ import {
  Clock,
  Cpu,
  Download,
+  Egg,
  Globe,
  type IconComponent,
  Info,
@@ -29,6 +31,7 @@ import {
  Moon,
  Package,
  Palette,
+  PawPrint,
  Plus,
  Settings,
  Settings2,
@@ -39,8 +42,9 @@ import {
  Zap
 } from '@/lib/icons'
 import { cn } from '@/lib/utils'
-import { $commandPaletteOpen, closeCommandPalette, setCommandPaletteOpen } from '@/store/command-palette'
+import { $commandPaletteOpen, $commandPalettePage, closeCommandPalette, setCommandPaletteOpen } from '@/store/command-palette'
 import { $bindings } from '@/store/keybinds'
+import { $petGenStatus, cleanupPetGen, generateDrafts } from '@/store/pet-generate'
 import { luminance } from '@/themes/color'
 import { type ThemeMode, useTheme } from '@/themes/context'
 import { isUserTheme, resolveTheme } from '@/themes/user-themes'
@@ -62,6 +66,8 @@ import { fieldCopyForSchemaKey } from '../settings/field-copy'
 import { prettyName } from '../settings/helpers'

 import { MarketplaceThemePage } from './marketplace-theme-page'
+import { PetGeneratePage } from './pet-generate-page'
+import { PetInlineToggle, PetPalettePage } from './pet-palette-page'

 interface PaletteItem {
  /** Keybind action id — its live combo renders as a hotkey hint. */
@@ -87,7 +93,7 @@ interface PaletteGroup {

 // Nested page → its parent, so Back / Esc step up one level instead of closing
 // the palette. Pages absent here go straight back to the root list.
-const PAGE_PARENTS: Record<string, string> = { 'install-theme': 'theme' }
+const PAGE_PARENTS: Record<string, string> = { 'generate-pet': 'pets', 'install-theme': 'theme' }

 /** A nested page reachable from a root item via `to`. */
 interface PalettePage {
@@ -205,8 +211,10 @@ function themeSupportsMode(name: string, target: 'light' | 'dark'): boolean {
 export function CommandPalette() {
  const { t } = useI18n()
  const open = useStore($commandPaletteOpen)
+  const pendingPage = useStore($commandPalettePage)
  const bindings = useStore($bindings)
  const navigate = useNavigate()
+  const { requestGateway } = useGatewayRequest()
  const { availableThemes, resolvedMode, setMode, setTheme, themeName } = useTheme()
  const [search, setSearch] = useState('')
  const [page, setPage] = useState<string | null>(null)
@@ -242,13 +250,23 @@ export function CommandPalette() {
  const sessions = useMemo(() => (sessionsQuery.data?.sessions ?? []).map(toSessionEntry), [sessionsQuery.data])
  const archivedSessions = useMemo(() => (archivedQuery.data?.sessions ?? []).map(toSessionEntry), [archivedQuery.data])

-  // Reset the query/sub-page on close so it reopens clean.
+  // Reset the query/sub-page on close so it reopens clean. Cleanup also deletes
+  // a hatched-but-unadopted preview pet so it doesn't linger in the gallery.
  useEffect(() => {
    if (!open) {
      setSearch('')
      setPage(null)
+      cleanupPetGen(requestGateway)
    }
-  }, [open])
+  }, [open, requestGateway])
+
+  // Deep-link into a nested page (e.g. `/pet list` → pets picker).
+  useEffect(() => {
+    if (open && pendingPage) {
+      setPage(pendingPage)
+      $commandPalettePage.set(null)
+    }
+  }, [open, pendingPage])

  const go = useCallback((path: string) => () => navigate(path), [navigate])

@@ -382,6 +400,20 @@ export function CommandPalette() {
            keywords: ['appearance', 'color mode', 'brightness', 'dark', 'light', 'system'],
            label: cc.changeColorMode,
            to: 'color-mode'
+          },
+          {
+            icon: PawPrint,
+            id: 'appearance-pets',
+            keywords: ['pet', 'petdex', 'mascot', 'pets', '/pet', 'paw'],
+            label: cc.pets.title,
+            to: 'pets'
+          },
+          {
+            icon: Egg,
+            id: 'appearance-generate-pet',
+            keywords: ['pet', 'generate', 'create', 'make', 'new pet', 'mascot', 'hatch', 'ai'],
+            label: cc.generatePet.title,
+            to: 'generate-pet'
          }
        ]
      },
@@ -550,6 +582,18 @@ export function CommandPalette() {
          }
        ]
      },
+      // Server-driven page: browse petdex gallery, adopt/switch, toggle off.
+      pets: {
+        title: t.commandCenter.pets.title,
+        placeholder: t.commandCenter.pets.placeholder,
+        groups: []
+      },
+      // Server-driven page: describe → draft variants → hatch a custom pet.
+      'generate-pet': {
+        title: t.commandCenter.generatePet.title,
+        placeholder: t.commandCenter.generatePet.placeholder,
+        groups: []
+      },
      // Server-driven page: items come from the Marketplace, rendered by
      // <MarketplaceThemePage> (loader + live search + per-row install).
      'install-theme': {
@@ -620,49 +664,77 @@ export function CommandPalette() {
                  event.preventDefault()
                  event.stopPropagation()
                  goBack()
+
+                  return
+                }
+
+                // On the generate page, Enter (re)generates from the typed
+                // concept — cmdk has no item to select there, so each Enter,
+                // including a retype after drafts already exist, starts a fresh
+                // round. The page's own Retry/Hatch buttons cover the rest.
+                if (page === 'generate-pet' && event.key === 'Enter' && search.trim()) {
+                  const genStatus = $petGenStatus.get()
+
+                  if (
+                    genStatus !== 'generating' &&
+                    genStatus !== 'hatching' &&
+                    genStatus !== 'preview' &&
+                    genStatus !== 'adopting'
+                  ) {
+                    event.preventDefault()
+                    void generateDrafts(requestGateway, { prompt: search })
+                  }
                }
              }}
              onValueChange={setSearch}
              placeholder={placeholder}
+              right={page === 'pets' ? <PetInlineToggle /> : undefined}
              value={search}
            />
            <CommandList className="dt-portal-scrollbar max-h-[min(20rem,56vh)]">
-              {page === 'install-theme' ? (
+              {/* Server-driven pages render their own list; the rest show groups. */}
+              {page === 'generate-pet' ? (
+                <PetGeneratePage search={search} />
+              ) : page === 'pets' ? (
+                <PetPalettePage onGenerate={() => { setSearch(''); setPage('generate-pet') }} search={search} />
+              ) : page === 'install-theme' ? (
                <MarketplaceThemePage onPickTheme={setTheme} search={search} />
              ) : (
-                <CommandEmpty>{t.commandCenter.noResults}</CommandEmpty>
-              )}
-              {visibleGroups.map((group, index) => (
-                <CommandGroup
-                  className={HUD_HEADING}
-                  heading={group.heading}
-                  key={group.heading ?? `palette-group-${index}`}
-                >
-                  {group.items.map(item => {
-                    const Icon = item.icon
-                    const combo = item.action ? bindings[item.action]?.[0] : undefined
+                <>
+                  <CommandEmpty>{t.commandCenter.noResults}</CommandEmpty>
+                  {visibleGroups.map((group, index) => (
+                    <CommandGroup
+                      className={HUD_HEADING}
+                      heading={group.heading}
+                      key={group.heading ?? `palette-group-${index}`}
+                    >
+                      {group.items.map(item => {
+                        const Icon = item.icon
+                        const combo = item.action ? bindings[item.action]?.[0] : undefined

-                    return (
-                      <CommandItem
-                        className={cn(HUD_ITEM, HUD_TEXT)}
-                        key={item.id}
-                        keywords={item.keywords}
-                        onSelect={() => handleSelect(item)}
-                        value={`${item.label} ${item.keywords?.join(' ') ?? ''} ${item.id}`}
-                      >
-                        <Icon className="size-3.5 shrink-0 text-muted-foreground" />
-                        <span className="truncate">{item.label}</span>
-                        {combo && <KbdCombo className="ml-auto opacity-55" combo={combo} size="sm" />}
-                        {item.to && (
-                          <ChevronRight
-                            className={cn('size-3.5 shrink-0 text-muted-foreground/70', !combo && 'ml-auto')}
-                          />
-                        )}
-                      </CommandItem>
-                    )
-                  })}
-                </CommandGroup>
-              ))}
+                        return (
+                          <CommandItem
+                            className={cn(HUD_ITEM, HUD_TEXT)}
+                            key={item.id}
+                            keywords={item.keywords}
+                            onSelect={() => handleSelect(item)}
+                            value={`${item.label} ${item.keywords?.join(' ') ?? ''} ${item.id}`}
+                          >
+                            <Icon className="size-3.5 shrink-0 text-muted-foreground" />
+                            <span className="truncate">{item.label}</span>
+                            {combo && <KbdCombo className="ml-auto opacity-55" combo={combo} size="sm" />}
+                            {item.to && (
+                              <ChevronRight
+                                className={cn('size-3.5 shrink-0 text-muted-foreground/70', !combo && 'ml-auto')}
+                              />
+                            )}
+                          </CommandItem>
+                        )
+                      })}
+                    </CommandGroup>
+                  ))}
+                </>
+              )}
            </CommandList>
          </Command>
        </DialogPrimitive.Content>
--- a/apps/desktop/src/app/command-palette/pet-generate-page.tsx
+++ b/apps/desktop/src/app/command-palette/pet-generate-page.tsx
@@ -0,0 +1,303 @@
+/**
+ * Cmd-K → Pets → "Generate" page — describe a pet, pick a draft, hatch it.
+ *
+ * A thin view over the `pet-generate` store. The palette search box doubles as
+ * the concept prompt; this page renders the variant grid, the selection, the
+ * retry/hatch actions, and the loading states. The store owns the two-step
+ * `pet.generate` → `pet.hatch` flow.
+ */
+
+import { useStore } from '@nanostores/react'
+import { useEffect, useState } from 'react'
+
+import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request'
+import { PetSprite } from '@/components/pet/pet-sprite'
+import { useI18n } from '@/i18n'
+import { triggerHaptic } from '@/lib/haptics'
+import { Check, Egg, Loader2, PawPrint, RefreshCw } from '@/lib/icons'
+import { cn } from '@/lib/utils'
+import { closeCommandPalette } from '@/store/command-palette'
+import { type PetInfo } from '@/store/pet'
+import {
+  $petGenDrafts,
+  $petGenError,
+  $petGenPreview,
+  $petGenSelected,
+  $petGenStatus,
+  adoptHatched,
+  discardHatched,
+  generateDrafts,
+  hatchSelected
+} from '@/store/pet-generate'
+
+const VARIANT_COUNT = 4
+
+// Fixed render scale for the preview so it's a predictable size regardless of
+// the user's configured `display.pet.scale`.
+const PREVIEW_SCALE = 0.7
+
+// Fallback row order if a backend doesn't return `stateRows`.
+const PREVIEW_ROWS = ['idle', 'waving', 'running-right', 'running-left', 'running', 'review', 'jumping', 'failed']
+const PREVIEW_STATE_MS = 1500
+
+const ROW_TO_FRAME_KEY: Record<string, string> = {
+  idle: 'idle',
+  wave: 'wave',
+  waving: 'wave',
+  jump: 'jump',
+  jumping: 'jump',
+  run: 'run',
+  running: 'run',
+  'running-right': 'run',
+  'running-left': 'run',
+  failed: 'failed',
+  review: 'review',
+  waiting: 'waiting'
+}
+
+function frameCountForRow(pet: PetInfo, row: string): number {
+  const byState = pet.framesByState
+  const mapped = ROW_TO_FRAME_KEY[row]
+  return byState?.[row] ?? (mapped ? byState?.[mapped] : undefined) ?? pet.framesPerState ?? 0
+}
+
+interface PetGeneratePageProps {
+  search: string
+}
+
+export function PetGeneratePage({ search }: PetGeneratePageProps) {
+  const { t } = useI18n()
+  const copy = t.commandCenter.generatePet
+  const { requestGateway } = useGatewayRequest()
+
+  const status = useStore($petGenStatus)
+  const error = useStore($petGenError)
+  const drafts = useStore($petGenDrafts)
+  const selected = useStore($petGenSelected)
+  const preview = useStore($petGenPreview)
+  const [name, setName] = useState('')
+
+  const prompt = search.trim()
+  const busy = status === 'generating' || status === 'hatching'
+
+  const generate = () => {
+    if (prompt) {
+      void generateDrafts(requestGateway, { prompt })
+    }
+  }
+
+  const hatch = () => {
+    void hatchSelected(requestGateway, { name: name.trim() || prompt, prompt })
+  }
+
+  const adopt = () => {
+    void adoptHatched(requestGateway).then(out => {
+      if (out.ok) {
+        triggerHaptic('crisp')
+        closeCommandPalette()
+      }
+    })
+  }
+
+  if (status === 'stale') {
+    return <Status text={copy.staleBackend} tone="error" />
+  }
+
+  // Hatching is slow (several grounded image generations) — own the whole pane.
+  if (status === 'hatching') {
+    return <Status icon={<Loader2 className="size-4 animate-spin" />} text={copy.hatching} />
+  }
+
+  // Preview: play every animation row before the user commits.
+  if ((status === 'preview' || status === 'adopting') && preview) {
+    return (
+      <HatchPreview
+        adopting={status === 'adopting'}
+        error={error}
+        onAdopt={adopt}
+        onDiscard={() => void discardHatched(requestGateway)}
+        pet={preview}
+      />
+    )
+  }
+
+  const hasDrafts = drafts.length > 0
+  const generating = status === 'generating'
+  const cells = generating ? Array.from({ length: VARIANT_COUNT }, (_, i) => ({ index: i, dataUri: '' })) : drafts
+
+  return (
+    <div className="flex flex-col gap-2 p-2">
+      {error && <p className="px-1 text-[0.6875rem] text-(--ui-red)">{error}</p>}
+
+      {!hasDrafts && !generating && (
+        <p className="px-1 py-1 text-xs text-muted-foreground">{prompt ? copy.readyHint : copy.promptHint}</p>
+      )}
+
+      {(hasDrafts || generating) && (
+        <div className="grid grid-cols-2 gap-2">
+          {cells.map((draft, i) => {
+            const isSelected = !generating && selected === draft.index
+
+            return (
+              <button
+                className={cn(
+                  'relative flex aspect-square items-center justify-center overflow-hidden rounded-lg border bg-(--ui-bg-quinary) transition-colors',
+                  isSelected
+                    ? 'border-(--ui-accent) ring-2 ring-(--ui-accent)/40'
+                    : 'border-(--ui-stroke-tertiary) hover:border-foreground/40'
+                )}
+                disabled={generating || busy}
+                key={generating ? i : draft.index}
+                onClick={() => $petGenSelected.set(draft.index)}
+                onMouseDown={event => event.preventDefault()}
+                type="button"
+              >
+                {generating ? (
+                  <Loader2 className="size-5 animate-spin text-muted-foreground" />
+                ) : (
+                  <img alt="" className="size-full object-contain" draggable={false} src={draft.dataUri} />
+                )}
+                {isSelected && (
+                  <span className="absolute right-1 top-1 rounded-full bg-(--ui-accent) p-0.5 text-(--ui-base)">
+                    <Check className="size-3" />
+                  </span>
+                )}
+              </button>
+            )
+          })}
+        </div>
+      )}
+
+      {hasDrafts ? (
+        <div className="flex flex-col gap-2">
+          <input
+            className="w-full rounded-md border border-(--ui-stroke-tertiary) bg-transparent px-2 py-1.5 text-xs outline-none placeholder:text-muted-foreground focus:border-foreground/40"
+            onChange={event => setName(event.target.value)}
+            onKeyDown={event => {
+              if (event.key === 'Enter') {
+                event.preventDefault()
+                hatch()
+              }
+            }}
+            placeholder={copy.namePlaceholder}
+            value={name}
+          />
+          <div className="flex gap-2">
+            <button
+              className="flex flex-1 items-center justify-center gap-1.5 rounded-md border border-border px-2 py-1.5 text-xs font-medium transition-colors hover:bg-(--chrome-action-hover) disabled:opacity-50"
+              disabled={busy || !prompt}
+              onClick={generate}
+              onMouseDown={event => event.preventDefault()}
+              type="button"
+            >
+              <RefreshCw className="size-3.5" />
+              {copy.retry}
+            </button>
+            <button
+              className="flex flex-1 items-center justify-center gap-1.5 rounded-md bg-primary px-2 py-1.5 text-xs font-medium text-primary-foreground transition-opacity hover:opacity-90 disabled:opacity-50"
+              disabled={busy || selected === null}
+              onClick={hatch}
+              onMouseDown={event => event.preventDefault()}
+              type="button"
+            >
+              <PawPrint className="size-3.5" />
+              {copy.hatch}
+            </button>
+          </div>
+        </div>
+      ) : (
+        <button
+          className="flex items-center justify-center gap-1.5 rounded-md bg-primary px-2 py-2 text-xs font-medium text-primary-foreground transition-opacity hover:opacity-90 disabled:opacity-50"
+          disabled={busy || !prompt}
+          onClick={generate}
+          onMouseDown={event => event.preventDefault()}
+          type="button"
+        >
+          {generating ? <Loader2 className="size-3.5 animate-spin" /> : <Egg className="size-3.5" />}
+          {generating ? copy.generating : copy.generate}
+        </button>
+      )}
+    </div>
+  )
+}
+
+interface HatchPreviewProps {
+  pet: PetInfo
+  adopting: boolean
+  error: string | null
+  onAdopt: () => void
+  onDiscard: () => void
+}
+
+function HatchPreview({ pet, adopting, error, onAdopt, onDiscard }: HatchPreviewProps) {
+  const { t } = useI18n()
+  const copy = t.commandCenter.generatePet
+  const [stateIndex, setStateIndex] = useState(0)
+  const previewRows = (pet.stateRows?.length ? pet.stateRows : PREVIEW_ROWS).filter(row => frameCountForRow(pet, row) > 0)
+  const rows = previewRows.length > 0 ? previewRows : ['idle']
+  const activeRow = rows[stateIndex % rows.length] ?? 'idle'
+
+  // Cycle through the animation rows so the preview showcases all frames.
+  useEffect(() => {
+    const id = setInterval(() => {
+      setStateIndex(i => (i + 1) % rows.length)
+    }, PREVIEW_STATE_MS)
+
+    return () => clearInterval(id)
+  }, [rows.length])
+
+  useEffect(() => {
+    setStateIndex(0)
+  }, [pet.slug])
+
+  const previewInfo: PetInfo = { ...pet, scale: PREVIEW_SCALE }
+
+  return (
+    <div className="flex flex-col items-center gap-2 p-2">
+      <div className="flex min-h-[9rem] w-full items-center justify-center rounded-lg border border-(--ui-stroke-tertiary) bg-(--ui-bg-quinary) py-2">
+        <PetSprite info={previewInfo} rowOverride={activeRow} />
+      </div>
+
+      {pet.displayName && <p className="text-xs font-medium text-foreground">{pet.displayName}</p>}
+
+      {error && <p className="px-1 text-[0.6875rem] text-(--ui-red)">{error}</p>}
+
+      <div className="flex w-full gap-2">
+        <button
+          className="flex flex-1 items-center justify-center gap-1.5 rounded-md border border-border px-2 py-1.5 text-xs font-medium transition-colors hover:bg-(--chrome-action-hover) disabled:opacity-50"
+          disabled={adopting}
+          onClick={onDiscard}
+          onMouseDown={event => event.preventDefault()}
+          type="button"
+        >
+          <RefreshCw className="size-3.5" />
+          {copy.startOver}
+        </button>
+        <button
+          className="flex flex-1 items-center justify-center gap-1.5 rounded-md bg-primary px-2 py-1.5 text-xs font-medium text-primary-foreground transition-opacity hover:opacity-90 disabled:opacity-50"
+          disabled={adopting}
+          onClick={onAdopt}
+          onMouseDown={event => event.preventDefault()}
+          type="button"
+        >
+          {adopting ? <Loader2 className="size-3.5 animate-spin" /> : <PawPrint className="size-3.5" />}
+          {copy.adopt}
+        </button>
+      </div>
+    </div>
+  )
+}
+
+function Status({ icon, text, tone }: { icon?: React.ReactNode; text: string; tone?: 'error' }) {
+  return (
+    <div
+      className={cn(
+        'flex items-center justify-center gap-2 px-2 py-6 text-xs',
+        tone === 'error' ? 'text-(--ui-red)' : 'text-muted-foreground'
+      )}
+    >
+      {icon}
+      {text}
+    </div>
+  )
+}
--- a/apps/desktop/src/app/command-palette/pet-palette-page.tsx
+++ b/apps/desktop/src/app/command-palette/pet-palette-page.tsx
@@ -0,0 +1,205 @@
+/**
+ * Cmd-K "Pets…" page — browse the petdex gallery, adopt/switch, toggle off.
+ *
+ * A thin view over the `pet-gallery` store: it subscribes to the shared atoms
+ * and calls the store's actions. The store owns fetching, caching, the thumb
+ * cache, and optimistic mutations, so reopening this page is instant and a
+ * toggle never re-pulls the network gallery.
+ */
+
+import { useStore } from '@nanostores/react'
+import { useEffect, useMemo } from 'react'
+
+import { HUD_ITEM, HUD_TEXT } from '@/app/floating-hud'
+import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request'
+import { PetThumb } from '@/components/pet/pet-thumb'
+import { useI18n } from '@/i18n'
+import { triggerHaptic } from '@/lib/haptics'
+import { Check, Egg, Loader2, PawPrint } from '@/lib/icons'
+import { cn } from '@/lib/utils'
+import {
+  $petBusy,
+  $petGallery,
+  $petGalleryError,
+  $petGalleryStatus,
+  adoptPet,
+  loadPetGallery,
+  loadPetThumb,
+  rankedGalleryPets,
+  setPetEnabled
+} from '@/store/pet-gallery'
+
+interface PetPalettePageProps {
+  search: string
+  /** Navigate to the "generate a pet" page (rendered as a header action). */
+  onGenerate?: () => void
+}
+
+export function PetPalettePage({ search, onGenerate }: PetPalettePageProps) {
+  const { t } = useI18n()
+  const copy = t.commandCenter.pets
+  const { requestGateway } = useGatewayRequest()
+
+  const gallery = useStore($petGallery)
+  const status = useStore($petGalleryStatus)
+  const error = useStore($petGalleryError)
+  const busy = useStore($petBusy)
+
+  useEffect(() => {
+    void loadPetGallery(requestGateway)
+  }, [requestGateway])
+
+  const enabled = gallery?.enabled ?? false
+  const active = gallery?.active ?? ''
+
+  const shown = useMemo(() => rankedGalleryPets(gallery, search).slice(0, 50), [gallery, search])
+
+  const adopt = (slug: string) => {
+    void adoptPet(requestGateway, slug, copy.adoptFailed).then(ok => ok && triggerHaptic('crisp'))
+  }
+
+  if (status === 'loading' && !gallery) {
+    return <Status icon={<Loader2 className="size-3.5 animate-spin" />} text={copy.loading} />
+  }
+
+  if (status === 'stale') {
+    return <Status text={copy.staleBackend} tone="error" />
+  }
+
+  if (!gallery?.pets.length && error) {
+    return <Status text={error} tone="error" />
+  }
+
+  const mutating = Boolean(busy)
+
+  return (
+    <div role="listbox">
+      {onGenerate && (
+        <button
+          className={cn(
+            'flex w-full items-center gap-2 rounded-md text-left text-foreground transition-colors hover:bg-(--chrome-action-hover)',
+            HUD_ITEM,
+            HUD_TEXT
+          )}
+          onClick={onGenerate}
+          onMouseDown={event => event.preventDefault()}
+          type="button"
+        >
+          <span className="flex size-8 shrink-0 items-center justify-center rounded-md bg-(--chrome-action-hover)">
+            <Egg className="size-4" />
+          </span>
+          <span className="font-medium">{t.commandCenter.generatePet.title}</span>
+        </button>
+      )}
+
+      {error && <p className="px-2 pb-1 pt-1.5 text-[0.6875rem] text-(--ui-red)">{error}</p>}
+
+      {shown.length === 0 ? (
+        <Status text={copy.empty} />
+      ) : (
+        shown.map(pet => {
+          const isActive = enabled && pet.slug === active
+          const isBusy = busy === pet.slug
+
+          return (
+            <button
+              className={cn(
+                'flex w-full items-center gap-2 rounded-md text-left transition-colors hover:bg-(--chrome-action-hover) disabled:opacity-60',
+                HUD_ITEM,
+                HUD_TEXT,
+                isActive && 'bg-(--chrome-action-hover)/70'
+              )}
+              disabled={mutating && !isBusy}
+              key={pet.slug}
+              onClick={() => adopt(pet.slug)}
+              onMouseDown={event => event.preventDefault()}
+              role="option"
+              type="button"
+            >
+              <PetThumb
+                alt={pet.displayName}
+                load={(slug, url) => loadPetThumb(requestGateway, slug, url)}
+                size={32}
+                slug={pet.slug}
+                url={pet.spritesheetUrl}
+              />
+              <span className="flex min-w-0 flex-col">
+                <span className="truncate font-medium">{pet.displayName}</span>
+                <span className="truncate text-[0.6875rem] text-muted-foreground/80">
+                  {pet.slug}
+                  {pet.installed ? ` · ${copy.installed}` : ''}
+                </span>
+              </span>
+              <span className="ml-auto flex shrink-0 items-center text-[0.6875rem] text-muted-foreground">
+                {isBusy ? (
+                  <Loader2 className="size-3 animate-spin" />
+                ) : isActive ? (
+                  <Check className="size-3.5 text-foreground" />
+                ) : null}
+              </span>
+            </button>
+          )
+        })
+      )}
+    </div>
+  )
+}
+
+/**
+ * Single on/off toggle, rendered inline on the palette's search row (see
+ * `CommandInput`'s `right` slot). The paw lights up when pets are on. Reads the
+ * same shared gallery atoms, so it stays in sync with the list below.
+ */
+export function PetInlineToggle() {
+  const { t } = useI18n()
+  const copy = t.commandCenter.pets
+  const { requestGateway } = useGatewayRequest()
+  const gallery = useStore($petGallery)
+  const busy = useStore($petBusy)
+
+  if (!gallery) {
+    return null
+  }
+
+  const enabled = gallery.enabled
+
+  const toggle = () => {
+    void setPetEnabled(requestGateway, !enabled, {
+      noneAvailable: copy.noneAvailable,
+      fallback: copy.toggleFailed
+    }).then(ok => ok && triggerHaptic('crisp'))
+  }
+
+  return (
+    <button
+      aria-label={enabled ? copy.turnOff : copy.turnOn}
+      aria-pressed={enabled}
+      className={cn(
+        'flex shrink-0 items-center justify-center rounded-md p-1.5 transition-colors disabled:opacity-50',
+        enabled ? 'bg-(--chrome-action-hover) text-foreground' : 'text-muted-foreground hover:bg-(--chrome-action-hover)/60'
+      )}
+      disabled={Boolean(busy)}
+      onClick={toggle}
+      // Don't steal focus from the search input on click.
+      onMouseDown={event => event.preventDefault()}
+      title={enabled ? copy.turnOff : copy.turnOn}
+      type="button"
+    >
+      {busy ? <Loader2 className="size-4 animate-spin" /> : <PawPrint className="size-4" />}
+    </button>
+  )
+}
+
+function Status({ icon, text, tone }: { icon?: React.ReactNode; text: string; tone?: 'error' }) {
+  return (
+    <div
+      className={cn(
+        'flex items-center justify-center gap-2 px-2 py-6 text-xs',
+        tone === 'error' ? 'text-(--ui-red)' : 'text-muted-foreground'
+      )}
+    >
+      {icon}
+      {text}
+    </div>
+  )
+}
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -37,6 +37,9 @@ import {
  SIDEBAR_SESSIONS_PAGE_SIZE,
  unpinSession
 } from '../store/layout'
+import { respondToApprovalAction } from '../store/native-notifications'
+import { setPetActivity } from '../store/pet'
+import { setPetOverlayOpenAppHandler, setPetOverlaySubmitHandler } from '../store/pet-overlay'
 import { $filePreviewTarget, $previewTarget, closeActiveRightRailTab } from '../store/preview'
 import {
  $activeGatewayProfile,
@@ -48,6 +51,7 @@ import {
 } from '../store/profile'
 import {
  $activeSessionId,
+  $attentionSessionIds,
  $currentCwd,
  $freshDraftReady,
  $gatewayState,
@@ -76,6 +80,7 @@ import {
  setSessionsLoading,
  setSessionsTotal
 } from '../store/session'
+import { onSessionsChanged } from '../store/session-sync'
 import { clearSessionTodos, setSessionTodos, todoListActive } from '../store/todos'
 import { openUpdatesWindow, startUpdatePoller, stopUpdatePoller } from '../store/updates'
 import { isSecondaryWindow } from '../store/windows'
@@ -269,6 +274,26 @@ export function DesktopController() {
    }
  }, [])

+  // Notification click: the main process already focused the window; jump to its session.
+  useEffect(() => {
+    const unsubscribe = window.hermesDesktop?.onFocusSession?.(sessionId => {
+      if (sessionId) {
+        navigate(sessionRoute(sessionId))
+      }
+    })
+
+    return () => unsubscribe?.()
+  }, [navigate])
+
+  // Notification action button (Approve/Reject) — resolve in place, no navigation.
+  useEffect(() => {
+    const unsubscribe = window.hermesDesktop?.onNotificationAction?.(({ actionId, sessionId }) => {
+      void respondToApprovalAction(sessionId ?? null, actionId)
+    })
+
+    return () => unsubscribe?.()
+  }, [])
+
  // hermes:// deep links (e.g. a docs "Send to App" button for an automation blueprint).
  // Build the equivalent /blueprint slash command from the payload and drop
  // it into the composer — the user reviews/edits, then sends; the agent (or
@@ -443,6 +468,17 @@ export function DesktopController() {
    void refreshSessions()
  }, [refreshSessions])

+  // Another window mutated the shared session list (e.g. a chat started in the
+  // pop-out). Re-pull so the sidebar reflects it. Pop-outs have no sidebar, so
+  // only real windows bother.
+  useEffect(() => {
+    if (isSecondaryWindow()) {
+      return
+    }
+
+    return onSessionsChanged(() => void refreshSessions().catch(() => undefined))
+  }, [refreshSessions])
+
  // ALL-profiles view pages one profile at a time: fetch that profile's next
  // page and merge it in place, leaving every other profile's rows untouched.
  const loadMoreSessionsForProfile = useCallback(async (profile: string) => {
@@ -678,7 +714,9 @@ export function DesktopController() {
    }

    lastGatewayProfileRef.current = activeGatewayProfile
-    void refreshCurrentModel()
+    // Force: the new profile has its own default, so reseed even if the composer
+    // already shows the previous profile's model.
+    void refreshCurrentModel(true)
    void refreshActiveProfile()
  }, [activeGatewayProfile, refreshCurrentModel])

@@ -751,6 +789,53 @@ export function DesktopController() {
    updateSessionState
  })

+  // The popped-out pet drives two actions back into the app: send a prompt, and
+  // open the most recent thread. Both are registered ONCE through refs that track
+  // the latest callbacks — re-registering on every `submitText`/`resumeSession`
+  // identity change left a brief window where the handler was nulled (cleanup
+  // before re-register), which could drop a submit fired from the overlay (e.g.
+  // creating a session from the new-session screen). The ref form keeps a stable,
+  // always-current handler. Primary window only — it owns the overlay.
+  const submitTextRef = useRef(submitText)
+  submitTextRef.current = submitText
+  const resumeSessionRef = useRef(resumeSession)
+  resumeSessionRef.current = resumeSession
+
+  useEffect(() => {
+    if (isSecondaryWindow()) {
+      return
+    }
+
+    setPetOverlaySubmitHandler(text => void submitTextRef.current(text))
+    // Mail icon: $sessions is ordered most-recent-first; the pet is global (not
+    // per session) so "most recent" is the right target. main.cjs already raised
+    // the window before forwarding this.
+    setPetOverlayOpenAppHandler(() => {
+      const recent = $sessions.get()[0]
+
+      if (recent?.id) {
+        void resumeSessionRef.current(recent.id)
+      }
+    })
+
+    return () => {
+      setPetOverlaySubmitHandler(null)
+      setPetOverlayOpenAppHandler(null)
+    }
+  }, [])
+
+  // Mirror "a session is blocked on the user" (clarify/approval) into the pet's
+  // awaitingInput flag so it shows the `waiting` pose. Lives on $petActivity so
+  // it rides the same atom the pop-out overlay mirrors — no session list needed
+  // there. Every window keeps its own in-window pet in sync.
+  useEffect(() => {
+    const sync = () => setPetActivity({ awaitingInput: $attentionSessionIds.get().length > 0 })
+
+    sync()
+
+    return $attentionSessionIds.listen(sync)
+  }, [])
+
  useGatewayBoot({
    handleGatewayEvent: handleDesktopGatewayEvent,
    onConnectionReady: c => {
@@ -826,7 +911,6 @@ export function DesktopController() {
    gatewayLogLines,
    gatewayState,
    inferenceStatus,
-    modelMenuContent,
    openAgents,
    freshDraftReady,
    openCommandCenterSection,
@@ -948,6 +1032,7 @@ export function DesktopController() {
    <ChatView
      gateway={gatewayRef.current}
      maxVoiceRecordingSeconds={voiceMaxRecordingSeconds}
+      modelMenuContent={modelMenuContent}
      onAddContextRef={composer.addContextRefAttachment}
      onAddUrl={url => composer.addContextRefAttachment(`@url:${formatRefValue(url)}`, url)}
      onAttachDroppedItems={composer.attachDroppedItems}
--- a/apps/desktop/src/app/hooks/use-keybinds.ts
+++ b/apps/desktop/src/app/hooks/use-keybinds.ts
@@ -37,6 +37,7 @@ import {
  switcherActive,
  switcherJustClosed
 } from '@/store/session-switcher'
+import { openNewSessionInNewWindow } from '@/store/windows'
 import { useTheme } from '@/themes/context'

 import { requestComposerFocus } from '../chat/composer/focus'
@@ -132,6 +133,7 @@ export function useKeybinds(deps: KeybindRuntimeDeps): void {
      deps.startFreshSession()
      window.dispatchEvent(new CustomEvent('hermes:new-session-shortcut'))
    },
+    'session.newWindow': () => void openNewSessionInNewWindow(),
    'session.next': () => stepSession(1),
    'session.prev': () => stepSession(-1),
    ...sessionSlotHandlers,
--- a/apps/desktop/src/app/messaging/index.tsx
+++ b/apps/desktop/src/app/messaging/index.tsx
@@ -527,7 +527,7 @@ const PLATFORM_INTRO: Record<string, string> = {
  wecom_callback:
    'Set up a WeCom self-built app, expose its callback URL, and provide the corp ID, secret, agent ID, and AES key.',
  weixin:
-    'Sign in to the WeChat Official Account platform, copy the AppID and Token, and point the message callback URL at Hermes.',
+    'Run `hermes gateway setup`, select Weixin, then scan and confirm the QR code with a personal WeChat account. Hermes connects through Tencent\'s iLink Bot API and saves the credentials.',
  qqbot: 'Register an app on the QQ Open Platform (q.qq.com) and copy the App ID and Client Secret.',
  api_server:
    'Expose Hermes as an OpenAI-compatible API. Set an auth key, then point Open WebUI / LobeChat / etc. at the host:port.',
--- a/apps/desktop/src/app/pet-overlay/overlay-root.tsx
+++ b/apps/desktop/src/app/pet-overlay/overlay-root.tsx
@@ -0,0 +1,38 @@
+import { StrictMode } from 'react'
+import { createRoot } from 'react-dom/client'
+
+import { ErrorBoundary } from '@/components/error-boundary'
+import { ThemeProvider } from '@/themes/context'
+
+import { PetOverlayApp } from './pet-overlay-app'
+
+/**
+ * Boot the pet-overlay window. Loaded by the same bundle as the main app but
+ * via `?win=overlay`, so it shares CSS/atoms while mounting a minimal, transparent
+ * surface (no app shell, no gateway, no I18n — the bubble strings are inline).
+ *
+ * The index.html boot script paints an OPAQUE themed background to avoid a flash
+ * in normal windows; the overlay must be see-through, so we force every host
+ * layer transparent with a late, high-specificity style tag.
+ */
+export function mountPetOverlay(): void {
+  const style = document.createElement('style')
+  style.textContent = 'html,body,#root{background:transparent !important;}'
+  document.head.appendChild(style)
+
+  const root = document.getElementById('root')
+
+  if (!root) {
+    return
+  }
+
+  createRoot(root).render(
+    <StrictMode>
+      <ErrorBoundary label="pet-overlay">
+        <ThemeProvider>
+          <PetOverlayApp />
+        </ThemeProvider>
+      </ErrorBoundary>
+    </StrictMode>
+  )
+}
--- a/apps/desktop/src/app/pet-overlay/pet-overlay-app.tsx
+++ b/apps/desktop/src/app/pet-overlay/pet-overlay-app.tsx
@@ -0,0 +1,345 @@
+import { useStore } from '@nanostores/react'
+import { useEffect, useRef, useState } from 'react'
+
+import { PetBubble } from '@/components/pet/pet-bubble'
+import { PetSprite } from '@/components/pet/pet-sprite'
+import { Mail } from '@/lib/icons'
+import { $petActivity, $petInfo, setPetInfo } from '@/store/pet'
+import { setAwaitingResponse, setBusy } from '@/store/session'
+
+/**
+ * The pop-out overlay's only view: a transparent, draggable mascot with a mini
+ * composer.
+ *
+ * This runs in a separate, gateway-less BrowserWindow (`?win=overlay`). It is a
+ * pure puppet — the main renderer pushes the live pet state over IPC and we
+ * mirror it into the same atoms the in-window pet reads, so `PetSprite` /
+ * `PetBubble` render identically with zero extra logic.
+ *
+ * The window is a full rectangle but mostly transparent; we toggle OS-level
+ * mouse click-through so only the sprite (or the open composer) is interactive
+ * and the empty margins pass clicks through to whatever is behind.
+ *
+ * Gestures on the pet: drag to move it anywhere on screen (even outside the
+ * app), shift-click to pop it back into the window, single-click to open a small
+ * composer, double-click to toggle the app window (minimize ↔ restore). A mail
+ * icon (shown only when a turn finished while you were away) raises the app on
+ * the most recent thread.
+ */
+
+// Below this much pointer travel, a press counts as a click, not a drag.
+const CLICK_SLOP_PX = 3
+// A second click within this window is a double-click (raise app) and cancels
+// the deferred single-click (open composer), so a double never flashes it open.
+const DOUBLE_CLICK_MS = 250
+
+interface DragState {
+  startX: number
+  startY: number
+  offX: number
+  offY: number
+  width: number
+  height: number
+  moved: boolean
+}
+
+export function PetOverlayApp() {
+  const info = useStore($petInfo)
+  const [composerOpen, setComposerOpen] = useState(false)
+  const [draft, setDraft] = useState('')
+  // Mirrored from the main renderer: a finish landed while you were away.
+  const [unread, setUnread] = useState(false)
+
+  const dragRef = useRef<DragState | null>(null)
+  const petRef = useRef<HTMLDivElement | null>(null)
+  const inputRef = useRef<HTMLInputElement | null>(null)
+  const ignoreRef = useRef(true)
+  const composerOpenRef = useRef(false)
+  const clickTimerRef = useRef<ReturnType<typeof setTimeout> | undefined>(undefined)
+
+  const setIgnore = (ignore: boolean) => {
+    if (ignoreRef.current !== ignore) {
+      ignoreRef.current = ignore
+      window.hermesDesktop?.petOverlay?.setIgnoreMouse(ignore)
+    }
+  }
+
+  // Mirror pushed state into the shared atoms so PetSprite/PetBubble just work.
+  useEffect(() => {
+    const off = window.hermesDesktop?.petOverlay?.onState(payload => {
+      setPetInfo(payload.info)
+      $petActivity.set(payload.activity ?? {})
+      setBusy(Boolean(payload.busy))
+      setAwaitingResponse(Boolean(payload.awaiting))
+      setUnread(Boolean(payload.unread))
+    })
+
+    // Tell the main renderer we're mounted so it pushes the current frame (the
+    // subscribe-time pushes during open() can land before this view exists).
+    window.hermesDesktop?.petOverlay?.control({ type: 'ready' })
+
+    return off
+  }, [])
+
+  // Click-through: make only the sprite (or an open composer) interactive. With
+  // ignore+forward, the renderer still receives mousemove so we can re-enable
+  // hit-testing the moment the cursor returns to the pet.
+  useEffect(() => {
+    setIgnore(true)
+
+    const onMove = (ev: MouseEvent) => {
+      if (dragRef.current || composerOpenRef.current) {
+        setIgnore(false)
+
+        return
+      }
+
+      const el = petRef.current
+
+      if (!el) {
+        return
+      }
+
+      const r = el.getBoundingClientRect()
+      const over = ev.clientX >= r.left && ev.clientX <= r.right && ev.clientY >= r.top && ev.clientY <= r.bottom
+      setIgnore(!over)
+    }
+
+    window.addEventListener('mousemove', onMove)
+
+    return () => {
+      window.removeEventListener('mousemove', onMove)
+      clearTimeout(clickTimerRef.current)
+    }
+  }, [])
+
+  // The whole window must stay interactive while the composer is open (so the
+  // input keeps focus); focus it on open. The overlay is a non-activating panel
+  // (so it never steals the app's cmd/alt-tab anchor) — flip it focusable while
+  // the composer needs the keyboard, then back to non-activating when it closes.
+  useEffect(() => {
+    composerOpenRef.current = composerOpen
+
+    window.hermesDesktop?.petOverlay?.setFocusable(composerOpen)
+
+    if (composerOpen) {
+      setIgnore(false)
+      // The OS window has to become key first (setFocusable + focus happen in
+      // the main process), so focus the input on the next frame.
+      requestAnimationFrame(() => inputRef.current?.focus())
+    }
+  }, [composerOpen])
+
+  const onPetPointerDown = (e: React.PointerEvent) => {
+    if (e.button !== 0) {
+      return
+    }
+
+    ;(e.target as Element).setPointerCapture?.(e.pointerId)
+    dragRef.current = {
+      height: window.outerHeight,
+      moved: false,
+      offX: e.screenX - window.screenX,
+      offY: e.screenY - window.screenY,
+      startX: e.screenX,
+      startY: e.screenY,
+      width: window.outerWidth
+    }
+  }
+
+  const onPetPointerMove = (e: React.PointerEvent) => {
+    const drag = dragRef.current
+
+    if (!drag) {
+      return
+    }
+
+    if (Math.hypot(e.screenX - drag.startX, e.screenY - drag.startY) > CLICK_SLOP_PX) {
+      drag.moved = true
+    }
+
+    window.hermesDesktop?.petOverlay?.setBounds({
+      height: drag.height,
+      width: drag.width,
+      x: e.screenX - drag.offX,
+      y: e.screenY - drag.offY
+    })
+  }
+
+  const onPetPointerUp = (e: React.PointerEvent) => {
+    const drag = dragRef.current
+    dragRef.current = null
+    ;(e.target as Element).releasePointerCapture?.(e.pointerId)
+
+    if (!drag) {
+      return
+    }
+
+    if (drag.moved) {
+      // A drag cancels any deferred single-click so the composer can't pop open
+      // after you reposition the pet.
+      clearTimeout(clickTimerRef.current)
+      clickTimerRef.current = undefined
+
+      // Remember the spot on the desktop (screen coords) so the pet reopens here
+      // next time / after a restart.
+      window.hermesDesktop?.petOverlay?.control({
+        bounds: { height: drag.height, width: drag.width, x: e.screenX - drag.offX, y: e.screenY - drag.offY },
+        type: 'bounds'
+      })
+
+      return
+    }
+
+    // Shift-click always pops the pet back in (no double-click ambiguity).
+    if (e.shiftKey) {
+      window.hermesDesktop?.petOverlay?.control({ type: 'pop-in' })
+
+      return
+    }
+
+    // Double-click toggles the app window (minimize ↔ restore); defer the
+    // single-click composer toggle so a double never flashes the composer open.
+    if (clickTimerRef.current) {
+      clearTimeout(clickTimerRef.current)
+      clickTimerRef.current = undefined
+      window.hermesDesktop?.petOverlay?.control({ type: 'toggle-app' })
+
+      return
+    }
+
+    clickTimerRef.current = setTimeout(() => {
+      clickTimerRef.current = undefined
+      setComposerOpen(open => !open)
+    }, DOUBLE_CLICK_MS)
+  }
+
+  const send = () => {
+    const text = draft.trim()
+
+    if (text) {
+      window.hermesDesktop?.petOverlay?.control({ text, type: 'submit' })
+    }
+
+    setDraft('')
+    setComposerOpen(false)
+  }
+
+  const openApp = () => {
+    // Hide the icon immediately; the main renderer also clears the source flag.
+    setUnread(false)
+    window.hermesDesktop?.petOverlay?.control({ type: 'open-app' })
+  }
+
+  if (!info.enabled || !info.spritesheetBase64) {
+    return null
+  }
+
+  return (
+    <div
+      onPointerDown={e => {
+        // Click on the transparent backdrop (not the pet/composer) dismisses
+        // the composer.
+        if (composerOpen && e.target === e.currentTarget) {
+          setComposerOpen(false)
+        }
+      }}
+      style={{
+        alignItems: 'center',
+        background: 'transparent',
+        display: 'flex',
+        flexDirection: 'column',
+        height: '100vh',
+        justifyContent: 'flex-end',
+        paddingBottom: 24,
+        userSelect: 'none',
+        width: '100vw'
+      }}
+    >
+      {composerOpen && (
+        <input
+          onChange={e => setDraft(e.target.value)}
+          onKeyDown={e => {
+            if (e.key === 'Enter' && !e.shiftKey) {
+              e.preventDefault()
+              send()
+            } else if (e.key === 'Escape') {
+              setComposerOpen(false)
+            }
+          }}
+          placeholder="Message…"
+          ref={inputRef}
+          style={{
+            background: 'var(--ui-bg-elevated)',
+            border: '1px solid var(--ui-stroke-secondary)',
+            borderRadius: 2,
+            boxShadow: '0 6px 18px rgba(0,0,0,0.28)',
+            color: 'var(--foreground)',
+            fontSize: 12,
+            marginBottom: 8,
+            outline: 'none',
+            padding: '4px 8px',
+            width: 184
+          }}
+          value={draft}
+        />
+      )}
+
+      <div
+        onPointerDown={onPetPointerDown}
+        onPointerMove={onPetPointerMove}
+        onPointerUp={onPetPointerUp}
+        ref={petRef}
+        style={{
+          alignItems: 'center',
+          cursor: 'grab',
+          display: 'flex',
+          flexDirection: 'column',
+          position: 'relative',
+          touchAction: 'none'
+        }}
+      >
+        <div style={{ marginBottom: 4 }}>
+          <PetBubble />
+        </div>
+        <div style={{ lineHeight: 0, position: 'relative' }}>
+          <PetSprite info={info} />
+
+          {/* Mail icon: only when a finish landed while you were away. Jumps to
+              the app's most recent thread. Anchored to the sprite (kept inside
+              its box so the overlay's click-through hit-test still catches it);
+              stopPropagation keeps a click from starting a window drag. */}
+          {unread && (
+            <button
+              aria-label="Open in Hermes"
+              onClick={openApp}
+              onPointerDown={e => e.stopPropagation()}
+              onPointerUp={e => e.stopPropagation()}
+              style={{
+                alignItems: 'center',
+                background: 'var(--ui-bg-elevated)',
+                border: '1px solid var(--ui-stroke-secondary)',
+                borderRadius: 999,
+                boxShadow: '0 4px 14px rgba(0,0,0,0.22)',
+                color: 'var(--foreground)',
+                cursor: 'pointer',
+                display: 'inline-flex',
+                height: 24,
+                justifyContent: 'center',
+                padding: 0,
+                position: 'absolute',
+                right: 0,
+                top: 0,
+                width: 24
+              }}
+              title="Open in Hermes"
+              type="button"
+            >
+              <Mail style={{ height: 13, width: 13 }} />
+            </button>
+          )}
+        </div>
+      </div>
+    </div>
+  )
+}
--- a/apps/desktop/src/app/profiles/create-profile-dialog.tsx
+++ b/apps/desktop/src/app/profiles/create-profile-dialog.tsx
@@ -2,14 +2,15 @@ import { useEffect, useState } from 'react'

 import { ActionStatus } from '@/components/ui/action-status'
 import { Button } from '@/components/ui/button'
-import { Checkbox } from '@/components/ui/checkbox'
 import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'
 import { Input } from '@/components/ui/input'
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
 import { Textarea } from '@/components/ui/textarea'
 import { createProfile, updateProfileSoul } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { AlertTriangle } from '@/lib/icons'
 import { cn } from '@/lib/utils'
+import type { ProfileInfo } from '@/types/hermes'

 const PROFILE_NAME_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/

@@ -23,16 +24,18 @@ export function isValidProfileName(name: string): boolean {
 export function CreateProfileDialog({
  onClose,
  onCreated,
-  open
+  open,
+  profiles = []
 }: {
  onClose: () => void
  onCreated?: (name: string) => Promise<void> | void
  open: boolean
+  profiles?: ProfileInfo[]
 }) {
  const { t } = useI18n()
  const p = t.profiles
  const [name, setName] = useState('')
-  const [cloneFromDefault, setCloneFromDefault] = useState(true)
+  const [cloneFrom, setCloneFrom] = useState<null | string>('default')
  const [soul, setSoul] = useState('')
  const [status, setStatus] = useState<'done' | 'idle' | 'saving'>('idle')
  const [error, setError] = useState<null | string>(null)
@@ -43,7 +46,7 @@ export function CreateProfileDialog({
    }

    setName('')
-    setCloneFromDefault(true)
+    setCloneFrom('default')
    setSoul('')
    setError(null)
    setStatus('idle')
@@ -66,7 +69,7 @@ export function CreateProfileDialog({
    setError(null)

    try {
-      await createProfile({ name: trimmed, clone_from_default: cloneFromDefault })
+      await createProfile({ name: trimmed, clone_from: cloneFrom })

      if (soul.trim()) {
        await updateProfileSoul(trimmed, soul)
@@ -107,17 +110,25 @@ export function CreateProfileDialog({
            </p>
          </div>

-          <label className="flex cursor-pointer select-none items-start gap-2.5 px-0.5 py-1">
-            <Checkbox
-              checked={cloneFromDefault}
-              className="mt-0.5 shrink-0"
-              onCheckedChange={checked => setCloneFromDefault(checked === true)}
-            />
-            <span className="grid gap-0.5 leading-snug">
-              <span className="text-sm font-medium">{p.cloneFromDefault}</span>
-              <span className="text-xs text-muted-foreground">{p.cloneFromDefaultDesc}</span>
-            </span>
-          </label>
+          <div className="grid gap-1.5">
+            <label className="text-xs font-medium" htmlFor="new-profile-clone-from">
+              {p.cloneFrom}
+            </label>
+            <Select onValueChange={value => setCloneFrom(value === '__none__' ? null : value)} value={cloneFrom ?? '__none__'}>
+              <SelectTrigger className="h-9 rounded-md" id="new-profile-clone-from">
+                <SelectValue />
+              </SelectTrigger>
+              <SelectContent>
+                <SelectItem value="__none__">{p.cloneFromNone}</SelectItem>
+                {profiles.map(profile => (
+                  <SelectItem key={profile.name} value={profile.name}>
+                    {profile.name}
+                  </SelectItem>
+                ))}
+              </SelectContent>
+            </Select>
+            <p className="text-xs text-muted-foreground">{p.cloneFromDesc}</p>
+          </div>

          <div className="grid gap-1.5">
            <label className="text-xs font-medium" htmlFor="new-profile-soul">
@@ -127,7 +138,7 @@ export function CreateProfileDialog({
              className="min-h-28 font-mono text-xs leading-5"
              id="new-profile-soul"
              onChange={event => setSoul(event.target.value)}
-              placeholder={p.soulPlaceholder(cloneFromDefault ? p.soulPlaceholderCloned : p.soulPlaceholderEmpty)}
+              placeholder={p.soulPlaceholder(cloneFrom ? p.soulPlaceholderCloned : p.soulPlaceholderEmpty)}
              value={soul}
            />
          </div>
--- a/apps/desktop/src/app/profiles/index.tsx
+++ b/apps/desktop/src/app/profiles/index.tsx
@@ -12,6 +12,7 @@ import {
  DialogTitle
 } from '@/components/ui/dialog'
 import { Input } from '@/components/ui/input'
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
 import { Textarea } from '@/components/ui/textarea'
 import {
  createProfile,
@@ -82,14 +83,14 @@ export function ProfilesView({ onClose }: ProfilesViewProps) {
  }, [profiles, selectedName])

  const handleCreate = useCallback(
-    async (name: string, cloneFromDefault: boolean) => {
+    async (name: string, cloneFrom: null | string) => {
      const trimmed = name.trim()

      if (!isValidProfileName(trimmed)) {
        throw new Error(p.nameHint)
      }

-      await createProfile({ name: trimmed, clone_from_default: cloneFromDefault })
+      await createProfile({ name: trimmed, clone_from: cloneFrom })
      notify({ kind: 'success', title: p.created, message: trimmed })
      setSelectedName(trimmed)
      await refresh()
@@ -180,8 +181,9 @@ export function ProfilesView({ onClose }: ProfilesViewProps) {

      <CreateProfileDialog
          onClose={() => setCreateOpen(false)}
-          onCreate={async (name, cloneFromDefault) => handleCreate(name, cloneFromDefault)}
+          onCreate={async (name, cloneFrom) => handleCreate(name, cloneFrom)}
          open={createOpen}
+          profiles={profiles ?? []}
        />

        <Dialog onOpenChange={open => !open && !deleting && setPendingDelete(null)} open={pendingDelete !== null}>
@@ -453,16 +455,18 @@ function SoulEditor({ profileName }: { profileName: string }) {
 function CreateProfileDialog({
  onClose,
  onCreate,
-  open
+  open,
+  profiles
 }: {
  onClose: () => void
-  onCreate: (name: string, cloneFromDefault: boolean) => Promise<void>
+  onCreate: (name: string, cloneFrom: null | string) => Promise<void>
  open: boolean
+  profiles: ProfileInfo[]
 }) {
  const { t } = useI18n()
  const p = t.profiles
  const [name, setName] = useState('')
-  const [cloneFromDefault, setCloneFromDefault] = useState(true)
+  const [cloneFrom, setCloneFrom] = useState<null | string>('default')
  const [saving, setSaving] = useState(false)
  const [error, setError] = useState<null | string>(null)

@@ -472,7 +476,7 @@ function CreateProfileDialog({
    }

    setName('')
-    setCloneFromDefault(true)
+    setCloneFrom('default')
    setError(null)
    setSaving(false)
  }, [open])
@@ -493,7 +497,7 @@ function CreateProfileDialog({
    setError(null)

    try {
-      await onCreate(trimmed, cloneFromDefault)
+      await onCreate(trimmed, cloneFrom)
      onClose()
    } catch (err) {
      setError(err instanceof Error ? err.message : p.failedCreate)
@@ -528,18 +532,25 @@ function CreateProfileDialog({
            </p>
          </div>

-          <label className="flex cursor-pointer items-center gap-2 rounded-md border border-border/40 bg-background/50 px-3 py-2 text-sm">
-            <input
-              checked={cloneFromDefault}
-              className="size-4 accent-primary"
-              onChange={event => setCloneFromDefault(event.target.checked)}
-              type="checkbox"
-            />
-            <span>
-              <span className="font-medium">{p.cloneFromDefault}</span>
-              <span className="ml-2 text-xs text-muted-foreground">{p.cloneFromDefaultDesc}</span>
-            </span>
-          </label>
+          <div className="grid gap-1.5">
+            <label className="text-xs font-medium" htmlFor="new-profile-clone-from">
+              {p.cloneFrom}
+            </label>
+            <Select onValueChange={value => setCloneFrom(value === '__none__' ? null : value)} value={cloneFrom ?? '__none__'}>
+              <SelectTrigger className="h-9 rounded-md" id="new-profile-clone-from">
+                <SelectValue />
+              </SelectTrigger>
+              <SelectContent>
+                <SelectItem value="__none__">{p.cloneFromNone}</SelectItem>
+                {profiles.map(profile => (
+                  <SelectItem key={profile.name} value={profile.name}>
+                    {profile.name}
+                  </SelectItem>
+                ))}
+              </SelectContent>
+            </Select>
+            <p className="text-xs text-muted-foreground">{p.cloneFromDesc}</p>
+          </div>

          {error && (
            <div className="flex items-start gap-2 rounded-md border border-destructive/30 bg-destructive/10 px-3 py-2 text-xs text-destructive">
--- a/apps/desktop/src/app/right-sidebar/index.test.tsx
+++ b/apps/desktop/src/app/right-sidebar/index.test.tsx
@@ -0,0 +1,75 @@
+import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react'
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
+
+import type { HermesReadDirResult } from '@/global'
+import { $connection, setCurrentCwd } from '@/store/session'
+
+import { resetProjectTreeState } from './files/use-project-tree'
+
+import { RightSidebarPane } from './index'
+
+const readDir = vi.fn<(path: string) => Promise<HermesReadDirResult>>()
+const selectPaths = vi.fn()
+
+function ok(entries: { name: string; path: string; isDirectory: boolean }[]): HermesReadDirResult {
+  return { entries }
+}
+
+function installBridge() {
+  ;(
+    window as unknown as {
+      hermesDesktop: {
+        readDir: typeof readDir
+        selectPaths: typeof selectPaths
+      }
+    }
+  ).hermesDesktop = { readDir, selectPaths }
+}
+
+describe('RightSidebarPane', () => {
+  beforeEach(() => {
+    $connection.set(null)
+    resetProjectTreeState()
+    setCurrentCwd('/repo')
+    readDir.mockReset()
+    selectPaths.mockReset()
+    readDir.mockResolvedValue(ok([{ name: 'README.md', path: '/repo/README.md', isDirectory: false }]))
+    selectPaths.mockResolvedValue(['/repo-next'])
+    installBridge()
+  })
+
+  afterEach(() => {
+    cleanup()
+    $connection.set(null)
+    setCurrentCwd('')
+    resetProjectTreeState()
+    delete (window as unknown as { hermesDesktop?: unknown }).hermesDesktop
+  })
+
+  it('refreshes the current tree without opening the folder picker', async () => {
+    const onChangeCwd = vi.fn()
+
+    render(<RightSidebarPane onActivateFile={vi.fn()} onActivateFolder={vi.fn()} onChangeCwd={onChangeCwd} />)
+
+    await waitFor(() => expect(screen.getByRole('button', { name: 'Refresh tree' }).hasAttribute('disabled')).toBe(false))
+
+    readDir.mockClear()
+
+    fireEvent.click(screen.getByRole('button', { name: 'Refresh tree' }))
+
+    await waitFor(() => expect(readDir).toHaveBeenCalledWith('/repo'))
+    expect(selectPaths).not.toHaveBeenCalled()
+
+    fireEvent.click(screen.getByRole('button', { name: 'Open folder' }))
+
+    await waitFor(() =>
+      expect(selectPaths).toHaveBeenCalledWith({
+        defaultPath: '/repo',
+        directories: true,
+        multiple: false,
+        title: 'Change working directory'
+      })
+    )
+    await waitFor(() => expect(onChangeCwd).toHaveBeenCalledWith('/repo-next'))
+  })
+})
--- a/apps/desktop/src/app/right-sidebar/index.tsx
+++ b/apps/desktop/src/app/right-sidebar/index.tsx
@@ -126,12 +126,12 @@ interface FilesystemTabProps extends FileTreeBodyProps {
  onRefresh: () => void
 }

-// Sidebar palette + hover-reveal: refresh tracks label hover; collapse-all
-// stays visible while any folder is expanded.
+// Sidebar palette + hover-reveal: header actions stay reachable while moving
+// from the project label to the action buttons.
 const HEADER_ACTION_CLASS =
  'text-sidebar-foreground/70 hover:bg-sidebar-accent! hover:text-sidebar-accent-foreground! focus-visible:ring-sidebar-ring'

-const HEADER_ACTION_LABEL_REVEAL = `${HEADER_ACTION_CLASS} pointer-events-none opacity-0 transition-opacity focus-visible:pointer-events-auto focus-visible:opacity-100 peer-focus-visible/project-label:pointer-events-auto peer-focus-visible/project-label:opacity-100 peer-hover/project-label:pointer-events-auto peer-hover/project-label:opacity-100`
+const HEADER_ACTION_LABEL_REVEAL = `${HEADER_ACTION_CLASS} pointer-events-none opacity-0 transition-opacity focus-visible:pointer-events-auto focus-visible:opacity-100 group-focus-within/project-header:pointer-events-auto group-focus-within/project-header:opacity-100 group-hover/project-header:pointer-events-auto group-hover/project-header:opacity-100`

 function FilesystemTab({
  canCollapse,
@@ -158,7 +158,7 @@ function FilesystemTab({
  return (
    <div className="flex min-h-0 flex-1 flex-col">
      <RightSidebarSectionHeader>
-        <div className="peer/project-label flex min-w-0 flex-1">
+        <div className="flex min-w-0 flex-1">
          <button
            className="flex w-full min-w-0 items-center rounded-md text-left hover:text-(--ui-text-secondary)"
            onClick={() => void onChangeFolder()}
@@ -216,7 +216,7 @@ function FilesystemTab({
 }

 export function RightSidebarSectionHeader({ children }: { children: ReactNode }) {
-  return <div className="flex h-7 shrink-0 items-center px-2.5">{children}</div>
+  return <div className="group/project-header flex h-7 shrink-0 items-center px-2.5">{children}</div>
 }

 interface FileTreeBodyProps {
--- a/apps/desktop/src/app/right-sidebar/store.ts
+++ b/apps/desktop/src/app/right-sidebar/store.ts
@@ -9,3 +9,22 @@ export const $terminalTakeover = atom(storedBoolean(TAKEOVER_KEY, false))
 $terminalTakeover.subscribe(active => persistBoolean(TAKEOVER_KEY, active))

 export const setTerminalTakeover = (active: boolean) => $terminalTakeover.set(active)
+
+/** A command queued to run in the embedded terminal. The terminal pane flushes
+ *  (and clears) it once its session is live, so a value set before the pane
+ *  mounts still runs. Cleared after flush so a later remount can't replay it. */
+export const $terminalInjection = atom<null | string>(null)
+
+/** Open the terminal pane and run a command in it. Used to disconnect external
+ *  (CLI-managed) providers, which Hermes can't clear via the API — the user
+ *  sees exactly what runs instead of Hermes silently deleting their creds. */
+export const runInTerminal = (command: string) => {
+  const trimmed = command.trim()
+
+  if (!trimmed) {
+    return
+  }
+
+  setTerminalTakeover(true)
+  $terminalInjection.set(trimmed)
+}
--- a/apps/desktop/src/app/right-sidebar/terminal/use-terminal-session.ts
+++ b/apps/desktop/src/app/right-sidebar/terminal/use-terminal-session.ts
@@ -10,6 +10,8 @@ import { triggerHaptic } from '@/lib/haptics'
 import { $filePreviewTarget, $previewTarget } from '@/store/preview'
 import { useTheme } from '@/themes/context'

+import { $terminalInjection } from '../store'
+
 import { makeTerminalReader, setActiveTerminalReader } from './buffer'
 import {
  isAddSelectionShortcut,
@@ -675,6 +677,28 @@ export function useTerminalSession({ cwd, onAddSelectionToChat }: UseTerminalSes
    return () => cancelAnimationFrame(raf)
  }, [activeTheme, themeName])

+  // Flush a queued command (e.g. a provider-disconnect) into the live session.
+  // Only active while open; the subscribe fires immediately, so a command set
+  // before this pane mounted runs as soon as the session is ready. Clearing the
+  // atom after writing stops a later remount from replaying a stale command.
+  useEffect(() => {
+    if (status !== 'open') {
+      return
+    }
+
+    return $terminalInjection.subscribe(command => {
+      const id = sessionIdRef.current
+
+      if (!command || !id) {
+        return
+      }
+
+      void window.hermesDesktop?.terminal?.write(id, `${command}\r`)
+      $terminalInjection.set(null)
+      termRef.current?.focus()
+    })
+  }, [status])
+
  return {
    addSelectionToChat,
    hostRef,
--- a/apps/desktop/src/app/session/hooks/use-message-stream.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts
@@ -2,6 +2,7 @@ import type { QueryClient } from '@tanstack/react-query'
 import { type MutableRefObject, useCallback, useEffect, useRef } from 'react'

 import { readActiveTerminal } from '@/app/right-sidebar/terminal/buffer'
+import { translateNow } from '@/i18n'
 import {
  appendAssistantTextPart,
  appendReasoningPart,
@@ -15,6 +16,7 @@ import {
  upsertToolPart
 } from '@/lib/chat-messages'
 import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime'
+import { playCompletionSound } from '@/lib/completion-sound'
 import { gatewayEventRequiresSessionId } from '@/lib/gateway-events'
 import {
  dedupeGeneratedImageEchoesInParts,
@@ -25,10 +27,13 @@ import { triggerHaptic } from '@/lib/haptics'
 import { isProviderSetupErrorMessage } from '@/lib/provider-setup-errors'
 import { parseTodos } from '@/lib/todos'
 import { setClarifyRequest } from '@/store/clarify'
+import { setSessionCompacting } from '@/store/compaction'
 import { refreshBackgroundProcesses } from '@/store/composer-status'
 import { $gateway } from '@/store/gateway'
+import { dispatchNativeNotification } from '@/store/native-notifications'
 import { notify } from '@/store/notifications'
 import { requestDesktopOnboarding } from '@/store/onboarding'
+import { flashPetActivity, markPetUnread, setPetActivity } from '@/store/pet'
 import { clearAllPrompts, setApprovalRequest, setSecretRequest, setSudoRequest } from '@/store/prompts'
 import {
  setCurrentBranch,
@@ -43,6 +48,7 @@ import {
  setTurnStartedAt,
  setYoloActive
 } from '@/store/session'
+import { broadcastSessionsChanged } from '@/store/session-sync'
 import { clearSessionSubagents, pruneDelegateFallbackSubagents, upsertSubagent } from '@/store/subagents'
 import { setSessionTodos } from '@/store/todos'
 import { recordToolDiff } from '@/store/tool-diffs'
@@ -330,6 +336,8 @@ export function useMessageStream({
  const flushHandleRef = useRef<number | null>(null)
  const lastFlushAtRef = useRef<number>(0)
  const nativeSubagentSessionsRef = useRef<Set<string>>(new Set())
+  // Turns that auto-compacted: skip post-turn hydrate so live scrollback survives.
+  const compactedTurnRef = useRef<Set<string>>(new Set())

  const flushQueuedDeltas = useCallback(
    (sessionId?: string) => {
@@ -635,19 +643,26 @@ export function useMessageStream({
      })

      void refreshSessions().catch(() => undefined)
+      // Sync the freshly-titled row to other windows (e.g. main, when the turn
+      // ran in the pop-out).
+      broadcastSessionsChanged()
+
+      if (compactedTurnRef.current.delete(sessionId)) {
+        shouldHydrate = false
+      }

      if (shouldHydrate) {
        void hydrateFromStoredSession(3, completedState.storedSessionId, sessionId)
      }

-      if (document.hidden && sessionId === activeSessionIdRef.current) {
-        void window.hermesDesktop?.notify({
-          title: 'Hermes finished',
-          body: text.slice(0, 140) || 'The response is ready.'
-        })
-      }
+      dispatchNativeNotification({
+        body: text.slice(0, 140) || translateNow('notifications.native.turnDoneBody'),
+        kind: 'turnDone',
+        sessionId,
+        title: translateNow('notifications.native.turnDoneTitle')
+      })
    },
-    [activeSessionIdRef, hydrateFromStoredSession, refreshSessions, updateSessionState]
+    [hydrateFromStoredSession, refreshSessions, updateSessionState]
  )

  const failAssistantMessage = useCallback(
@@ -822,6 +837,8 @@ export function useMessageStream({

        flushQueuedDeltas(sessionId)
        clearSessionSubagents(sessionId)
+        setSessionCompacting(sessionId, false)
+        compactedTurnRef.current.delete(sessionId)
        nativeSubagentSessionsRef.current.delete(sessionId)

        if (isActiveEvent) {
@@ -853,10 +870,18 @@ export function useMessageStream({
        if (sessionId) {
          appendReasoningDelta(sessionId, coerceThinkingText(payload?.text))
        }
+
+        if (isActiveEvent) {
+          setPetActivity({ reasoning: true })
+        }
      } else if (event.type === 'reasoning.available') {
        if (sessionId) {
          appendReasoningDelta(sessionId, coerceThinkingText(payload?.text), true)
        }
+
+        if (isActiveEvent) {
+          setPetActivity({ reasoning: true })
+        }
      } else if (event.type === 'message.complete') {
        if (!sessionId) {
          return
@@ -867,18 +892,31 @@ export function useMessageStream({
        // session so a background turn finishing can't wipe the active chat's
        // prompt, and vice versa.
        clearAllPrompts(sessionId)
+        setSessionCompacting(sessionId, false)

        flushQueuedDeltas(sessionId)

-        if (isActiveEvent) {
-          triggerHaptic('streamDone')
-        }
+        playCompletionSound()

        const finalText = coerceGatewayText(payload?.text) || coerceGatewayText(payload?.rendered)
        completeAssistantMessage(sessionId, finalText)

        if (isActiveEvent) {
          setTurnStartedAt(null)
+
+          // Pet beat: a finished turn always celebrates — go straight to the
+          // jump, never linger on the run/reason pose. One atom update (clears
+          // toolRunning/reasoning AND sets celebrate together) so no stray "run"
+          // frame leaks to the sprite — including the popped-out overlay, which
+          // mirrors each activity change. The jump runs ~2 loops, then settles.
+          flashPetActivity({ celebrate: true, reasoning: false, toolRunning: false }, 2200)
+
+          // Light up the pet's mail icon if the user wasn't looking when the turn
+          // finished — a glanceable "new message" hint on the popped-out overlay.
+          // Cleared when they open the app via the mail icon or refocus the window.
+          if (typeof document !== 'undefined' && !document.hasFocus()) {
+            markPetUnread()
+          }
        }

        if (payload?.usage) {
@@ -891,10 +929,19 @@ export function useMessageStream({

        flushQueuedDeltas(sessionId)
        upsertToolCall(sessionId, toTodoPayload(payload) ?? payload, 'running', event.type)
+
+        if (isActiveEvent) {
+          setPetActivity({ reasoning: false, toolRunning: true })
+        }
      } else if (event.type === 'tool.complete') {
        if (sessionId) {
          flushQueuedDeltas(sessionId)
          upsertToolCall(sessionId, toTodoPayload(payload) ?? payload, 'complete', event.type)
+
+          if (isActiveEvent) {
+            setPetActivity({ toolRunning: false })
+          }
+
          // A pending clarify blocks the turn, so the first tool.complete after
          // one is the clarify resolving — drop the "needs input" flag here so
          // the sidebar indicator clears as soon as it's answered, not only at
@@ -903,10 +950,7 @@ export function useMessageStream({

          // terminal/process tool calls are the only things that spawn or reap
          // background processes — sync the composer status stack right after.
-          if (
-            !sessionInterrupted(sessionId) &&
-            (payload?.name === 'terminal' || payload?.name === 'process')
-          ) {
+          if (!sessionInterrupted(sessionId) && (payload?.name === 'terminal' || payload?.name === 'process')) {
            void refreshBackgroundProcesses(sessionId)
          }
        }
@@ -958,6 +1002,13 @@ export function useMessageStream({
          if (sessionId) {
            updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
          }
+
+          dispatchNativeNotification({
+            body: question,
+            kind: 'input',
+            sessionId,
+            title: translateNow('notifications.native.inputTitle')
+          })
        }
      } else if (event.type === 'approval.request') {
        // Dangerous-command / execute_code approval. The Python side is blocked
@@ -966,17 +1017,31 @@ export function useMessageStream({
        // Park it per-session (like clarify) so a *background* profile's turn can
        // raise it and wait — the sidebar flags "needs input" and the inline bar
        // surfaces once the user focuses that chat.
+        const command = typeof payload?.command === 'string' ? payload.command : ''
+        const description = typeof payload?.description === 'string' ? payload.description : 'dangerous command'
+
        setApprovalRequest({
          // false only when a tirith warning forbids it; backend omits the field otherwise.
          allowPermanent: payload?.allow_permanent !== false,
-          command: typeof payload?.command === 'string' ? payload.command : '',
-          description: typeof payload?.description === 'string' ? payload.description : 'dangerous command',
+          command,
+          description,
          sessionId: sessionId ?? null
        })

        if (sessionId) {
          updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
        }
+
+        dispatchNativeNotification({
+          actions: [
+            { id: 'approve', text: translateNow('notifications.native.approveAction') },
+            { id: 'reject', text: translateNow('notifications.native.rejectAction') }
+          ],
+          body: command || description,
+          kind: 'approval',
+          sessionId,
+          title: translateNow('notifications.native.approvalTitle')
+        })
      } else if (event.type === 'sudo.request') {
        // Sudo password capture (tools/terminal_tool.py). Blocked on
        // sudo.respond {request_id, password}.
@@ -988,6 +1053,13 @@ export function useMessageStream({
          if (sessionId) {
            updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
          }
+
+          dispatchNativeNotification({
+            body: translateNow('notifications.native.inputBody'),
+            kind: 'input',
+            sessionId,
+            title: translateNow('notifications.native.inputTitle')
+          })
        }
      } else if (event.type === 'secret.request') {
        // Skill credential capture (tools/skills_tool.py). Blocked on
@@ -995,16 +1067,26 @@ export function useMessageStream({
        const requestId = typeof payload?.request_id === 'string' ? payload.request_id : ''

        if (requestId) {
+          const envVar = typeof payload?.env_var === 'string' ? payload.env_var : ''
+          const promptText = typeof payload?.prompt === 'string' ? payload.prompt : ''
+
          setSecretRequest({
            requestId,
-            envVar: typeof payload?.env_var === 'string' ? payload.env_var : '',
-            prompt: typeof payload?.prompt === 'string' ? payload.prompt : '',
+            envVar,
+            prompt: promptText,
            sessionId: sessionId ?? null
          })

          if (sessionId) {
            updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
          }
+
+          dispatchNativeNotification({
+            body: promptText || envVar || translateNow('notifications.native.inputBody'),
+            kind: 'input',
+            sessionId,
+            title: translateNow('notifications.native.inputTitle')
+          })
        }
      } else if (event.type === 'terminal.read.request') {
        // read_terminal tool: serialize the renderer's xterm buffer and answer
@@ -1022,9 +1104,12 @@ export function useMessageStream({
          })
        }
      } else if (event.type === 'status.update') {
-        // The gateway's notification poller announces background process
-        // completions / watch matches here — re-sync the status stack.
-        if (sessionId && payload?.kind === 'process') {
+        if (sessionId && payload?.kind === 'compacting') {
+          setSessionCompacting(sessionId, true)
+          compactedTurnRef.current.add(sessionId)
+        } else if (sessionId && payload?.kind === 'process') {
+          // The gateway's notification poller announces background process
+          // completions / watch matches here — re-sync the status stack.
          void refreshBackgroundProcesses(sessionId)
        }
      } else if (event.type === 'error') {
@@ -1036,8 +1121,22 @@ export function useMessageStream({
        // the failed turn (same intent as the message.complete clear).
        if (sessionId) {
          clearAllPrompts(sessionId)
+          setSessionCompacting(sessionId, false)
+          compactedTurnRef.current.delete(sessionId)
        }

+        if (isActiveEvent) {
+          setPetActivity({ reasoning: false, toolRunning: false })
+          flashPetActivity({ error: true })
+        }
+
+        dispatchNativeNotification({
+          body: errorMessage,
+          kind: 'turnError',
+          sessionId,
+          title: translateNow('notifications.native.turnErrorTitle')
+        })
+
        if (looksLikeProviderSetup) {
          requestDesktopOnboarding(errorMessage)
        } else if (isActiveEvent) {
--- a/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx
@@ -1,5 +1,5 @@
-import { renderHook } from '@testing-library/react'
 import { QueryClient } from '@tanstack/react-query'
+import { cleanup, render, renderHook } from '@testing-library/react'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

 import { getGlobalModelInfo } from '@/hermes'
@@ -13,12 +13,51 @@ import {

 import { useModelControls } from './use-model-controls'

+const setGlobalModel = vi.fn()
+const notifyError = vi.fn()
+
 vi.mock('@/hermes', () => ({
  getGlobalModelInfo: vi.fn(),
-  setGlobalModel: vi.fn()
+  setGlobalModel: (...args: Parameters<typeof setGlobalModel>) => setGlobalModel(...args)
 }))

-describe('useModelControls.refreshCurrentModel', () => {
+vi.mock('@/i18n', () => ({
+  useI18n: () => ({
+    t: {
+      desktop: {
+        modelSwitchFailed: 'Model switch failed'
+      }
+    }
+  })
+}))
+
+vi.mock('@/store/notifications', () => ({
+  notifyError: (...args: Parameters<typeof notifyError>) => notifyError(...args)
+}))
+
+type Controls = ReturnType<typeof useModelControls>
+
+function Harness({
+  activeSessionId,
+  onReady,
+  requestGateway
+}: {
+  activeSessionId: string | null
+  onReady: (controls: Controls) => void
+  requestGateway: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T>
+}) {
+  const controls = useModelControls({
+    activeSessionId,
+    queryClient: new QueryClient(),
+    requestGateway
+  })
+
+  onReady(controls)
+
+  return null
+}
+
+describe('useModelControls', () => {
  beforeEach(() => {
    $activeSessionId.set(null)
    setCurrentModel('')
@@ -26,6 +65,7 @@ describe('useModelControls.refreshCurrentModel', () => {
  })

  afterEach(() => {
+    cleanup()
    vi.restoreAllMocks()
    $activeSessionId.set(null)
    setCurrentModel('')
@@ -74,4 +114,85 @@ describe('useModelControls.refreshCurrentModel', () => {
    expect($currentModel.get()).toBe('deepseek/deepseek-v4-pro')
    expect($currentProvider.get()).toBe('deepseek')
  })
+
+  it('routes active-session picker changes through config.set with an explicit provider', async () => {
+    const requestGateway = vi.fn(async () => ({ key: 'model', value: 'claude-sonnet-4.6' }) as never)
+    let controls!: Controls
+
+    render(
+      <Harness
+        activeSessionId="session-1"
+        onReady={value => (controls = value)}
+        requestGateway={requestGateway}
+      />
+    )
+
+    await expect(
+      controls.selectModel({
+        model: 'claude-sonnet-4.6',
+        provider: 'anthropic'
+      })
+    ).resolves.toBe(true)
+
+    expect(requestGateway).toHaveBeenCalledWith('config.set', {
+      session_id: 'session-1',
+      key: 'model',
+      value: 'claude-sonnet-4.6 --provider anthropic'
+    })
+    expect(requestGateway).not.toHaveBeenCalledWith('slash.exec', expect.anything())
+  })
+
+  it('stores a no-session pick as UI state with no gateway or global write', async () => {
+    const requestGateway = vi.fn()
+    let controls!: Controls
+
+    render(
+      <Harness
+        activeSessionId={null}
+        onReady={value => (controls = value)}
+        requestGateway={requestGateway}
+      />
+    )
+
+    await expect(
+      controls.selectModel({
+        model: 'claude-sonnet-4.6',
+        provider: 'anthropic'
+      })
+    ).resolves.toBe(true)
+
+    // The pick is plain UI state; session.create ships it later. Nothing touches
+    // the gateway or the profile default here.
+    expect($currentModel.get()).toBe('claude-sonnet-4.6')
+    expect($currentProvider.get()).toBe('anthropic')
+    expect(requestGateway).not.toHaveBeenCalled()
+    expect(setGlobalModel).not.toHaveBeenCalled()
+  })
+
+  it('seeds an empty composer model from global but never clobbers a pick', async () => {
+    vi.mocked(getGlobalModelInfo).mockResolvedValue({ model: 'openai/gpt-5.5', provider: 'openai-codex' })
+
+    const { result } = renderHook(() =>
+      useModelControls({
+        activeSessionId: null,
+        queryClient: new QueryClient(),
+        requestGateway: vi.fn()
+      })
+    )
+
+    // Empty → seeds the default.
+    await result.current.refreshCurrentModel()
+    expect($currentModel.get()).toBe('openai/gpt-5.5')
+
+    // A user pick must survive the lifecycle refreshes that fire on boot / fresh
+    // draft / session events.
+    setCurrentModel('anthropic/claude-sonnet-4.6')
+    setCurrentProvider('anthropic')
+    await result.current.refreshCurrentModel()
+    expect($currentModel.get()).toBe('anthropic/claude-sonnet-4.6')
+
+    // A profile swap forces a reseed to the new profile's default.
+    await result.current.refreshCurrentModel(true)
+    expect($currentModel.get()).toBe('openai/gpt-5.5')
+  })
 })
--- a/apps/desktop/src/app/session/hooks/use-model-controls.ts
+++ b/apps/desktop/src/app/session/hooks/use-model-controls.ts
@@ -1,7 +1,7 @@
 import { type QueryClient } from '@tanstack/react-query'
 import { useCallback } from 'react'

-import { getGlobalModelInfo, setGlobalModel } from '@/hermes'
+import { getGlobalModelInfo } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { notifyError } from '@/store/notifications'
 import {
@@ -15,7 +15,6 @@ import type { ModelOptionsResponse } from '@/types/hermes'

 interface ModelSelection {
  model: string
-  persistGlobal: boolean
  provider: string
 }

@@ -28,6 +27,7 @@ interface ModelControlsOptions {
 export function useModelControls({ activeSessionId, queryClient, requestGateway }: ModelControlsOptions) {
  const { t } = useI18n()
  const copy = t.desktop
+
  const updateModelOptionsCache = useCallback(
    (provider: string, model: string, includeGlobal: boolean) => {
      const patch = (prev: ModelOptionsResponse | undefined) => ({ ...(prev ?? {}), provider, model })
@@ -41,14 +41,24 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
    [activeSessionId, queryClient]
  )

-  const refreshCurrentModel = useCallback(async () => {
+  // Seed the composer's model state from the profile default. `force` reseeds
+  // for a profile swap (the new profile has its own default); otherwise this
+  // only fills an EMPTY selection so a user's pick (plain UI state in
+  // $currentModel) survives the lifecycle refreshes that fire on boot / fresh
+  // draft / session events. A live session owns the footer, so skip entirely.
+  const refreshCurrentModel = useCallback(async (force = false) => {
    try {
+      if ($activeSessionId.get()) {
+        return
+      }
+
+      if (!force && $currentModel.get()) {
+        return
+      }
+
      const result = await getGlobalModelInfo()

-      // A resumed/live session owns the footer model state. Global config
-      // refreshes (gateway boot, profile swap, settings save) must not clobber
-      // the active chat's runtime model/provider in the status bar.
-      if ($activeSessionId.get()) {
+      if ($activeSessionId.get() || (!force && $currentModel.get())) {
        return
      }

@@ -64,12 +74,14 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
    }
  }, [])

-  // Returns whether the switch succeeded so callers can await it before
-  // applying follow-up changes (e.g. editing a model's reasoning/fast must land
-  // on the right active model — bail rather than write to the previous one).
+  // Returns whether the switch succeeded so callers can await it before applying
+  // follow-up changes. The composer model is plain UI state: with no live
+  // session it's just stored (and shipped on the next session.create); with one
+  // it's scoped to that session via config.set. It NEVER writes the profile
+  // default — that lives in Settings → Model — so picking a model here can't
+  // silently mutate global config.
  const selectModel = useCallback(
    async (selection: ModelSelection): Promise<boolean> => {
-      const includeGlobal = selection.persistGlobal || !activeSessionId
      // Snapshot for rollback: the switch is applied optimistically, so a
      // failure must restore the prior model/provider (store + query cache)
      // rather than leave the UI showing a model the backend never selected.
@@ -78,41 +90,34 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway

      setCurrentModel(selection.model)
      setCurrentProvider(selection.provider)
-      updateModelOptionsCache(selection.provider, selection.model, includeGlobal)
+      updateModelOptionsCache(selection.provider, selection.model, !activeSessionId)
+
+      // No live session yet: the pick is pure UI state. session.create reads
+      // $currentModel/$currentProvider and applies it as that session's override.
+      if (!activeSessionId) {
+        return true
+      }

      try {
-        if (activeSessionId) {
-          await requestGateway('slash.exec', {
-            session_id: activeSessionId,
-            command: `/model ${selection.model} --provider ${selection.provider}${selection.persistGlobal ? ' --global' : ''}`
-          })
+        await requestGateway('config.set', {
+          session_id: activeSessionId,
+          key: 'model',
+          value: `${selection.model} --provider ${selection.provider}`
+        })

-          if (selection.persistGlobal) {
-            void refreshCurrentModel()
-          }
-
-          void queryClient.invalidateQueries({
-            queryKey: selection.persistGlobal ? ['model-options'] : ['model-options', activeSessionId]
-          })
-
-          return true
-        }
-
-        await setGlobalModel(selection.provider, selection.model)
-        void refreshCurrentModel()
-        void queryClient.invalidateQueries({ queryKey: ['model-options'] })
+        void queryClient.invalidateQueries({ queryKey: ['model-options', activeSessionId] })

        return true
      } catch (err) {
        setCurrentModel(prevModel)
        setCurrentProvider(prevProvider)
-        updateModelOptionsCache(prevProvider, prevModel, includeGlobal)
+        updateModelOptionsCache(prevProvider, prevModel, !activeSessionId)
        notifyError(err, copy.modelSwitchFailed)

        return false
      }
    },
-    [activeSessionId, copy.modelSwitchFailed, queryClient, refreshCurrentModel, requestGateway, updateModelOptionsCache]
+    [activeSessionId, copy.modelSwitchFailed, queryClient, requestGateway, updateModelOptionsCache]
  )

  return { refreshCurrentModel, selectModel, updateModelOptionsCache }
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -27,6 +27,7 @@ import { triggerHaptic } from '@/lib/haptics'
 import { setMutableRef } from '@/lib/mutable-ref'
 import { isProviderSetupErrorMessage } from '@/lib/provider-setup-errors'
 import { setSessionYolo } from '@/lib/yolo-session'
+import { openCommandPalettePage } from '@/store/command-palette'
 import {
  $composerAttachments,
  clearComposerAttachments,
@@ -38,6 +39,7 @@ import {
 import { resetSessionBackground } from '@/store/composer-status'
 import { clearNotifications, notify, notifyError } from '@/store/notifications'
 import { requestDesktopOnboarding } from '@/store/onboarding'
+import { setPetScale } from '@/store/pet-gallery'
 import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
 import {
  $busy,
@@ -57,6 +59,7 @@ import { clearSessionSubagents } from '@/store/subagents'
 import { clearSessionTodos } from '@/store/todos'

 import type {
+  BrowserManageResponse,
  ClientSessionState,
  FileAttachResponse,
  HandoffFailResponse,
@@ -1141,6 +1144,110 @@ export function usePromptActions({
          } catch (err) {
            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
          }
+        },
+        pet: async ctx => {
+          const [sub = '', rawValue = ''] = ctx.arg.trim().split(/\s+/)
+          const lower = sub.toLowerCase()
+
+          if (lower === 'list' || lower === 'gallery' || lower === 'browse' || lower === 'all') {
+            openCommandPalettePage('pets')
+
+            return
+          }
+
+          // `/pet scale <n>` resizes the floating pet locally (instant) and
+          // persists via the store — no round-trip to the slash worker.
+          if (lower === 'scale') {
+            const value = Number(rawValue)
+
+            if (!rawValue || Number.isNaN(value)) {
+              const resolved = await withSlashOutput(ctx)
+              resolved?.render('usage: /pet scale <factor>  (e.g. /pet scale 0.5)')
+
+              return
+            }
+
+            setPetScale(requestGateway, value)
+
+            return
+          }
+
+          await runExec(ctx)
+        },
+        // /browser connect|disconnect|status manages the live CDP connection on
+        // the gateway host, mirroring the TUI's browser.manage RPC. It mutates
+        // BROWSER_CDP_URL (and may launch Chrome) in the gateway process — only
+        // meaningful when that process runs on this machine, so it's gated to
+        // local connections. A remote gateway would act on the wrong host.
+        browser: async ctx => {
+          const resolved = await withSlashOutput(ctx)
+
+          if (!resolved) {
+            return
+          }
+
+          const { render: renderSlashOutput, sessionId } = resolved
+
+          if ($connection.get()?.mode === 'remote') {
+            renderSlashOutput(
+              '/browser manages a Chromium-family browser on the gateway host — only available when connected to a local gateway.'
+            )
+
+            return
+          }
+
+          const [rawAction = 'status', ...rest] = ctx.arg.trim().split(/\s+/).filter(Boolean)
+          const cmdAction = rawAction.toLowerCase()
+
+          if (!['connect', 'disconnect', 'status'].includes(cmdAction)) {
+            renderSlashOutput(
+              'usage: /browser [connect|disconnect|status] [url] · persistent: set browser.cdp_url in config.yaml'
+            )
+
+            return
+          }
+
+          const url = cmdAction === 'connect' ? rest.join(' ').trim() || 'http://127.0.0.1:9222' : undefined
+
+          if (url) {
+            renderSlashOutput(`checking Chromium-family browser remote debugging at ${url}...`)
+          }
+
+          try {
+            const result = await requestGateway<BrowserManageResponse>('browser.manage', {
+              action: cmdAction,
+              session_id: sessionId,
+              ...(url && { url })
+            })
+
+            // Without a streamed session subscription, the gateway bundles its
+            // progress lines into `messages` — flush them inline.
+            result?.messages?.forEach(message => renderSlashOutput(message))
+
+            if (cmdAction === 'status') {
+              renderSlashOutput(
+                result?.connected
+                  ? `browser connected: ${result.url || '(url unavailable)'}`
+                  : 'browser not connected (try /browser connect <url> or set browser.cdp_url in config.yaml)'
+              )
+
+              return
+            }
+
+            if (cmdAction === 'disconnect') {
+              renderSlashOutput('browser disconnected')
+
+              return
+            }
+
+            if (result?.connected) {
+              renderSlashOutput('Browser connected to live Chromium-family browser via CDP')
+              renderSlashOutput(`Endpoint: ${result.url || '(url unavailable)'}`)
+              renderSlashOutput('next browser tool call will use this CDP endpoint')
+            }
+          } catch (err) {
+            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
+          }
        }
      }

@@ -1283,6 +1390,7 @@ export function usePromptActions({

  const cancelRun = useCallback(async () => {
    const sessionId = activeSessionId || activeSessionIdRef.current
+
    const releaseBusy = () => {
      setMutableRef(busyRef, false)
      setBusy(false)
--- a/Show More
+++ b/Show More