change(tooling): update node to 26 everywhere, keep node version managed

2026-06-22 10:00:56 +08:00 · 2026-06-12 13:44:29 -04:00
394 changed files with 5471 additions and 22577 deletions
--- a/.github/workflows/build-windows-installer.yml
+++ b/.github/workflows/build-windows-installer.yml
@@ -48,7 +48,7 @@ jobs:
      - name: Setup Node.js
        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm

      - name: Install npm dependencies
--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -11,20 +11,8 @@ on:
      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:
-    inputs:
-      skills_index_run_id:
-        description: 'Optional Build Skills Index run ID whose skills-index artifact should be deployed'
-        required: false
-        type: string
-      rebuild_skills_index:
-        description: 'Force a fresh multi-source crawl instead of reusing the latest healthy index'
-        required: false
-        default: false
-        type: boolean

 permissions:
-  contents: read
-  actions: read
  pages: write
  id-token: write

@@ -56,7 +44,7 @@ jobs:

      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm
          cache-dependency-path: website/package-lock.json

@@ -67,81 +55,26 @@ jobs:
      - name: Install PyYAML for skill extraction
        run: pip install pyyaml==6.0.2 httpx==0.28.1

-      - name: Prepare skills index (unified multi-source catalog)
+      - name: Build skills index (unified multi-source catalog)
        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_TOKEN: ${{ github.token }}
-          SKILLS_INDEX_RUN_ID: ${{ github.event.inputs.skills_index_run_id || '' }}
-          REBUILD_SKILLS_INDEX: ${{ github.event.inputs.rebuild_skills_index || 'false' }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          # The unified external catalog is expensive to crawl and can burn
-          # through the repository installation's GitHub API quota when several
-          # docs deploys land close together.  Normal docs deploys therefore
-          # reuse the latest healthy catalog: first the artifact from a
-          # scheduled skills-index run, then the currently live index.  Only a
-          # manual force rebuild does a fresh crawl here.
+          # Rebuild the unified catalog. The file is gitignored, so a fresh
+          # checkout starts without it and we want the freshest crawl in
+          # every deploy.
          #
-          # If we do crawl, the build remains fatal. build_skills_index.py runs
-          # the health check BEFORE writing and exits non-zero on source
-          # collapse, keeping the last good Pages deployment live instead of
-          # publishing a degenerate catalog.
-          set -euo pipefail
-          INDEX_PATH="website/static/api/skills-index.json"
-          mkdir -p "$(dirname "$INDEX_PATH")"
-
-          validate_index() {
-            python3 - "$INDEX_PATH" <<'PY'
-          import json
-          import sys
-          from pathlib import Path
-
-          path = Path(sys.argv[1])
-          try:
-              data = json.loads(path.read_text(encoding="utf-8"))
-          except Exception as exc:
-              print(f"invalid skills index JSON: {exc}", file=sys.stderr)
-              sys.exit(1)
-          skills = data.get("skills")
-          if not isinstance(skills, list) or len(skills) < 1500:
-              count = len(skills) if isinstance(skills, list) else "missing"
-              print(f"skills index too small: {count}", file=sys.stderr)
-              sys.exit(1)
-          print(f"skills index ready: {len(skills)} skills")
-          PY
-          }
-
-          if [ "$REBUILD_SKILLS_INDEX" = "true" ]; then
-            python3 scripts/build_skills_index.py
-            validate_index
-            exit 0
-          fi
-
-          if [ -n "$SKILLS_INDEX_RUN_ID" ]; then
-            tmpdir="$(mktemp -d)"
-            echo "Downloading skills-index artifact from run $SKILLS_INDEX_RUN_ID"
-            if gh run download "$SKILLS_INDEX_RUN_ID" --name skills-index --dir "$tmpdir"; then
-              candidate="$(find "$tmpdir" -name skills-index.json -type f | head -n 1 || true)"
-              if [ -n "$candidate" ]; then
-                cp "$candidate" "$INDEX_PATH"
-                if validate_index; then
-                  exit 0
-                fi
-              fi
-            fi
-            echo "::warning::Could not use skills-index artifact from run $SKILLS_INDEX_RUN_ID; trying live index"
-          fi
-
-          echo "Downloading currently live skills index"
-          if curl -fsSL --retry 3 --retry-delay 5 \
-            "https://hermes-agent.nousresearch.com/docs/api/skills-index.json" \
-            -o "$INDEX_PATH" && validate_index; then
-            exit 0
-          fi
-
-          echo "::warning::Live skills index unavailable or unhealthy; falling back to a fresh crawl"
-          rm -f "$INDEX_PATH"
+          # This MUST be fatal. build_skills_index.py runs a health check and
+          # exits non-zero WITHOUT writing the output file when a source
+          # collapses (e.g. a GitHub API rate limit zeroes the github /
+          # claude-marketplace / well-known taps all at once). Letting the
+          # deploy continue would either (a) ship a degenerate index missing
+          # whole hubs — the June 2026 regression where OpenAI/Anthropic/
+          # HuggingFace/NVIDIA tabs vanished — or (b) fall through to a
+          # local-only catalog. Failing here keeps the last good deployment
+          # live (GitHub Pages serves the previous build) instead of
+          # publishing a broken catalog. Re-run the workflow once the
+          # transient rate limit clears.
          python3 scripts/build_skills_index.py
-          validate_index

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -90,7 +90,7 @@ jobs:
      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
      # shard would otherwise reach the session-scoped ``built_image``
      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
-      # ``docker build`` — guaranteed to
+      # ``docker build`` under a 180s pytest-timeout cap — guaranteed to
      # die in fixture setup.
      #
      # Piggybacking here avoids a second image build: the smoke test
@@ -114,7 +114,7 @@ jobs:
        run: |
          uv venv .venv --python 3.11
          source .venv/bin/activate
-          # ``dev`` extra pulls in pytest, pytest-asyncio —
+          # ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -18,7 +18,7 @@ jobs:

      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm
          cache-dependency-path: website/package-lock.json

--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -53,4 +53,4 @@ jobs:
      - name: Trigger Deploy Site workflow
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh workflow run deploy-site.yml --repo ${{ github.repository }} -f skills_index_run_id=${{ github.run_id }}
+        run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -29,8 +29,6 @@ jobs:
      scan: ${{ steps.filter.outputs.scan }}
      # True when pyproject.toml changed in this PR
      deps: ${{ steps.filter.outputs.deps }}
-      # True when the curated MCP catalog / bundled MCP manifests changed.
-      mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
@@ -56,14 +54,6 @@ jobs:
          else
            echo "deps=false" >> "$GITHUB_OUTPUT"
          fi
-          MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
-            'optional-mcps/**' \
-            'hermes_cli/mcp_catalog.py' || true)
-          if [ -n "$MCP_CATALOG_FILES" ]; then
-            echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
-          fi

  scan:
    name: Scan PR for critical supply chain risks
@@ -278,50 +268,3 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - run: echo "No pyproject.toml changes, skipping dependency bounds check."
-
-  mcp-catalog-review:
-    name: MCP catalog security review
-    needs: changes
-    if: needs.changes.outputs.mcp_catalog == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          fetch-depth: 0
-
-      - name: Require explicit MCP catalog review label
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set -euo pipefail
-          PR="${{ github.event.pull_request.number }}"
-          LABELS=$(gh pr view "$PR" --json labels --jq '.labels[].name' || true)
-          if echo "$LABELS" | grep -Fxq 'mcp-catalog-reviewed'; then
-            echo "MCP catalog review label present."
-            exit 0
-          fi
-
-          BODY="## ⚠️ MCP catalog security review required
-
-          This PR changes the bundled MCP catalog or MCP catalog installer code. MCP entries can define local commands that users later install into \`mcp_servers\`, so this needs explicit maintainer review before merge.
-
-          A maintainer should verify:
-          - any new/changed \`optional-mcps/**/manifest.yaml\` command and args are expected,
-          - stdio transports do not use shell+egress/exfiltration payloads,
-          - git install refs are pinned and bootstrap commands are minimal,
-          - requested env vars/secrets match the upstream MCP's documented needs.
-
-          After review, add the \`mcp-catalog-reviewed\` label and re-run this check."
-
-          gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
-          echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
-          exit 1
-
-  mcp-catalog-review-gate:
-    name: MCP catalog security review
-    needs: changes
-    if: always() && needs.changes.outputs.mcp_catalog != 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo "No MCP catalog changes, skipping MCP catalog security review."
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,13 +4,13 @@ on:
  push:
    branches: [main]
    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
+      - '**/*.md'
+      - 'docs/**'
  pull_request:
    branches: [main]
    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
+      - '**/*.md'
+      - 'docs/**'

 permissions:
  contents: read
@@ -30,17 +30,13 @@ jobs:
        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Restore duration cache
-        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: test_durations.json
-          # main always writes a new suffix, but jobs pick the latest one with the same prefix
-          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
-          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
-          # If there are no exact matches, the action searches for partial matches of the restore keys.
-          # When the action finds a partial match, the most recent cache is restored to the path directory.
+          # Single stable key. main always overwrites, PRs always find it.
          key: test-durations

      - name: Install ripgrep (prebuilt binary)
@@ -58,7 +54,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -119,7 +115,7 @@ jobs:
          NOUS_API_KEY: ""

      - name: Upload per-slice durations
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
        with:
          name: test-durations-slice-${{ matrix.slice }}
          path: test_durations.json
@@ -129,11 +125,11 @@ jobs:
  # (including PRs) get balanced slicing.
  save-durations:
    needs: test
-    if: needs.test.result == 'success' && github.ref == 'refs/heads/main'
+    if: always() && github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - name: Download all slice durations
-        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
        with:
          pattern: test-durations-slice-*
          path: durations
@@ -153,17 +149,17 @@ jobs:
          "

      - name: Save merged duration cache
-        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae  # v5.0.5
        with:
          path: test_durations.json
-          key: test-durations-${{ github.run_id }}
+          key: test-durations

  e2e:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Install ripgrep (prebuilt binary)
        run: |
@@ -180,7 +176,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -10,16 +10,15 @@ on:
 jobs:
  typecheck:
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        package:
-          [ui-tui, web, apps/bootstrap-installer, apps/desktop, apps/shared]
-      fail-fast: false # report all failures, not just the first one
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
        with:
-          node-version: 22
+          node-version: 26
          cache: npm
      - run: npm ci
-      - run: npm run --prefix ${{ matrix.package }} typecheck
+      - run: npm run --prefix ui-tui typecheck
+      - run: npm run --prefix web typecheck
+      - run: npm run --prefix apps/bootstrap-installer typecheck
+      - run: npm run --prefix apps/desktop typecheck
+      - run: npm run --prefix apps/shared typecheck
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -53,7 +53,7 @@ jobs:
      - name: Set up Node.js
        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: '22'
+          node-version: '26'

      - name: Build web dashboard
        run: cd web && npm ci && npm run build
--- a/.nvmrc
+++ b/.nvmrc
@@ -0,0 +1 @@
+26.3.0
--- a/18
+++ b/18
@@ -1,12 +1,12 @@
 FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
-# Node 22 LTS source stage. Debian trixie's bundled nodejs is pinned to 20.x
+# Node 26 source stage. Debian trixie's bundled nodejs is pinned to 20.x
 # which reached EOL in April 2026 — we copy node + npm + corepack from the
-# upstream node:22 image instead so we can stay on a supported LTS without
-# waiting for Debian 14 (forky, ~mid-2027).  Bookworm-based slim image used
+# upstream node:26 image instead so we can stay on the supported node without
+# waiting for Debian 15+. Bookworm-based slim image used
 # so the produced binary links against glibc 2.36, which runs cleanly on
 # our Debian 13 (trixie, glibc 2.41) runtime.  Bumping to a new Node major
 # is a one-line ARG change; see #4977.
-FROM node:22-bookworm-slim@sha256:7af03b14a13c8cdd38e45058fd957bf00a72bbe17feac43b1c15a689c029c732 AS node_source
+FROM node:26-bookworm-slim@sha256:3fe807a03a4436e7bc76b7e84e6861899cd75c9028ae99bc00581940141ae150 AS node_source
 FROM debian:13.4

 # Disable Python stdout buffering to ensure logs are printed immediately
@@ -90,17 +90,15 @@ RUN useradd -u 10000 -m -d /opt/data hermes

 COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/

-# Node 22 LTS: copy the node binary plus the bundled npm + corepack JS
-# installs from the upstream image.  npm and npx are recreated as symlinks
+# Node 26: copy the node binary plus the bundled npm JS
+# installs from the upstream image. npm and npx are recreated as symlinks
 # because they're symlinks in the source image (and need to live on PATH).
 # See node_source stage at the top of the file for the version-bump
 # rationale (#4977).
 COPY --chmod=0755 --from=node_source /usr/local/bin/node /usr/local/bin/
 COPY --from=node_source /usr/local/lib/node_modules/npm /usr/local/lib/node_modules/npm
-COPY --from=node_source /usr/local/lib/node_modules/corepack /usr/local/lib/node_modules/corepack
 RUN ln -sf /usr/local/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm && \
-    ln -sf /usr/local/lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx && \
-    ln -sf /usr/local/lib/node_modules/corepack/dist/corepack.js /usr/local/bin/corepack
+    ln -sf /usr/local/lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx

 WORKDIR /opt/hermes

@@ -119,7 +117,7 @@ COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/

 # `npm_config_install_links=false` forces npm to install `file:` deps as
 # symlinks instead of copies.  This is the default since npm 10+, which is
-# what the image ships now (via the node:22 source stage).  We set it
+# what the image ships now (via the node:26 source stage).  We set it
 # explicitly anyway as defense-in-depth: the previous Debian-bundled npm
 # 9.x defaulted to install-as-copy, which produced a hidden
 # node_modules/.package-lock.json that permanently disagreed with the root
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -824,7 +824,6 @@ class HermesACPAgent(acp.Agent):

        try:
            from model_tools import get_tool_definitions
-            from agent.memory_manager import inject_memory_provider_tools

            enabled_toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
@@ -840,7 +839,6 @@ class HermesACPAgent(acp.Agent):
            state.agent.valid_tool_names = {
                tool["function"]["name"] for tool in state.agent.tools or []
            }
-            inject_memory_provider_tools(state.agent)
            invalidate = getattr(state.agent, "_invalidate_system_prompt", None)
            if callable(invalidate):
                invalidate()
@@ -1781,25 +1779,10 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
-            from types import SimpleNamespace
-            from agent.memory_manager import inject_memory_provider_tools
-
            toolsets = _expand_acp_enabled_toolsets(
                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            )
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
-            tool_view = SimpleNamespace(
-                tools=list(tools or []),
-                valid_tool_names={
-                    tool.get("function", {}).get("name")
-                    for tool in tools or []
-                    if isinstance(tool, dict)
-                },
-                enabled_toolsets=toolsets,
-                _memory_manager=getattr(state.agent, "_memory_manager", None),
-            )
-            inject_memory_provider_tools(tool_view)
-            tools = tool_view.tools
            if not tools:
                return "No tools available."
            lines = [f"Available tools ({len(tools)}):"]
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -900,9 +900,6 @@ def init_agent(
        agent.api_key = client_kwargs.get("api_key", "")
        agent.base_url = client_kwargs.get("base_url", agent.base_url)
        try:
-            from agent.ssl_guard import verify_ca_bundle_with_fallback
-
-            verify_ca_bundle_with_fallback()
            agent.client = agent._create_openai_client(client_kwargs, reason="agent_init", shared=True)
            if not agent.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {agent.model}")
@@ -1196,8 +1193,38 @@ def init_agent(
            _ra().logger.warning("Memory provider plugin init failed: %s", _mpe)
            agent._memory_manager = None

-    from agent.memory_manager import inject_memory_provider_tools as _inject_memory_provider_tools
-    _inject_memory_provider_tools(agent)
+    # Inject memory provider tool schemas into the tool surface.
+    # Skip tools whose names already exist (plugins may register the
+    # same tools via ctx.register_tool(), which lands in agent.tools
+    # through _ra().get_tool_definitions()).  Duplicate function names cause
+    # 400 errors on providers that enforce unique names (e.g. Xiaomi
+    # MiMo via Nous Portal).
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    #   enabled_toolsets is None        → no filter, inject (backward compat)
+    #   "memory" in enabled_toolsets    → user opted in, inject
+    #   otherwise (incl. [])            → user excluded memory, skip injection
+    #
+    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
+    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
+    # penalty on local models and a frequent trigger of tool-call loops.
+    if agent._memory_manager and agent.tools is not None and (
+        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
+    ):
+        _existing_tool_names = {
+            t.get("function", {}).get("name")
+            for t in agent.tools
+            if isinstance(t, dict)
+        }
+        for _schema in agent._memory_manager.get_all_tool_schemas():
+            _tname = _schema.get("name", "")
+            if _tname and _tname in _existing_tool_names:
+                continue  # already registered via plugin path
+            _wrapped = {"type": "function", "function": _schema}
+            agent.tools.append(_wrapped)
+            if _tname:
+                agent.valid_tool_names.add(_tname)
+                _existing_tool_names.add(_tname)

    # Skills config: nudge interval for skill creation reminders
    agent._skill_nudge_interval = 10
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -445,45 +445,6 @@ def repair_message_sequence(agent, messages: List[Dict]) -> int:
    return repairs


-def repair_message_sequence_with_cursor(agent, messages: List[Dict]) -> int:
-    """Run :func:`repair_message_sequence` and keep the SessionDB flush
-    cursor consistent with the compacted list (#44837).
-
-    ``repair_message_sequence`` merges/drops messages in place, shrinking
-    the list. ``_last_flushed_db_idx`` (the DB-write cursor) indexes into
-    that list, so after compaction it can point past the new end — the
-    turn-end flush would then skip the assistant/tool chain entirely — or
-    past unflushed messages shifted to lower indexes.
-
-    Repair preserves object identity for surviving messages, so counting
-    the survivors from the previously-flushed prefix gives the exact new
-    cursor even when messages are dropped/merged at indexes *before* the
-    cursor — a plain ``min()`` clamp would silently skip that many
-    unflushed rows. Falls back to the clamp when no prefix snapshot is
-    available.
-
-    Returns the number of repairs made (same as ``repair_message_sequence``).
-    """
-    pre_repair_flushed_ids = None
-    flush_cursor = getattr(agent, "_last_flushed_db_idx", None)
-    if isinstance(flush_cursor, int) and flush_cursor > 0:
-        pre_repair_flushed_ids = {id(m) for m in messages[:flush_cursor]}
-
-    repairs = repair_message_sequence(agent, messages)
-
-    if repairs > 0 and hasattr(agent, "_last_flushed_db_idx"):
-        if pre_repair_flushed_ids is not None:
-            agent._last_flushed_db_idx = sum(
-                1 for m in messages if id(m) in pre_repair_flushed_ids
-            )
-        else:
-            agent._last_flushed_db_idx = min(
-                agent._last_flushed_db_idx, len(messages)
-            )
-
-    return repairs
-
-

 def strip_think_blocks(agent, content: str) -> str:
    """Remove reasoning/thinking blocks from content, returning only visible text.
@@ -618,33 +579,12 @@ def recover_with_credential_pool(
    current_provider = (getattr(agent, "provider", "") or "").strip().lower()
    pool_provider = (getattr(pool, "provider", "") or "").strip().lower()
    if current_provider and pool_provider and current_provider != pool_provider:
-        # Custom endpoints use two naming conventions for the SAME provider:
-        # the agent carries the generic ``custom`` label while the pool is
-        # keyed ``custom:<name>`` (see CUSTOM_POOL_PREFIX). A literal string
-        # compare treats them as a mismatch and skips recovery for every
-        # custom-provider user — 401s/429s then burn the full retry cycle
-        # with no rotation or refresh. Accept the pair as matching only when
-        # the agent's CURRENT base_url actually resolves to this pool key,
-        # so a fallback provider (or a different custom endpoint) still
-        # triggers the guard.
-        _custom_match = False
-        if current_provider == "custom" and pool_provider.startswith("custom:"):
-            try:
-                from agent.credential_pool import get_custom_provider_pool_key
-                _agent_base = (getattr(agent, "base_url", "") or "").strip()
-                _custom_match = bool(_agent_base) and (
-                    (get_custom_provider_pool_key(_agent_base) or "").strip().lower()
-                    == pool_provider
-                )
-            except Exception:
-                _custom_match = False
-        if not _custom_match:
-            _ra().logger.warning(
-                "Credential pool provider mismatch: pool=%s, agent=%s — "
-                "skipping pool mutation to avoid cross-provider contamination",
-                pool_provider, current_provider,
-            )
-            return False, has_retried_429
+        _ra().logger.warning(
+            "Credential pool provider mismatch: pool=%s, agent=%s — "
+            "skipping pool mutation to avoid cross-provider contamination",
+            pool_provider, current_provider,
+        )
+        return False, has_retried_429

    effective_reason = classified_reason
    if effective_reason is None:
@@ -881,8 +821,6 @@ def try_recover_primary_transport(

 def drop_thinking_only_and_merge_users(
    messages: List[Dict[str, Any]],
-    *,
-    drop_codex_reasoning_items: bool = True,
 ) -> List[Dict[str, Any]]:
    """Drop thinking-only assistant turns; merge any adjacent user messages left behind.

@@ -904,13 +842,7 @@ def drop_thinking_only_and_merge_users(
        return messages

    # Pass 1: drop thinking-only assistant turns.
-    kept = [
-        m for m in messages
-        if not _ra().AIAgent._is_thinking_only_assistant(
-            m,
-            drop_codex_reasoning_items=drop_codex_reasoning_items,
-        )
-    ]
+    kept = [m for m in messages if not _ra().AIAgent._is_thinking_only_assistant(m)]
    dropped = len(messages) - len(kept)
    if dropped == 0:
        return messages
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -751,9 +751,6 @@ def build_anthropic_client(
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
-    if normalized_base_url:
-        import re as _re
-        normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1144,8 +1144,7 @@ def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
    normalized = (base_url or "").strip().lower().rstrip("/")
    if not normalized:
        return False
-    path = urlparse(normalized).path.rstrip("/")
-    if path.endswith("/anthropic") or path.endswith("/anthropic/v1"):
+    if normalized.endswith("/anthropic"):
        return True
    hostname = base_url_hostname(normalized)
    if hostname == "api.anthropic.com":
@@ -3191,7 +3190,7 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
-        explicit_base_url = runtime_base_url or None
+        explicit_base_url = None
        explicit_api_key = None
        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
            resolved_provider = "custom"
@@ -5005,7 +5004,7 @@ def _build_call_kwargs(

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
-    if provider == "nous":
+    if provider == "nous" or auxiliary_is_nous:
        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
    if merged_extra:
        kwargs["extra_body"] = merged_extra
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -935,14 +935,11 @@ def build_converse_kwargs(
    if system_prompt:
        kwargs["system"] = system_prompt

-    from agent.anthropic_adapter import _forbids_sampling_params
+    if temperature is not None:
+        kwargs["inferenceConfig"]["temperature"] = temperature

-    if not _forbids_sampling_params(model):
-        if temperature is not None:
-            kwargs["inferenceConfig"]["temperature"] = temperature
-
-        if top_p is not None:
-            kwargs["inferenceConfig"]["topP"] = top_p
+    if top_p is not None:
+        kwargs["inferenceConfig"]["topP"] = top_p

    if stop_sequences:
        kwargs["inferenceConfig"]["stopSequences"] = stop_sequences
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -1081,7 +1081,6 @@ def _normalize_codex_response(
    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
-    saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"}
    saw_commentary_phase = False
    saw_final_answer_phase = False
    saw_reasoning_item = False
@@ -1096,7 +1095,6 @@ def _normalize_codex_response(

        if item_status in {"queued", "in_progress", "incomplete"}:
            has_incomplete_items = True
-            saw_streaming_or_item_incomplete = True

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
@@ -1254,9 +1252,7 @@ def _normalize_codex_response(
        finish_reason = "tool_calls"
    elif leaked_tool_call_text:
        finish_reason = "incomplete"
-    elif saw_streaming_or_item_incomplete:
-        finish_reason = "incomplete"
-    elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase:
+    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
        # Response contains only reasoning (encrypted thinking state and/or
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -69,31 +69,6 @@ SUMMARY_PREFIX = (
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

-# Metadata key added to context compression summary messages so that frontends
-# (CLI, Desktop, gateway, TUI) can distinguish them from real assistant/user
-# messages and filter or render them appropriately without content-prefix
-# heuristics. See https://github.com/NousResearch/hermes-agent/issues/38389
-#
-# Underscore-prefixed ON PURPOSE: the wire sanitizers
-# (agent/transports/chat_completions.py convert_messages and the summary-path
-# mirror in agent/chat_completion_helpers.py) strip every top-level message
-# key starting with "_" before the request leaves the process. Strict
-# OpenAI-compatible gateways (Fireworks, Mistral, Moonshot/Kimi, opencode-go)
-# reject payloads carrying unknown keys with "Extra inputs are not permitted",
-# poisoning every subsequent request in the session — a bare key like
-# "is_compressed_summary" would reach the wire and trip exactly that.
-COMPRESSED_SUMMARY_METADATA_KEY = "_compressed_summary"
-
-# Appended to every standalone summary message (and to the merged-into-tail
-# prefix) so the model has an unambiguous "summary ends here" boundary.
-# Without it, weak models read the verbatim "## Active Task" quote as fresh
-# user input (#11475, #14521) or regurgitate an assistant-role summary as
-# their own output (#33256).
-_SUMMARY_END_MARKER = (
-    "--- END OF CONTEXT SUMMARY — "
-    "respond to the message below, not the summary above ---"
-)
-
 # Handoff prefixes that shipped in earlier releases. A summary persisted under
 # one of these can be inherited into a resumed lineage (#35344); when it is
 # re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
@@ -171,11 +146,6 @@ _FALLBACK_TURN_MAX_CHARS = 700
 _AUTO_FOCUS_MAX_TURNS = 3
 _AUTO_FOCUS_TURN_MAX_CHARS = 260
 _AUTO_FOCUS_MAX_CHARS = 700
-# Keep a short run of recent messages verbatim even when the token budget is
-# already exhausted.  The public ``protect_last_n`` default is intentionally
-# high for small/light tails, but using all 20 as a hard floor here would bring
-# back the old large-tool-output case where nothing can be compacted.
-_MAX_TAIL_MESSAGE_FLOOR = 8


 _PATH_MENTION_RE = re.compile(r"(?:/|~/?|[A-Za-z]:\\)[^\s`'\")\]}<>]+")
@@ -1646,13 +1616,7 @@ This compaction should PRIORITISE preserving all information related to the focu
        text = (summary or "").strip()
        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
            if text.startswith(prefix):
-                text = text[len(prefix):].lstrip()
-                break
-        # Strip the trailing end marker too — a rehydrated handoff body that
-        # keeps it would leak the boundary directive into the iterative-update
-        # summarizer prompt (and the marker is re-appended on insertion anyway).
-        if text.endswith(_SUMMARY_END_MARKER):
-            text = text[: -len(_SUMMARY_END_MARKER)].rstrip()
+                return text[len(prefix):].lstrip()
        return text

    @classmethod
@@ -1668,19 +1632,6 @@ This compaction should PRIORITISE preserving all information related to the focu
            return True
        return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)

-    @staticmethod
-    def _has_compressed_summary_metadata(message: Any) -> bool:
-        """Return True if *message* carries the compressed-summary flag.
-
-        Callers (frontends, CLI, gateway) can use this to distinguish context
-        compaction summaries from real assistant or user messages without
-        relying on content-prefix heuristics.  The flag is in-process only —
-        the wire sanitizers strip underscore-prefixed keys before API calls.
-        """
-        if not isinstance(message, dict):
-            return False
-        return bool(message.get(COMPRESSED_SUMMARY_METADATA_KEY))
-
    @classmethod
    def _derive_auto_focus_topic(
        cls,
@@ -1866,105 +1817,6 @@ This compaction should PRIORITISE preserving all information related to the focu
                return i
        return -1

-    def _find_last_assistant_message_idx(
-        self, messages: List[Dict[str, Any]], head_end: int
-    ) -> int:
-        """Return the index of the last user-visible assistant reply at or
-        after *head_end*, or -1.
-
-        A "user-visible reply" is an assistant message with non-empty
-        textual content — i.e. one that the WebUI / TUI / SessionsPage
-        rendered as a bubble the operator could read. We deliberately
-        skip assistant messages that contain only ``tool_calls`` (and
-        no text), because those render as small "calling tool X"
-        indicators and aren't what the reporter means by "the output
-        of the last message you sent" (#29824).
-
-        Falling back to the most recent assistant message of ANY kind
-        only kicks in when no content-bearing assistant message exists
-        in the compressible region — typically a fresh session that
-        just started a multi-step tool sequence with no prior reply
-        to anchor. In that case the agent fix is a no-op and the
-        existing user-message anchor carries the load.
-        """
-        last_any = -1
-        for i in range(len(messages) - 1, head_end - 1, -1):
-            msg = messages[i]
-            if msg.get("role") != "assistant":
-                continue
-            if last_any < 0:
-                last_any = i
-            content = msg.get("content")
-            if isinstance(content, str) and content.strip():
-                return i
-            if isinstance(content, list):
-                # Multimodal / Anthropic-style content: look for any
-                # text block with non-empty text.
-                for part in content:
-                    if isinstance(part, dict):
-                        text = part.get("text") or part.get("content")
-                        if isinstance(text, str) and text.strip():
-                            return i
-        return last_any
-
-    def _ensure_last_assistant_message_in_tail(
-        self,
-        messages: List[Dict[str, Any]],
-        cut_idx: int,
-        head_end: int,
-    ) -> int:
-        """Guarantee the most recent assistant message is in the protected tail.
-
-        WebUI / TUI / SessionsPage bug (#29824). Without this anchor,
-        ``_find_tail_cut_by_tokens`` can leave the user's most recent
-        visible assistant response inside the compressed middle region —
-        especially when the conversation has a single oversized tool
-        result or a long stretch of tool-call/result pairs after the
-        last assistant reply. The summariser then rolls that reply up
-        into the single ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block
-        persisted as ``role="user"`` or ``role="assistant"``. From the
-        operator's perspective the WebUI session viewer
-        (``web/src/pages/SessionsPage.tsx``) and the TUI chat panel
-        both suddenly show the opaque "Context compaction" block in the
-        slot where they were just reading the assistant's actual reply:
-
-            User:       "i cant see the output of the last message you
-                         sent, i did see it previously, however now see
-                         'context compaction'"
-
-        Mirror of ``_ensure_last_user_message_in_tail`` but anchors on
-        the last assistant-role message. Re-runs the tool-group
-        alignment so we don't split a ``tool_call`` / ``tool_result``
-        group that immediately precedes the anchored message — orphaned
-        tool messages would otherwise be removed by
-        ``_sanitize_tool_pairs`` and trigger the same data-loss symptom
-        we're trying to prevent.
-        """
-        last_asst_idx = self._find_last_assistant_message_idx(messages, head_end)
-        if last_asst_idx < 0:
-            # No assistant message in the compressible region — nothing
-            # to anchor (single-turn pre-reply state, etc.).
-            return cut_idx
-        if last_asst_idx >= cut_idx:
-            # Already in the tail — the token-budget walk did the right
-            # thing on its own.
-            return cut_idx
-        # Pull cut_idx back to the assistant message, then re-align so
-        # we don't split a tool group that immediately precedes it
-        # (e.g. an ``assistant(tool_calls)`` → ``tool(result)`` →
-        # ``assistant(final reply)`` sequence would otherwise leave the
-        # ``tool`` orphan when cut lands at the final reply).
-        new_cut = self._align_boundary_backward(messages, last_asst_idx)
-        if not self.quiet_mode:
-            logger.debug(
-                "Anchoring tail cut to last assistant message at index %d "
-                "(was %d, aligned to %d) to keep the previously-visible "
-                "reply out of the compaction summary (#29824)",
-                last_asst_idx, cut_idx, new_cut,
-            )
-        # Safety: never go back into the head region.
-        return max(new_cut, head_end + 1)
-
    def _ensure_last_user_message_in_tail(
        self,
        messages: List[Dict[str, Any]],
@@ -2023,12 +1875,11 @@ This compaction should PRIORITISE preserving all information related to the focu
        derived from ``summary_target_ratio * context_length``, so it
        scales automatically with the model's context window.

-        Token budget is the primary criterion.  A bounded message-count floor
-        keeps a short run of recent turns verbatim even when the budget is
-        exhausted, but the budget is allowed to exceed by up to 1.5x to avoid
-        cutting inside an oversized message (tool output, file read, etc.). If
-        even that floor exceeds 1.5x the budget, the cut is placed right after
-        the head so compression still runs.
+        Token budget is the primary criterion.  A hard minimum of 3 messages
+        is always protected, but the budget is allowed to exceed by up to
+        1.5x to avoid cutting inside an oversized message (tool output, file
+        read, etc.).  If even the minimum 3 messages exceed 1.5x the budget
+        the cut is placed right after the head so compression still runs.

        Never cuts inside a tool_call/result group.  Always ensures the most
        recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
@@ -2036,19 +1887,8 @@ This compaction should PRIORITISE preserving all information related to the focu
        if token_budget is None:
            token_budget = self.tail_token_budget
        n = len(messages)
-        # Hard minimum: always keep a bounded recent-message floor in the tail.
-        # ``protect_last_n`` remains a minimum up to the cap; the cap avoids
-        # preserving a whole run of bulky tool outputs on every compaction.
-        available_tail = max(0, n - head_end - 1)
-        min_tail_floor = max(3, min(self.protect_last_n, _MAX_TAIL_MESSAGE_FLOOR))
-        # Leave at least two non-head messages available to summarize on short
-        # transcripts; otherwise compression can replace a tiny middle with a
-        # summary and save no messages at all.
-        compressible_tail_cap = max(3, available_tail - 2)
-        min_tail = (
-            min(min_tail_floor, compressible_tail_cap, available_tail)
-            if available_tail > 1 else 0
-        )
+        # Hard minimum: always keep at least 3 messages in the tail
+        min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
        soft_ceiling = int(token_budget * 1.5)
        accumulated = 0
        cut_idx = n  # start from beyond the end
@@ -2120,13 +1960,6 @@ This compaction should PRIORITISE preserving all information related to the focu
        # active task is never lost to compression (fixes #10896).
        cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)

-        # Ensure the most recent assistant message is always in the tail
-        # so the previously-visible reply isn't silently rolled into the
-        # ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block (fixes #29824).
-        # Each anchor only walks ``cut_idx`` backward, so chaining them is
-        # monotonic — the tail can only grow, never shrink.
-        cut_idx = self._ensure_last_assistant_message_in_tail(messages, cut_idx, head_end)
-
        return max(cut_idx, head_end + 1)

    # ------------------------------------------------------------------
@@ -2360,33 +2193,32 @@ This compaction should PRIORITISE preserving all information related to the focu

        # When the summary lands as a standalone role="user" message,
        # weak models read the verbatim "## Active Task" quote of a past
-        # user request as fresh input (#11475, #14521).
-        # When it lands as role="assistant", models may regurgitate the
-        # summary text as their own output (#33256). In both cases, append
-        # the explicit end marker so the model has a clear "summary ends
-        # here, respond to the message below" signal.
-        if not _merge_summary_into_tail:
-            summary = summary + "\n\n" + _SUMMARY_END_MARKER
+        # user request as fresh input (#11475, #14521). Append the explicit
+        # end marker — the same one used in the merge-into-tail path — so
+        # the model has a clear "summary above, not new input" signal.
+        if not _merge_summary_into_tail and summary_role == "user":
+            summary = (
+                summary
+                + "\n\n--- END OF CONTEXT SUMMARY — "
+                "respond to the message below, not the summary above ---"
+            )

        if not _merge_summary_into_tail:
-            compressed.append({
-                "role": summary_role,
-                "content": summary,
-                COMPRESSED_SUMMARY_METADATA_KEY: True,
-            })
+            compressed.append({"role": summary_role, "content": summary})

        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = summary + "\n\n" + _SUMMARY_END_MARKER + "\n\n"
+                merged_prefix = (
+                    summary
+                    + "\n\n--- END OF CONTEXT SUMMARY — "
+                    "respond to the message below, not the summary above ---\n\n"
+                )
                msg["content"] = _append_text_to_content(
                    msg.get("content"),
                    merged_prefix,
                    prepend=True,
                )
-                # Mark the merged message so frontends can identify it as
-                # containing a compression summary prefix.
-                msg[COMPRESSED_SUMMARY_METADATA_KEY] = True
                _merge_summary_into_tail = False
            compressed.append(msg)

--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -40,16 +40,6 @@ from agent.model_metadata import estimate_request_tokens_rough

 logger = logging.getLogger(__name__)

-# Stable marker the gateway matches on to re-tag the auto-compaction lifecycle
-# status as ``kind="compacting"`` (tui_gateway/server.py::_status_update), so
-# drivers like the desktop app can show an explicit "Summarizing…" indicator
-# instead of the transcript appearing to silently reset. Keep the marker phrase
-# intact if you reword COMPACTION_STATUS.
-COMPACTION_STATUS_MARKER = "Compacting context"
-COMPACTION_STATUS = (
-    f"🗜️ {COMPACTION_STATUS_MARKER} — summarizing earlier conversation so I can continue..."
-)
-

 def _compression_lock_holder(agent: Any) -> str:
    """Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
@@ -334,7 +324,9 @@ def compress_context(
        f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
        focus_topic,
    )
-    agent._emit_status(COMPACTION_STATUS)
+    agent._emit_status(
+        "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
+    )

    # ── Compression lock ────────────────────────────────────────────────
    # Atomic, state.db-backed lock per session_id.  Without this, two
@@ -639,11 +631,7 @@ def compress_context(
    return compressed, new_system_prompt


-def try_shrink_image_parts_in_messages(
-    api_messages: list,
-    *,
-    max_dimension: int = 8000,
-) -> bool:
+def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
    """Re-encode all native image parts at a smaller size to recover from
    image-too-large errors (Anthropic 5 MB, unknown other providers).

@@ -654,8 +642,7 @@ def try_shrink_image_parts_in_messages(
    Strategy: look for ``image_url`` / ``input_image`` parts carrying a
    ``data:image/...;base64,...`` payload.  For each one whose encoded
    size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
-    ceiling with header overhead) or whose longest side exceeds
-    ``max_dimension``, write the base64 to a tempfile, call
+    ceiling with header overhead), write the base64 to a tempfile, call
    ``vision_tools._resize_image_for_vision`` to produce a smaller data
    URL, and substitute it in place.

@@ -677,9 +664,10 @@ def try_shrink_image_parts_in_messages(
    # after a confirmed provider rejection, so the alternative is failure.
    target_bytes = 4 * 1024 * 1024
    # Anthropic enforces an 8000px per-side dimension cap independently of
-    # the 5 MB byte cap.  In many-image requests, the provider can report a
-    # lower cap (observed: 2000px).  The caller passes that parsed ceiling
-    # when the rejection includes it.
+    # the 5 MB byte cap.  A tall screenshot can be well under 5 MB yet far
+    # over 8000px (e.g. 1200×12000 at 0.06 MB).  We check pixel dimensions
+    # even when the byte budget is fine.
+    max_dimension = 8000
    changed_count = 0
    # Track parts that are over the target but could NOT be shrunk under it.
    # If any survive, retrying is pointless — the same oversized payload will
@@ -696,9 +684,9 @@ def try_shrink_image_parts_in_messages(
        # Check both byte size AND pixel dimensions.
        needs_shrink = len(url) > target_bytes  # over byte budget
        if not needs_shrink:
-            # Even if bytes are fine, check pixel dimensions against the
-            # provider's reported per-side cap.  A screenshot can be tiny in
-            # bytes yet too large in pixels.
+            # Even if bytes are fine, check pixel dimensions against
+            # Anthropic's 8000px cap.  A tall image can be tiny in bytes
+            # yet huge in pixels.
            try:
                import base64 as _b64_dim
                header_d, _, data_d = url.partition(",")
@@ -807,8 +795,6 @@ def try_shrink_image_parts_in_messages(


 __all__ = [
-    "COMPACTION_STATUS",
-    "COMPACTION_STATUS_MARKER",
    "check_compression_model_feasibility",
    "replay_compression_warning",
    "compress_context",
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -71,35 +71,6 @@ logger = logging.getLogger(__name__)
 INTERRUPT_WAITING_FOR_MODEL_PREFIX = "Operation interrupted: waiting for model response ("


-def _image_error_max_dimension(error: Exception) -> Optional[int]:
-    """Extract a provider-reported image dimension ceiling, if present."""
-    parts = []
-    for value in (
-        error,
-        getattr(error, "message", None),
-        getattr(error, "body", None),
-    ):
-        if value:
-            try:
-                parts.append(str(value))
-            except Exception:
-                pass
-    text = " ".join(parts).lower()
-    if "image" not in text or "dimension" not in text or "max allowed size" not in text:
-        return None
-
-    match = re.search(r"max allowed size(?:\s+for [^:]+)?:\s*(\d{3,5})\s*pixels?", text)
-    if not match:
-        return None
-    try:
-        max_dimension = int(match.group(1))
-    except ValueError:
-        return None
-    if 512 <= max_dimension <= 8000:
-        return max_dimension
-    return None
-
-
 def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
    """Return a user-facing error when Ollama is loaded with too little context."""
    if not getattr(agent, "tools", None):
@@ -397,42 +368,6 @@ def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List
        )


-# Shared recovery hint appended to every content-policy refusal message. Both
-# the HTTP-200 refusal path (``finish_reason=content_filter``) and the
-# exception path (a provider moderation error classified as
-# ``content_policy_blocked``) end with the same actionable next steps, so they
-# share one trailer to keep the guidance from drifting between the two sites.
-_CONTENT_POLICY_RECOVERY_HINT = (
-    "Try rephrasing the request, narrowing the context, or "
-    "adding a fallback provider with `hermes fallback add`."
-)
-
-
-def _content_policy_blocked_result(
-    messages: List[Dict],
-    api_call_count: int,
-    *,
-    final_response: str,
-    error_detail: str,
-) -> Dict[str, Any]:
-    """Build the terminal turn result for a content-policy block.
-
-    A content-policy refusal is deterministic for the unchanged prompt, so the
-    turn ends here (no retry). Both the HTTP-200 refusal handler and the
-    exception-path handler return the identical shape — a failed, non-completed
-    turn carrying the user-facing message and a ``content_policy_blocked:``
-    prefixed error — so they funnel through this one builder.
-    """
-    return {
-        "final_response": final_response,
-        "messages": messages,
-        "api_calls": api_call_count,
-        "completed": False,
-        "failed": True,
-        "error": f"content_policy_blocked: {error_detail}",
-    }
-
-
 def run_conversation(
    agent,
    user_message: str,
@@ -660,11 +595,7 @@ def run_conversation(
        # landed after an orphan tool result). Most providers return
        # empty content on malformed sequences, which would otherwise
        # retrigger the empty-retry loop indefinitely.
-        # repair_message_sequence_with_cursor also recomputes the SessionDB
-        # flush cursor (_last_flushed_db_idx) when repair compacts the list,
-        # so the turn-end flush doesn't skip the assistant/tool chain (#44837).
-        from agent.agent_runtime_helpers import repair_message_sequence_with_cursor
-        repaired_seq = repair_message_sequence_with_cursor(agent, messages)
+        repaired_seq = agent._repair_message_sequence(messages)
        if repaired_seq > 0:
            request_logger.info(
                "Repaired %s message-alternation violations before request (session=%s)",
@@ -772,10 +703,7 @@ def run_conversation(
        # a thinking-only turn. Runs on the per-call copy only — the
        # stored conversation history keeps the reasoning block for the
        # UI transcript and session persistence.
-        api_messages = agent._drop_thinking_only_and_merge_users(
-            api_messages,
-            drop_codex_reasoning_items=agent.api_mode != "codex_responses",
-        )
+        api_messages = agent._drop_thinking_only_and_merge_users(api_messages)

        # Normalize message whitespace and tool-call JSON for consistent
        # prefix matching.  Ensures bit-perfect prefixes across turns,
@@ -1384,106 +1312,6 @@ def run_conversation(
                        )
                        finish_reason = "length"

-                # ── Content-policy refusal (HTTP 200) ──────────────────
-                # The model — or the provider's safety system — returned a
-                # *successful* response whose stop/finish reason is a refusal:
-                # Anthropic ``stop_reason="refusal"`` → ``content_filter``;
-                # OpenAI / portal ``finish_reason="content_filter"`` or a
-                # populated ``message.refusal`` (mapped in the chat_completions
-                # transport); Bedrock ``guardrail_intervened``. The content is
-                # typically empty, so without this branch the response falls
-                # through to the empty-response / invalid-response retry loops
-                # and is mis-surfaced as "rate limited" / "no content after
-                # retries" — burning paid attempts reproducing a deterministic
-                # refusal. Surface it clearly and stop. Mirrors the
-                # exception-based ``content_policy_blocked`` recovery: try a
-                # configured fallback once, otherwise return the refusal.
-                if finish_reason == "content_filter":
-                    _refusal_transport = agent._get_transport()
-                    if agent.api_mode == "anthropic_messages":
-                        _refusal_result = _refusal_transport.normalize_response(
-                            response, strip_tool_prefix=agent._is_anthropic_oauth
-                        )
-                    else:
-                        _refusal_result = _refusal_transport.normalize_response(response)
-                    _refusal_text = (getattr(_refusal_result, "content", None) or "").strip()
-                    # Some refusals carry the explanation only in the reasoning
-                    # channel; fall back to it so the user sees *something*.
-                    if not _refusal_text:
-                        _refusal_text = (agent._extract_reasoning(_refusal_result) or "").strip()
-
-                    agent._invoke_api_request_error_hook(
-                        task_id=effective_task_id,
-                        turn_id=turn_id,
-                        api_request_id=api_request_id,
-                        api_call_count=api_call_count,
-                        api_start_time=api_start_time,
-                        api_kwargs=api_kwargs,
-                        error_type="ContentPolicyBlocked",
-                        error_message=_refusal_text or "model declined to respond (content_filter)",
-                        status_code=None,
-                        retry_count=retry_count,
-                        max_retries=max_retries,
-                        retryable=False,
-                        reason=FailoverReason.content_policy_blocked.value,
-                    )
-
-                    if thinking_spinner:
-                        thinking_spinner.stop("")
-                        thinking_spinner = None
-                    if agent.thinking_callback:
-                        agent.thinking_callback("")
-
-                    # Deterministic for the unchanged prompt — never retry.
-                    # Try a configured fallback once (a different model may not
-                    # refuse); otherwise surface the refusal terminally.
-                    if agent._has_pending_fallback():
-                        agent._buffer_status(
-                            "⚠️ Model declined to respond (safety refusal) — trying fallback..."
-                        )
-                    if agent._try_activate_fallback():
-                        retry_count = 0
-                        compression_attempts = 0
-                        _retry.primary_recovery_attempted = False
-                        continue
-
-                    agent._flush_status_buffer()
-                    _refusal_log = (
-                        _refusal_text[:500] + "..."
-                        if len(_refusal_text) > 500
-                        else _refusal_text
-                    )
-                    logger.warning(
-                        "%sModel declined to respond (finish_reason=content_filter). "
-                        "model=%s provider=%s refusal=%s",
-                        agent.log_prefix, agent.model, agent.provider,
-                        _refusal_log or "(no text)",
-                    )
-                    agent._emit_status(
-                        "⚠️ The model declined to respond to this request (safety refusal)."
-                    )
-
-                    _refusal_detail = (
-                        f"Model's explanation: {_refusal_text}"
-                        if _refusal_text
-                        else "The model returned no explanation."
-                    )
-                    _refusal_response = (
-                        "⚠️  The model declined to respond to this request "
-                        "(safety refusal — not a Hermes/gateway failure).\n\n"
-                        f"{_refusal_detail}\n\n"
-                        f"{_CONTENT_POLICY_RECOVERY_HINT}"
-                    )
-
-                    agent._cleanup_task_resources(effective_task_id)
-                    agent._persist_session(messages, conversation_history)
-                    return _content_policy_blocked_result(
-                        messages,
-                        api_call_count,
-                        final_response=_refusal_response,
-                        error_detail=_refusal_text or "model declined (content_filter)",
-                    )
-
                if finish_reason == "length":
                    if getattr(response, "id", "") == PARTIAL_STREAM_STUB_ID:
                        agent._vprint(
@@ -2235,11 +2063,7 @@ def run_conversation(
                    and not _retry.image_shrink_retry_attempted
                ):
                    _retry.image_shrink_retry_attempted = True
-                    image_max_dimension = _image_error_max_dimension(api_error) or 8000
-                    if agent._try_shrink_image_parts_in_messages(
-                        api_messages,
-                        max_dimension=image_max_dimension,
-                    ):
+                    if agent._try_shrink_image_parts_in_messages(api_messages):
                        agent._vprint(
                            f"{agent.log_prefix}📐 Image(s) exceeded provider size limit — "
                            f"shrank and retrying...",
@@ -2807,13 +2631,10 @@ def run_conversation(
                    except Exception:
                        pass
                    if _genuine_nous_rate_limit:
-                        # Re-enter the loop exactly once so the
-                        # top-of-loop Nous guard handles fallback or
-                        # bails cleanly. (Setting retry_count to
-                        # max_retries would make the while condition
-                        # false immediately and the guard would never
-                        # run -- no fallback, generic exhaustion error.)
-                        retry_count = max(0, max_retries - 1)
+                        # Skip straight to max_retries -- the
+                        # top-of-loop guard will handle fallback or
+                        # bail cleanly.
+                        retry_count = max_retries
                        continue
                    # Upstream capacity 429: fall through to normal
                    # retry logic.  A different model (or the same
@@ -3255,17 +3076,20 @@ def run_conversation(
                    if classified.reason == FailoverReason.content_policy_blocked:
                        _summary = agent._summarize_api_error(api_error)
                        _policy_response = (
-                            "⚠️  The model provider's safety filter blocked this request "
-                            "(not a Hermes/gateway failure).\n\n"
+                            f"⚠️  The model provider's safety filter blocked this request "
+                            f"(not a Hermes/gateway failure).\n\n"
                            f"Provider message: {_summary}\n\n"
-                            f"{_CONTENT_POLICY_RECOVERY_HINT}"
-                        )
-                        return _content_policy_blocked_result(
-                            messages,
-                            api_call_count,
-                            final_response=_policy_response,
-                            error_detail=_summary,
+                            f"Try rephrasing the request, narrowing the context, or "
+                            f"adding a fallback provider with `hermes fallback add`."
                        )
+                        return {
+                            "final_response": _policy_response,
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "failed": True,
+                            "error": f"content_policy_blocked: {_summary}",
+                        }
                    return {
                        "final_response": None,
                        "messages": messages,
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -70,6 +70,16 @@ def _resolve_args() -> list[str]:

 def _resolve_home_dir() -> str:
    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
    home = os.environ.get("HOME", "").strip()
    if home:
        return home
@@ -95,10 +105,7 @@ def _resolve_home_dir() -> str:

 def _build_subprocess_env() -> dict[str, str]:
    env = os.environ.copy()
-    home = _resolve_home_dir()
-    env["HOME"] = home
-    from hermes_constants import apply_subprocess_home_env
-    apply_subprocess_home_env(env)
+    env["HOME"] = _resolve_home_dir()
    return env


--- a/agent/display.py
+++ b/agent/display.py
@@ -858,6 +858,20 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
    return False, ""


+def _used_free_parallel(result: str | None) -> bool:
+    """True when a web result came from Parallel's free Search MCP.
+
+    Only the keyless Parallel path tags its result with ``provider="parallel"``;
+    the paid REST path and every other provider omit it. Used to label the tool
+    line "Parallel search" / "Parallel fetch" exactly when the free MCP served
+    the call.
+    """
+    if not isinstance(result, str) or '"provider"' not in result:
+        return False
+    data = safe_json_loads(result)
+    return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel"
+
+
 def get_cute_tool_message(
    tool_name: str, args: dict, duration: float, result: str | None = None,
 ) -> str:
@@ -895,15 +909,17 @@ def get_cute_tool_message(
        return f"{line}{failure_suffix}"

    if tool_name == "web_search":
-        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
+        verb = "Parallel search" if _used_free_parallel(result) else "search"
+        return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
+        verb = "Parallel fetch" if _used_free_parallel(result) else "fetch"
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
-        return _wrap(f"┊ 📄 fetch     pages  {dur}")
+            return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 {verb:<9} pages  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
--- a/agent/errors.py
+++ b/agent/errors.py
@@ -1,3 +0,0 @@
-class SSLConfigurationError(Exception):
-    """Raised when SSL/TLS certificate bundle configuration fails."""
-    pass
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -46,6 +46,11 @@ def build_write_denied_paths(home: str) -> set[str]:
            # Top-level Anthropic PKCE credential store remains sensitive even
            # when a profile is active; default/non-profile sessions still read it.
            str(hermes_root / ".anthropic_oauth.json"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
            os.path.join(home, ".netrc"),
            os.path.join(home, ".pgpass"),
            os.path.join(home, ".npmrc"),
@@ -99,6 +104,12 @@ def is_write_denied(path: str) -> bool:
        if resolved.startswith(prefix):
            return True

+    # Hermes control-plane files: block both the ACTIVE profile's view
+    # (hermes_home) AND the global root view. Without the root pass, a
+    # profile-mode session leaves <root>/auth.json + <root>/config.yaml
+    # writable — letting a prompt-injected write_file overwrite the global
+    # files that every profile inherits from (same shape as #15981).
+    control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
    mcp_tokens_dir_name = "mcp-tokens"

    hermes_dirs = []
@@ -111,6 +122,12 @@ def is_write_denied(path: str) -> bool:
            continue

    for base_real in hermes_dirs:
+        for name in control_file_names:
+            try:
+                if resolved == os.path.realpath(os.path.join(base_real, name)):
+                    return True
+            except Exception:
+                continue
        try:
            mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
            if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -41,16 +41,6 @@ DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
 GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535


-def bare_gemini_model_id(model: str) -> str:
-    """Strip Gemini's own provider prefix from an aggregator-style model id."""
-    name = (model or "").strip()
-    lowered = name.lower()
-    for prefix in ("google/", "gemini/"):
-        if lowered.startswith(prefix):
-            return name[len(prefix):].strip() or name
-    return name
-
-
 def is_native_gemini_base_url(base_url: str) -> bool:
    """Return True when the endpoint speaks Gemini's native REST API."""
    normalized = str(base_url or "").strip().rstrip("/").lower()
@@ -340,7 +330,7 @@ def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[st
    system_instruction = None
    joined_system = "\n".join(part for part in system_text_parts if part).strip()
    if joined_system:
-        system_instruction = {"role": "system", "parts": [{"text": joined_system}]}
+        system_instruction = {"parts": [{"text": joined_system}]}
    return contents, system_instruction


@@ -924,7 +914,6 @@ class GeminiNativeClient:
            thinking_config=thinking_config,
        )

-        model = bare_gemini_model_id(model)
        if stream:
            return self._stream_completion(model=model, request=request, timeout=timeout)

--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -44,66 +44,6 @@ logger = logging.getLogger(__name__)
 _SYNC_DRAIN_TIMEOUT_S = 5.0


-def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
-    """Return whether external memory-provider tools should be exposed."""
-    if enabled_toolsets is None:
-        return True
-    if not enabled_toolsets:
-        return False
-    if "memory" in enabled_toolsets:
-        return True
-
-    try:
-        from toolsets import resolve_toolset
-
-        return any("memory" in resolve_toolset(name) for name in enabled_toolsets)
-    except Exception:
-        logger.debug("Failed to resolve enabled toolsets for memory-provider tools", exc_info=True)
-        return False
-
-
-def inject_memory_provider_tools(agent: Any) -> int:
-    """Append external memory-provider tool schemas to an agent tool surface."""
-    memory_manager = getattr(agent, "_memory_manager", None)
-    tools = getattr(agent, "tools", None)
-    if not memory_manager or tools is None:
-        return 0
-
-    existing_tool_names = {
-        tool.get("function", {}).get("name")
-        for tool in tools
-        if isinstance(tool, dict)
-    }
-    if (
-        "memory" not in existing_tool_names
-        and not memory_provider_tools_enabled(getattr(agent, "enabled_toolsets", None))
-    ):
-        return 0
-
-    get_schemas = getattr(memory_manager, "get_all_tool_schemas", None)
-    if not callable(get_schemas):
-        return 0
-
-    valid_tool_names = getattr(agent, "valid_tool_names", None)
-    if valid_tool_names is None:
-        valid_tool_names = set()
-        agent.valid_tool_names = valid_tool_names
-
-    added = 0
-    for schema in get_schemas():
-        if not isinstance(schema, dict):
-            continue
-        tool_name = schema.get("name", "")
-        if not tool_name or tool_name in existing_tool_names:
-            continue
-        tools.append({"type": "function", "function": schema})
-        valid_tool_names.add(tool_name)
-        existing_tool_names.add(tool_name)
-        added += 1
-
-    return added
-
-
 # ---------------------------------------------------------------------------
 # Context fencing helpers
 # ---------------------------------------------------------------------------
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -5,7 +5,6 @@ and run_agent.py for pre-flight context checks.
 """

 import ipaddress
-import json
 import logging
 import os
 import re
@@ -17,7 +16,7 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import atomic_json_write, base_url_host_matches, base_url_hostname
+from utils import base_url_host_matches, base_url_hostname

 from hermes_constants import OPENROUTER_MODELS_URL

@@ -112,57 +111,6 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
 _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

-
-def _get_model_metadata_cache_path() -> Path:
-    """Return path to the OpenRouter model metadata disk cache."""
-    from hermes_constants import get_hermes_home
-    return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
-
-
-def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
-    """Return disk-cache age in seconds, or None if freshness is unknown."""
-    try:
-        cache_path = _get_model_metadata_cache_path()
-        if not cache_path.exists():
-            return None
-        age = time.time() - cache_path.stat().st_mtime
-        if age < 0:
-            return None
-        return age
-    except Exception:
-        return None
-
-
-def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
-    """Load processed OpenRouter metadata cache from disk."""
-    try:
-        cache_path = _get_model_metadata_cache_path()
-        with cache_path.open("r", encoding="utf-8") as f:
-            data = json.load(f)
-        if not isinstance(data, dict):
-            return {}
-        return {
-            str(key): value
-            for key, value in data.items()
-            if isinstance(value, dict)
-        }
-    except Exception as e:
-        logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
-        return {}
-
-
-def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
-    """Save processed OpenRouter metadata cache to disk atomically."""
-    try:
-        atomic_json_write(
-            _get_model_metadata_cache_path(),
-            data,
-            indent=0,
-            separators=(",", ":"),
-        )
-    except Exception as e:
-        logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
-
 # Descending tiers for context length probing when the model is unknown.
 # We start at 256K (covers GPT-5.x, many current large-context models) and
 # step down on context-length errors until one works.  Tier[0] is also the
@@ -261,13 +209,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    # https://platform.minimax.io/docs/api-reference/text-chat-openai
    "minimax-m3": 1000000,
    "minimax": 204800,
-    # GLM — GLM-5.2 ships with a 1M context window (verified empirically:
-    # needle-in-a-haystack retrieval at 789K prompt tokens succeeded with
-    # zero errors on api.z.ai/api/coding/paas/v4).  Older GLM models
-    # (5, 5.1, 5-turbo) are ~202K.  Longest-key-first substring matching
-    # ensures "glm-5.2" resolves to 1M while older variants still hit the
-    # generic 202K fallback.
-    "glm-5.2": 1_048_576,
+    # GLM
    "glm": 202752,
    # xAI Grok — xAI /v1/models does not return context_length metadata,
    # so these hardcoded fallbacks prevent Hermes from probing-down to
@@ -685,15 +627,6 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
        return _model_metadata_cache

-    if not force_refresh:
-        disk_age = _model_metadata_disk_cache_age_seconds()
-        if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
-            disk_cache = _load_model_metadata_disk_cache()
-            if disk_cache:
-                _model_metadata_cache = disk_cache
-                _model_metadata_cache_time = time.time() - disk_age
-                return _model_metadata_cache
-
    try:
        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
@@ -715,24 +648,12 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any

        _model_metadata_cache = cache
        _model_metadata_cache_time = time.time()
-        _save_model_metadata_disk_cache(cache)
        logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
        return cache

    except Exception as e:
        logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
-        if _model_metadata_cache:
-            return _model_metadata_cache
-        disk_cache = _load_model_metadata_disk_cache()
-        if disk_cache:
-            _model_metadata_cache = disk_cache
-            disk_age = _model_metadata_disk_cache_age_seconds()
-            if disk_age is not None:
-                _model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
-            else:
-                _model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
-            return _model_metadata_cache
-        return {}
+        return _model_metadata_cache or {}


 def fetch_endpoint_model_metadata(
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -135,14 +135,7 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:

 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
    """Infer a reasonable ``type`` if this schema node has none."""
-    node_type = node.get("type")
-    if isinstance(node_type, list):
-        concrete = next(
-            (t for t in node_type if isinstance(t, str) and t not in {"", "null"}),
-            "string",
-        )
-        return {**node, "type": concrete}
-    if "type" in node and node_type not in {None, ""}:
+    if "type" in node and node["type"] not in {None, ""}:
        return node

    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -508,22 +508,13 @@ PLATFORM_HINTS = {
    ),
    "telegram": (
        "You are on a text messaging communication platform, Telegram. "
-        "Standard Markdown is automatically converted to Telegram formatting. "
+        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram now supports rich Markdown, so lean into it: whenever it "
-        "makes the answer clearer or easier to scan, actively reach for real "
-        "Markdown tables (pipe `| col | col |` syntax), bullet and numbered "
-        "lists, task lists (`- [ ]` / `- [x]`), headings, nested blockquotes, "
-        "collapsible details, footnotes/references, math/formulas (`$...$`, "
-        "`$$...$$`), underline, subscript/superscript, marked (highlighted) "
-        "text, and anchors. Default to structured formatting over dense "
-        "paragraphs for any comparison, set of steps, key/value summary, or "
-        "tabular data. Prefer real Markdown tables and task lists over "
-        "hand-built bullet substitutes when presenting structured data; these "
-        "degrade gracefully (tables become readable bullet groups) when rich "
-        "rendering is unavailable, but advanced constructs like math and "
-        "collapsible details may render as plain source text in that case. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -1164,7 +1155,7 @@ def build_skills_system_prompt(
        or get_session_env("HERMES_SESSION_PLATFORM")
        or ""
    )
-    disabled = get_disabled_skill_names(_platform_hint or None)
+    disabled = get_disabled_skill_names()
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -272,65 +272,27 @@ def skill_matches_environment(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


-_RAW_CONFIG_CACHE: Dict[Tuple[str, int, int], Dict[str, Any]] = {}
-
-
-def _raw_config_cache_clear() -> None:
-    """Test hook — drop the shared raw config cache."""
-    _RAW_CONFIG_CACHE.clear()
-
-
-def _load_raw_config() -> Dict[str, Any]:
-    """Read config.yaml with a shared mtime+size keyed cache.
-
-    This module intentionally avoids importing ``hermes_cli.config`` on the
-    skill prompt/build path. A tiny local cache gives the same repeated-read
-    win without pulling the heavier CLI config stack into startup.
-    """
-    config_path = get_config_path()
-    if not config_path.exists():
-        return {}
-    try:
-        stat = config_path.stat()
-        cache_key = (str(config_path), stat.st_mtime_ns, stat.st_size)
-    except OSError:
-        cache_key = None
-
-    if cache_key is not None:
-        cached = _RAW_CONFIG_CACHE.get(cache_key)
-        if cached is not None:
-            return cached
-
-    try:
-        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
-    except Exception as e:
-        logger.debug("Could not read skill config %s: %s", config_path, e)
-        return {}
-    if not isinstance(parsed, dict):
-        return {}
-
-    if cache_key is not None:
-        _RAW_CONFIG_CACHE.clear()
-        _RAW_CONFIG_CACHE[cache_key] = parsed
-    return parsed
-
-
 def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    """Read disabled skill names from config.yaml.

    Args:
        platform: Explicit platform name (e.g. ``"telegram"``).  When
            *None*, resolves from ``HERMES_PLATFORM`` or
-            ``HERMES_SESSION_PLATFORM`` env vars.  Returns the global
-            disabled list, unioned with the platform-specific list when a
-            platform is resolved (a globally-disabled skill stays disabled
-            on every platform).
+            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
+            global disabled list when no platform is determined.

    Reads the config file directly (no CLI config imports) to stay
    lightweight.
    """
-    parsed = _load_raw_config()
-    if not parsed:
+    config_path = get_config_path()
+    if not config_path.exists():
+        return set()
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception as e:
+        logger.debug("Could not read skill config %s: %s", config_path, e)
+        return set()
+    if not isinstance(parsed, dict):
        return set()

    skills_cfg = parsed.get("skills")
@@ -343,14 +305,13 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
        or os.getenv("HERMES_PLATFORM")
        or get_session_env("HERMES_SESSION_PLATFORM")
    )
-    global_disabled = _normalize_string_set(skills_cfg.get("disabled"))
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
        )
        if platform_disabled is not None:
-            return global_disabled | _normalize_string_set(platform_disabled)
-    return global_disabled
+            return _normalize_string_set(platform_disabled)
+    return _normalize_string_set(skills_cfg.get("disabled"))


 def _normalize_string_set(values) -> Set[str]:
@@ -375,7 +336,6 @@ _EXTERNAL_DIRS_CACHE: Dict[Tuple[str, int], List[Path]] = {}
 def _external_dirs_cache_clear() -> None:
    """Test hook — drop the in-process cache."""
    _EXTERNAL_DIRS_CACHE.clear()
-    _raw_config_cache_clear()


 def get_external_skills_dirs() -> List[Path]:
@@ -408,8 +368,11 @@ def get_external_skills_dirs() -> List[Path]:
            # Return a copy so callers can't mutate the cached list.
            return list(cached)

-    parsed = _load_raw_config()
-    if not parsed:
+    try:
+        parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+    if not isinstance(parsed, dict):
        return []

    skills_cfg = parsed.get("skills")
@@ -621,7 +584,15 @@ def resolve_skill_config_values(
    current values (or the declared default if the key isn't set).
    Path values are expanded via ``os.path.expanduser``.
    """
-    config = _load_raw_config()
+    config_path = get_config_path()
+    config: Dict[str, Any] = {}
+    if config_path.exists():
+        try:
+            parsed = yaml_load(config_path.read_text(encoding="utf-8"))
+            if isinstance(parsed, dict):
+                config = parsed
+        except Exception:
+            pass

    resolved: Dict[str, Any] = {}
    for var in config_vars:
--- a/agent/ssl_guard.py
+++ b/agent/ssl_guard.py
@@ -1,94 +0,0 @@
-"""Preventive SSL CA certificate checks for Hermes Agent.
-
-This module catches broken CA bundle paths before OpenAI/httpx turns them into
-opaque ``FileNotFoundError: [Errno 2] No such file or directory`` failures.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import ssl
-from pathlib import Path
-
-from agent.errors import SSLConfigurationError
-
-logger = logging.getLogger(__name__)
-
-_CA_BUNDLE_ENV_VARS = (
-    "HERMES_CA_BUNDLE",
-    "SSL_CERT_FILE",
-    "REQUESTS_CA_BUNDLE",
-    "CURL_CA_BUNDLE",
-)
-
-_SKIP_VALUES = {"1", "true", "yes", "on"}
-
-
-def _skip_ssl_guard_enabled() -> bool:
-    return os.getenv("HERMES_SKIP_SSL_GUARD", "").strip().lower() in _SKIP_VALUES
-
-
-def _repair_hint() -> str:
-    return (
-        "Repair: python -m pip install --force-reinstall certifi openai httpx\n"
-        "If you configured a custom corporate CA bundle, fix or unset the "
-        "broken CA bundle environment variable."
-    )
-
-
-def _ssl_err(message: str) -> SSLConfigurationError:
-    """Create a consistent, user-actionable SSL configuration error."""
-    return SSLConfigurationError(f"{message}\n{_repair_hint()}")
-
-
-def _validate_bundle_path(label: str, value: str, *, require_substantial: bool = False) -> None:
-    path = Path(value).expanduser()
-    if not path.exists():
-        raise _ssl_err(f"{label} points to a missing CA bundle: {value}")
-    if not path.is_file():
-        raise _ssl_err(f"{label} does not point to a CA bundle file: {value}")
-    if require_substantial and path.stat().st_size < 1024:
-        raise _ssl_err(f"{label} at {value} appears corrupted (too small)")
-    try:
-        ctx = ssl.create_default_context(cafile=str(path))
-    except Exception as exc:
-        raise _ssl_err(f"{label} CA bundle at {value} cannot be loaded: {exc}") from exc
-    if not ctx.get_ca_certs():
-        raise _ssl_err(f"{label} CA bundle at {value} did not load any certificates")
-
-
-def verify_ca_bundle() -> None:
-    """Verify configured and bundled CA certificates are present and loadable.
-
-    Raises:
-        SSLConfigurationError: If an explicit CA-bundle environment variable
-            points at a bad path, or if certifi's bundled ``cacert.pem`` is
-            missing/corrupt.
-    """
-    if _skip_ssl_guard_enabled():
-        logger.debug("SSL CA bundle guard skipped via HERMES_SKIP_SSL_GUARD")
-        return
-
-    for env_var in _CA_BUNDLE_ENV_VARS:
-        value = os.getenv(env_var)
-        if value:
-            _validate_bundle_path(env_var, value)
-
-    try:
-        import certifi
-    except Exception as exc:
-        raise _ssl_err(f"certifi is not importable: {exc}") from exc
-
-    ca_bundle = str(certifi.where())
-    _validate_bundle_path("certifi", ca_bundle, require_substantial=True)
-
-
-def verify_ca_bundle_with_fallback() -> None:
-    """Backward-compatible wrapper for older call sites.
-
-    The old PR name mentioned a platform fallback, but allowing startup with a
-    broken certifi bundle still leaves httpx/OpenAI and requests call sites
-    failing later. Keep the wrapper name but enforce the same check.
-    """
-    verify_ca_bundle()
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -186,21 +186,10 @@ class AnthropicTransport(ProviderTransport):
    def validate_response(self, response: Any) -> bool:
        """Check Anthropic response structure is valid.

-        An empty content list is legitimate for terminal stop reasons that
-        carry no text payload:
-
-        - ``end_turn`` — the model's canonical "nothing more to add" after a
-          tool turn that already delivered the user-facing text.
-        - ``refusal`` — the model declined to respond (Claude 4.5+). The
-          Messages API returns an empty ``content`` list with this stop
-          reason. Treating it as invalid sends a deterministic refusal into
-          the invalid-response retry loop, which reproduces the refusal on
-          every attempt and surfaces a misleading "rate limited / invalid
-          response" error instead of the refusal. ``normalize_response`` maps
-          ``refusal`` → ``content_filter`` so the agent loop's refusal handler
-          can surface it.
-
-        Treating either as invalid falsely retries a completed response.
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
        """
        if response is None:
            return False
@@ -208,7 +197,7 @@ class AnthropicTransport(ProviderTransport):
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return getattr(response, "stop_reason", None) in {"end_turn", "refusal"}
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -664,42 +664,8 @@ class ChatCompletionsTransport(ProviderTransport):
        if rd:
            provider_data["reasoning_details"] = rd

-        # OpenAI structured-refusal field. When a model declines, the SDK
-        # populates ``message.refusal`` with the explanation and leaves
-        # ``content`` empty. OpenAI-compatible proxies that front Anthropic /
-        # Bedrock (e.g. Nous Portal) surface a Claude refusal this way — or via
-        # ``finish_reason="content_filter"`` — instead of the native
-        # ``stop_reason="refusal"``. Without capturing it the refusal looks
-        # like an empty response, so the agent loop retries a deterministic
-        # refusal three times and gives up with "no content after retries".
-        # Promote it to content + a ``content_filter`` finish reason so the
-        # loop's refusal handler surfaces it clearly and stops. ``refusal`` is
-        # ``None`` for normal responses, so this is a no-op in the common case.
-        content = msg.content
-        refusal = getattr(msg, "refusal", None)
-        if refusal is None and hasattr(msg, "model_extra"):
-            _msg_extra = getattr(msg, "model_extra", None) or {}
-            if isinstance(_msg_extra, dict):
-                refusal = _msg_extra.get("refusal")
-        if isinstance(refusal, str) and refusal.strip():
-            # Record the refusal explanation regardless — it's useful provider
-            # metadata even when the model also returned a usable payload.
-            provider_data["refusal"] = refusal
-            _has_text = isinstance(content, str) and content.strip()
-            _has_tool_calls = bool(tool_calls)
-            # Only promote to a terminal ``content_filter`` when the refusal is
-            # the *sole* payload — no visible text and no tool calls. A response
-            # that carries real content (or tool calls) alongside a refusal note
-            # is a normal, usable turn: surfacing it as a failed safety refusal
-            # would discard the model's actual work. In the empty-payload case,
-            # adopt the refusal as content so the loop has something to show.
-            if not _has_text and not _has_tool_calls:
-                content = refusal
-                if finish_reason in (None, "stop"):
-                    finish_reason = "content_filter"
-
        return NormalizedResponse(
-            content=content,
+            content=msg.content,
            tool_calls=tool_calls,
            finish_reason=finish_reason,
            reasoning=reasoning,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -218,10 +218,22 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs.pop("timeout", None)

        if is_codex_backend:
-            # chatgpt.com/backend-api/codex rejects body-level
-            # ``extra_headers`` with HTTP 400. Correlation/cache routing for
-            # this backend must not be sent through the Responses payload.
-            kwargs.pop("extra_headers", None)
+            prompt_cache_key = kwargs.get("prompt_cache_key")
+            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
+            if cache_scope_id:
+                existing_extra_headers = kwargs.get("extra_headers")
+                merged_extra_headers: Dict[str, str] = {}
+                if isinstance(existing_extra_headers, dict):
+                    merged_extra_headers.update(
+                        {
+                            str(key): str(value)
+                            for key, value in existing_extra_headers.items()
+                            if key and value is not None
+                        }
+                    )
+                merged_extra_headers["session_id"] = cache_scope_id
+                merged_extra_headers["x-client-request-id"] = cache_scope_id
+                kwargs["extra_headers"] = merged_extra_headers

        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
--- a/apps/bootstrap-installer/src-tauri/src/update.rs
+++ b/apps/bootstrap-installer/src-tauri/src/update.rs
@@ -3,9 +3,8 @@
 //! Driven when the installer is launched as `Hermes-Setup.exe --update` (see
 //! `AppMode` in lib.rs). The desktop app hands off to us — it exits, then we:
 //!
-//!   1. wait for the old Hermes desktop process to fully exit (so both the
-//!      venv shim and packaged app.asar are free; otherwise `hermes update`
-//!      or repair bootstrap can race locked files),
+//!   1. wait for the old Hermes desktop process to fully exit (so the venv
+//!      shim is free; otherwise `hermes update` aborts with exit code 2),
 //!   2. run `hermes update --yes --gateway` (Python/repo update; this does NOT
 //!      rebuild apps/desktop by design — see cmd_update in hermes_cli/main.py),
 //!   3. run `hermes desktop --build-only` (the rebuild step update skips),
@@ -39,8 +38,8 @@ use crate::events::{BootstrapEvent, LogStream, StageInfo, StageState};
 /// hermes_cli/main.py (sys.exit(2)). We surface a targeted message for this.
 const UPDATE_EXIT_CONCURRENT: i32 = 2;

-/// How long to wait for the old desktop process to release files under the
-/// install tree before giving up and letting `hermes update`'s own guard decide.
+/// How long to wait for the old desktop process to release the venv shim
+/// before giving up and letting `hermes update`'s own guard decide.
 const DESKTOP_EXIT_WAIT: Duration = Duration::from_secs(20);
 const DESKTOP_EXIT_POLL: Duration = Duration::from_millis(500);

@@ -151,10 +150,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
    // ---- pre-step: wait for the old desktop to die -----------------------
    // The desktop exec'd us then called app.exit(), but process teardown is
    // async on Windows. If it still holds the venv shim, `hermes update`
-    // aborts with exit 2. If it still holds the packaged app.asar,
-    // install.ps1's repair/re-clone path cannot move/remove the install tree.
-    // Give both handles a bounded window to clear.
-    wait_for_install_locks_free(&install_root, &app, "update").await;
+    // aborts with exit 2. Give it a bounded window to clear.
+    wait_for_venv_free(&install_root, &app).await;

    // ---- stage 1: hermes update -----------------------------------------
    // Pass --branch so `hermes update` targets the branch this installer was
@@ -176,8 +173,8 @@ async fn run_update(app: AppHandle) -> Result<()> {
        vec!["update".into(), "--yes".into(), "--gateway".into()];
    // --force skips `hermes update`'s Windows running-exe guard (which would
    // `sys.exit(2)` and dead-end the handoff). By contract the desktop has
-    // already exited and waited for the install locks to clear before launching
-    // us, and wait_for_install_locks_free below force-kills any straggler — so by the
+    // already exited and waited for the venv shim to unlock before launching
+    // us, and wait_for_venv_free below force-kills any straggler — so by the
    // time `hermes update` runs there is no legitimate hermes.exe to protect,
    // and the guard would only produce a false "Hermes is still running" stop.
    update_args.push("--force".into());
@@ -394,57 +391,48 @@ async fn run_update(app: AppHandle) -> Result<()> {
    Ok(())
 }

-/// Poll until the venv shim AND packaged desktop app bundle are no longer locked
-/// (Windows) or a bounded timeout elapses. On non-Windows this is a short fixed
-/// grace since file locking isn't the failure mode there.
-pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHandle, stage: &str) {
-    let lock_targets = install_lock_probe_paths(install_root);
+/// Poll until the venv shim is no longer locked (Windows) or a bounded timeout
+/// elapses. On non-Windows this is a short fixed grace since file locking
+/// isn't the failure mode there.
+async fn wait_for_venv_free(install_root: &Path, app: &AppHandle) {
+    let shim = venv_hermes(install_root);
    let deadline = Instant::now() + DESKTOP_EXIT_WAIT;

-    emit_log(app, Some(stage), LogStream::Stdout, "[handoff] waiting for Hermes to exit…");
+    emit_log(app, Some("update"), LogStream::Stdout, "[update] waiting for Hermes to exit…");

    loop {
-        let locked = locked_paths(&lock_targets);
-        if locked.is_empty() {
+        if !is_locked(&shim) {
            return;
        }
        if Instant::now() >= deadline {
-            // Last resort: a backend hermes.exe (or the desktop Hermes.exe
-            // itself) is still holding one of the update-sensitive files. The
-            // desktop should have reaped its tree before handing off, but
-            // SIGTERM races / detached grandchildren / AV handles can leave a
-            // straggler. Rather than "proceed anyway" straight into uv's
-            // "Access is denied" or install.ps1's locked app.asar failure,
-            // force-kill every Hermes.exe except ourselves, then give the OS a
-            // beat to unload the image.
+            // Last resort: a backend hermes.exe (or a grandchild it spawned)
+            // is still holding the shim. The desktop should have reaped its
+            // tree before handing off, but SIGTERM races / detached
+            // grandchildren / AV handles can leave a straggler. Rather than
+            // "proceed anyway" straight into uv's "Access is denied", force-kill
+            // every hermes.exe except ourselves, then give the OS a beat to
+            // unload the image.
            emit_log(
                app,
-                Some(stage),
+                Some("update"),
                LogStream::Stdout,
-                &format!(
-                    "[handoff] Hermes still holding install files ({}); force-killing stragglers…",
-                    format_locked_paths(&locked)
-                ),
+                "[update] Hermes still holding the venv shim; force-killing stragglers…",
            );
            force_kill_other_hermes();
            tokio::time::sleep(Duration::from_millis(800)).await;
-            let locked_after_kill = locked_paths(&lock_targets);
-            if locked_after_kill.is_empty() {
+            if !is_locked(&shim) {
                emit_log(
                    app,
-                    Some(stage),
+                    Some("update"),
                    LogStream::Stdout,
-                    "[handoff] install files freed after force-kill",
+                    "[update] venv shim freed after force-kill",
                );
            } else {
                emit_log(
                    app,
-                    Some(stage),
+                    Some("update"),
                    LogStream::Stdout,
-                    &format!(
-                        "[handoff] install files still locked ({}); proceeding (--force + quarantine will handle it)",
-                        format_locked_paths(&locked_after_kill)
-                    ),
+                    "[update] venv shim still locked; proceeding (--force + quarantine will handle it)",
                );
            }
            return;
@@ -453,44 +441,13 @@ pub(crate) async fn wait_for_install_locks_free(install_root: &Path, app: &AppHa
    }
 }

-fn install_lock_probe_paths(install_root: &Path) -> Vec<PathBuf> {
-    let mut paths = vec![venv_hermes(install_root)];
-    paths.extend(desktop_app_payload_paths(install_root));
-    paths
-}
-
-fn desktop_app_payload_paths(install_root: &Path) -> Vec<PathBuf> {
-    let release = install_root.join("apps").join("desktop").join("release");
-    if cfg!(target_os = "windows") {
-        vec![
-            release.join("win-unpacked").join("resources").join("app.asar"),
-            release.join("win-arm64-unpacked").join("resources").join("app.asar"),
-        ]
-    } else if cfg!(target_os = "macos") {
-        vec![
-            release.join("mac").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
-            release.join("mac-arm64").join("Hermes.app").join("Contents").join("Resources").join("app.asar"),
-        ]
-    } else {
-        vec![release.join("linux-unpacked").join("resources").join("app.asar")]
-    }
-}
-
-fn locked_paths(paths: &[PathBuf]) -> Vec<PathBuf> {
-    paths.iter().filter(|p| is_locked(p)).cloned().collect()
-}
-
-fn format_locked_paths(paths: &[PathBuf]) -> String {
-    paths.iter().map(|p| p.display().to_string()).collect::<Vec<_>>().join(", ")
-}
-
 /// Force-kill any `hermes.exe` other than this process. Windows-only; a no-op
 /// elsewhere (POSIX has no mandatory-lock contention). We can't selectively
 /// target "the backend" by PID here — the desktop already exited and we never
 /// knew its children — so we kill the whole `hermes.exe` image tree via
 /// taskkill, excluding our own PID.
 ///
-/// Safe w.r.t. our own update child: this runs inside the install-lock wait,
+/// Safe w.r.t. our own update child: this runs inside `wait_for_venv_free`,
 /// which completes BEFORE we spawn `venv\Scripts\hermes.exe update`. At this
 /// point no update-driven hermes.exe exists yet, so the only hermes.exe images
 /// are stragglers from the old desktop — exactly what we want gone. (`/FI PID
@@ -934,29 +891,6 @@ mod tests {
        assert!(!is_locked(Path::new("/nonexistent/does/not/exist/xyz")));
    }

-    #[test]
-    fn lock_probe_paths_include_desktop_app_payload() {
-        let root = Path::new("/x/hermes-agent");
-        let probes = install_lock_probe_paths(root);
-
-        assert!(
-            probes.iter().any(|p| p == &venv_hermes(root)),
-            "venv shim remains part of the update lock probe"
-        );
-        assert!(
-            probes.iter().any(|p| p.ends_with(Path::new("resources/app.asar"))),
-            "packaged app.asar must be probed so repair/re-clone waits for the old desktop to exit"
-        );
-    }
-
-    #[test]
-    fn locked_paths_ignores_missing_payloads() {
-        let root = Path::new("/nonexistent/hermes-agent");
-        let probes = install_lock_probe_paths(root);
-
-        assert!(locked_paths(&probes).is_empty());
-    }
-
    #[test]
    fn parses_update_branch_from_space_or_equals_args() {
        assert_eq!(
--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -67,16 +67,6 @@ function buildDesktopBackendPath({
  )
 }

-function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
-  if (!hermesHome) return hermesHome
-  const resolved = pathModule.resolve(String(hermesHome))
-  const parent = pathModule.dirname(resolved)
-  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
-    return pathModule.dirname(parent)
-  }
-  return resolved
-}
-
 function buildDesktopBackendEnv({
  hermesHome,
  pythonPathEntries = [],
@@ -107,6 +97,5 @@ module.exports = {
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  delimiterForPlatform,
-  normalizeHermesHomeRoot,
  pathEnvKey
 }
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -7,7 +7,6 @@ const {
  appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
-  normalizeHermesHomeRoot,
  pathEnvKey
 } = require('./backend-env.cjs')

@@ -67,21 +66,6 @@ test('buildDesktopBackendEnv extends PYTHONPATH and backend PATH together', () =
  assert.ok(env.PATH.includes('/opt/homebrew/bin'))
 })

-test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root', () => {
-  assert.equal(
-    normalizeHermesHomeRoot('/Users/test/.hermes/profiles/oracle', { pathModule: path.posix }),
-    '/Users/test/.hermes'
-  )
-  assert.equal(
-    normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
-    'C:\\Users\\test\\AppData\\Local\\hermes'
-  )
-  assert.equal(
-    normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
-    '/Users/test/.hermes'
-  )
-})
-
 test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
  const env = buildDesktopBackendEnv({
    hermesHome: 'C:\\Users\\test\\AppData\\Local\\hermes',
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -1,66 +0,0 @@
-const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m
-
-/**
- * Watch a child process's stdout for the `HERMES_DASHBOARD_READY port=<N>`
- * line that web_server.py prints after uvicorn binds its socket.
- *
- * Returns the parsed port. Rejects if:
- *   - the child exits before emitting the line
- *   - the child emits an `error` event
- *   - no line arrives within the timeout
- *
- * A single `cleanup()` tears down every listener (data/exit/error/timeout)
- * on every terminal path — resolve, reject, or timeout — so repeated
- * backend spawns don't leak listener slots on the child.
- */
-function waitForDashboardPort(child, timeoutMs = 45_000) {
-  return new Promise((resolve, reject) => {
-    let buf = ''
-    let done = false
-
-    function cleanup() {
-      if (done) return
-      done = true
-      clearTimeout(timer)
-      child.stdout.off('data', onData)
-      child.off('exit', onExit)
-      child.off('error', onError)
-    }
-
-    function onData(chunk) {
-      buf += chunk.toString()
-      let nl
-      while ((nl = buf.indexOf('\n')) !== -1) {
-        const line = buf.slice(0, nl)
-        buf = buf.slice(nl + 1)
-        const m = line.match(_READY_RE)
-        if (m) {
-          cleanup()
-          resolve(parseInt(m[1], 10))
-          return
-        }
-      }
-    }
-
-    function onExit(code, signal) {
-      cleanup()
-      reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`))
-    }
-
-    function onError(err) {
-      cleanup()
-      reject(err)
-    }
-
-    const timer = setTimeout(() => {
-      cleanup()
-      reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`))
-    }, timeoutMs)
-
-    child.stdout.on('data', onData)
-    child.on('exit', onExit)
-    child.on('error', onError)
-  })
-}
-
-module.exports = { waitForDashboardPort }
--- a/apps/desktop/electron/git-worktrees.cjs
+++ b/apps/desktop/electron/git-worktrees.cjs
@@ -1,174 +0,0 @@
-'use strict'
-
-// Resolve git-worktree relationships for a set of session cwds, reading git's
-// on-disk metadata directly (no `git` spawn per path):
-//
-//   - A normal checkout has a `.git` DIRECTORY at its root → it's the main
-//     worktree; its repo root IS that directory's parent.
-//   - A linked worktree has a `.git` FILE: `gitdir: <repo>/.git/worktrees/<name>`.
-//     That admin dir's `commondir` points back at the shared `<repo>/.git`, whose
-//     parent is the main repo root.
-//
-// Grouping by repoRoot therefore clusters a repo's main checkout with all of its
-// linked worktrees, regardless of how the worktree directories are named. The
-// branch (read from the worktree's own HEAD) gives each worktree a meaningful
-// label.
-
-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
-
-// Walk up from `start` to the nearest ancestor that carries a `.git` entry
-// (file for a linked worktree, dir for the main checkout). Capped so a stray
-// path can't loop forever.
-function findGitHost(start, fsImpl) {
-  let dir = start
-
-  for (let i = 0; i < 64; i += 1) {
-    const dotgit = path.join(dir, '.git')
-
-    try {
-      if (fsImpl.existsSync(dotgit)) {
-        return dir
-      }
-    } catch {
-      return null
-    }
-
-    const parent = path.dirname(dir)
-
-    if (parent === dir) {
-      return null
-    }
-
-    dir = parent
-  }
-
-  return null
-}
-
-function readBranch(gitDir, fsImpl) {
-  try {
-    const head = fsImpl.readFileSync(path.join(gitDir, 'HEAD'), 'utf8').trim()
-    const ref = head.match(/^ref:\s*refs\/heads\/(.+)$/)
-
-    if (ref) {
-      return ref[1]
-    }
-
-    // Detached HEAD: surface a short sha so the worktree still gets a label.
-    return /^[0-9a-f]{7,40}$/i.test(head) ? head.slice(0, 8) : null
-  } catch {
-    return null
-  }
-}
-
-// Given the directory that owns the `.git` entry, resolve its worktree identity.
-function resolveFromHost(host, fsImpl) {
-  const dotgit = path.join(host, '.git')
-  let stat
-
-  try {
-    stat = fsImpl.statSync(dotgit)
-  } catch {
-    return null
-  }
-
-  if (stat.isDirectory()) {
-    return {
-      repoRoot: host,
-      worktreeRoot: host,
-      isMainWorktree: true,
-      branch: readBranch(dotgit, fsImpl)
-    }
-  }
-
-  // Linked worktree: `.git` is a file pointing at the admin dir.
-  let contents
-
-  try {
-    contents = fsImpl.readFileSync(dotgit, 'utf8').trim()
-  } catch {
-    return null
-  }
-
-  const match = contents.match(/^gitdir:\s*(.+)$/m)
-
-  if (!match) {
-    return null
-  }
-
-  const adminDir = path.resolve(host, match[1].trim())
-
-  // `commondir` resolves to the shared `<repo>/.git`; fall back to walking two
-  // levels up from `<repo>/.git/worktrees/<name>` if it's missing.
-  let commonDir
-
-  try {
-    const rel = fsImpl.readFileSync(path.join(adminDir, 'commondir'), 'utf8').trim()
-    commonDir = path.resolve(adminDir, rel)
-  } catch {
-    commonDir = path.dirname(path.dirname(adminDir))
-  }
-
-  return {
-    repoRoot: path.dirname(commonDir),
-    worktreeRoot: host,
-    isMainWorktree: false,
-    branch: readBranch(adminDir, fsImpl)
-  }
-}
-
-function resolveWorktree(startPath, fsImpl = fs) {
-  let resolved
-
-  try {
-    resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Worktree lookup' })
-  } catch {
-    return null
-  }
-
-  let start = resolved
-
-  try {
-    const stat = fsImpl.statSync(resolved)
-
-    if (!stat.isDirectory()) {
-      start = path.dirname(resolved)
-    }
-  } catch {
-    return null
-  }
-
-  const host = findGitHost(start, fsImpl)
-
-  if (!host) {
-    return null
-  }
-
-  return resolveFromHost(host, fsImpl)
-}
-
-// Batch entry point for the renderer: maps each requested cwd to its worktree
-// info (or null when it isn't inside a git checkout / can't be read). Dedupes so
-// many sessions sharing a cwd cost one lookup.
-async function worktreesForIpc(cwds, options = {}) {
-  const fsImpl = options.fs || fs
-  const list = Array.isArray(cwds) ? cwds : []
-  const out = {}
-
-  for (const cwd of list) {
-    if (typeof cwd !== 'string' || !cwd.trim() || cwd in out) {
-      continue
-    }
-
-    out[cwd] = resolveWorktree(cwd, fsImpl)
-  }
-
-  return out
-}
-
-module.exports = {
-  resolveWorktree,
-  worktreesForIpc
-}
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -35,13 +35,12 @@ const {
 const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
 const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs')
 const { adoptServedDashboardToken } = require('./dashboard-token.cjs')
-const { waitForDashboardPort } = require('./backend-ready.cjs')
+const { PortPool } = require('./port-pool.cjs')
 const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')
 const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs')
-const { buildDesktopBackendEnv, normalizeHermesHomeRoot } = require('./backend-env.cjs')
+const { buildDesktopBackendEnv } = require('./backend-env.cjs')
 const { readDirForIpc } = require('./fs-read-dir.cjs')
 const { gitRootForIpc } = require('./git-root.cjs')
-const { worktreesForIpc } = require('./git-worktrees.cjs')
 const { OFFICIAL_REPO_HTTPS_URL, isOfficialSshRemote } = require('./update-remote.cjs')
 const {
  buildPosixCleanupScript,
@@ -112,6 +111,12 @@ if (USER_DATA_OVERRIDE) {
  app.setPath('userData', resolvedUserData)
 }

+const PORT_FLOOR = 9120
+const PORT_CEILING = 9199
+// In-process port reservations that close the pickPort() TOCTOU window where
+// two concurrent backend spawns could be handed the same port. See
+// port-pool.cjs for the full rationale.
+const portPool = new PortPool(PORT_FLOOR, PORT_CEILING)
 const DEV_SERVER = process.env.HERMES_DESKTOP_DEV_SERVER
 const IS_PACKAGED = app.isPackaged
 const IS_MAC = process.platform === 'darwin'
@@ -240,7 +245,7 @@ if (INSTALL_STAMP) {
 // HERMES_HOME beneath the throwaway userData dir so a fresh-install run never
 // touches the user's real ~/.hermes / %LOCALAPPDATA%\hermes.
 function resolveHermesHome() {
-  if (process.env.HERMES_HOME) return normalizeHermesHomeRoot(process.env.HERMES_HOME)
+  if (process.env.HERMES_HOME) return path.resolve(process.env.HERMES_HOME)
  if (USER_DATA_OVERRIDE) return path.join(path.resolve(USER_DATA_OVERRIDE), 'hermes-home')
  if (IS_WINDOWS && process.env.LOCALAPPDATA) {
    const localappdata = path.join(process.env.LOCALAPPDATA, 'hermes')
@@ -1835,44 +1840,6 @@ async function applyUpdates(opts = {}) {
  }
 }

-async function handOffWindowsBootstrapRecovery(reason) {
-  if (!IS_WINDOWS || !IS_PACKAGED) return false
-
-  const updater = resolveUpdaterBinary()
-  if (!updater) return false
-
-  const updateRoot = resolveUpdateRoot()
-  const { branch: configuredBranch } = readDesktopUpdateConfig()
-  const branch = directoryExists(path.join(updateRoot, '.git'))
-    ? await resolveHealedBranch(updateRoot, configuredBranch || DEFAULT_UPDATE_BRANCH)
-    : configuredBranch || DEFAULT_UPDATE_BRANCH
-  const venvBin = path.join(updateRoot, 'venv', IS_WINDOWS ? 'Scripts' : 'bin')
-  const venvHermes = path.join(venvBin, IS_WINDOWS ? 'hermes.exe' : 'hermes')
-  const updaterArgs = fileExists(venvHermes) ? ['--update', '--branch', branch] : ['--repair', '--branch', branch]
-
-  await releaseBackendLockForUpdate(updateRoot)
-
-  const child = spawn(updater, updaterArgs, {
-    cwd: HERMES_HOME,
-    env: {
-      ...process.env,
-      HERMES_HOME,
-      PATH: [path.join(HERMES_HOME, 'node', 'bin'), venvBin, process.env.PATH].filter(Boolean).join(path.delimiter)
-    },
-    detached: true,
-    stdio: 'ignore',
-    windowsHide: false
-  })
-  child.unref()
-
-  rememberLog(`[bootstrap] handed off ${reason} recovery to updater: ${updater} ${updaterArgs.join(' ')}; exiting desktop to release app.asar`)
-  setTimeout(() => {
-    app.quit()
-  }, 600)
-
-  return true
-}
-
 // Resolve the hermes CLI to drive an in-app update: prefer the venv shim in
 // the install we're updating, fall back to `hermes` on PATH.
 function resolveHermesCliBinary(updateRoot) {
@@ -2470,14 +2437,6 @@ async function ensureRuntime(backend) {
  if (backend.kind === 'bootstrap-needed') {
    rememberLog('[bootstrap] no Hermes install found; starting first-launch bootstrap')

-    if (await handOffWindowsBootstrapRecovery('bootstrap-needed')) {
-      const handoffError = new Error('Hermes recovery was handed off to Hermes Setup. The desktop will restart when recovery completes.')
-      handoffError.isBootstrapFailure = true
-      handoffError.bootstrapHandedOff = true
-      bootstrapFailure = handoffError
-      throw handoffError
-    }
-
    // Eagerly flip the bootstrap UI state to 'active' so the renderer
    // shows the install overlay BEFORE the runner finishes fetching the
    // manifest (which on slow networks can take tens of seconds and would
@@ -2607,6 +2566,24 @@ async function ensureRuntime(backend) {
  return backend
 }

+function isPortAvailable(port) {
+  return new Promise(resolve => {
+    const server = net.createServer()
+    server.once('error', () => resolve(false))
+    server.once('listening', () => {
+      server.close(() => resolve(true))
+    })
+    server.listen(port, '127.0.0.1')
+  })
+}
+
+async function pickPort() {
+  const port = await portPool.reserve(isPortAvailable)
+  if (port === null) {
+    throw new Error(`No free localhost port in ${PORT_FLOOR}-${PORT_CEILING}`)
+  }
+  return port
+}

 function fetchJson(url, token, options = {}) {
  return new Promise((resolve, reject) => {
@@ -4684,14 +4661,25 @@ async function spawnPoolBackend(profile, entry) {
    }
  }

+  const port = await pickPort()
  const token = crypto.randomBytes(32).toString('base64url')
  // --profile wins over the inherited HERMES_HOME env (see _apply_profile_override
  // step 3 in hermes_cli/main.py), so the child re-homes to this profile.
-  // --port 0: the OS assigns an ephemeral port; the child announces it on stdout.
-  const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', '0']
-  const backend = await ensureRuntime(resolveHermesBackend(dashboardArgs))
-  const hermesCwd = resolveHermesCwd()
-  const webDist = resolveWebDist()
+  const dashboardArgs = ['--profile', profile, 'dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)]
+  let backend
+  let hermesCwd
+  let webDist
+  try {
+    backend = await ensureRuntime(resolveHermesBackend(dashboardArgs))
+    hermesCwd = resolveHermesCwd()
+    webDist = resolveWebDist()
+  } catch (error) {
+    // These run before the child exists / its exit handler is attached, so a
+    // throw here would otherwise leak the reservation and slowly exhaust the
+    // 9120-9199 range across switch cycles in one app session.
+    portPool.release(port)
+    throw error
+  }

  rememberLog(`Starting Hermes backend for profile "${profile}" via ${backend.label}`)

@@ -4719,6 +4707,7 @@ async function spawnPoolBackend(profile, entry) {
    })
  )
  entry.process = child
+  entry.port = port
  entry.token = token

  child.stdout.on('data', rememberLog)
@@ -4732,11 +4721,13 @@ async function spawnPoolBackend(profile, entry) {
  child.once('error', error => {
    rememberLog(`Hermes backend for profile "${profile}" failed to start: ${error.message}`)
    backendPool.delete(profile)
+    portPool.release(port)
    rejectStart?.(error)
  })
  child.once('exit', (code, signal) => {
    rememberLog(`Hermes backend for profile "${profile}" exited (${signal || code})`)
    backendPool.delete(profile)
+    portPool.release(port)
    if (!ready) {
      rejectStart?.(
        new Error(`Hermes backend for profile "${profile}" exited before it became ready (${signal || code}).`)
@@ -4744,10 +4735,6 @@ async function spawnPoolBackend(profile, entry) {
    }
  })

-  // Discover the ephemeral port the child bound to
-  const port = await Promise.race([waitForDashboardPort(child), startFailed])
-  entry.port = port
-
  const baseUrl = `http://127.0.0.1:${port}`
  await Promise.race([waitForHermes(baseUrl, token), startFailed])
  ready = true
@@ -4775,6 +4762,7 @@ function stopPoolBackend(profile) {
  const entry = backendPool.get(profile)
  if (!entry) return
  backendPool.delete(profile)
+  if (entry.port) portPool.release(entry.port)
  if (entry.process && !entry.process.killed) {
    try {
      entry.process.kill('SIGTERM')
@@ -4860,6 +4848,11 @@ async function startHermes() {
  }
  if (connectionPromise) return connectionPromise

+  // Hoisted so the outer .catch can release a port reserved by pickPort() when
+  // a throw (e.g. ensureRuntime failing) happens before the child's exit
+  // handler is attached. Stays null on the remote path (no port picked).
+  let reservedPort = null
+
  connectionPromise = (async () => {
    await advanceBootProgress('backend.resolve', 'Resolving Hermes backend', 8)
    // Resolve for the desktop's primary profile so a per-profile remote
@@ -4887,9 +4880,11 @@ async function startHermes() {
      }
    }

+    await advanceBootProgress('backend.port', 'Finding an open local port', 16)
+    const port = await pickPort()
+    reservedPort = port
    const token = crypto.randomBytes(32).toString('base64url')
-    // --port 0: the OS assigns an ephemeral port; the child announces it on stdout.
-    const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', '0']
+    const dashboardArgs = ['dashboard', '--no-open', '--host', '127.0.0.1', '--port', String(port)]
    // Pin the desktop's chosen profile via the global --profile flag. This is
    // deterministic (it wins over the sticky ~/.hermes/active_profile file) and
    // resolves HERMES_HOME the same way `hermes -p <name>` does on the CLI. An
@@ -4956,6 +4951,7 @@ async function startHermes() {
      )
      hermesProcess = null
      connectionPromise = null
+      portPool.release(port)
      sendBackendExit({ code: null, signal: null, error: error.message })
      rejectBackendStart?.(error)
    })
@@ -4963,6 +4959,7 @@ async function startHermes() {
      rememberLog(`Hermes backend exited (${signal || code})`)
      hermesProcess = null
      connectionPromise = null
+      portPool.release(port)
      sendBackendExit({ code, signal })
      if (!backendReady) {
        const message = `Hermes backend exited before it became ready (${signal || code}).`
@@ -4983,10 +4980,6 @@ async function startHermes() {
      }
    })

-    await advanceBootProgress('backend.port', 'Waiting for Hermes backend to launch', 86)
-    // Discover the ephemeral port the child bound to
-    const port = await Promise.race([waitForDashboardPort(hermesProcess), backendStartFailed])
-
    const baseUrl = `http://127.0.0.1:${port}`
    await advanceBootProgress('backend.wait', 'Waiting for Hermes backend to become ready', 90)
    await Promise.race([waitForHermes(baseUrl, token), backendStartFailed])
@@ -5026,6 +5019,7 @@ async function startHermes() {
      { allowDecrease: true }
    )
    connectionPromise = null
+    portPool.release(reservedPort)
    throw error
  })

@@ -5609,30 +5603,11 @@ ipcMain.handle('hermes:api', async (_event, request) => {

 ipcMain.handle('hermes:notify', (_event, payload) => {
  if (!Notification.isSupported()) return false
-  // Action buttons render only on signed macOS builds; elsewhere they're dropped
-  // and the body click still works.
-  const actions = Array.isArray(payload?.actions) ? payload.actions : []
-  const notification = new Notification({
+  new Notification({
    title: payload?.title || 'Hermes',
    body: payload?.body || '',
-    silent: Boolean(payload?.silent),
-    actions: actions.map(action => ({ type: 'button', text: String(action?.text || '') }))
-  })
-  notification.on('click', () => {
-    if (!mainWindow || mainWindow.isDestroyed()) return
-    focusWindow(mainWindow)
-    if (payload?.sessionId) {
-      mainWindow.webContents.send('hermes:focus-session', payload.sessionId)
-    }
-  })
-  notification.on('action', (_actionEvent, index) => {
-    if (!mainWindow || mainWindow.isDestroyed()) return
-    const action = actions[index]
-    if (action?.id) {
-      mainWindow.webContents.send('hermes:notification-action', { sessionId: payload?.sessionId, actionId: action.id })
-    }
-  })
-  notification.show()
+    silent: Boolean(payload?.silent)
+  }).show()
  return true
 })

@@ -6020,8 +5995,6 @@ ipcMain.handle('hermes:fs:readDir', async (_event, dirPath) => readDirForIpc(dir

 ipcMain.handle('hermes:fs:gitRoot', async (_event, startPath) => gitRootForIpc(startPath))

-ipcMain.handle('hermes:fs:worktrees', async (_event, cwds) => worktreesForIpc(cwds))
-
 ipcMain.handle('hermes:terminal:start', async (event, payload = {}) => {
  if (!nodePty) {
    throw new Error('PTY support is unavailable. Reinstall desktop dependencies and restart Hermes.')
--- a/apps/desktop/electron/port-pool.cjs
+++ b/apps/desktop/electron/port-pool.cjs
@@ -0,0 +1,73 @@
+'use strict'
+
+/**
+ * In-process port reservation pool for the desktop backend launcher.
+ *
+ * pickPort() probes a localhost port with a throwaway server and closes it
+ * before the real bind happens in a separate Python child. Between that probe
+ * and the child's bind there is a TOCTOU window: a second concurrent spawn
+ * (the primary backend racing a pool backend) can be handed the SAME port, and
+ * one then dies with EADDRINUSE ("address already in use" -> "Object has been
+ * destroyed" boot loop). Reserving the chosen port in THIS process until the
+ * child exits closes that window.
+ *
+ * The OS bind remains the source of truth; this only deconflicts racers inside
+ * this process — it can't stop a foreign squatter, which the probe + the
+ * EADDRINUSE self-heal still cover.
+ *
+ * The pool is dependency-injected (the availability probe is passed in) and
+ * free of Electron/Node socket I/O, so it is unit-tested without real sockets
+ * (see port-pool.test.cjs).
+ */
+class PortPool {
+  /**
+   * @param {number} floor   inclusive lowest port to hand out
+   * @param {number} ceiling inclusive highest port to hand out
+   */
+  constructor(floor, ceiling) {
+    this.floor = floor
+    this.ceiling = ceiling
+    this._reserved = new Set()
+  }
+
+  /** @returns {boolean} whether `port` is currently reserved in-process. */
+  has(port) {
+    return this._reserved.has(port)
+  }
+
+  /** Release a previously reserved port. No-op if it was not reserved. */
+  release(port) {
+    this._reserved.delete(port)
+  }
+
+  /** Drop all reservations. */
+  clear() {
+    this._reserved.clear()
+  }
+
+  /** @returns {number} count of currently reserved ports. */
+  get size() {
+    return this._reserved.size
+  }
+
+  /**
+   * Reserve and return the lowest port in [floor, ceiling] that is neither
+   * already reserved in-process nor rejected by `isAvailable(port)`, or null
+   * if every port is taken. `isAvailable` may be sync (boolean) or async
+   * (Promise<boolean>); it is awaited either way.
+   *
+   * @param {(port: number) => boolean | Promise<boolean>} isAvailable
+   * @returns {Promise<number|null>}
+   */
+  async reserve(isAvailable) {
+    for (let port = this.floor; port <= this.ceiling; port += 1) {
+      if (this._reserved.has(port)) continue
+      if (!(await isAvailable(port))) continue
+      this._reserved.add(port)
+      return port
+    }
+    return null
+  }
+}
+
+module.exports = { PortPool }
--- a/apps/desktop/electron/port-pool.test.cjs
+++ b/apps/desktop/electron/port-pool.test.cjs
@@ -0,0 +1,77 @@
+/**
+ * Tests for electron/port-pool.cjs.
+ *
+ * Run with: node --test electron/port-pool.test.cjs
+ *
+ * PortPool is the in-process reservation that closes the pickPort() TOCTOU
+ * window. These cover selection order, skipping reserved/unavailable ports,
+ * release/reuse, exhaustion, and async probes — without real sockets.
+ */
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+
+const { PortPool } = require('./port-pool.cjs')
+
+const allFree = () => true
+
+test('reserve returns the lowest free port and reserves it', async () => {
+  const pool = new PortPool(9120, 9199)
+  const port = await pool.reserve(allFree)
+  assert.equal(port, 9120)
+  assert.ok(pool.has(9120))
+  assert.equal(pool.size, 1)
+})
+
+test('reserve skips ports already reserved in-process', async () => {
+  const pool = new PortPool(9120, 9199)
+  const first = await pool.reserve(allFree)
+  const second = await pool.reserve(allFree)
+  assert.equal(first, 9120)
+  assert.equal(second, 9121)
+})
+
+test('reserve skips ports the probe rejects', async () => {
+  const pool = new PortPool(9120, 9199)
+  const busy = new Set([9120, 9121])
+  const port = await pool.reserve(p => !busy.has(p))
+  assert.equal(port, 9122)
+})
+
+test('reserve returns null when every port is taken', async () => {
+  const pool = new PortPool(9120, 9121)
+  await pool.reserve(allFree)
+  await pool.reserve(allFree)
+  assert.equal(await pool.reserve(allFree), null)
+})
+
+test('release frees a reserved port for reuse', async () => {
+  const pool = new PortPool(9120, 9120)
+  assert.equal(await pool.reserve(allFree), 9120)
+  assert.equal(await pool.reserve(allFree), null) // exhausted
+  pool.release(9120)
+  assert.ok(!pool.has(9120))
+  assert.equal(await pool.reserve(allFree), 9120) // reusable
+})
+
+test('release is a no-op for an unreserved port', () => {
+  const pool = new PortPool(9120, 9199)
+  pool.release(9120)
+  assert.equal(pool.size, 0)
+})
+
+test('reserve awaits an async probe', async () => {
+  const pool = new PortPool(9120, 9199)
+  const busy = new Set([9120])
+  const port = await pool.reserve(p => Promise.resolve(!busy.has(p)))
+  assert.equal(port, 9121)
+})
+
+test('clear drops all reservations', async () => {
+  const pool = new PortPool(9120, 9199)
+  await pool.reserve(allFree)
+  await pool.reserve(allFree)
+  assert.equal(pool.size, 2)
+  pool.clear()
+  assert.equal(pool.size, 0)
+})
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -54,7 +54,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  getRecentLogs: () => ipcRenderer.invoke('hermes:logs:recent'),
  readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
  gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
-  worktrees: cwds => ipcRenderer.invoke('hermes:fs:worktrees', cwds),
  terminal: {
    dispose: id => ipcRenderer.invoke('hermes:terminal:dispose', id),
    resize: (id, size) => ipcRenderer.invoke('hermes:terminal:resize', id, size),
@@ -94,16 +93,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    ipcRenderer.on('hermes:window-state-changed', listener)
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
-  onFocusSession: callback => {
-    const listener = (_event, sessionId) => callback(sessionId)
-    ipcRenderer.on('hermes:focus-session', listener)
-    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
-  },
-  onNotificationAction: callback => {
-    const listener = (_event, payload) => callback(payload)
-    ipcRenderer.on('hermes:notification-action', listener)
-    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
-  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -42,9 +42,6 @@ test('intentional or interactive desktop child processes stay documented', () =>
  const source = readElectronFile('main.cjs')

  assert.match(source, /windowsHide: false/)
-  assert.match(source, /handOffWindowsBootstrapRecovery/)
-  assert.match(source, /'--repair', '--branch'/)
-  assert.match(source, /'--update', '--branch'/)
  assert.match(source, /nodePty\.spawn\(command, args/)
  assert.match(source, /spawn\('cmd\.exe', \['\/c', 'start'/)
 })
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -8,7 +8,7 @@
  "type": "module",
  "main": "electron/main.cjs",
  "engines": {
-    "node": "^20.19.0 || >=22.12.0"
+    "node": ">=26.0.0"
  },
  "scripts": {
    "dev": "concurrently -k \"npm:dev:renderer\" \"npm:dev:electron\"",
@@ -36,7 +36,7 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/port-pool.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
@@ -90,7 +90,6 @@
    "react-router-dom": "^7.17.0",
    "react-shiki": "^0.9.3",
    "remark-math": "^6.0.0",
-    "remend": "^1.3.0",
    "shiki": "^4.0.2",
    "streamdown": "^2.5.0",
    "tailwind-merge": "^3.5.0",
@@ -99,7 +98,6 @@
    "unicode-animations": "^1.0.3",
    "unified": "^11.0.5",
    "unist-util-visit-parents": "^6.0.2",
-    "use-stick-to-bottom": "^1.1.6",
    "vfile": "^6.0.3",
    "web-haptics": "^0.0.6"
  },
--- a/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
+++ b/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
@@ -24,7 +24,6 @@ afterEach(cleanup)
 // state stays stale while the DOM already holds the text.
 function Harness({
  busy = false,
-  disabled = false,
  queued = [],
  onSubmit,
  onQueue,
@@ -32,7 +31,6 @@ function Harness({
  onDrain
 }: {
  busy?: boolean
-  disabled?: boolean
  queued?: readonly string[]
  onSubmit: (text: string) => void
  onQueue: (text: string) => void
@@ -54,10 +52,6 @@ function Harness({
  }

  const submitDraft = () => {
-    if (disabled) {
-      return
-    }
-
    const editor = editorRef.current
    if (editor) {
      const domText = composerPlainText(editor)
@@ -90,10 +84,6 @@ function Harness({
      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0

-      if (disabled) {
-        return
-      }
-
      if (!busy && !hasLivePayload && queued.length > 0) {
        onDrain()

@@ -196,23 +186,4 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
    expect(onDrain).toHaveBeenCalledTimes(1)
    expect(onSubmit).not.toHaveBeenCalled()
  })
-
-  it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
-    const onSubmit = vi.fn()
-    const onDrain = vi.fn()
-    const { getByTestId } = render(
-      <Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
-    )
-    const editor = getByTestId('editor')
-
-    await act(async () => {
-      editor.textContent = 'draft while reconnecting'
-      fireEvent.input(editor)
-      fireEvent.keyDown(editor, { key: 'Enter' })
-    })
-
-    expect(editor.textContent).toBe('draft while reconnecting')
-    expect(onDrain).not.toHaveBeenCalled()
-    expect(onSubmit).not.toHaveBeenCalled()
-  })
 })
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -43,16 +43,13 @@ import {
 import {
  $queuedPromptsBySession,
  enqueueQueuedPrompt,
-  MAX_AUTO_DRAIN_ATTEMPTS,
-  migrateQueuedPrompts,
  promoteQueuedPrompt,
  type QueuedPromptEntry,
  removeQueuedPrompt,
-  shouldAutoDrain,
+  shouldAutoDrainOnSettle,
  updateQueuedPrompt
 } from '@/store/composer-queue'
 import { $statusItemsBySession } from '@/store/composer-status'
-import { notify } from '@/store/notifications'
 import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
 import { useTheme } from '@/themes'
@@ -85,8 +82,6 @@ import {
 import { QueuePanel } from './queue-panel'
 import {
  composerPlainText,
-  deleteSelectionInEditor,
-  insertPlainTextAtCaret,
  normalizeComposerEditorDom,
  placeCaretEnd,
  refChipElement,
@@ -137,12 +132,6 @@ function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind {
  return 'command'
 }

-/** A `/` query is at its arg stage once it's past the command name. */
-const slashArgStage = (query: string) => query.includes(' ')
-
-/** The `/command` token of a slash query (`personality x` → `/personality`). */
-const slashCommandToken = (query: string) => `/${query.split(/\s+/, 1)[0]?.toLowerCase() ?? ''}`
-
 interface QueueEditState {
  attachments: ComposerAttachment[]
  draft: string
@@ -185,6 +174,7 @@ export function ChatBar({
  const queuedPromptsBySession = useStore($queuedPromptsBySession)
  const statusItemsBySession = useStore($statusItemsBySession)
  const scrolledUp = useStore($threadScrolledUp)
+  const sessionMessages = useStore($messages)
  const activeQueueSessionKey = queueSessionKey || sessionId || null

  const queuedPrompts = useMemo(
@@ -207,14 +197,11 @@ export function ChatBar({
  const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
  const editorRef = useRef<HTMLDivElement | null>(null)
  const draftRef = useRef(draft)
+  const previousBusyRef = useRef(busy)
  const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null)
  const activeQueueSessionKeyRef = useRef(activeQueueSessionKey)
  activeQueueSessionKeyRef.current = activeQueueSessionKey
-  const prevQueueKeyRef = useRef(activeQueueSessionKey)
  const drainingQueueRef = useRef(false)
-  // Per-entry auto-drain failure counts; bounds retries so a persistent 404
-  // can't spin-loop. Cleared on success; reset naturally on remount/reconnect.
-  const drainFailuresRef = useRef(new Map<string, number>())
  const urlInputRef = useRef<HTMLInputElement | null>(null)

  const [urlOpen, setUrlOpen] = useState(false)
@@ -255,8 +242,6 @@ export function ChatBar({
  const gatewayState = useStore($gatewayState)
  const newSessionPlaceholders = t.composer.newSessionPlaceholders
  const followUpPlaceholders = t.composer.followUpPlaceholders
-  const reconnecting = gatewayState === 'closed' || gatewayState === 'error'
-  const inputDisabled = disabled && !reconnecting

  // Resting placeholder: a starter for brand-new sessions, a continuation for
  // existing ones. Picked once and only re-rolled when we genuinely move to a
@@ -287,13 +272,11 @@ export function ChatBar({
    setRestingPlaceholder(pickPlaceholder(sessionId ? followUpPlaceholders : newSessionPlaceholders))
  }, [followUpPlaceholders, newSessionPlaceholders, sessionId])

-  // When the transport is disabled it's because the gateway isn't open.
-  // Distinguish a cold start ("Starting Hermes...") from a dropped connection
-  // we're trying to restore. During reconnect, keep the textbox editable so a
-  // flaky network doesn't block drafting; only submit/backend actions stay
-  // disabled until the gateway is open again.
+  // When the bar is disabled it's because the gateway isn't open. Distinguish a
+  // cold start ("Starting Hermes...") from a dropped connection we're trying to
+  // restore (e.g. after the Mac slept) so the stuck state reads as recoverable.
  const placeholder = disabled
-    ? reconnecting
+    ? gatewayState === 'closed' || gatewayState === 'error'
      ? t.composer.placeholderReconnecting
      : t.composer.placeholderStarting
    : restingPlaceholder
@@ -335,13 +318,13 @@ export function ChatBar({
  )

  useEffect(() => {
-    if (!inputDisabled) {
+    if (!disabled) {
      focusInput()
    }
-  }, [focusInput, focusKey, focusRequestId, inputDisabled])
+  }, [disabled, focusInput, focusKey, focusRequestId])

  useEffect(() => {
-    if (inputDisabled) {
+    if (disabled) {
      return undefined
    }

@@ -361,7 +344,7 @@ export function ChatBar({
      offFocus()
      offInsert()
    }
-  }, [appendExternalText, inputDisabled])
+  }, [appendExternalText, disabled])

  // Keep draftRef in sync with the assistant-ui composer state for callers
  // that read the latest text outside the React render cycle. We don't push
@@ -540,6 +523,48 @@ export function ChatBar({
    })
  }, [])

+  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
+    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
+
+    if (imageBlobs.length > 0) {
+      event.preventDefault()
+
+      if (onAttachImageBlob) {
+        triggerHaptic('selection')
+
+        for (const blob of imageBlobs) {
+          void onAttachImageBlob(blob)
+        }
+      }
+
+      return
+    }
+
+    // Trim surrounding whitespace so a copy that dragged along leading/trailing
+    // blank lines (common when selecting from terminals, code blocks, web pages)
+    // doesn't dump multiline padding into the composer. Internal newlines are
+    // preserved — only the edges are cleaned up.
+    const pastedText = event.clipboardData.getData('text').trim()
+
+    if (!pastedText) {
+      event.preventDefault()
+
+      return
+    }
+
+    if (DATA_IMAGE_URL_RE.test(pastedText)) {
+      event.preventDefault()
+
+      return
+    }
+
+    event.preventDefault()
+    document.execCommand('insertText', false, pastedText)
+    const nextDraft = composerPlainText(event.currentTarget)
+    draftRef.current = nextDraft
+    aui.composer().setText(nextDraft)
+  }
+
  const [trigger, setTrigger] = useState<TriggerState | null>(null)
  const [triggerActive, setTriggerActive] = useState(0)
  const [triggerItems, setTriggerItems] = useState<readonly Unstable_TriggerItem[]>([])
@@ -576,15 +601,7 @@ export function ChatBar({
    }

    const before = textBeforeCaret(editor)
-    const found = detectTrigger(before ?? composerPlainText(editor))
-
-    // The arg-stage popover is only useful for commands with an options screen.
-    // For a no-arg command it would dead-end on "No matches", so drop it — the
-    // directive is already complete.
-    const detected =
-      found?.kind === '/' && slashArgStage(found.query) && !desktopSlashCommandTakesArgs(slashCommandToken(found.query))
-        ? null
-        : found
+    const detected = detectTrigger(before ?? composerPlainText(editor))

    setTrigger(detected)

@@ -624,46 +641,6 @@ export function ChatBar({
    flushEditorToDraft(event.currentTarget)
  }

-  const handlePaste = (event: ClipboardEvent<HTMLDivElement>) => {
-    const imageBlobs = extractClipboardImageBlobs(event.clipboardData)
-
-    if (imageBlobs.length > 0) {
-      event.preventDefault()
-
-      if (onAttachImageBlob) {
-        triggerHaptic('selection')
-
-        for (const blob of imageBlobs) {
-          void onAttachImageBlob(blob)
-        }
-      }
-
-      return
-    }
-
-    // Trim surrounding whitespace so a copy that dragged along leading/trailing
-    // blank lines (common when selecting from terminals, code blocks, web pages)
-    // doesn't dump multiline padding into the composer. Internal newlines are
-    // preserved — only the edges are cleaned up.
-    const pastedText = event.clipboardData.getData('text').trim()
-
-    if (!pastedText) {
-      event.preventDefault()
-
-      return
-    }
-
-    if (DATA_IMAGE_URL_RE.test(pastedText)) {
-      event.preventDefault()
-
-      return
-    }
-
-    event.preventDefault()
-    insertPlainTextAtCaret(event.currentTarget, pastedText)
-    flushEditorToDraft(event.currentTarget)
-  }
-
  const triggerAdapter: Unstable_TriggerAdapter | null =
    trigger?.kind === '@' ? at.adapter : trigger?.kind === '/' ? slash.adapter : null

@@ -679,12 +656,6 @@ export function ChatBar({

  const triggerLoading = trigger?.kind === '@' ? at.loading : trigger?.kind === '/' ? slash.loading : false

-  // Suppress the "No matches" empty state once a slash command is past its name:
-  // a no-arg command has nothing to offer, and a fully-typed arg commits on
-  // Space/Tab — neither should dead-end on a popover.
-  const argStageEmpty =
-    trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length
-
  const closeTrigger = () => {
    setTrigger(null)
    setTriggerItems([])
@@ -695,25 +666,6 @@ export function ChatBar({
    setTriggerActive(idx => Math.min(idx, Math.max(0, triggerItems.length - 1)))
  }, [triggerItems.length])

-  // Commit the literally-typed `/command arg` as a directive chip — used when
-  // the completion list is empty because the arg is already fully typed (the
-  // backend completer drops exact matches). Reuses the chip path via a
-  // synthetic item whose serialized form is the verbatim text.
-  const commitTypedSlashDirective = () => {
-    if (trigger?.kind !== '/') {
-      return
-    }
-
-    const text = `/${trigger.query.trimEnd()}`
-
-    replaceTriggerWithChip({
-      id: text,
-      type: 'slash',
-      label: text.slice(1),
-      metadata: { command: slashCommandToken(trigger.query), display: text, meta: '', group: '', action: '', rawText: text }
-    })
-  }
-
  const replaceTriggerWithChip = (item: Unstable_TriggerItem) => {
    const editor = editorRef.current

@@ -832,18 +784,6 @@ export function ChatBar({
      return
    }

-    // Non-collapsed Backspace/Delete: native selection-delete is ~O(n²) on large
-    // drafts (Ctrl+A → Delete froze ~1.3s). Collapsed carets fall through.
-    if (
-      (event.key === 'Backspace' || event.key === 'Delete') &&
-      deleteSelectionInEditor(event.currentTarget)
-    ) {
-      event.preventDefault()
-      flushEditorToDraft(event.currentTarget)
-
-      return
-    }
-
    // Cmd/Ctrl+Shift+K drains the next queued message. Plain Cmd/Ctrl+K is
    // reserved for the global command palette.
    if ((event.metaKey || event.ctrlKey) && !event.altKey && event.shiftKey && event.key.toLowerCase() === 'k') {
@@ -873,15 +813,7 @@ export function ChatBar({
        return
      }

-      // Enter / Tab / Space all accept the highlighted item: a no-arg command
-      // commits its directive chip, an arg-taking command expands to its
-      // options step, and an arg option commits the full `/cmd arg` chip. Space
-      // is slash-only (an `@` mention takes a literal space) and gated to a
-      // non-empty query so a bare `/ ` still types a space.
-      const acceptOnSpace = event.key === ' ' && trigger.kind === '/' && Boolean(trigger.query.trim())
-      const accept = event.key === 'Enter' || event.key === 'Tab' || acceptOnSpace
-
-      if (accept) {
+      if (event.key === 'Enter' || event.key === 'Tab') {
        event.preventDefault()
        triggerKeyConsumedRef.current = true
        const item = triggerItems[triggerActive]
@@ -902,24 +834,6 @@ export function ChatBar({
      }
    }

-    // Arg stage with nothing left to suggest — a fully-typed arg the backend
-    // completer no longer echoes (it drops the exact match), e.g.
-    // `/personality creative`. Space/Tab still commit what's typed as a single
-    // directive chip; Enter falls through to submit (send it as-is).
-    if (
-      trigger?.kind === '/' &&
-      !triggerItems.length &&
-      (event.key === ' ' || event.key === 'Tab') &&
-      slashArgStage(trigger.query) &&
-      trigger.query.trim()
-    ) {
-      event.preventDefault()
-      triggerKeyConsumedRef.current = true
-      commitTypedSlashDirective()
-
-      return
-    }
-
    // ArrowUp/ArrowDown navigate, in priority order: the queue (edit entries in
    // place) then sent-message history. The history ring is derived from live
    // session messages each press — single source of truth, no mirror.
@@ -952,9 +866,7 @@ export function ChatBar({
      event.preventDefault()
      triggerKeyConsumedRef.current = true

-      // $messages is read imperatively (not subscribed) so the composer
-      // doesn't re-render on every streaming delta flush.
-      const history = deriveUserHistory($messages.get(), chatMessageText)
+      const history = deriveUserHistory(sessionMessages, chatMessageText)
      const entry = browseBackward(sessionId, currentDraft, history)

      if (entry !== null) {
@@ -979,7 +891,7 @@ export function ChatBar({
        event.preventDefault()
        triggerKeyConsumedRef.current = true

-        const history = deriveUserHistory($messages.get(), chatMessageText)
+        const history = deriveUserHistory(sessionMessages, chatMessageText)
        const result = browseForward(sessionId, history)

        if (result !== null) {
@@ -1015,10 +927,6 @@ export function ChatBar({
      const editorText = editorRef.current ? composerPlainText(editorRef.current) : draftRef.current
      const hasLivePayload = editorText.trim().length > 0 || attachments.length > 0

-      if (disabled) {
-        return
-      }
-
      if (!busy && !hasLivePayload && queuedPrompts.length > 0) {
        void drainNextQueued()

@@ -1417,7 +1325,6 @@ export function ChatBar({
          return false
        }

-        drainFailuresRef.current.delete(entry.id)
        removeQueuedPrompt(activeQueueSessionKey, entry.id)
        resetBrowseState(sessionId)

@@ -1429,17 +1336,16 @@ export function ChatBar({
    [activeQueueSessionKey, onSubmit, queuedPrompts, sessionId]
  )

-  const pickDrainHead = useCallback(
-    (entries: QueuedPromptEntry[]) => {
-      const skip = queueEditRef.current?.entryId
+  const drainNextQueued = useCallback(
+    () =>
+      runDrain(entries => {
+        const skip = queueEdit?.entryId

-      return skip ? entries.find(e => e.id !== skip) : entries[0]
-    },
-    [] // reads the edit id off a ref so the lock-holder always sees the latest
+        return skip ? entries.find(e => e.id !== skip) : entries[0]
+      }),
+    [queueEdit, runDrain]
  )

-  const drainNextQueued = useCallback(() => runDrain(pickDrainHead), [pickDrainHead, runDrain])
-
  const sendQueuedNow = useCallback(
    (id: string) => {
      if (!activeQueueSessionKey || id === queueEdit?.entryId) {
@@ -1457,76 +1363,30 @@ export function ChatBar({
        return true
      }

-      // A manual send clears the auto-drain backoff so a stuck entry the user
-      // taps gets a fresh attempt (and re-enables auto-retry on success).
-      drainFailuresRef.current.delete(id)
-
      return runDrain(entries => entries.find(e => e.id === id))
    },
    [activeQueueSessionKey, busy, onCancel, queueEdit, runDrain]
  )

-  // Edge-independent auto-drain: send the head whenever the session is idle and
-  // the queue is non-empty, bounding retries so a thrown/rejected onSubmit (e.g.
-  // a stale-session 404) can't strand the entry permanently nor spin-loop. The
-  // drain lock serializes sends; a remount/reconnect resets the failure counts.
-  const autoDrainNext = useCallback(() => {
-    if (busy || drainingQueueRef.current || !activeQueueSessionKey) {
-      return
-    }
+  // Auto-drain on busy → false (turn settled). Queued turns always flow once
+  // the session is idle again — whether the turn finished naturally or the
+  // user interrupted it. Interrupting to reach a queued message is the whole
+  // point of the queue, so we never suppress the drain. To cancel queued
+  // turns, the user deletes them from the panel.
+  useEffect(() => {
+    const wasBusy = previousBusyRef.current
+    previousBusyRef.current = busy

-    const entry = pickDrainHead(queuedPrompts)
-
-    if (!entry || (drainFailuresRef.current.get(entry.id) ?? 0) >= MAX_AUTO_DRAIN_ATTEMPTS) {
-      return
-    }
-
-    const onFail = () => {
-      const fails = (drainFailuresRef.current.get(entry.id) ?? 0) + 1
-      drainFailuresRef.current.set(entry.id, fails)
-
-      if (fails >= MAX_AUTO_DRAIN_ATTEMPTS) {
-        notify({
-          id: 'composer-queue-stuck',
-          kind: 'error',
-          title: t.composer.queueStuckTitle,
-          message: t.composer.queueStuckBody
-        })
-      }
-    }
-
-    void runDrain(() => entry)
-      .then(sent => {
-        if (!sent) {
-          onFail()
-        }
+    if (
+      shouldAutoDrainOnSettle({
+        isBusy: busy,
+        queueLength: queuedPrompts.length,
+        wasBusy
      })
-      .catch(onFail)
-  }, [activeQueueSessionKey, busy, pickDrainHead, queuedPrompts, runDrain, t])
-
-  // Re-key on a runtime session-id change. A stable stored id (queueSessionKey)
-  // never churns, so a change there is a real session switch and must NOT
-  // migrate; only the runtime-derived key (queueSessionKey falsy → key is
-  // sessionId) churns on a backend bounce/resume of the same conversation.
-  useEffect(() => {
-    const prev = prevQueueKeyRef.current
-    prevQueueKeyRef.current = activeQueueSessionKey
-
-    if (queueSessionKey || !prev || !activeQueueSessionKey || prev === activeQueueSessionKey) {
-      return
+    ) {
+      void drainNextQueued()
    }
-
-    migrateQueuedPrompts(prev, activeQueueSessionKey)
-  }, [activeQueueSessionKey, queueSessionKey])
-
-  // Queued turns flow whenever the session is idle — on the busy→false settle
-  // edge, on mount/reconnect, and after a re-key — so a swallowed edge can't
-  // strand them. To cancel queued turns, the user deletes them from the panel.
-  useEffect(() => {
-    if (shouldAutoDrain({ isBusy: busy, queueLength: queuedPrompts.length })) {
-      autoDrainNext()
-    }
-  }, [autoDrainNext, busy, queuedPrompts.length])
+  }, [busy, drainNextQueued, queuedPrompts.length])

  // Queue-edit cleanup: on session swap the scope effect already stashed the
  // edit snapshot; only restore into the composer when still on the same scope.
@@ -1561,10 +1421,6 @@ export function ChatBar({
  }

  const submitDraft = () => {
-    if (disabled) {
-      return
-    }
-
    // Source the text from the DOM editor, not React state. The AUI composer
    // state (`draft`) and the derived `hasComposerPayload` lag the DOM by a
    // render, so on fast typing or IME composition the final keystroke(s) may
@@ -1745,7 +1601,6 @@ export function ChatBar({
  const input = (
    <div className={cn('relative', stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1')}>
      <div
-        aria-disabled={inputDisabled ? true : undefined}
        aria-label={t.composer.message}
        autoCapitalize="off"
        autoCorrect="off"
@@ -1756,7 +1611,7 @@ export function ChatBar({
          stacked && 'pl-3',
          stacked ? 'w-full' : 'min-w-(--composer-input-inline-min-width) flex-1'
        )}
-        contentEditable={!inputDisabled}
+        contentEditable={!disabled}
        data-placeholder={placeholder}
        data-slot={RICH_INPUT_SLOT}
        onBlur={() => window.setTimeout(closeTrigger, 80)}
@@ -1842,7 +1697,7 @@ export function ChatBar({
          ref={composerRef}
        >
          {showHelpHint && <HelpHint />}
-          {trigger && !argStageEmpty && (
+          {trigger && (
            <ComposerTriggerPopover
              activeIndex={triggerActive}
              items={triggerItems}
@@ -1886,6 +1741,7 @@ export function ChatBar({
                'group/composer-surface relative z-4 isolate rounded-[inherit] border border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(18%*var(--composer-ring-strength)),var(--dt-input))] transition-[border-color] duration-200 ease-out focus-within:border-[color-mix(in_srgb,var(--dt-composer-ring)_calc(45%*var(--composer-ring-strength)),transparent)]',
                COMPOSER_DROP_FADE_CLASS,
                'group-has-data-[state=open]/composer:border-t-transparent',
+                'group-data-[status-stack]/composer:border-t-transparent',
                dragActive && COMPOSER_DROP_ACTIVE_CLASS
              )}
              data-slot="composer-surface"
--- a/apps/desktop/src/app/chat/composer/queue-panel.tsx
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -1,9 +1,7 @@
 import { StatusRow } from '@/components/chat/status-row'
 import { StatusSection } from '@/components/chat/status-section'
 import { Button } from '@/components/ui/button'
-import { Tip } from '@/components/ui/tooltip'
 import { type Translations, useI18n } from '@/i18n'
-import { ArrowUp, Pencil, Trash2 } from '@/lib/icons'
 import { cn } from '@/lib/utils'
 import type { QueuedPromptEntry } from '@/store/composer-queue'

@@ -40,46 +38,32 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
              isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
            )}
            key={entry.id}
+            leading={
+              <span aria-hidden className="size-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent" />
+            }
            trailing={
              <>
-                <Tip label={c.queueEdit}>
-                  <Button
-                    aria-label={c.queueEdit}
-                    className="size-5 rounded-md"
-                    disabled={Boolean(editingId) && !isEditing}
-                    onClick={() => onEdit(entry)}
-                    size="icon-xs"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <Pencil size={11} />
-                  </Button>
-                </Tip>
-                <Tip label={busy ? c.queueSendNext : c.queueSend}>
-                  <Button
-                    aria-label={busy ? c.queueSendNext : c.queueSend}
-                    className="size-5 rounded-md"
-                    disabled={isEditing}
-                    onClick={() => onSendNow(entry.id)}
-                    size="icon-xs"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <ArrowUp size={11} />
-                  </Button>
-                </Tip>
-                <Tip label={c.queueDelete}>
-                  <Button
-                    aria-label={c.queueDelete}
-                    className="size-5 rounded-md"
-                    onClick={() => onDelete(entry.id)}
-                    size="icon-xs"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <Trash2 size={11} />
-                  </Button>
-                </Tip>
+                <Button
+                  disabled={Boolean(editingId) && !isEditing}
+                  onClick={() => onEdit(entry)}
+                  size="micro"
+                  type="button"
+                  variant="text"
+                >
+                  {c.queueEdit}
+                </Button>
+                <Button
+                  disabled={isEditing}
+                  onClick={() => onSendNow(entry.id)}
+                  size="micro"
+                  type="button"
+                  variant="secondary"
+                >
+                  {busy ? c.queueSendNext : c.queueSend}
+                </Button>
+                <Button onClick={() => onDelete(entry.id)} size="micro" type="button" variant="text">
+                  {c.queueDelete}
+                </Button>
              </>
            }
            trailingVisible={isEditing}
--- a/apps/desktop/src/app/chat/composer/rich-editor.test.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.test.ts
@@ -3,24 +3,12 @@ import { describe, expect, it } from 'vitest'
 import { insertInlineRefsIntoEditor } from './inline-refs'
 import {
  composerPlainText,
-  deleteSelectionInEditor,
-  insertPlainTextAtCaret,
  normalizeComposerEditorDom,
  refChipElement,
  renderComposerContents,
  RICH_INPUT_SLOT
 } from './rich-editor'

-const caretIn = (editor: HTMLElement) => {
-  const range = document.createRange()
-  const selection = window.getSelection()!
-
-  range.selectNodeContents(editor)
-  range.collapse(false)
-  selection.removeAllRanges()
-  selection.addRange(range)
-}
-
 describe('renderComposerContents', () => {
  it('renders refs and raw text without interpreting user text as HTML', () => {
    const editor = document.createElement('div')
@@ -71,64 +59,3 @@ describe('insertInlineRefsIntoEditor', () => {
    expect(composerPlainText(editor)).toBe('@file:`src/foo.ts` ')
  })
 })
-
-describe('insertPlainTextAtCaret', () => {
-  it('inserts multiline text as text nodes + br', () => {
-    const editor = document.createElement('div')
-    editor.dataset.slot = RICH_INPUT_SLOT
-    document.body.append(editor)
-    caretIn(editor)
-
-    insertPlainTextAtCaret(editor, 'one\ntwo\nthree')
-
-    expect(editor.querySelectorAll('br').length).toBe(2)
-    expect(composerPlainText(editor)).toBe('one\ntwo\nthree')
-
-    editor.remove()
-  })
-
-  it('replaces the selected span', () => {
-    const editor = document.createElement('div')
-    editor.dataset.slot = RICH_INPUT_SLOT
-    editor.textContent = 'abXYef'
-    document.body.append(editor)
-
-    const text = editor.firstChild!
-    const selection = window.getSelection()!
-    const range = document.createRange()
-
-    range.setStart(text, 2)
-    range.setEnd(text, 4)
-    selection.removeAllRanges()
-    selection.addRange(range)
-
-    insertPlainTextAtCaret(editor, 'cd')
-
-    expect(composerPlainText(editor)).toBe('abcdef')
-
-    editor.remove()
-  })
-})
-
-describe('deleteSelectionInEditor', () => {
-  it('clears a non-collapsed range and leaves a collapsed caret', () => {
-    const editor = document.createElement('div')
-    editor.dataset.slot = RICH_INPUT_SLOT
-    editor.textContent = 'hello world'
-    document.body.append(editor)
-
-    const selection = window.getSelection()!
-    const range = document.createRange()
-
-    range.selectNodeContents(editor)
-    selection.removeAllRanges()
-    selection.addRange(range)
-
-    expect(deleteSelectionInEditor(editor)).toBe(true)
-    expect(composerPlainText(editor)).toBe('')
-    expect(selection.getRangeAt(0).collapsed).toBe(true)
-    expect(deleteSelectionInEditor(editor)).toBe(false)
-
-    editor.remove()
-  })
-})
--- a/apps/desktop/src/app/chat/composer/rich-editor.ts
+++ b/apps/desktop/src/app/chat/composer/rich-editor.ts
@@ -132,63 +132,6 @@ export function renderComposerContents(target: HTMLElement, text: string) {
  appendComposerContents(target, text)
 }

-/** Caret range when the selection lives inside `editor`; else null. */
-function composerSelectionRange(editor: HTMLElement) {
-  const selection = window.getSelection()
-  const range = selection?.rangeCount ? selection.getRangeAt(0) : null
-
-  if (!selection || !range || !editor.contains(range.commonAncestorContainer)) {
-    return null
-  }
-
-  return { range, selection }
-}
-
-/** Insert plain text at the caret (replacing any selection). Pastes use this
- *  instead of `execCommand('insertText')` — Chromium's editing pipeline is
- *  ~O(n²) on large multiline blobs. */
-export function insertPlainTextAtCaret(editor: HTMLElement, text: string) {
-  const hit = composerSelectionRange(editor)
-  const fragment = document.createDocumentFragment()
-
-  appendTextWithBreaks(fragment, text)
-
-  const tail = fragment.lastChild
-
-  if (hit) {
-    hit.range.deleteContents()
-    hit.range.insertNode(fragment)
-  } else {
-    editor.append(fragment)
-  }
-
-  if (tail) {
-    const caret = document.createRange()
-    caret.setStartAfter(tail)
-    caret.collapse(true)
-    const selection = hit?.selection ?? window.getSelection()
-    selection?.removeAllRanges()
-    selection?.addRange(caret)
-  }
-}
-
-/** Remove a non-collapsed selection in-editor. Skips collapsed carets so word/
- *  line delete (Opt/Cmd+Backspace) stays native. Returns whether anything ran. */
-export function deleteSelectionInEditor(editor: HTMLElement) {
-  const hit = composerSelectionRange(editor)
-
-  if (!hit || hit.range.collapsed) {
-    return false
-  }
-
-  hit.range.deleteContents()
-  hit.range.collapse(true)
-  hit.selection.removeAllRanges()
-  hit.selection.addRange(hit.range)
-
-  return true
-}
-
 /** Serialize a draft string into chip-HTML for the contenteditable surface. */
 export function composerHtml(text: string) {
  let cursor = 0
--- a/apps/desktop/src/app/chat/composer/status-stack/index.tsx
+++ b/apps/desktop/src/app/chat/composer/status-stack/index.tsx
@@ -170,22 +170,14 @@ export function ComposerStatusStack({ queue, sessionId }: ComposerStatusStackPro

  return (
    <div
-      // Sits above the composer (bottom-full), nudged down by the shell's 0.5rem
-      // top pad (pt-2 on composer-root) plus 1px so its bottom edge overlaps the
-      // composer surface's top border. z BELOW the surface (z-4) so the surface's
-      // top border paints over our transparent bottom border — one seam, no
-      // double line.
-      className="absolute inset-x-0 bottom-full z-3 max-h-[40vh] translate-y-[calc(0.5rem+1px)] overflow-y-auto"
+      className="absolute inset-x-0 bottom-full z-6 -mb-[9px] max-h-[40vh] overflow-y-auto"
      onPointerDownCapture={() => blurComposerInput()}
      ref={stackRef}
    >
      {/* The card paints the shared --composer-fill (rest / scrolled / focused
          all match the composer surface by construction); on scroll we only
-          ghost the CONTENT — element opacity on the card would kill the blur.
-          Rounded top, square bottom; the bottom border is TRANSPARENT — the
-          composer surface's visible top border (which sits at a higher z) is the
-          single shared seam, so the two read as one fused capsule. */}
-      <div className={cn(composerDockCard('top'), 'mx-2 rounded-b-none border-b border-b-transparent pt-0.5 pb-1')}>
+          ghost the CONTENT — element opacity on the card would kill the blur. */}
+      <div className={cn(composerDockCard('top'), 'mx-1 pt-0.5 pb-1')}>
        <div
          className={cn(
            'transition-opacity duration-200 ease-out',
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -35,9 +35,7 @@ import {
  $gatewayState,
  $introPersonality,
  $introSeed,
-  $lastVisibleMessageIsUser,
  $messages,
-  $messagesEmpty,
  $selectedStoredSessionId,
  $sessions,
  sessionPinId
@@ -55,9 +53,8 @@ import { droppedFileInlineRefs, type SessionDragPayload, sessionInlineRef } from
 import type { ChatBarState } from './composer/types'
 import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-actions'
 import { useFileDropZone } from './hooks/use-file-drop-zone'
-import { ScrollToBottomButton } from './scroll-to-bottom-button'
 import { SessionActionsMenu } from './sidebar/session-actions-menu'
-import { threadLoadingState } from './thread-loading'
+import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading'

 interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  gateway: HermesGateway | null
@@ -128,13 +125,7 @@ function ChatHeader({

  return (
    <header className={cn(titlebarHeaderBaseClass, isRoutedSessionView && titlebarHeaderShadowClass)}>
-      <div
-        className={titlebarHeaderTitleClass}
-        style={{
-          maxWidth:
-            'calc(100vw - var(--titlebar-content-inset,0px) - var(--titlebar-tools-right) - var(--titlebar-tools-width) - 1.5rem)'
-        }}
-      >
+      <div className={titlebarHeaderTitleClass}>
        <SessionActionsMenu
          align="start"
          onDelete={selectedSessionId ? onDeleteSelectedSession : undefined}
@@ -145,7 +136,7 @@ function ChatHeader({
          title={title}
        >
          <Button
-            className="pointer-events-auto flex h-6 min-w-0 max-w-full gap-1 overflow-hidden border border-transparent bg-transparent px-2 py-0 text-(--ui-text-secondary) hover:border-(--ui-stroke-tertiary) hover:bg-(--ui-control-hover-background) hover:text-foreground data-[state=open]:border-(--ui-stroke-tertiary) data-[state=open]:bg-(--ui-control-active-background) [-webkit-app-region:no-drag]"
+            className="pointer-events-auto flex h-6 w-full min-w-0 max-w-full gap-1 overflow-hidden border border-transparent bg-transparent px-2 py-0 text-(--ui-text-secondary) hover:border-(--ui-stroke-tertiary) hover:bg-(--ui-control-hover-background) hover:text-foreground data-[state=open]:border-(--ui-stroke-tertiary) data-[state=open]:bg-(--ui-control-active-background) [-webkit-app-region:no-drag]"
            type="button"
            variant="ghost"
          >
@@ -158,42 +149,105 @@ function ChatHeader({
  )
 }

-interface ChatRuntimeBoundaryProps {
-  busy: boolean
-  children: React.ReactNode
-  onCancel: () => Promise<void> | void
-  onEdit: (message: AppendMessage) => Promise<void>
-  onReload: (parentId: string | null) => Promise<void>
-  onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
-  /** Route points at an unloaded session — render empty until resume swaps in
-   *  the new transcript, so the previous session's messages don't linger. */
-  suppressMessages: boolean
-}
-
-const NO_MESSAGES: ChatMessage[] = []
-
-/**
- * Owns the $messages subscription and the assistant-ui external-store runtime.
- *
- * Isolated from ChatView so the per-token delta flush (which replaces the
- * $messages atom ~30×/s during streaming) only re-renders this component and
- * the runtime provider. The children (Thread, ChatBar) are created by
- * ChatView, whose render output is stable across flushes — so React bails out
- * of re-rendering them by element identity and the stream's render cost stays
- * confined to the streaming message's own subtree.
- */
-function ChatRuntimeBoundary({
-  busy,
-  children,
+export function ChatView({
+  className,
+  gateway,
+  onToggleSelectedPin,
+  onDeleteSelectedSession,
  onCancel,
+  onAddContextRef,
+  onAddUrl,
+  onAttachImageBlob,
+  onAttachDroppedItems,
+  onBranchInNewChat,
+  maxVoiceRecordingSeconds,
+  onPasteClipboardImage,
+  onPickFiles,
+  onPickFolders,
+  onPickImages,
+  onRemoveAttachment,
+  onSteer,
+  onSubmit,
+  onThreadMessagesChange,
  onEdit,
  onReload,
-  onThreadMessagesChange,
-  suppressMessages
-}: ChatRuntimeBoundaryProps) {
-  const storeMessages = useStore($messages)
-  const messages = suppressMessages ? NO_MESSAGES : storeMessages
+  onRestoreToMessage,
+  onTranscribeAudio
+}: ChatViewProps) {
+  const location = useLocation()
+  const activeSessionId = useStore($activeSessionId)
+  const awaitingResponse = useStore($awaitingResponse)
+  const busy = useStore($busy)
+  const contextSuggestions = useStore($contextSuggestions)
+  const currentCwd = useStore($currentCwd)
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const freshDraftReady = useStore($freshDraftReady)
+  const gatewayState = useStore($gatewayState)
+  const gatewaySwapTarget = useStore($gatewaySwapTarget)
+  const gatewayOpen = gatewayState === 'open'
+  const introPersonality = useStore($introPersonality)
+  const introSeed = useStore($introSeed)
+  const messages = useStore($messages)
+  const selectedSessionId = useStore($selectedStoredSessionId)
  const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
+  const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
+
+  const showIntro =
+    freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0
+
+  // Session is still loading if the route references a session we haven't
+  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
+  // session exists — even if it has zero messages (a brand-new routed
+  // session). The flicker where `busy` flips true briefly during hydrate
+  // is handled by `threadLoadingState`'s last-visible-user gate.
+  const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId
+  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages))
+  const showChatBar = !loadingSession
+  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
+
+  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
+    queryKey: ['model-options', activeSessionId || 'global'],
+    queryFn: () => {
+      if (!activeSessionId) {
+        return getGlobalModelOptions()
+      }
+
+      if (!gateway) {
+        throw new Error('Hermes gateway unavailable')
+      }
+
+      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
+    },
+    enabled: gatewayOpen
+  })
+
+  const quickModels = useMemo(
+    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
+    [currentModel, currentProvider, modelOptionsQuery.data]
+  )
+
+  const chatBarState = useMemo<ChatBarState>(
+    () => ({
+      model: {
+        model: currentModel,
+        provider: currentProvider,
+        canSwitch: gatewayOpen,
+        loading: !gatewayOpen || (!currentModel && !currentProvider),
+        quickModels
+      },
+      tools: {
+        enabled: true,
+        label: 'Add context',
+        suggestions: contextSuggestions
+      },
+      voice: {
+        enabled: true,
+        active: false
+      }
+    }),
+    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
+  )

  const runtimeMessageRepository = useMemo(() => {
    const items: { message: ThreadMessage; parentId: string | null }[] = []
@@ -243,120 +297,6 @@ function ChatRuntimeBoundary({
    onReload
  })

-  return <AssistantRuntimeProvider runtime={runtime}>{children}</AssistantRuntimeProvider>
-}
-
-export function ChatView({
-  className,
-  gateway,
-  onToggleSelectedPin,
-  onDeleteSelectedSession,
-  onCancel,
-  onAddContextRef,
-  onAddUrl,
-  onAttachImageBlob,
-  onAttachDroppedItems,
-  onBranchInNewChat,
-  maxVoiceRecordingSeconds,
-  onPasteClipboardImage,
-  onPickFiles,
-  onPickFolders,
-  onPickImages,
-  onRemoveAttachment,
-  onSteer,
-  onSubmit,
-  onThreadMessagesChange,
-  onEdit,
-  onReload,
-  onRestoreToMessage,
-  onTranscribeAudio
-}: ChatViewProps) {
-  const location = useLocation()
-  const activeSessionId = useStore($activeSessionId)
-  const awaitingResponse = useStore($awaitingResponse)
-  const busy = useStore($busy)
-  const contextSuggestions = useStore($contextSuggestions)
-  const currentCwd = useStore($currentCwd)
-  const currentModel = useStore($currentModel)
-  const currentProvider = useStore($currentProvider)
-  const freshDraftReady = useStore($freshDraftReady)
-  const gatewayState = useStore($gatewayState)
-  const gatewaySwapTarget = useStore($gatewaySwapTarget)
-  const gatewayOpen = gatewayState === 'open'
-  const introPersonality = useStore($introPersonality)
-  const introSeed = useStore($introSeed)
-  // PERF: ChatView must not subscribe to $messages — the atom is replaced on
-  // every streaming delta flush (~30×/s) and a subscription here re-renders
-  // the entire chat shell (header, chat bar, thread wrapper) per token. The
-  // runtime that DOES need the messages lives in ChatRuntimeBoundary below;
-  // this component only needs streaming-stable derivations.
-  const messagesEmpty = useStore($messagesEmpty)
-  const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
-  const selectedSessionId = useStore($selectedStoredSessionId)
-  const routedSessionId = routeSessionId(location.pathname)
-  const isRoutedSessionView = Boolean(routedSessionId)
-
-  // The URL points at a session the store hasn't loaded yet (sidebar / cmd-K /
-  // direct nav). Derived in render so the swap reads instantly: the same frame
-  // the id changes we drop the old transcript and show the loader, instead of
-  // waiting for the resume effect (which paints a frame later) to clear them.
-  const routeSessionMismatch = isRoutedSessionView && routedSessionId !== selectedSessionId
-
-  const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
-
-  // Session is still loading if the route references a session we haven't
-  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
-  // session exists — even if it has zero messages (a brand-new routed
-  // session). The flicker where `busy` flips true briefly during hydrate
-  // is handled by `threadLoadingState`'s last-visible-user gate.
-  const loadingSession = isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
-  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
-  const showChatBar = !loadingSession
-  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
-
-  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
-    queryKey: ['model-options', activeSessionId || 'global'],
-    queryFn: () => {
-      if (!activeSessionId) {
-        return getGlobalModelOptions()
-      }
-
-      if (!gateway) {
-        throw new Error('Hermes gateway unavailable')
-      }
-
-      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
-    },
-    enabled: gatewayOpen
-  })
-
-  const quickModels = useMemo(
-    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
-    [currentModel, currentProvider, modelOptionsQuery.data]
-  )
-
-  const chatBarState = useMemo<ChatBarState>(
-    () => ({
-      model: {
-        model: currentModel,
-        provider: currentProvider,
-        canSwitch: gatewayOpen,
-        loading: !gatewayOpen || (!currentModel && !currentProvider),
-        quickModels
-      },
-      tools: {
-        enabled: true,
-        label: 'Add context',
-        suggestions: contextSuggestions
-      },
-      voice: {
-        enabled: true,
-        active: false
-      }
-    }),
-    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
-  )
-
  // Drop files anywhere in the conversation area, not just on the composer
  // input. In-app drags (project tree / gutter) carry workspace-relative paths
  // the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder
@@ -409,14 +349,7 @@ export function ChatView({
        className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]"
        {...dropHandlers}
      >
-        <ChatRuntimeBoundary
-          busy={busy}
-          onCancel={onCancel}
-          onEdit={onEdit}
-          onReload={onReload}
-          onThreadMessagesChange={onThreadMessagesChange}
-          suppressMessages={routeSessionMismatch}
-        >
+        <AssistantRuntimeProvider runtime={runtime}>
          <Thread
            clampToComposer={showChatBar}
            cwd={currentCwd}
@@ -451,14 +384,13 @@ export function ChatView({
                onSteer={onSteer}
                onSubmit={onSubmit}
                onTranscribeAudio={onTranscribeAudio}
-                queueSessionKey={selectedSessionId}
+                queueSessionKey={selectedSessionId || activeSessionId}
                sessionId={activeSessionId}
                state={chatBarState}
              />
            </Suspense>
          )}
-        </ChatRuntimeBoundary>
-        {showChatBar && <ScrollToBottomButton />}
+        </AssistantRuntimeProvider>
        <ChatDropOverlay kind={dragKind} />
        <ChatSwapOverlay profile={gatewaySwapTarget} />
      </div>
--- a/apps/desktop/src/app/chat/scroll-to-bottom-button.test.tsx
+++ b/apps/desktop/src/app/chat/scroll-to-bottom-button.test.tsx
@@ -1,67 +0,0 @@
-import { cleanup, fireEvent, render, screen } from '@testing-library/react'
-import { afterEach, describe, expect, it, vi } from 'vitest'
-
-import { clearAllPrompts, setApprovalRequest } from '@/store/prompts'
-import { $activeSessionId } from '@/store/session'
-import { onScrollToBottomRequest, resetThreadScroll, setThreadAtBottom } from '@/store/thread-scroll'
-
-import { ScrollToBottomButton } from './scroll-to-bottom-button'
-
-function pendingApproval() {
-  $activeSessionId.set('sess-1')
-  setApprovalRequest({ command: 'rm -rf /tmp/x', description: 'dangerous command', sessionId: 'sess-1' })
-}
-
-afterEach(() => {
-  cleanup()
-  clearAllPrompts()
-  resetThreadScroll()
-  $activeSessionId.set(null)
-})
-
-// `getByRole('button')` excludes aria-hidden nodes, so "queryByRole null" is the
-// control's hidden (parked-at-bottom) state.
-describe('ScrollToBottomButton', () => {
-  it('stays hidden while parked at the bottom', () => {
-    render(<ScrollToBottomButton />)
-
-    expect(screen.queryByRole('button')).toBeNull()
-  })
-
-  it('is a plain jump-to-bottom control when scrolled up with no approval', () => {
-    setThreadAtBottom(false)
-    render(<ScrollToBottomButton />)
-
-    expect(screen.getByRole('button', { name: 'Scroll to bottom' })).toBeTruthy()
-    expect(screen.queryByText('Approval needed')).toBeNull()
-  })
-
-  it('morphs into the approval pill when scrolled up with a pending approval', () => {
-    pendingApproval()
-    setThreadAtBottom(false)
-    render(<ScrollToBottomButton />)
-
-    expect(screen.getByRole('button', { name: 'Approval needed' })).toBeTruthy()
-    expect(screen.getByText('Approval needed')).toBeTruthy()
-  })
-
-  it('does not morph while a pending approval is still in view (at bottom)', () => {
-    pendingApproval()
-    render(<ScrollToBottomButton />)
-
-    // Parked at bottom → control hidden, so it can't claim "approval needed".
-    expect(screen.queryByRole('button')).toBeNull()
-  })
-
-  it('re-arms sticky-bottom on click', () => {
-    const handler = vi.fn()
-    const stop = onScrollToBottomRequest(handler)
-    setThreadAtBottom(false)
-    render(<ScrollToBottomButton />)
-
-    fireEvent.click(screen.getByRole('button'))
-
-    expect(handler).toHaveBeenCalledTimes(1)
-    stop()
-  })
-})
--- a/apps/desktop/src/app/chat/scroll-to-bottom-button.tsx
+++ b/apps/desktop/src/app/chat/scroll-to-bottom-button.tsx
@@ -1,74 +0,0 @@
-import { useStore } from '@nanostores/react'
-import { useRef } from 'react'
-
-import { Codicon } from '@/components/ui/codicon'
-import { useI18n } from '@/i18n'
-import { triggerHaptic } from '@/lib/haptics'
-import { cn } from '@/lib/utils'
-import { $approvalRequest } from '@/store/prompts'
-import { $threadJumpButtonVisible, requestScrollToBottom } from '@/store/thread-scroll'
-
-/**
- * Floating "jump to bottom" control. Sits centered just above the composer,
- * clearing the out-of-flow status stack via the same measured-height CSS vars
- * the thread's bottom clearance uses (`--composer-measured-height` +
- * `--status-stack-measured-height`), so it never overlaps the queue / subagent
- * / background cards. Visible only while the user has scrolled meaningfully
- * away from the bottom; clicking re-arms sticky-bottom and pins the viewport.
- *
- * When the turn is BLOCKED on an approval, this same control morphs into an
- * "Approval needed" pill — the only response surface is the inline Run/Reject
- * bar on the parked tool row, which is always the bottom-most content, so the
- * existing scroll-to-bottom action lands the user right on it. One control, no
- * collision, no second scroll path (native scrollIntoView would scroll
- * overflow:hidden ancestors that can't scroll back and wreck the layout).
- *
- * Enter/exit motion lives in styles.css under `.thread-jump-button` — a
- * directional scale (contract in from 1.1, contract out to 0.9) keyed off
- * `data-state`. `idle` (never-shown) stays silent so it can't flash on mount;
- * `in`/`out` only swap once it has actually appeared.
- */
-export function ScrollToBottomButton() {
-  const { t } = useI18n()
-  const visible = useStore($threadJumpButtonVisible)
-  const request = useStore($approvalRequest)
-  // Scrolled away while an approval is pending → the inline Run/Reject bar is
-  // below the fold. Relabel so the user knows the session needs them, not just
-  // that there's more to read.
-  const approval = visible && Boolean(request)
-  const hasShownRef = useRef(false)
-
-  if (visible) {
-    hasShownRef.current = true
-  }
-
-  const state = visible ? 'in' : hasShownRef.current ? 'out' : 'idle'
-  const label = approval ? t.assistant.approval.jumpToApproval : t.assistant.thread.scrollToBottom
-
-  return (
-    <button
-      aria-hidden={!visible}
-      aria-label={label}
-      className={cn(
-        'thread-jump-button absolute left-1/2 z-20 grid place-items-center backdrop-blur-[0.75rem] [-webkit-backdrop-filter:blur(0.75rem)]',
-        approval
-          ? 'h-8 grid-flow-col gap-1.5 rounded-full border border-primary/40 bg-(--composer-fill) px-3 text-primary hover:bg-primary/10'
-          : 'size-8 rounded-full border border-border/65 bg-(--composer-fill) text-muted-foreground hover:text-foreground',
-        !visible && 'pointer-events-none'
-      )}
-      data-state={state}
-      onClick={() => {
-        triggerHaptic('selection')
-        requestScrollToBottom()
-      }}
-      style={{
-        bottom: 'calc(var(--composer-measured-height) + var(--status-stack-measured-height) + 0.625rem)'
-      }}
-      tabIndex={visible ? 0 : -1}
-      type="button"
-    >
-      <Codicon name="arrow-down" size={approval ? '0.875rem' : '1rem'} />
-      {approval && <span className="text-xs font-medium">{label}</span>}
-    </button>
-  )
-}
--- a/apps/desktop/src/app/chat/sidebar/index.tsx
+++ b/apps/desktop/src/app/chat/sidebar/index.tsx
--- a/apps/desktop/src/app/chat/sidebar/order.test.ts
+++ b/apps/desktop/src/app/chat/sidebar/order.test.ts
@@ -1,21 +0,0 @@
-import { describe, expect, it } from 'vitest'
-
-import { resolveManualSessionOrderIds } from './order'
-
-describe('resolveManualSessionOrderIds', () => {
-  it('clears legacy auto-seeded order until the user manually reorders sessions', () => {
-    expect(resolveManualSessionOrderIds(['newest', 'older'], ['older', 'newest'], false)).toEqual([])
-  })
-
-  it('keeps a manual order and surfaces newly seen sessions first', () => {
-    expect(resolveManualSessionOrderIds(['newest', 'older', 'oldest'], ['oldest', 'older'], true)).toEqual([
-      'newest',
-      'oldest',
-      'older'
-    ])
-  })
-
-  it('clears manual order when none of the saved ids still exist', () => {
-    expect(resolveManualSessionOrderIds(['newest'], ['gone'], true)).toEqual([])
-  })
-})
--- a/apps/desktop/src/app/chat/sidebar/order.ts
+++ b/apps/desktop/src/app/chat/sidebar/order.ts
@@ -1,17 +0,0 @@
-export function resolveManualSessionOrderIds(currentIds: string[], orderIds: string[], manual: boolean): string[] {
-  if (!manual || !currentIds.length || !orderIds.length) {
-    return []
-  }
-
-  const current = new Set(currentIds)
-  const retained = orderIds.filter(id => current.has(id))
-
-  if (!retained.length) {
-    return []
-  }
-
-  const retainedSet = new Set(retained)
-  const fresh = currentIds.filter(id => !retainedSet.has(id))
-
-  return [...fresh, ...retained]
-}
--- a/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
+++ b/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
@@ -284,7 +284,6 @@ export function ProfileRail() {
          selectProfile(name)
        }}
        open={createOpen}
-        profiles={profiles}
      />

      <RenameProfileDialog
@@ -468,10 +467,6 @@ function ProfileSquare({ active, color, label, onDelete, onRecolor, onRename, on
          aria-label={p.actionsFor(label)}
          className="w-40"
          collisionPadding={{ bottom: 44, left: 8, right: 8, top: 8 }}
-          // Menu close refocuses the trigger — which doubles as the popover
-          // anchor — so the picker reads it as focus-outside and dies on open.
-          // Suppress the refocus and the picker survives.
-          onCloseAutoFocus={event => event.preventDefault()}
        >
          <ContextMenuItem onSelect={() => setPickerOpen(true)}>
            <Codicon name="symbol-color" size="0.875rem" />
--- a/apps/desktop/src/app/chat/sidebar/session-actions-menu.tsx
+++ b/apps/desktop/src/app/chat/sidebar/session-actions-menu.tsx
@@ -4,7 +4,7 @@ import { useEffect, useRef, useState } from 'react'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
 import { ContextMenu, ContextMenuContent, ContextMenuItem, ContextMenuTrigger } from '@/components/ui/context-menu'
-import { CopyButton } from '@/components/ui/copy-button'
+import { writeClipboardText } from '@/components/ui/copy-button'
 import {
  Dialog,
  DialogContent,
@@ -49,17 +49,26 @@ function useSessionActions({ sessionId, title, pinned = false, profile, onPin, o
  const r = t.sidebar.row
  const [renameOpen, setRenameOpen] = useState(false)

-  const pinItem: ItemSpec = {
-    disabled: !onPin,
-    icon: 'pin',
-    label: pinned ? r.unpin : r.pin,
-    onSelect: () => {
-      triggerHaptic('selection')
-      onPin?.()
-    }
-  }
-
  const items: ItemSpec[] = [
+    {
+      disabled: !onPin,
+      icon: 'pin',
+      label: pinned ? r.unpin : r.pin,
+      onSelect: () => {
+        triggerHaptic('selection')
+        onPin?.()
+      }
+    },
+    {
+      disabled: !sessionId,
+      icon: 'copy',
+      label: r.copyId,
+      onSelect: event => {
+        event.preventDefault()
+        triggerHaptic('selection')
+        void writeClipboardText(sessionId).catch(err => notifyError(err, r.copyIdFailed))
+      }
+    },
    ...(canOpenSessionWindow()
      ? [
          {
@@ -113,28 +122,13 @@ function useSessionActions({ sessionId, title, pinned = false, profile, onPin, o
    }
  ]

-  const renderMenuItem = (Item: MenuItem, { className, disabled, icon, label, onSelect, variant }: ItemSpec) => (
-    <Item className={className} disabled={disabled} key={label} onSelect={onSelect} variant={variant}>
-      <Codicon name={icon} size="0.875rem" />
-      <span>{label}</span>
-    </Item>
-  )
-
-  const renderItems = (Item: MenuItem) => (
-    <>
-      {renderMenuItem(Item, pinItem)}
-      <CopyButton
-        appearance={Item === DropdownMenuItem ? 'menu-item' : 'context-menu-item'}
-        disabled={!sessionId}
-        errorMessage={r.copyIdFailed}
-        key={r.copyId}
-        label={r.copyId}
-        onCopyError={err => notifyError(err, r.copyIdFailed)}
-        text={sessionId}
-      />
-      {items.map(spec => renderMenuItem(Item, spec))}
-    </>
-  )
+  const renderItems = (Item: MenuItem) =>
+    items.map(({ className, disabled, icon, label, onSelect, variant }) => (
+      <Item className={className} disabled={disabled} key={label} onSelect={onSelect} variant={variant}>
+        <Codicon name={icon} size="0.875rem" />
+        <span>{label}</span>
+      </Item>
+    ))

  const renameDialog = (
    <RenameSessionDialog
--- a/apps/desktop/src/app/chat/sidebar/session-row.tsx
+++ b/apps/desktop/src/app/chat/sidebar/session-row.tsx
@@ -96,9 +96,7 @@ export function SidebarSessionRow({
          'group relative grid min-h-[1.625rem] cursor-pointer grid-cols-[minmax(0,1fr)_1.375rem] items-center rounded-md transition-colors duration-100 ease-out hover:bg-(--ui-row-hover-background) hover:transition-none',
          isSelected && 'bg-(--ui-row-active-background)',
          isWorking && 'text-foreground',
-          // Opaque surface while lifted so the dragged row erases what's under
-          // it (translucency let the rows below bleed through).
-          dragging && 'z-10 cursor-grabbing bg-(--ui-sidebar-surface-background)',
+          dragging && 'z-10 cursor-grabbing opacity-60 shadow-sm',
          className
        )}
        data-working={isWorking ? 'true' : undefined}
--- a/apps/desktop/src/app/chat/sidebar/virtual-session-list.tsx
+++ b/apps/desktop/src/app/chat/sidebar/virtual-session-list.tsx
@@ -1,7 +1,7 @@
-import { useSortable } from '@dnd-kit/sortable'
+import { SortableContext, useSortable, verticalListSortingStrategy } from '@dnd-kit/sortable'
 import { CSS } from '@dnd-kit/utilities'
 import { useVirtualizer } from '@tanstack/react-virtual'
-import { type FC, useCallback, useRef } from 'react'
+import { type FC, useCallback, useMemo, useRef } from 'react'

 import type { SessionInfo } from '@/hermes'
 import { cn } from '@/lib/utils'
@@ -48,6 +48,7 @@ export const VirtualSessionList: FC<VirtualSessionListProps> = ({
  workingSessionIdSet
 }) => {
  const scrollerRef = useRef<HTMLDivElement | null>(null)
+  const ids = useMemo(() => sessions.map(s => s.id), [sessions])

  const virtualizer = useVirtualizer({
    count: sessions.length,
@@ -100,16 +101,21 @@ export const VirtualSessionList: FC<VirtualSessionListProps> = ({
    )
  })

-  // When sortable, the caller wraps this in a ReorderableList that owns the
-  // DndContext + SortableContext (keyed on the same ids); the virtualized rows
-  // just consume that context via useSortable.
-  return (
+  const list = (
    <div className={cn('relative min-h-0 flex-1 overflow-x-hidden overflow-y-auto overscroll-contain', className)} ref={scrollerRef}>
      <div className="grid gap-px" style={{ paddingBottom: `${paddingBottom}px`, paddingTop: `${paddingTop}px` }}>
        {rows}
      </div>
    </div>
  )
+
+  return sortable ? (
+    <SortableContext items={ids} strategy={verticalListSortingStrategy}>
+      {list}
+    </SortableContext>
+  ) : (
+    list
+  )
 }

 interface VirtualSortableRowProps {
--- a/apps/desktop/src/app/chat/sidebar/workspace-groups.test.ts
+++ b/apps/desktop/src/app/chat/sidebar/workspace-groups.test.ts
@@ -1,149 +0,0 @@
-import { describe, expect, it } from 'vitest'
-
-import type { HermesWorktreeInfo } from '@/global'
-import type { SessionInfo } from '@/types/hermes'
-
-import { uniqueCwds, workspaceGroupsFor, workspaceTreeFor, type WorktreeResolver } from './workspace-groups'
-
-let nextId = 0
-
-function makeSession(cwd: null | string, overrides: Partial<SessionInfo> = {}): SessionInfo {
-  return {
-    archived: false,
-    cwd,
-    ended_at: null,
-    id: `s${nextId++}`,
-    input_tokens: 0,
-    is_active: false,
-    last_active: 1_000,
-    message_count: 1,
-    model: 'claude',
-    output_tokens: 0,
-    preview: null,
-    source: 'cli',
-    started_at: 1_000,
-    title: null,
-    tool_call_count: 0,
-    ...overrides
-  }
-}
-
-const labels = (sessions: SessionInfo[]) => workspaceGroupsFor(sessions, 'No workspace').map(g => g.label)
-
-describe('workspaceGroupsFor', () => {
-  it('groups by full cwd, not by basename — same-named folders are separate groups', () => {
-    const groups = workspaceGroupsFor(
-      [makeSession('/a/hermes-agent/apps/desktop'), makeSession('/a/hermes-agent-wt-rtl/apps/desktop')],
-      'No workspace'
-    )
-
-    expect(groups).toHaveLength(2)
-  })
-
-  it('disambiguates colliding basenames by walking up the path', () => {
-    expect(
-      labels([makeSession('/a/hermes-agent/apps/desktop'), makeSession('/a/hermes-agent-wt-rtl/apps/desktop')])
-    ).toEqual(['hermes-agent/apps/desktop', 'hermes-agent-wt-rtl/apps/desktop'])
-  })
-
-  it('leaves a unique basename as its short label', () => {
-    expect(labels([makeSession('/a/hermes-agent/apps/desktop'), makeSession('/b/heval-py')])).toEqual([
-      'desktop',
-      'heval-py'
-    ])
-  })
-
-  it('grows the prefix past one segment when the parent also collides', () => {
-    expect(labels([makeSession('/x/proj/apps/desktop'), makeSession('/y/proj/apps/desktop')])).toEqual([
-      'x/proj/apps/desktop',
-      'y/proj/apps/desktop'
-    ])
-  })
-
-  it('keeps the synthetic no-workspace group untouched even if a real group shares its label', () => {
-    const groups = workspaceGroupsFor([makeSession(null), makeSession('/a/No workspace')], 'No workspace')
-    const noWorkspace = groups.find(g => g.path === null)
-
-    expect(noWorkspace?.label).toBe('No workspace')
-  })
-})
-
-const info = (over: Partial<HermesWorktreeInfo> & Pick<HermesWorktreeInfo, 'repoRoot' | 'worktreeRoot'>): HermesWorktreeInfo => ({
-  branch: null,
-  isMainWorktree: false,
-  ...over
-})
-
-describe('workspaceTreeFor', () => {
-  it('heuristic nests `<repo>-wt-<branch>` under its sibling repo', () => {
-    const tree = workspaceTreeFor(
-      [makeSession('/www/hermes-agent'), makeSession('/www/hermes-agent-wt-rtl')],
-      'No workspace'
-    )
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('hermes-agent')
-    expect(tree[0].groups.map(g => g.label).sort()).toEqual(['hermes-agent', 'rtl'])
-  })
-
-  it('git metadata is authoritative — worktrees group by repoRoot regardless of directory naming', () => {
-    const resolver: WorktreeResolver = cwd => {
-      if (cwd === '/www/hermes-agent') {
-        return info({ repoRoot: '/www/hermes-agent', worktreeRoot: '/www/hermes-agent', isMainWorktree: true, branch: 'main' })
-      }
-
-      if (cwd === '/elsewhere/ha-rtl') {
-        return info({ repoRoot: '/www/hermes-agent', worktreeRoot: '/elsewhere/ha-rtl', branch: 'rtl' })
-      }
-
-      return null
-    }
-
-    const tree = workspaceTreeFor(
-      [makeSession('/www/hermes-agent'), makeSession('/elsewhere/ha-rtl')],
-      'No workspace',
-      resolver
-    )
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('hermes-agent')
-    // The main checkout labels by directory (its branch is transient — using it
-    // would misattribute old sessions to the currently checked-out branch);
-    // linked worktrees label by branch.
-    expect(tree[0].groups.map(g => g.label)).toEqual(['hermes-agent', 'rtl'])
-  })
-
-  it('a standalone directory is its own parent (always parent → worktree → sessions)', () => {
-    const tree = workspaceTreeFor([makeSession('/www/heval-node')], 'No workspace')
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('heval-node')
-    expect(tree[0].groups).toHaveLength(1)
-    expect(tree[0].groups[0].label).toBe('heval-node')
-  })
-
-  it('aggregates session counts across a repo’s worktrees', () => {
-    const tree = workspaceTreeFor(
-      [makeSession('/www/ha'), makeSession('/www/ha-wt-x'), makeSession('/www/ha-wt-x')],
-      'No workspace'
-    )
-
-    const parent = tree.find(p => p.label === 'ha')
-
-    expect(parent?.sessionCount).toBe(3)
-  })
-
-  it('no-workspace sessions form their own parent', () => {
-    const tree = workspaceTreeFor([makeSession(null)], 'No workspace')
-
-    expect(tree).toHaveLength(1)
-    expect(tree[0].label).toBe('No workspace')
-    expect(tree[0].path).toBeNull()
-  })
-})
-
-describe('uniqueCwds', () => {
-  it('dedupes and drops empty/whitespace cwds', () => {
-    expect(uniqueCwds([makeSession('/a'), makeSession('/a'), makeSession(null), makeSession('   ')])).toEqual(['/a'])
-  })
-})
--- a/apps/desktop/src/app/chat/sidebar/workspace-groups.ts
+++ b/apps/desktop/src/app/chat/sidebar/workspace-groups.ts
@@ -1,326 +0,0 @@
-import type { HermesWorktreeInfo } from '@/global'
-import type { SessionInfo } from '@/hermes'
-
-export interface SidebarSessionGroup {
-  id: string
-  label: string
-  path: null | string
-  sessions: SessionInfo[]
-  // Profile color for the ALL-profiles view; absent for workspace groups.
-  color?: null | string
-  loadingMore?: boolean
-  mode?: 'profile' | 'source' | 'workspace'
-  onLoadMore?: () => void
-  sourceId?: string
-  totalCount?: number
-}
-
-const NO_WORKSPACE_ID = '__no_workspace__'
-
-/** Path split into segments, ignoring trailing slashes and mixed separators. */
-const segments = (path: string): string[] => path.replace(/[/\\]+$/, '').split(/[/\\]/).filter(Boolean)
-
-/** Last path segment. */
-export const baseName = (path: string): string | undefined => segments(path).pop()
-
-/** The segments above the basename. */
-const parentSegments = (path: string): string[] => segments(path).slice(0, -1)
-
-interface Labelable {
-  id: string
-  label: string
-  path: null | string
-}
-
-/**
- * Disambiguate groups whose basename collides (worktrees all end in the same
- * `apps/desktop`, sibling repos share a folder name, etc.) by walking up the
- * path and prepending parent segments until each colliding label is unique —
- * e.g. `hermes-agent/desktop` vs `hermes-agent-wt-rtl/desktop`. Groups with a
- * unique basename keep their short label untouched.
- */
-function disambiguateLabels(groups: Labelable[]): void {
-  const byLabel = new Map<string, Labelable[]>()
-
-  for (const group of groups) {
-    const bucket = byLabel.get(group.label)
-
-    if (bucket) {
-      bucket.push(group)
-    } else {
-      byLabel.set(group.label, [group])
-    }
-  }
-
-  for (const bucket of byLabel.values()) {
-    if (bucket.length < 2) {
-      continue
-    }
-
-    // Only groups backed by a real path can grow a prefix; the synthetic
-    // "No workspace" group has no path and stays as-is.
-    const pathed = bucket.filter(group => group.path)
-
-    if (pathed.length < 2) {
-      continue
-    }
-
-    const parents = new Map(pathed.map(group => [group.id, parentSegments(group.path!)]))
-    let depth = 1
-
-    // Grow the prefix one parent segment at a time until every label in the
-    // bucket is distinct, or we run out of parent segments to add.
-    while (depth <= Math.max(...pathed.map(g => parents.get(g.id)!.length))) {
-      const labels = new Map<string, number>()
-
-      for (const group of pathed) {
-        const segs = parents.get(group.id)!
-        const prefix = segs.slice(-depth).join('/')
-        const base = baseName(group.path!) ?? group.path!
-        group.label = prefix ? `${prefix}/${base}` : base
-        labels.set(group.label, (labels.get(group.label) ?? 0) + 1)
-      }
-
-      if ([...labels.values()].every(count => count === 1)) {
-        break
-      }
-
-      depth += 1
-    }
-  }
-}
-
-export function workspaceGroupsFor(
-  sessions: SessionInfo[],
-  noWorkspaceLabel: string,
-  options: { preserveSessionOrder?: boolean } = {}
-): SidebarSessionGroup[] {
-  const groups = new Map<string, SidebarSessionGroup>()
-
-  for (const session of sessions) {
-    const path = session.cwd?.trim() || ''
-    const id = path || NO_WORKSPACE_ID
-    const label = baseName(path) || path || noWorkspaceLabel
-
-    const group = groups.get(id) ?? { id, label, path: path || null, sessions: [] }
-    group.sessions.push(session)
-    groups.set(id, group)
-  }
-
-  if (!options.preserveSessionOrder) {
-    // Groups keep recency order (Map insertion = first-seen in the recency-sorted
-    // input, so an active project floats up), but rows *within* a group sort by
-    // creation time so they don't reshuffle every time a message lands — keeps
-    // muscle memory intact.
-    for (const group of groups.values()) {
-      group.sessions.sort((a, b) => b.started_at - a.started_at)
-    }
-  }
-
-  const result = [...groups.values()]
-  disambiguateLabels(result)
-
-  return result
-}
-
-/**
- * A worktree's main repo and all its linked worktrees collapse into ONE parent
- * (keyed by the repo root); each worktree is a child group; sessions hang off
- * the worktree they ran in. `parent → worktree → sessions`.
- */
-export interface SidebarWorkspaceTree {
-  id: string
-  label: string
-  path: null | string
-  groups: SidebarSessionGroup[]
-  sessionCount: number
-}
-
-/** Resolves a session cwd to git-worktree identity (from the local fs probe). */
-export type WorktreeResolver = (cwd: string) => HermesWorktreeInfo | null | undefined
-
-interface WorkspacePlacement {
-  parentKey: string
-  parentLabel: string
-  parentPath: string
-  worktreeKey: string
-  worktreeLabel: string
-  worktreePath: string
-}
-
-/** Replace a path's final segment, preserving its prefix + separators. */
-const withBaseName = (path: string, name: string): string =>
-  path.replace(/[/\\]+$/, '').replace(/[^/\\]+$/, name)
-
-/**
- * Path-only fallback for when git metadata is unavailable (remote backends,
- * unreadable paths). Mirrors the git layout: a `<repo>-wt-<branch>` directory
- * nests under its sibling `<repo>`; any other directory is its own repo root.
- */
-function placeByHeuristic(path: string): WorkspacePlacement | null {
-  const base = baseName(path)
-
-  if (!base) {
-    return null
-  }
-
-  const worktreeMatch = base.match(/^(.+)-wt-(.+)$/)
-
-  if (worktreeMatch) {
-    const repo = worktreeMatch[1]
-    const repoPath = withBaseName(path, repo)
-
-    return {
-      parentKey: repoPath,
-      parentLabel: repo,
-      parentPath: repoPath,
-      worktreeKey: path,
-      worktreeLabel: worktreeMatch[2],
-      worktreePath: path
-    }
-  }
-
-  return {
-    parentKey: path,
-    parentLabel: base,
-    parentPath: path,
-    worktreeKey: path,
-    worktreeLabel: base,
-    worktreePath: path
-  }
-}
-
-function placeWorkspace(path: string, resolver?: WorktreeResolver): WorkspacePlacement | null {
-  const info = resolver?.(path)
-
-  if (info?.repoRoot && info.worktreeRoot) {
-    const dirLabel = baseName(info.worktreeRoot) || info.worktreeRoot
-
-    return {
-      parentKey: info.repoRoot,
-      parentLabel: baseName(info.repoRoot) ?? info.repoRoot,
-      parentPath: info.repoRoot,
-      worktreeKey: info.worktreeRoot,
-      // The main checkout's branch is transient — it changes as you work, so a
-      // branch label would misattribute every past session to whatever branch
-      // is checked out *now*. Label it by directory. Linked worktrees are
-      // per-branch by construction, so branch is the clearest label there.
-      worktreeLabel: info.isMainWorktree ? dirLabel : info.branch || dirLabel,
-      worktreePath: info.worktreeRoot
-    }
-  }
-
-  return placeByHeuristic(path)
-}
-
-/** Unique, non-empty session cwds — the batch to probe for worktree info. */
-export function uniqueCwds(sessions: SessionInfo[]): string[] {
-  const seen = new Set<string>()
-
-  for (const session of sessions) {
-    const path = session.cwd?.trim()
-
-    if (path) {
-      seen.add(path)
-    }
-  }
-
-  return [...seen]
-}
-
-/**
- * Build the `parent → worktree → sessions` tree. Parents keep recency order
- * (first-seen in the recency-sorted input); worktree groups within a parent do
- * too, while rows inside a worktree sort by creation time (stable muscle memory,
- * matching `workspaceGroupsFor`).
- */
-export function workspaceTreeFor(
-  sessions: SessionInfo[],
-  noWorkspaceLabel: string,
-  resolver?: WorktreeResolver,
-  options: { preserveSessionOrder?: boolean } = {}
-): SidebarWorkspaceTree[] {
-  interface WorktreeEntry {
-    group: SidebarSessionGroup
-    parentKey: string
-    parentLabel: string
-    parentPath: string
-  }
-
-  const worktrees = new Map<string, WorktreeEntry>()
-  const noWorkspace: SessionInfo[] = []
-
-  for (const session of sessions) {
-    const path = session.cwd?.trim() || ''
-
-    if (!path) {
-      noWorkspace.push(session)
-
-      continue
-    }
-
-    const placement = placeWorkspace(path, resolver)
-
-    if (!placement) {
-      noWorkspace.push(session)
-
-      continue
-    }
-
-    let entry = worktrees.get(placement.worktreeKey)
-
-    if (!entry) {
-      entry = {
-        group: { id: placement.worktreeKey, label: placement.worktreeLabel, path: placement.worktreePath, sessions: [] },
-        parentKey: placement.parentKey,
-        parentLabel: placement.parentLabel,
-        parentPath: placement.parentPath
-      }
-      worktrees.set(placement.worktreeKey, entry)
-    }
-
-    entry.group.sessions.push(session)
-  }
-
-  if (!options.preserveSessionOrder) {
-    for (const entry of worktrees.values()) {
-      entry.group.sessions.sort((a, b) => b.started_at - a.started_at)
-    }
-  }
-
-  const parents = new Map<string, SidebarWorkspaceTree>()
-
-  for (const entry of worktrees.values()) {
-    let parent = parents.get(entry.parentKey)
-
-    if (!parent) {
-      parent = { id: entry.parentKey, label: entry.parentLabel, path: entry.parentPath, groups: [], sessionCount: 0 }
-      parents.set(entry.parentKey, parent)
-    }
-
-    parent.groups.push(entry.group)
-    parent.sessionCount += entry.group.sessions.length
-  }
-
-  const result = [...parents.values()]
-
-  if (noWorkspace.length) {
-    result.push({
-      id: NO_WORKSPACE_ID,
-      label: noWorkspaceLabel,
-      path: null,
-      groups: [{ id: NO_WORKSPACE_ID, label: noWorkspaceLabel, path: null, sessions: noWorkspace }],
-      sessionCount: noWorkspace.length
-    })
-  }
-
-  // Parents that collide on basename grow a path prefix; worktree labels that
-  // collide inside a parent do the same.
-  disambiguateLabels(result)
-
-  for (const parent of result) {
-    disambiguateLabels(parent.groups)
-  }
-
-  return result
-}
--- a/apps/desktop/src/app/chat/thread-loading.ts
+++ b/apps/desktop/src/app/chat/thread-loading.ts
@@ -3,14 +3,9 @@ import type { ChatMessage } from '@/lib/chat-messages'
 export type ThreadLoadingState = 'response' | 'session'

 export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean {
-  // Allocation-free reverse scan — runs in a hot $messages computed.
-  for (let i = messages.length - 1; i >= 0; i -= 1) {
-    if (!messages[i].hidden) {
-      return messages[i].role === 'user'
-    }
-  }
+  const lastVisible = [...messages].reverse().find(message => !message.hidden)

-  return false
+  return lastVisible?.role === 'user'
 }

 export function threadLoadingState(
--- a/apps/desktop/src/app/command-palette/index.tsx
+++ b/apps/desktop/src/app/command-palette/index.tsx
@@ -118,10 +118,6 @@ const paletteFilter = (value: string, search: string, keywords?: string[]): numb
  return needle.split(/\s+/).every(term => haystack.includes(term)) ? 1 : 0
 }

-// Hermes session ids: <YYYYMMDD>_<HHMMSS>_<6 hex>. Used to offer a direct
-// "Go to session ‹id›" jump for ids that aren't in the recent-200 list.
-const SESSION_ID_RE = /^\d{8}_\d{6}_[a-f0-9]{6}$/
-
 type SessionRow = Awaited<ReturnType<typeof listAllProfileSessions>>['sessions'][number]

 const toSessionEntry = (session: SessionRow): SessionEntry => ({
@@ -417,24 +413,6 @@ export function CommandPalette() {

    const result: PaletteGroup[] = []

-    // Paste a raw session id → jump straight to it, even if it predates the
-    // recent-200 window the lists below are built from.
-    const directId = search.trim()
-
-    if (SESSION_ID_RE.test(directId)) {
-      result.push({
-        items: [
-          {
-            icon: MessageCircle,
-            id: `goto-${directId}`,
-            keywords: ['session', 'id', 'go to', directId],
-            label: `${t.commandCenter.goToSession} ${directId}`,
-            run: go(sessionRoute(directId))
-          }
-        ]
-      })
-    }
-
    if (sessions.length > 0) {
      result.push({
        heading: t.commandCenter.sections.sessions,
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -37,7 +37,6 @@ import {
  SIDEBAR_SESSIONS_PAGE_SIZE,
  unpinSession
 } from '../store/layout'
-import { respondToApprovalAction } from '../store/native-notifications'
 import { $filePreviewTarget, $previewTarget, closeActiveRightRailTab } from '../store/preview'
 import {
  $activeGatewayProfile,
@@ -270,26 +269,6 @@ export function DesktopController() {
    }
  }, [])

-  // Notification click: the main process already focused the window; jump to its session.
-  useEffect(() => {
-    const unsubscribe = window.hermesDesktop?.onFocusSession?.(sessionId => {
-      if (sessionId) {
-        navigate(sessionRoute(sessionId))
-      }
-    })
-
-    return () => unsubscribe?.()
-  }, [navigate])
-
-  // Notification action button (Approve/Reject) — resolve in place, no navigation.
-  useEffect(() => {
-    const unsubscribe = window.hermesDesktop?.onNotificationAction?.(({ actionId, sessionId }) => {
-      void respondToApprovalAction(sessionId ?? null, actionId)
-    })
-
-    return () => unsubscribe?.()
-  }, [])
-
  // hermes:// deep links (e.g. a docs "Send to App" button for an automation blueprint).
  // Build the equivalent /blueprint slash command from the payload and drop
  // it into the composer — the user reviews/edits, then sends; the agent (or
--- a/apps/desktop/src/app/messaging/index.tsx
+++ b/apps/desktop/src/app/messaging/index.tsx
@@ -527,7 +527,7 @@ const PLATFORM_INTRO: Record<string, string> = {
  wecom_callback:
    'Set up a WeCom self-built app, expose its callback URL, and provide the corp ID, secret, agent ID, and AES key.',
  weixin:
-    'Run `hermes gateway setup`, select Weixin, then scan and confirm the QR code with a personal WeChat account. Hermes connects through Tencent\'s iLink Bot API and saves the credentials.',
+    'Sign in to the WeChat Official Account platform, copy the AppID and Token, and point the message callback URL at Hermes.',
  qqbot: 'Register an app on the QQ Open Platform (q.qq.com) and copy the App ID and Client Secret.',
  api_server:
    'Expose Hermes as an OpenAI-compatible API. Set an auth key, then point Open WebUI / LobeChat / etc. at the host:port.',
--- a/apps/desktop/src/app/profiles/create-profile-dialog.tsx
+++ b/apps/desktop/src/app/profiles/create-profile-dialog.tsx
@@ -2,15 +2,14 @@ import { useEffect, useState } from 'react'

 import { ActionStatus } from '@/components/ui/action-status'
 import { Button } from '@/components/ui/button'
+import { Checkbox } from '@/components/ui/checkbox'
 import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'
 import { Input } from '@/components/ui/input'
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
 import { Textarea } from '@/components/ui/textarea'
 import { createProfile, updateProfileSoul } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { AlertTriangle } from '@/lib/icons'
 import { cn } from '@/lib/utils'
-import type { ProfileInfo } from '@/types/hermes'

 const PROFILE_NAME_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/

@@ -24,18 +23,16 @@ export function isValidProfileName(name: string): boolean {
 export function CreateProfileDialog({
  onClose,
  onCreated,
-  open,
-  profiles = []
+  open
 }: {
  onClose: () => void
  onCreated?: (name: string) => Promise<void> | void
  open: boolean
-  profiles?: ProfileInfo[]
 }) {
  const { t } = useI18n()
  const p = t.profiles
  const [name, setName] = useState('')
-  const [cloneFrom, setCloneFrom] = useState<null | string>('default')
+  const [cloneFromDefault, setCloneFromDefault] = useState(true)
  const [soul, setSoul] = useState('')
  const [status, setStatus] = useState<'done' | 'idle' | 'saving'>('idle')
  const [error, setError] = useState<null | string>(null)
@@ -46,7 +43,7 @@ export function CreateProfileDialog({
    }

    setName('')
-    setCloneFrom('default')
+    setCloneFromDefault(true)
    setSoul('')
    setError(null)
    setStatus('idle')
@@ -69,7 +66,7 @@ export function CreateProfileDialog({
    setError(null)

    try {
-      await createProfile({ name: trimmed, clone_from: cloneFrom })
+      await createProfile({ name: trimmed, clone_from_default: cloneFromDefault })

      if (soul.trim()) {
        await updateProfileSoul(trimmed, soul)
@@ -110,25 +107,17 @@ export function CreateProfileDialog({
            </p>
          </div>

-          <div className="grid gap-1.5">
-            <label className="text-xs font-medium" htmlFor="new-profile-clone-from">
-              {p.cloneFrom}
-            </label>
-            <Select onValueChange={value => setCloneFrom(value === '__none__' ? null : value)} value={cloneFrom ?? '__none__'}>
-              <SelectTrigger className="h-9 rounded-md" id="new-profile-clone-from">
-                <SelectValue />
-              </SelectTrigger>
-              <SelectContent>
-                <SelectItem value="__none__">{p.cloneFromNone}</SelectItem>
-                {profiles.map(profile => (
-                  <SelectItem key={profile.name} value={profile.name}>
-                    {profile.name}
-                  </SelectItem>
-                ))}
-              </SelectContent>
-            </Select>
-            <p className="text-xs text-muted-foreground">{p.cloneFromDesc}</p>
-          </div>
+          <label className="flex cursor-pointer select-none items-start gap-2.5 px-0.5 py-1">
+            <Checkbox
+              checked={cloneFromDefault}
+              className="mt-0.5 shrink-0"
+              onCheckedChange={checked => setCloneFromDefault(checked === true)}
+            />
+            <span className="grid gap-0.5 leading-snug">
+              <span className="text-sm font-medium">{p.cloneFromDefault}</span>
+              <span className="text-xs text-muted-foreground">{p.cloneFromDefaultDesc}</span>
+            </span>
+          </label>

          <div className="grid gap-1.5">
            <label className="text-xs font-medium" htmlFor="new-profile-soul">
@@ -138,7 +127,7 @@ export function CreateProfileDialog({
              className="min-h-28 font-mono text-xs leading-5"
              id="new-profile-soul"
              onChange={event => setSoul(event.target.value)}
-              placeholder={p.soulPlaceholder(cloneFrom ? p.soulPlaceholderCloned : p.soulPlaceholderEmpty)}
+              placeholder={p.soulPlaceholder(cloneFromDefault ? p.soulPlaceholderCloned : p.soulPlaceholderEmpty)}
              value={soul}
            />
          </div>
--- a/apps/desktop/src/app/profiles/index.tsx
+++ b/apps/desktop/src/app/profiles/index.tsx
@@ -12,7 +12,6 @@ import {
  DialogTitle
 } from '@/components/ui/dialog'
 import { Input } from '@/components/ui/input'
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
 import { Textarea } from '@/components/ui/textarea'
 import {
  createProfile,
@@ -83,14 +82,14 @@ export function ProfilesView({ onClose }: ProfilesViewProps) {
  }, [profiles, selectedName])

  const handleCreate = useCallback(
-    async (name: string, cloneFrom: null | string) => {
+    async (name: string, cloneFromDefault: boolean) => {
      const trimmed = name.trim()

      if (!isValidProfileName(trimmed)) {
        throw new Error(p.nameHint)
      }

-      await createProfile({ name: trimmed, clone_from: cloneFrom })
+      await createProfile({ name: trimmed, clone_from_default: cloneFromDefault })
      notify({ kind: 'success', title: p.created, message: trimmed })
      setSelectedName(trimmed)
      await refresh()
@@ -181,9 +180,8 @@ export function ProfilesView({ onClose }: ProfilesViewProps) {

      <CreateProfileDialog
          onClose={() => setCreateOpen(false)}
-          onCreate={async (name, cloneFrom) => handleCreate(name, cloneFrom)}
+          onCreate={async (name, cloneFromDefault) => handleCreate(name, cloneFromDefault)}
          open={createOpen}
-          profiles={profiles ?? []}
        />

        <Dialog onOpenChange={open => !open && !deleting && setPendingDelete(null)} open={pendingDelete !== null}>
@@ -455,18 +453,16 @@ function SoulEditor({ profileName }: { profileName: string }) {
 function CreateProfileDialog({
  onClose,
  onCreate,
-  open,
-  profiles
+  open
 }: {
  onClose: () => void
-  onCreate: (name: string, cloneFrom: null | string) => Promise<void>
+  onCreate: (name: string, cloneFromDefault: boolean) => Promise<void>
  open: boolean
-  profiles: ProfileInfo[]
 }) {
  const { t } = useI18n()
  const p = t.profiles
  const [name, setName] = useState('')
-  const [cloneFrom, setCloneFrom] = useState<null | string>('default')
+  const [cloneFromDefault, setCloneFromDefault] = useState(true)
  const [saving, setSaving] = useState(false)
  const [error, setError] = useState<null | string>(null)

@@ -476,7 +472,7 @@ function CreateProfileDialog({
    }

    setName('')
-    setCloneFrom('default')
+    setCloneFromDefault(true)
    setError(null)
    setSaving(false)
  }, [open])
@@ -497,7 +493,7 @@ function CreateProfileDialog({
    setError(null)

    try {
-      await onCreate(trimmed, cloneFrom)
+      await onCreate(trimmed, cloneFromDefault)
      onClose()
    } catch (err) {
      setError(err instanceof Error ? err.message : p.failedCreate)
@@ -532,25 +528,18 @@ function CreateProfileDialog({
            </p>
          </div>

-          <div className="grid gap-1.5">
-            <label className="text-xs font-medium" htmlFor="new-profile-clone-from">
-              {p.cloneFrom}
-            </label>
-            <Select onValueChange={value => setCloneFrom(value === '__none__' ? null : value)} value={cloneFrom ?? '__none__'}>
-              <SelectTrigger className="h-9 rounded-md" id="new-profile-clone-from">
-                <SelectValue />
-              </SelectTrigger>
-              <SelectContent>
-                <SelectItem value="__none__">{p.cloneFromNone}</SelectItem>
-                {profiles.map(profile => (
-                  <SelectItem key={profile.name} value={profile.name}>
-                    {profile.name}
-                  </SelectItem>
-                ))}
-              </SelectContent>
-            </Select>
-            <p className="text-xs text-muted-foreground">{p.cloneFromDesc}</p>
-          </div>
+          <label className="flex cursor-pointer items-center gap-2 rounded-md border border-border/40 bg-background/50 px-3 py-2 text-sm">
+            <input
+              checked={cloneFromDefault}
+              className="size-4 accent-primary"
+              onChange={event => setCloneFromDefault(event.target.checked)}
+              type="checkbox"
+            />
+            <span>
+              <span className="font-medium">{p.cloneFromDefault}</span>
+              <span className="ml-2 text-xs text-muted-foreground">{p.cloneFromDefaultDesc}</span>
+            </span>
+          </label>

          {error && (
            <div className="flex items-start gap-2 rounded-md border border-destructive/30 bg-destructive/10 px-3 py-2 text-xs text-destructive">
--- a/apps/desktop/src/app/right-sidebar/terminal/use-terminal-session.ts
+++ b/apps/desktop/src/app/right-sidebar/terminal/use-terminal-session.ts
@@ -328,19 +328,13 @@ export function useTerminalSession({ cwd, onAddSelectionToChat }: UseTerminalSes

    const term = new Terminal({
      allowProposedApi: true,
-      // Opaque canvas = WebGL's crisp fast-path. allowTransparency instead bakes
-      // glyphs as grayscale-alpha for compositing over a see-through canvas, which
-      // reads soft on every platform; VS Code keeps it off and our surface
-      // (--ui-bg-chrome) is opaque anyway, so withSurface paints it solid.
-      allowTransparency: false,
+      allowTransparency: true,
      convertEol: true,
      cursorBlink: true,
      fontFamily: "'JetBrains Mono', 'Cascadia Code', 'SF Mono', Menlo, Consolas, monospace",
      fontSize: 11,
-      // VS Code's terminal renders 'normal'/'bold' (400/700); we were using Medium
-      // (500) as the base, which reads a touch heavy at this size.
-      fontWeight: 'normal',
-      fontWeightBold: 'bold',
+      fontWeight: '400',
+      fontWeightBold: '700',
      letterSpacing: 0,
      lineHeight: 1.12,
      // Full-screen TUIs (hermes --tui, vim) grab the mouse, so a plain drag
@@ -623,10 +617,8 @@ export function useTerminalSession({ cwd, onAddSelectionToChat }: UseTerminalSes
      startSession()
    }

-    // fonts.ready settles only already-requested faces; the regular (400),
-    // bold (700) and italic aren't asked for until styled output paints (past
-    // atlas init), so warm them up front — otherwise the WebGL atlas bakes a
-    // fallback face and the terminal renders thin until a repaint.
+    // fonts.ready settles only already-requested faces; bold/italic aren't asked
+    // for until styled output paints (past atlas init), so warm them up front.
    const warm = document.fonts?.load
      ? Promise.allSettled(['400', '700', 'italic 400'].map(v => document.fonts.load(`${v} 11px 'JetBrains Mono'`)))
      : Promise.resolve()
--- a/apps/desktop/src/app/session/hooks/use-message-stream.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts
@@ -2,7 +2,6 @@ import type { QueryClient } from '@tanstack/react-query'
 import { type MutableRefObject, useCallback, useEffect, useRef } from 'react'

 import { readActiveTerminal } from '@/app/right-sidebar/terminal/buffer'
-import { translateNow } from '@/i18n'
 import {
  appendAssistantTextPart,
  appendReasoningPart,
@@ -16,21 +15,13 @@ import {
  upsertToolPart
 } from '@/lib/chat-messages'
 import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime'
-import { playCompletionSound } from '@/lib/completion-sound'
 import { gatewayEventRequiresSessionId } from '@/lib/gateway-events'
-import {
-  dedupeGeneratedImageEchoesInParts,
-  generatedImageEchoSources,
-  stripGeneratedImageEchoes
-} from '@/lib/generated-images'
 import { triggerHaptic } from '@/lib/haptics'
 import { isProviderSetupErrorMessage } from '@/lib/provider-setup-errors'
 import { parseTodos } from '@/lib/todos'
 import { setClarifyRequest } from '@/store/clarify'
-import { setSessionCompacting } from '@/store/compaction'
 import { refreshBackgroundProcesses } from '@/store/composer-status'
 import { $gateway } from '@/store/gateway'
-import { dispatchNativeNotification } from '@/store/native-notifications'
 import { notify } from '@/store/notifications'
 import { requestDesktopOnboarding } from '@/store/onboarding'
 import { clearAllPrompts, setApprovalRequest, setSecretRequest, setSudoRequest } from '@/store/prompts'
@@ -334,8 +325,6 @@ export function useMessageStream({
  const flushHandleRef = useRef<number | null>(null)
  const lastFlushAtRef = useRef<number>(0)
  const nativeSubagentSessionsRef = useRef<Set<string>>(new Set())
-  // Turns that auto-compacted: skip post-turn hydrate so live scrollback survives.
-  const compactedTurnRef = useRef<Set<string>>(new Set())

  const flushQueuedDeltas = useCallback(
    (sessionId?: string) => {
@@ -354,7 +343,7 @@ export function useMessageStream({
        if (queued.assistant) {
          mutateStream(
            id,
-            parts => dedupeGeneratedImageEchoesInParts(appendAssistantTextPart(parts, queued.assistant)),
+            parts => appendAssistantTextPart(parts, queued.assistant),
            () => [assistantTextPart(queued.assistant)]
          )
        }
@@ -518,7 +507,7 @@ export function useMessageStream({

      mutateStream(
        sessionId,
-        parts => dedupeGeneratedImageEchoesInParts(upsertToolPart(parts, payload, phase)),
+        parts => upsertToolPart(parts, payload, phase),
        () => upsertToolPart([], payload, phase),
        { pending: m => phase !== 'complete' || (m.pending ?? false) }
      )
@@ -551,11 +540,9 @@ export function useMessageStream({
        const finalText = renderMediaTags(text).trim()
        const completionError = completionErrorText(finalText)
        const normalize = (value: string) => value.replace(/\s+/g, ' ').trim()
+        const dedupeReference = normalize(finalText)

        const replaceTextPart = (parts: ChatMessagePart[]) => {
-          const visibleFinalText = stripGeneratedImageEchoes(finalText, generatedImageEchoSources(parts)).trim()
-          const dedupeReference = normalize(visibleFinalText)
-
          const kept = parts.filter(part => {
            if (part.type === 'text') {
              return false
@@ -570,7 +557,7 @@ export function useMessageStream({
            return !(r && (dedupeReference.startsWith(r) || r.startsWith(dedupeReference)))
          })

-          return visibleFinalText ? [...kept, assistantTextPart(visibleFinalText)] : kept
+          return finalText ? [...kept, assistantTextPart(finalText)] : kept
        }

        const completeMessage = (message: ChatMessage): ChatMessage =>
@@ -642,22 +629,18 @@ export function useMessageStream({

      void refreshSessions().catch(() => undefined)

-      if (compactedTurnRef.current.delete(sessionId)) {
-        shouldHydrate = false
-      }
-
      if (shouldHydrate) {
        void hydrateFromStoredSession(3, completedState.storedSessionId, sessionId)
      }

-      dispatchNativeNotification({
-        body: text.slice(0, 140) || translateNow('notifications.native.turnDoneBody'),
-        kind: 'turnDone',
-        sessionId,
-        title: translateNow('notifications.native.turnDoneTitle')
-      })
+      if (document.hidden && sessionId === activeSessionIdRef.current) {
+        void window.hermesDesktop?.notify({
+          title: 'Hermes finished',
+          body: text.slice(0, 140) || 'The response is ready.'
+        })
+      }
    },
-    [hydrateFromStoredSession, refreshSessions, updateSessionState]
+    [activeSessionIdRef, hydrateFromStoredSession, refreshSessions, updateSessionState]
  )

  const failAssistantMessage = useCallback(
@@ -832,8 +815,6 @@ export function useMessageStream({

        flushQueuedDeltas(sessionId)
        clearSessionSubagents(sessionId)
-        setSessionCompacting(sessionId, false)
-        compactedTurnRef.current.delete(sessionId)
        nativeSubagentSessionsRef.current.delete(sessionId)

        if (isActiveEvent) {
@@ -879,11 +860,12 @@ export function useMessageStream({
        // session so a background turn finishing can't wipe the active chat's
        // prompt, and vice versa.
        clearAllPrompts(sessionId)
-        setSessionCompacting(sessionId, false)

        flushQueuedDeltas(sessionId)

-        playCompletionSound()
+        if (isActiveEvent) {
+          triggerHaptic('streamDone')
+        }

        const finalText = coerceGatewayText(payload?.text) || coerceGatewayText(payload?.rendered)
        completeAssistantMessage(sessionId, finalText)
@@ -914,7 +896,10 @@ export function useMessageStream({

          // terminal/process tool calls are the only things that spawn or reap
          // background processes — sync the composer status stack right after.
-          if (!sessionInterrupted(sessionId) && (payload?.name === 'terminal' || payload?.name === 'process')) {
+          if (
+            !sessionInterrupted(sessionId) &&
+            (payload?.name === 'terminal' || payload?.name === 'process')
+          ) {
            void refreshBackgroundProcesses(sessionId)
          }
        }
@@ -966,13 +951,6 @@ export function useMessageStream({
          if (sessionId) {
            updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
          }
-
-          dispatchNativeNotification({
-            body: question,
-            kind: 'input',
-            sessionId,
-            title: translateNow('notifications.native.inputTitle')
-          })
        }
      } else if (event.type === 'approval.request') {
        // Dangerous-command / execute_code approval. The Python side is blocked
@@ -981,31 +959,17 @@ export function useMessageStream({
        // Park it per-session (like clarify) so a *background* profile's turn can
        // raise it and wait — the sidebar flags "needs input" and the inline bar
        // surfaces once the user focuses that chat.
-        const command = typeof payload?.command === 'string' ? payload.command : ''
-        const description = typeof payload?.description === 'string' ? payload.description : 'dangerous command'
-
        setApprovalRequest({
          // false only when a tirith warning forbids it; backend omits the field otherwise.
          allowPermanent: payload?.allow_permanent !== false,
-          command,
-          description,
+          command: typeof payload?.command === 'string' ? payload.command : '',
+          description: typeof payload?.description === 'string' ? payload.description : 'dangerous command',
          sessionId: sessionId ?? null
        })

        if (sessionId) {
          updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
        }
-
-        dispatchNativeNotification({
-          actions: [
-            { id: 'approve', text: translateNow('notifications.native.approveAction') },
-            { id: 'reject', text: translateNow('notifications.native.rejectAction') }
-          ],
-          body: command || description,
-          kind: 'approval',
-          sessionId,
-          title: translateNow('notifications.native.approvalTitle')
-        })
      } else if (event.type === 'sudo.request') {
        // Sudo password capture (tools/terminal_tool.py). Blocked on
        // sudo.respond {request_id, password}.
@@ -1017,13 +981,6 @@ export function useMessageStream({
          if (sessionId) {
            updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
          }
-
-          dispatchNativeNotification({
-            body: translateNow('notifications.native.inputBody'),
-            kind: 'input',
-            sessionId,
-            title: translateNow('notifications.native.inputTitle')
-          })
        }
      } else if (event.type === 'secret.request') {
        // Skill credential capture (tools/skills_tool.py). Blocked on
@@ -1031,26 +988,16 @@ export function useMessageStream({
        const requestId = typeof payload?.request_id === 'string' ? payload.request_id : ''

        if (requestId) {
-          const envVar = typeof payload?.env_var === 'string' ? payload.env_var : ''
-          const promptText = typeof payload?.prompt === 'string' ? payload.prompt : ''
-
          setSecretRequest({
            requestId,
-            envVar,
-            prompt: promptText,
+            envVar: typeof payload?.env_var === 'string' ? payload.env_var : '',
+            prompt: typeof payload?.prompt === 'string' ? payload.prompt : '',
            sessionId: sessionId ?? null
          })

          if (sessionId) {
            updateSessionState(sessionId, state => ({ ...state, needsInput: true }))
          }
-
-          dispatchNativeNotification({
-            body: promptText || envVar || translateNow('notifications.native.inputBody'),
-            kind: 'input',
-            sessionId,
-            title: translateNow('notifications.native.inputTitle')
-          })
        }
      } else if (event.type === 'terminal.read.request') {
        // read_terminal tool: serialize the renderer's xterm buffer and answer
@@ -1068,12 +1015,9 @@ export function useMessageStream({
          })
        }
      } else if (event.type === 'status.update') {
-        if (sessionId && payload?.kind === 'compacting') {
-          setSessionCompacting(sessionId, true)
-          compactedTurnRef.current.add(sessionId)
-        } else if (sessionId && payload?.kind === 'process') {
-          // The gateway's notification poller announces background process
-          // completions / watch matches here — re-sync the status stack.
+        // The gateway's notification poller announces background process
+        // completions / watch matches here — re-sync the status stack.
+        if (sessionId && payload?.kind === 'process') {
          void refreshBackgroundProcesses(sessionId)
        }
      } else if (event.type === 'error') {
@@ -1085,17 +1029,8 @@ export function useMessageStream({
        // the failed turn (same intent as the message.complete clear).
        if (sessionId) {
          clearAllPrompts(sessionId)
-          setSessionCompacting(sessionId, false)
-          compactedTurnRef.current.delete(sessionId)
        }

-        dispatchNativeNotification({
-          body: errorMessage,
-          kind: 'turnError',
-          sessionId,
-          title: translateNow('notifications.native.turnErrorTitle')
-        })
-
        if (looksLikeProviderSetup) {
          requestDesktopOnboarding(errorMessage)
        } else if (isActiveEvent) {
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.test.tsx
@@ -325,81 +325,6 @@ describe('usePromptActions submit / queue drain semantics', () => {
    })
  })

-  it('a rejected fromQueue drain returns false (entry stays queued) and a later retry sends it', async () => {
-    // A stale-session 404 must not strand the queued entry: submitPrompt returns
-    // false on failure so the composer keeps it, and the edge-independent
-    // auto-drain re-attempts once the session is idle again. storedSessionId is
-    // null so the session.resume recovery path is skipped and the error surfaces.
-    let attempt = 0
-    const requestGateway = vi.fn(async (method: string) => {
-      if (method === 'prompt.submit') {
-        attempt += 1
-
-        if (attempt === 1) {
-          throw new Error('404: {"detail":"Session not found"}')
-        }
-      }
-
-      return {} as never
-    })
-
-    let handle: HarnessHandle | null = null
-    render(
-      <Harness
-        onReady={h => (handle = h)}
-        refreshSessions={async () => undefined}
-        requestGateway={requestGateway}
-        storedSessionId={null}
-      />
-    )
-
-    const first = await handle!.submitText('please send me', { fromQueue: true })
-    expect(first).toBe(false)
-
-    const second = await handle!.submitText('please send me', { fromQueue: true })
-    expect(second).toBe(true)
-    expect(requestGateway).toHaveBeenCalledWith('prompt.submit', {
-      session_id: RUNTIME_SESSION_ID,
-      text: 'please send me'
-    })
-  })
-
-  it('rides out a transient "session busy" so the user never sees it (retries, no error bubble)', async () => {
-    // A submit racing the settle edge can hit a transient 4009 before the turn
-    // has fully wound down. It must be invisible: retried in place until the
-    // gateway accepts, never a red "session busy" bubble.
-    let attempt = 0
-    const seeds: Record<string, unknown>[] = []
-    const requestGateway = vi.fn(async (method: string) => {
-      if (method === 'prompt.submit') {
-        attempt += 1
-
-        if (attempt === 1) {
-          throw new Error('4009: session busy')
-        }
-      }
-
-      return {} as never
-    })
-
-    let handle: HarnessHandle | null = null
-    render(
-      <Harness
-        onReady={h => (handle = h)}
-        onSeedState={s => seeds.push(s)}
-        refreshSessions={async () => undefined}
-        requestGateway={requestGateway}
-      />
-    )
-
-    expect(await handle!.submitText('sent while settling')).toBe(true)
-    expect(attempt).toBe(2) // rode past the busy on the second try
-    // No assistant-error message was appended for the transient busy.
-    expect(seeds.some(s => Array.isArray(s.messages) && (s.messages as { error?: string }[]).some(m => m.error))).toBe(
-      false
-    )
-  })
-
  it('a normal (non-queue) submit still respects the busyRef guard', async () => {
    const busyRef = { current: true }
    const requestGateway = vi.fn(async () => ({}) as never)
@@ -876,7 +801,7 @@ describe('usePromptActions sleep/wake session recovery', () => {
    const requestGateway = vi.fn(async (method: string) => {
      calls.push(method)
      if (method === 'prompt.submit') {
-        throw new Error('gateway exploded')
+        throw new Error('session busy')
      }
      return {} as never
    })
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -118,12 +118,10 @@ function isSessionNotFoundError(error: unknown): boolean {
 }

 // The gateway refuses prompt.submit while a turn is running (4009 "session
-// busy"). It's a transient concurrency guard, never a user-facing error: a
-// submit racing the settle edge (or a rewind interrupting mid-turn) just waits
-// a beat for the turn to wind down, then lands. Bounded so a genuinely stuck
-// turn still surfaces eventually.
-const SESSION_BUSY_RETRY_TIMEOUT_MS = 6_000
-const SESSION_BUSY_RETRY_INTERVAL_MS = 150
+// busy"). Edit/restore (revert) can fire mid-turn, so they interrupt first then
+// retry the submit until the cooperative interrupt has wound the turn down.
+const REWIND_INTERRUPT_TIMEOUT_MS = 6_000
+const REWIND_RETRY_INTERVAL_MS = 150

 function isSessionBusyError(error: unknown): boolean {
  return /session busy/i.test(error instanceof Error ? error.message : String(error))
@@ -131,26 +129,6 @@ function isSessionBusyError(error: unknown): boolean {

 const sleep = (ms: number) => new Promise<void>(resolve => setTimeout(resolve, ms))

-// Retry a gateway call across transient "session busy" so it never reaches the
-// user — the turn settles within the deadline and the call lands.
-async function withSessionBusyRetry<T>(call: () => Promise<T>): Promise<T> {
-  const deadline = Date.now() + SESSION_BUSY_RETRY_TIMEOUT_MS
-
-  for (;;) {
-    try {
-      return await call()
-    } catch (err) {
-      if (isSessionBusyError(err) && Date.now() < deadline) {
-        await sleep(SESSION_BUSY_RETRY_INTERVAL_MS)
-
-        continue
-      }
-
-      throw err
-    }
-  }
-}
-
 function base64FromDataUrl(dataUrl: string): string {
  const comma = dataUrl.indexOf(',')

@@ -705,7 +683,7 @@ export function usePromptActions({
        let submitErr: unknown = null

        try {
-          await withSessionBusyRetry(() => requestGateway('prompt.submit', { session_id: sessionId, text }))
+          await requestGateway('prompt.submit', { session_id: sessionId, text })
        } catch (firstErr) {
          if (isSessionNotFoundError(firstErr) && selectedStoredSessionIdRef.current) {
            // Re-register the session in the gateway and get a fresh live ID.
@@ -717,7 +695,7 @@ export function usePromptActions({

            if (recoveredId) {
              activeSessionIdRef.current = recoveredId
-              await withSessionBusyRetry(() => requestGateway('prompt.submit', { session_id: recoveredId, text }))
+              await requestGateway('prompt.submit', { session_id: recoveredId, text })
            } else {
              submitErr = firstErr
            }
@@ -736,17 +714,9 @@ export function usePromptActions({

        return true
      } catch (err) {
-        releaseBusy()
-
-        // A queued drain that raced a not-yet-settled turn gets a transient
-        // "session busy" (4009). Don't surface an error bubble/toast — the entry
-        // stays queued and the composer's bounded auto-drain retries when idle.
-        if (options?.fromQueue && isSessionBusyError(err)) {
-          return false
-        }
-
        const message = inlineErrorMessage(err, copy.promptFailed)

+        releaseBusy()
        updateSessionState(sessionId, state => ({
          ...state,
          messages: [
@@ -1482,8 +1452,9 @@ export function usePromptActions({
  // text is submitted as a fresh turn. Callers confirm before invoking; errors
  // are rethrown so the confirmation dialog can surface them inline.
  // Submit a rewind (truncate-before-ordinal + resubmit). Because edit/restore
-  // can fire while a turn is streaming, interrupt the live turn first — the
-  // cooperative interrupt takes a beat, so the shared busy-retry rides it out.
+  // can fire while a turn is streaming, interrupt the live turn first, then
+  // retry the submit until the gateway stops reporting "session busy" — the
+  // interrupt is cooperative, so the running turn takes a beat to wind down.
  const submitRewindPrompt = useCallback(
    async (sessionId: string, text: string, truncateOrdinal: number | undefined, wasRunning: boolean) => {
      if (wasRunning) {
@@ -1494,13 +1465,27 @@ export function usePromptActions({
        }
      }

-      await withSessionBusyRetry(() =>
-        requestGateway('prompt.submit', {
-          session_id: sessionId,
-          text,
-          ...(truncateOrdinal !== undefined && { truncate_before_user_ordinal: truncateOrdinal })
-        })
-      )
+      const deadline = Date.now() + REWIND_INTERRUPT_TIMEOUT_MS
+
+      for (;;) {
+        try {
+          await requestGateway('prompt.submit', {
+            session_id: sessionId,
+            text,
+            ...(truncateOrdinal !== undefined && { truncate_before_user_ordinal: truncateOrdinal })
+          })
+
+          return
+        } catch (err) {
+          if (isSessionBusyError(err) && Date.now() < deadline) {
+            await sleep(REWIND_RETRY_INTERVAL_MS)
+
+            continue
+          }
+
+          throw err
+        }
+      }
    },
    [requestGateway]
  )
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -2,7 +2,7 @@ import type { MutableRefObject } from 'react'
 import { useCallback, useRef } from 'react'
 import type { NavigateFunction } from 'react-router-dom'

-import { deleteSession, getSession, getSessionMessages, setSessionArchived } from '@/hermes'
+import { deleteSession, getSessionMessages, listAllProfileSessions, setSessionArchived } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '@/lib/chat-messages'
 import { normalizePersonalityValue } from '@/lib/chat-runtime'
@@ -12,7 +12,7 @@ import { clearQueuedPrompts } from '@/store/composer-queue'
 import { $pinnedSessionIds } from '@/store/layout'
 import { clearNotifications, notify, notifyError } from '@/store/notifications'
 import { requestDesktopOnboarding } from '@/store/onboarding'
-import { $activeGatewayProfile, $newChatProfile, $profiles, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
+import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
 import {
  $currentCwd,
  $messages,
@@ -236,42 +236,18 @@ async function resolveStoredSession(storedSessionId: string): Promise<SessionInf
    return cached
  }

-  // Direct by-id on the live backend — one row lookup, no list scan. Covers
-  // single-profile users and any id on the active profile (e.g. an old session
-  // past the sidebar's recent window). 404 just means it's not on this profile.
  try {
-    const session = await getSession(storedSessionId)
+    const result = await listAllProfileSessions(500, 0, 'include', 'recent', 'all')
+    const resolved = result.sessions.find(session => sessionMatchesStoredId(session, storedSessionId))

-    upsertResolvedSession(session, storedSessionId)
-
-    return session
-  } catch {
-    // Not on the active profile — fall through to the cross-profile probe.
-  }
-
-  // Multi-profile only: probe each other profile by id (still one cheap lookup
-  // each) rather than pulling every profile's recent sessions. The first hit
-  // carries its owning `profile`, which routes the resume to the right backend.
-  const activeKey = normalizeProfileKey($activeGatewayProfile.get())
-
-  const otherProfiles = $profiles
-    .get()
-    .map(profile => normalizeProfileKey(profile.name))
-    .filter(key => key !== activeKey)
-
-  for (const profile of otherProfiles) {
-    try {
-      const session = await getSession(storedSessionId, profile)
-
-      upsertResolvedSession(session, storedSessionId)
-
-      return session
-    } catch {
-      // Not on this profile; try the next.
+    if (resolved) {
+      upsertResolvedSession(resolved, storedSessionId)
    }
-  }

-  return undefined
+    return resolved
+  } catch {
+    return undefined
+  }
 }

 type SessionRuntimeStatePatch = Partial<
@@ -547,31 +523,8 @@ export function useSessionActions({
      const isCurrentResume = () =>
        resumeRequestRef.current === requestId && selectedStoredSessionIdRef.current === storedSessionId

-      // Paint the click before the profile-resolve / gateway-swap awaits below,
-      // so there's zero dead air: highlight the row instantly (the sidebar reads
-      // $selectedStoredSessionId) and, for a cold target, drop the previous
-      // transcript so the thread shows its loader instead of the old session
-      // lingering until resume lands. A warm-cached target keeps its transcript —
-      // the cached fast-path repaints it this same tick. Setting the ref here is
-      // also what use-route-resume's self-heal assumes ("set synchronously at
-      // resume entry").
-      setFreshDraftReady(false)
-      clearNotifications()
-      setSelectedStoredSessionId(storedSessionId)
-      selectedStoredSessionIdRef.current = storedSessionId
-
-      const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
-
-      if (!warmRuntimeId || !sessionStateByRuntimeIdRef.current.get(warmRuntimeId)) {
-        setActiveSessionId(null)
-        activeSessionIdRef.current = null
-        setMessages([])
-      }
-
      // Swap the single live gateway to this session's profile before any
      // gateway call (no-op when it's already on that profile / single-profile).
-      // resolveStoredSession finds the row by id (cheap), so an uncached pasted
-      // id loads as fast as a sidebar click instead of hanging on a list scan.
      const storedForProfile = await resolveStoredSession(storedSessionId)
      const sessionProfile = storedForProfile?.profile

@@ -665,26 +618,10 @@ export function useSessionActions({
        const watchWindow = isWatchWindow()
        let localSnapshot = $messages.get()

-        // REST transcript prefetch and the gateway resume RPC are independent
-        // — run them concurrently so a big session's wall time is
-        // max(prefetch, resume) instead of their sum. The prefetch paints the
-        // transcript as soon as it lands; the RPC binds the runtime id.
-        // Watch windows skip the prefetch — lazy resume attaches the live mirror.
-        const prefetchPromise = watchWindow ? null : getSessionMessages(storedSessionId, sessionProfile)
-
-        const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
-          session_id: storedSessionId,
-          cols: 96,
-          ...(watchWindow ? { lazy: true } : {}),
-          ...(sessionProfile ? { profile: sessionProfile } : {})
-        })
-        // The rejection is consumed by the `await` below; this guard only
-        // keeps it from surfacing as unhandled while the prefetch settles.
-        resumePromise.catch(() => undefined)
-
        try {
-          if (prefetchPromise) {
-            const storedMessages = await prefetchPromise
+          // Watch windows skip REST prefetch — lazy resume attaches the live mirror.
+          if (!watchWindow) {
+            const storedMessages = await getSessionMessages(storedSessionId, sessionProfile)

            if (isCurrentResume()) {
              localSnapshot = preserveLocalAssistantErrors(toChatMessages(storedMessages.messages), $messages.get())
@@ -698,7 +635,12 @@ export function useSessionActions({
          // Non-fatal: gateway resume below can still hydrate the session.
        }

-        const resumed = await resumePromise
+        const resumed = await requestGateway<SessionResumeResponse>('session.resume', {
+          session_id: storedSessionId,
+          cols: 96,
+          ...(watchWindow ? { lazy: true } : {}),
+          ...(sessionProfile ? { profile: sessionProfile } : {})
+        })

        if (!isCurrentResume()) {
          return
@@ -706,22 +648,17 @@ export function useSessionActions({

        const currentMessages = $messages.get()

-        // Keep the local snapshot when resume would only reshuffle runtime
-        // projection. When the REST prefetch already hydrated the transcript,
-        // skip converting/reconciling the resume payload entirely — on a
-        // 1000+-message session that second conversion plus the deep
-        // equivalence compare costs over a second of main-thread time.
+        const resumedMessages = preserveLocalAssistantErrors(
+          reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
+          currentMessages
+        )
+        // Keep the local snapshot when resume would only reshuffle runtime projection.
        const preferredMessages =
          localSnapshot.length > 0
            ? localSnapshot
-            : (() => {
-                const resumedMessages = preserveLocalAssistantErrors(
-                  reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
-                  currentMessages
-                )
-
-                return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
-              })()
+            : chatMessageArraysEquivalent(currentMessages, resumedMessages)
+              ? currentMessages
+              : resumedMessages

        const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)

--- a/apps/desktop/src/app/settings/index.tsx
+++ b/apps/desktop/src/app/settings/index.tsx
@@ -5,7 +5,7 @@ import { Tip } from '@/components/ui/tooltip'
 import { getHermesConfigDefaults, getHermesConfigRecord, saveHermesConfig } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { triggerHaptic } from '@/lib/haptics'
-import { Archive, Bell, Globe, Info, KeyRound, Settings2, Sparkles, Wrench, Zap } from '@/lib/icons'
+import { Archive, Globe, Info, KeyRound, Settings2, Sparkles, Wrench, Zap } from '@/lib/icons'
 import { notifyError } from '@/store/notifications'

 import { useRouteEnumParam } from '../hooks/use-route-enum-param'
@@ -20,7 +20,6 @@ import { SECTIONS } from './constants'
 import { GatewaySettings } from './gateway-settings'
 import { KEYS_VIEWS, KeysSettings, type KeysView } from './keys-settings'
 import { McpSettings } from './mcp-settings'
-import { NotificationsSettings } from './notifications-settings'
 import { PROVIDER_VIEWS, ProvidersSettings, type ProviderView } from './providers-settings'
 import { SessionsSettings } from './sessions-settings'
 import type { SettingsPageProps, SettingsView as SettingsViewId } from './types'
@@ -31,7 +30,6 @@ const SETTINGS_VIEWS: readonly SettingsViewId[] = [
  'gateway',
  'keys',
  'mcp',
-  'notifications',
  'sessions',
  'about'
 ]
@@ -103,12 +101,6 @@ export function SettingsView({ gateway, onClose, onConfigSaved, onMainModelChang
              />
            )
          })}
-          <OverlayNavItem
-            active={activeView === 'notifications'}
-            icon={Bell}
-            label={t.settings.nav.notifications}
-            onClick={() => setActiveView('notifications')}
-          />
          <div className="my-2 h-px bg-border/30" />
          <OverlayNavItem
            active={activeView === 'providers'}
@@ -233,8 +225,6 @@ export function SettingsView({ gateway, onClose, onConfigSaved, onMainModelChang
            <KeysSettings view={keysView} />
          ) : activeView === 'mcp' ? (
            <McpSettings gateway={gateway} onConfigSaved={onConfigSaved} />
-          ) : activeView === 'notifications' ? (
-            <NotificationsSettings />
          ) : (
            <SessionsSettings />
          )}
--- a/apps/desktop/src/app/settings/notifications-settings.tsx
+++ b/apps/desktop/src/app/settings/notifications-settings.tsx
@@ -1,150 +0,0 @@
-import { useStore } from '@nanostores/react'
-import type { ReactNode } from 'react'
-
-import { Button } from '@/components/ui/button'
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
-import { Switch } from '@/components/ui/switch'
-import { useI18n } from '@/i18n'
-import { COMPLETION_SOUND_VARIANTS, previewCompletionSound } from '@/lib/completion-sound'
-import { triggerHaptic } from '@/lib/haptics'
-import { Bell, Play } from '@/lib/icons'
-import { cn } from '@/lib/utils'
-import { $completionSoundVariantId, setCompletionSoundVariantId } from '@/store/completion-sound'
-import {
-  $nativeNotifyPrefs,
-  NATIVE_NOTIFICATION_KINDS,
-  sendTestNativeNotification,
-  setNativeNotifyEnabled,
-  setNativeNotifyKind
-} from '@/store/native-notifications'
-import { notify } from '@/store/notifications'
-
-import { CONTROL_TEXT } from './constants'
-import { ListRow, SectionHeading, SettingsContent } from './primitives'
-
-const CAPTION = 'text-[length:var(--conversation-caption-font-size)] text-(--ui-text-tertiary)'
-
-function Caption({ children, className }: { children: ReactNode; className?: string }) {
-  return <p className={cn(CAPTION, className)}>{children}</p>
-}
-
-function ToggleRow(props: {
-  checked: boolean
-  description: string
-  disabled?: boolean
-  label: string
-  onChange: (on: boolean) => void
-}) {
-  return (
-    <ListRow
-      action={
-        <Switch
-          aria-label={props.label}
-          checked={props.checked}
-          disabled={props.disabled}
-          onCheckedChange={on => {
-            triggerHaptic('selection')
-            props.onChange(on)
-          }}
-        />
-      }
-      description={props.description}
-      title={props.label}
-    />
-  )
-}
-
-export function NotificationsSettings() {
-  const { t } = useI18n()
-  const prefs = useStore($nativeNotifyPrefs)
-  const completionSoundVariantId = useStore($completionSoundVariantId)
-  const copy = t.settings.notifications
-
-  const runTest = async () => {
-    triggerHaptic('open')
-    const ok = await sendTestNativeNotification(copy.testTitle, copy.testBody)
-    notify({ kind: ok ? 'info' : 'error', message: ok ? copy.testSent : copy.testUnsupported })
-  }
-
-  return (
-    <SettingsContent>
-      <SectionHeading icon={Bell} title={copy.title} />
-      <Caption className="mb-2 leading-(--conversation-caption-line-height)">{copy.intro}</Caption>
-
-      <ToggleRow
-        checked={prefs.enabled}
-        description={copy.enableAllDesc}
-        label={copy.enableAll}
-        onChange={setNativeNotifyEnabled}
-      />
-
-      <div className="my-1 h-px bg-border/30" />
-
-      {NATIVE_NOTIFICATION_KINDS.map(kind => (
-        <ToggleRow
-          checked={prefs.enabled && prefs.kinds[kind]}
-          description={copy.kinds[kind].description}
-          disabled={!prefs.enabled}
-          key={kind}
-          label={copy.kinds[kind].label}
-          onChange={on => setNativeNotifyKind(kind, on)}
-        />
-      ))}
-
-      <div className="my-1 h-px bg-border/30" />
-
-      <ListRow
-        action={
-          <div className="flex flex-wrap items-center justify-end gap-2">
-            <Select
-              onValueChange={value => {
-                const variantId = Number.parseInt(value, 10)
-
-                setCompletionSoundVariantId(variantId)
-                previewCompletionSound(variantId)
-                triggerHaptic('selection')
-              }}
-              value={String(completionSoundVariantId)}
-            >
-              <SelectTrigger className={cn('min-w-56', CONTROL_TEXT)}>
-                <SelectValue />
-              </SelectTrigger>
-
-              <SelectContent>
-                {COMPLETION_SOUND_VARIANTS.map(variant => (
-                  <SelectItem key={variant.id} value={String(variant.id)}>
-                    {variant.name}
-                  </SelectItem>
-                ))}
-              </SelectContent>
-            </Select>
-
-            <Button
-              className="gap-1.5"
-              onClick={() => {
-                previewCompletionSound()
-                triggerHaptic('crisp')
-              }}
-              size="sm"
-              type="button"
-              variant="outline"
-            >
-              <Play className="size-3.5" />
-              {copy.completionSoundPreview}
-            </Button>
-          </div>
-        }
-        description={copy.completionSoundDesc}
-        title={copy.completionSoundTitle}
-      />
-
-      <div className="mt-4 flex flex-col gap-2">
-        <Button className="self-start" onClick={() => void runTest()} size="sm" type="button" variant="outline">
-          <Bell />
-          {copy.test}
-        </Button>
-        <Caption>{copy.focusedHint}</Caption>
-      </div>
-    </SettingsContent>
-  )
-}
--- a/apps/desktop/src/app/settings/providers-settings.test.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.test.tsx
@@ -1,100 +0,0 @@
-import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react'
-import { atom } from 'nanostores'
-import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
-
-import type { OAuthProvider } from '@/types/hermes'
-
-const listOAuthProviders = vi.fn()
-const disconnectOAuthProvider = vi.fn()
-const getEnvVars = vi.fn()
-const startManualProviderOAuth = vi.fn()
-const onboarding = atom({ manual: false })
-
-vi.mock('@/hermes', () => ({
-  disconnectOAuthProvider: (providerId: string) => disconnectOAuthProvider(providerId),
-  getEnvVars: () => getEnvVars(),
-  listOAuthProviders: () => listOAuthProviders()
-}))
-
-vi.mock('@/store/onboarding', () => ({
-  $desktopOnboarding: onboarding,
-  startManualProviderOAuth: (providerId: string) => startManualProviderOAuth(providerId)
-}))
-
-function provider(id: string, loggedIn: boolean, patch: Partial<OAuthProvider> = {}): OAuthProvider {
-  return {
-    cli_command: `hermes auth add ${id}`,
-    disconnectable: true,
-    docs_url: '',
-    flow: 'device_code',
-    id,
-    name: id === 'nous' ? 'Nous Portal' : 'MiniMax',
-    status: {
-      logged_in: loggedIn
-    },
-    ...patch
-  }
-}
-
-beforeEach(() => {
-  onboarding.set({ manual: false })
-  getEnvVars.mockResolvedValue({})
-  disconnectOAuthProvider.mockResolvedValue({ ok: true, provider: 'nous' })
-  listOAuthProviders.mockResolvedValue({
-    providers: [provider('nous', true), provider('minimax-oauth', false)]
-  })
-  vi.spyOn(window, 'confirm').mockReturnValue(true)
-})
-
-afterEach(() => {
-  cleanup()
-  vi.restoreAllMocks()
-  vi.clearAllMocks()
-})
-
-async function renderProvidersSettings() {
-  const { ProvidersSettings } = await import('./providers-settings')
-
-  return render(<ProvidersSettings onViewChange={vi.fn()} view="accounts" />)
-}
-
-describe('ProvidersSettings', () => {
-  it('disconnects a connected provider account and refreshes the accounts list', async () => {
-    await renderProvidersSettings()
-
-    const remove = await screen.findByRole('button', { name: 'Remove Nous Portal' })
-    fireEvent.click(remove)
-
-    await waitFor(() => expect(disconnectOAuthProvider).toHaveBeenCalledWith('nous'))
-    expect(listOAuthProviders).toHaveBeenCalledTimes(2)
-  })
-
-  it('keeps provider selection separate from account removal', async () => {
-    await renderProvidersSettings()
-
-    fireEvent.click(await screen.findByText('Nous Portal'))
-
-    expect(startManualProviderOAuth).toHaveBeenCalledWith('nous')
-    expect(disconnectOAuthProvider).not.toHaveBeenCalled()
-  })
-
-  it('does not offer removal for externally managed providers', async () => {
-    listOAuthProviders.mockResolvedValue({
-      providers: [
-        provider('qwen-oauth', true, {
-          cli_command: 'hermes auth add qwen-oauth',
-          disconnect_hint: 'Use `hermes auth add qwen-oauth` or that provider\'s CLI to remove it.',
-          disconnectable: false,
-          flow: 'external',
-          name: 'Qwen (via Qwen CLI)'
-        })
-      ]
-    })
-
-    await renderProvidersSettings()
-
-    expect(await screen.findByText('Qwen Code')).toBeTruthy()
-    expect(screen.queryByRole('button', { name: 'Remove Qwen Code' })).toBeNull()
-    expect(screen.getByText(/managed outside Hermes/)).toBeTruthy()
-  })
-})
--- a/apps/desktop/src/app/settings/providers-settings.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.tsx
@@ -1,20 +1,18 @@
 import { useStore } from '@nanostores/react'
-import { useCallback, useEffect, useMemo, useState } from 'react'
+import { useEffect, useMemo, useState } from 'react'

 import {
  FEATURED_ID,
  FeaturedProviderRow,
  KeyProviderRow,
  ProviderRow,
-  providerTitle,
  sortProviders
 } from '@/components/desktop-onboarding-overlay'
 import { Button } from '@/components/ui/button'
-import { disconnectOAuthProvider, listOAuthProviders } from '@/hermes'
+import { listOAuthProviders } from '@/hermes'
 import { useI18n } from '@/i18n'
-import { Check, ChevronDown, ChevronRight, KeyRound, Loader2, Terminal, Trash2 } from '@/lib/icons'
+import { ChevronDown, KeyRound } from '@/lib/icons'
 import { cn } from '@/lib/utils'
-import { notify, notifyError } from '@/store/notifications'
 import { $desktopOnboarding, startManualProviderOAuth } from '@/store/onboarding'
 import type { EnvVarInfo, OAuthProvider } from '@/types/hermes'

@@ -87,17 +85,7 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
 // Selecting a provider hands off to the shared onboarding overlay, which runs
 // that provider's real sign-in flow; the key affordances open the API-key
 // catalog below.
-function OAuthPicker({
-  disconnecting,
-  onDisconnect,
-  onWantApiKey,
-  providers
-}: {
-  disconnecting: null | string
-  onDisconnect: (provider: OAuthProvider) => void
-  onWantApiKey: () => void
-  providers: OAuthProvider[]
-}) {
+function OAuthPicker({ onWantApiKey, providers }: { onWantApiKey: () => void; providers: OAuthProvider[] }) {
  const { t } = useI18n()
  const p = t.settings.providers
  const [showAll, setShowAll] = useState(false)
@@ -109,7 +97,7 @@ function OAuthPicker({

  const select = (p: OAuthProvider) => startManualProviderOAuth(p.id)

-  const featured = ordered.find(p => p.id === FEATURED_ID && !p.status?.logged_in) ?? null
+  const featured = ordered.find(p => p.id === FEATURED_ID) ?? null
  const rest = featured ? ordered.filter(p => p.id !== FEATURED_ID) : ordered
  // Keep connected accounts grouped and always visible; only the unconnected
  // providers hide behind the disclosure, so the page leads with what's set up.
@@ -142,13 +130,7 @@ function OAuthPicker({
            {p.connected}
          </p>
          {connected.map(p => (
-            <ConnectedProviderRow
-              disconnecting={disconnecting === p.id}
-              key={p.id}
-              onDisconnect={onDisconnect}
-              onSelect={select}
-              provider={p}
-            />
+            <ProviderRow key={p.id} onSelect={select} provider={p} />
          ))}
        </>
      )}
@@ -176,63 +158,6 @@ function OAuthPicker({
  )
 }

-function ConnectedProviderRow({
-  disconnecting,
-  onDisconnect,
-  onSelect,
-  provider
-}: {
-  disconnecting: boolean
-  onDisconnect: (provider: OAuthProvider) => void
-  onSelect: (provider: OAuthProvider) => void
-  provider: OAuthProvider
-}) {
-  const { t } = useI18n()
-  const title = providerTitle(provider)
-  const Trail = provider.flow === 'external' ? Terminal : ChevronRight
-  const canDisconnect = provider.disconnectable ?? provider.flow !== 'external'
-
-  const disconnectHint = provider.flow === 'external'
-    ? t.settings.providers.removeExternal(title, provider.cli_command)
-    : t.settings.providers.removeKeyManaged(title)
-
-  return (
-    <div className="group grid grid-cols-[minmax(0,1fr)_auto] items-center gap-1 rounded-[6px] transition-colors hover:bg-(--ui-control-hover-background)">
-      <button className="min-w-0 px-3 py-2.5 text-left" onClick={() => onSelect(provider)} type="button">
-        <div className="flex min-w-0 items-center gap-2">
-          <span className="truncate text-[length:var(--conversation-text-font-size)] font-semibold">{title}</span>
-          <span className="inline-flex shrink-0 items-center gap-1 bg-primary/10 px-2 py-0.5 text-xs font-medium text-primary">
-            <Check className="size-3" />
-            {t.settings.providers.connected}
-          </span>
-        </div>
-        <p className="mt-1 text-xs leading-5 text-muted-foreground">{t.onboarding.flowSubtitles[provider.flow]}</p>
-        {!canDisconnect && (
-          <p className="mt-0.5 truncate text-[0.68rem] leading-5 text-muted-foreground/70">
-            {disconnectHint}
-          </p>
-        )}
-      </button>
-      <div className="flex items-center gap-1 pr-2">
-        <Trail className="size-4 text-muted-foreground transition group-hover:text-foreground" />
-        {canDisconnect && (
-          <Button
-            aria-label={`${t.common.remove} ${title}`}
-            disabled={disconnecting}
-            onClick={() => onDisconnect(provider)}
-            size="icon-xs"
-            title={`${t.common.remove} ${title}`}
-            type="button"
-            variant="ghost"
-          >
-            {disconnecting ? <Loader2 className="size-3 animate-spin" /> : <Trash2 className="size-3" />}
-          </Button>
-        )}
-      </div>
-    </div>
-  )
-}
-
 function NoProviderKeys() {
  const { t } = useI18n()

@@ -248,26 +173,20 @@ export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps
  const { rowProps, vars } = useEnvCredentials()
  const [oauthProviders, setOauthProviders] = useState<OAuthProvider[]>([])
  const [openProvider, setOpenProvider] = useState<null | string>(null)
-  const [disconnecting, setDisconnecting] = useState<null | string>(null)
  // The onboarding overlay owns the OAuth flow. Watch its `manual` flag so we
  // re-read connection state when the user finishes (or dismisses) a sign-in
  // they launched from this page — otherwise the cards keep their stale status.
  const onboardingActive = useStore($desktopOnboarding).manual

-  const refreshOAuthProviders = useCallback(async () => {
-    // OAuth providers are best-effort — a failure here just hides the panel.
-    const { providers } = await listOAuthProviders()
-    setOauthProviders(providers)
-  }, [])
-
  useEffect(() => {
+    if (onboardingActive) {
+      return
+    }
+
    let cancelled = false

+    // OAuth providers are best-effort — a failure here just hides the panel.
    void (async () => {
-      if (onboardingActive) {
-        return
-      }
-
      try {
        const { providers } = await listOAuthProviders()

@@ -282,26 +201,6 @@ export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps
    return () => void (cancelled = true)
  }, [onboardingActive])

-  async function handleDisconnect(provider: OAuthProvider) {
-    const name = providerTitle(provider)
-
-    if (!window.confirm(t.settings.providers.removeConfirm(name))) {
-      return
-    }
-
-    setDisconnecting(provider.id)
-
-    try {
-      await disconnectOAuthProvider(provider.id)
-      notify({ durationMs: 3_000, kind: 'success', title: t.settings.providers.removedTitle, message: t.settings.providers.removedMessage(name) })
-      await refreshOAuthProviders().catch(() => undefined)
-    } catch (err) {
-      notifyError(err, t.settings.providers.failedRemove(name))
-    } finally {
-      setDisconnecting(null)
-    }
-  }
-
  if (!vars) {
    return <LoadingState label={t.settings.providers.loading} />
  }
@@ -338,12 +237,7 @@ export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps

  return (
    <SettingsContent>
-      <OAuthPicker
-        disconnecting={disconnecting}
-        onDisconnect={provider => void handleDisconnect(provider)}
-        onWantApiKey={() => onViewChange('keys')}
-        providers={oauthProviders}
-      />
+      <OAuthPicker onWantApiKey={() => onViewChange('keys')} providers={oauthProviders} />
    </SettingsContent>
  )
 }
--- a/apps/desktop/src/app/settings/types.ts
+++ b/apps/desktop/src/app/settings/types.ts
@@ -4,15 +4,7 @@ import type { HermesGateway } from '@/hermes'
 import type { IconComponent } from '@/lib/icons'
 import type { EnvVarInfo } from '@/types/hermes'

-export type SettingsView =
-  | 'about'
-  | 'gateway'
-  | 'keys'
-  | 'mcp'
-  | 'notifications'
-  | 'providers'
-  | 'sessions'
-  | `config:${string}`
+export type SettingsView = 'about' | 'gateway' | 'keys' | 'mcp' | 'providers' | 'sessions' | `config:${string}`
 export type EnvPatch = Partial<Pick<EnvVarInfo, 'is_set' | 'redacted_value'>>

 export interface SettingsPageProps {
--- a/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
+++ b/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
@@ -2,7 +2,7 @@

 import { type ToolCallMessagePartProps } from '@assistant-ui/react'
 import { useStore } from '@nanostores/react'
-import { type FormEvent, type KeyboardEvent, useCallback, useMemo, useRef, useState, type ComponentProps } from 'react'
+import { type FormEvent, type KeyboardEvent, useCallback, useMemo, useRef, useState } from 'react'

 import { ToolFallback } from '@/components/assistant-ui/tool-fallback'
 import { Button } from '@/components/ui/button'
@@ -36,30 +36,14 @@ function readClarifyArgs(args: unknown): ClarifyArgs {
 }

 // Choice and "Other" rows share a layout; only color/hover differs.
-const OPTION_ROW_CLASS = 'flex w-full items-start gap-2 rounded-md px-2.5 py-1.5 text-left text-sm transition-colors'
-
-const CLARIFY_SHELL_CLASS =
-  'relative mb-3 mt-2 rounded-[0.5rem] border border-border/70 bg-card/40 text-sm shadow-[inset_0_1px_0_color-mix(in_srgb,var(--foreground)_3%,transparent)]'
-
-function ClarifyShell({
-  children,
-  className,
-  ...props
-}: ComponentProps<'div'>) {
-  return (
-    <div className={cn(CLARIFY_SHELL_CLASS, className)} data-slot="clarify-inline" {...props}>
-      <span aria-hidden className="arc-border" />
-      {children}
-    </div>
-  )
-}
+const OPTION_ROW_CLASS = 'flex w-full items-center gap-2 rounded-md px-2.5 py-1.5 text-left text-sm transition-colors'

 function RadioDot({ selected }: { selected: boolean }) {
  return (
    <span
      aria-hidden
      className={cn(
-        'mt-0.5 grid size-3.5 shrink-0 place-items-center rounded-full border transition-colors',
+        'grid size-3.5 shrink-0 place-items-center rounded-full border transition-colors',
        selected ? 'border-primary' : 'border-muted-foreground/40'
      )}
    >
@@ -115,11 +99,9 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
  const textareaRef = useRef<HTMLTextAreaElement | null>(null)

  // Race: tool.start fires a tick before clarify.request, so request_id
-  // arrives slightly after the tool block mounts. Hold the whole panel on a
-  // spinner until the gateway request is wired — showing disabled choices or
-  // a "loading question" stub is worse than a brief wait.
+  // arrives slightly after the tool block mounts. Show the question (from
+  // args) but disable submit until we have the request id from the gateway.
  const ready = Boolean(matchingRequest?.requestId)
-  const loading = !ready && !submitting

  const respond = useCallback(
    async (answer: string) => {
@@ -156,11 +138,7 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {

  const handleTextareaKey = useCallback(
    (event: KeyboardEvent<HTMLTextAreaElement>) => {
-      if (event.nativeEvent.isComposing) {
-        return
-      }
-
-      if (event.key === 'Enter' && !event.shiftKey) {
+      if (event.key === 'Enter' && (event.metaKey || event.ctrlKey)) {
        event.preventDefault()
        const trimmed = draft.trim()

@@ -184,20 +162,12 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
    [draft, respond]
  )

-  if (loading) {
-    return (
-      <ClarifyShell
-        aria-label={copy.loadingQuestion}
-        className="grid min-h-24 place-items-center px-3 py-6"
-        role="status"
-      >
-        <Loader2 aria-hidden className="size-5 animate-spin text-muted-foreground/80" />
-      </ClarifyShell>
-    )
-  }
-
  return (
-    <ClarifyShell className="grid gap-6 px-3 py-2.5">
+    <div
+      className="relative mb-3 mt-2 grid gap-6 rounded-[0.5rem] border border-border/70 bg-card/40 px-3 py-2.5 text-sm shadow-[inset_0_1px_0_color-mix(in_srgb,var(--foreground)_3%,transparent)]"
+      data-slot="clarify-inline"
+    >
+      <span aria-hidden className="arc-border" />
      <div className="flex items-start gap-2.5">
        <span
          aria-hidden
@@ -205,7 +175,9 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
        >
          <HelpCircle className="size-3.5" />
        </span>
-        <span className="flex-1 whitespace-pre-wrap font-medium leading-snug text-foreground">{question}</span>
+        <span className="flex-1 whitespace-pre-wrap font-medium leading-snug text-foreground">
+          {question || <em className="font-normal text-muted-foreground/70">{copy.loadingQuestion}</em>}
+        </span>
      </div>

      {!typing && hasChoices && (
@@ -218,7 +190,7 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
                selectedChoice === choice && 'bg-accent/60'
              )}
              data-choice
-              disabled={submitting}
+              disabled={!ready || submitting}
              key={`${index}-${choice}`}
              onClick={() => {
                setSelectedChoice(choice)
@@ -228,7 +200,7 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
            >
              <RadioDot selected={selectedChoice === choice} />
              <span className="flex-1 wrap-anywhere">{choice}</span>
-              {selectedChoice === choice && <Check aria-hidden className="mt-0.5 size-4 shrink-0 text-primary" />}
+              {selectedChoice === choice && <Check aria-hidden className="size-4 shrink-0 text-primary" />}
            </button>
          ))}
          <button
@@ -259,9 +231,8 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
          />
          <div className="flex items-center justify-between gap-2">
            <span className="inline-flex items-center gap-1 text-[0.6875rem] text-muted-foreground/85">
-              <KbdCombo combo="enter" size="sm" />
-              <KbdCombo combo="shift+enter" size="sm" />
-              {t.composer.hotkeyDescs['composer.sendNewline']}
+              <KbdCombo combo="mod+enter" size="sm" />
+              {copy.shortcutSuffix}
            </span>
            <div className="flex items-center gap-1.5">
              {hasChoices && (
@@ -278,10 +249,16 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
                  {copy.back}
                </Button>
              )}
-              <Button disabled={submitting} onClick={() => void respond('')} size="sm" type="button" variant="ghost">
+              <Button
+                disabled={!ready || submitting}
+                onClick={() => void respond('')}
+                size="sm"
+                type="button"
+                variant="ghost"
+              >
                {copy.skip}
              </Button>
-              <Button disabled={submitting || !draft.trim()} size="sm" type="submit">
+              <Button disabled={!ready || submitting || !draft.trim()} size="sm" type="submit">
                {submitting ? <Loader2 className="size-3.5 animate-spin" /> : copy.send}
              </Button>
            </div>
@@ -293,7 +270,7 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
        <div className="flex justify-end">
          <Button
            className="-mr-2"
-            disabled={submitting}
+            disabled={!ready || submitting}
            onClick={() => void respond('')}
            size="xs"
            type="button"
@@ -303,6 +280,6 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
          </Button>
        </div>
      )}
-    </ClarifyShell>
+    </div>
  )
 }
--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@@ -2,22 +2,12 @@

 import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react'
 import {
-  parseMarkdownIntoBlocks,
  type StreamdownTextComponents,
  StreamdownTextPrimitive,
  type SyntaxHighlighterProps
 } from '@assistant-ui/react-streamdown'
 import { code } from '@streamdown/code'
-import {
-  type ComponentProps,
-  memo,
-  type ReactNode,
-  useDeferredValue,
-  useEffect,
-  useMemo,
-  useRef,
-  useState
-} from 'react'
+import { type ComponentProps, memo, type ReactNode, useDeferredValue, useEffect, useMemo, useRef, useState } from 'react'

 import { PreviewAttachment } from '@/components/chat/preview-attachment'
 import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
@@ -36,7 +26,6 @@ import {
  mediaStreamUrl
 } from '@/lib/media'
 import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
-import { tailBoundedRemend } from '@/lib/remend-tail'
 import { cn } from '@/lib/utils'

 // Math rendering plugin (KaTeX). Configured once at module scope — the
@@ -53,51 +42,6 @@ import { cn } from '@/lib/utils'
 // LLM convention). The default false-setting only accepts `$$...$$`.
 const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })

-// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per
-// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
-// module-scope so the prop identity is stable across renders.
-function preprocessWithTailRepair(text: string): string {
-  return tailBoundedRemend(preprocessMarkdown(text))
-}
-
-// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
-// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed
-// on the text — but the same text is re-lexed every time a message REMOUNTS
-// (virtualizer scroll, session switch) and whenever multiple surfaces render
-// the same content (deferred + smooth reveal republish). A small module-level
-// LRU keyed by the exact source string removes all of those repeat parses
-// with zero correctness risk (same input → same output). Streaming tail
-// growth misses the cache by design (every flush is a new string) — that
-// single lex is the irreducible cost.
-const BLOCK_CACHE_MAX = 64
-const BLOCK_CACHE_MIN_LENGTH = 1024
-const blockCache = new Map<string, string[]>()
-
-function parseMarkdownIntoBlocksCached(markdown: string): string[] {
-  if (markdown.length < BLOCK_CACHE_MIN_LENGTH) {
-    return parseMarkdownIntoBlocks(markdown)
-  }
-
-  const hit = blockCache.get(markdown)
-
-  if (hit) {
-    // Refresh recency (Map iteration order is insertion order).
-    blockCache.delete(markdown)
-    blockCache.set(markdown, hit)
-
-    return hit
-  }
-
-  const blocks = parseMarkdownIntoBlocks(markdown)
-  blockCache.set(markdown, blocks)
-
-  if (blockCache.size > BLOCK_CACHE_MAX) {
-    blockCache.delete(blockCache.keys().next().value as string)
-  }
-
-  return blocks
-}
-
 async function mediaSrc(path: string): Promise<string> {
  if (/^(?:https?|data):/i.test(path)) {
    return path
@@ -297,13 +241,6 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
 // keeps draining its tail instead of snapping.
 const REVEAL_DRAIN_MS = 500
 const REVEAL_MAX_CHARS_PER_FRAME = 30
-// Floor between reveal commits. Each commit republishes the text context and
-// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark
-// on the open block) over the full accumulated text — at raw rAF cadence
-// that's 60 full parses/second and was the dominant streaming cost for
-// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while
-// halving the parse work.
-const REVEAL_MIN_COMMIT_MS = 33

 function useSmoothReveal(text: string, isRunning: boolean): string {
  const [displayed, setDisplayed] = useState(isRunning ? '' : text)
@@ -336,27 +273,10 @@ function useSmoothReveal(text: string, isRunning: boolean): string {
    const tick = () => {
      const now = performance.now()
      const dt = now - lastTickRef.current
-
-      // Skip this frame if the floor hasn't elapsed — the backlog math below
-      // is dt-proportional, so delayed commits reveal proportionally more.
-      if (dt < REVEAL_MIN_COMMIT_MS) {
-        frameRef.current = requestAnimationFrame(tick)
-
-        return
-      }
-
      lastTickRef.current = now

      const remaining = targetRef.current.length - shownRef.current.length
-
-      const add = Math.min(
-        remaining,
-        // dt-scaled so the per-commit cap stays equivalent to the old
-        // per-frame cap at any commit cadence.
-        Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7),
-        Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))
-      )
-
+      const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
      shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
      setDisplayed(shownRef.current)

@@ -540,20 +460,17 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
      containerProps={containerProps}
      lineNumbers={false}
      mode="streaming"
-      // Incomplete-markdown repair is handled by `preprocessWithTailRepair`
-      // below (tail-bounded remend) instead of Streamdown's built-in pass,
-      // which re-runs remend over the ENTIRE message on every flush — ~18%
-      // of streaming script time on 50KB+ messages. The repair itself stays
-      // always-on (even between flushes / for completed messages): an
-      // unclosed ```python ... ``` whose body contains `$` (shell snippets,
-      // JS template strings, dollar amounts) would otherwise leak those
-      // dollars to the math parser and render broken inline math. Shiki is
-      // independently deferred via `defer={isStreaming}` on the
-      // SyntaxHighlighter component.
-      parseIncompleteMarkdown={false}
-      parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached}
+      // Always auto-close incomplete fences — even during streaming.
+      // Without this, an unclosed ```python ... ``` whose body contains
+      // `$` (very common: shell snippets, JS template strings, dollar
+      // amounts) leaks those dollars out to the math parser and they
+      // get rendered as broken inline math until the closing fence
+      // arrives. Shiki is independently deferred via `defer={isStreaming}`
+      // on the SyntaxHighlighter component, so we don't pay code-block
+      // tokenization on every token even with this set.
+      parseIncompleteMarkdown
      plugins={plugins}
-      preprocess={preprocessWithTailRepair}
+      preprocess={preprocessMarkdown}
    />
  )
 }
--- a/apps/desktop/src/components/assistant-ui/streaming.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/streaming.test.tsx
@@ -58,9 +58,9 @@ Element.prototype.animate = function animate() {
  } as unknown as Animation
 }

-// jsdom returns 0 for offset*; some layout code reads those to size the
+// jsdom returns 0 for offset*; the virtualizer reads those to size its
 // viewport. Fall through to client* (which tests can override) or a sane
-// default so message rows render with non-zero dimensions.
+// default so virtualized items render.
 function stubOffsetDimension(
  prop: 'offsetHeight' | 'offsetWidth',
  clientProp: 'clientHeight' | 'clientWidth',
@@ -216,32 +216,6 @@ function assistantTodoMessage(
  } as ThreadMessage
 }

-function assistantImageMessage(running = false): ThreadMessage {
-  return {
-    id: `assistant-image-${running ? 'running' : 'done'}`,
-    role: 'assistant',
-    content: [
-      {
-        type: 'tool-call',
-        toolCallId: 'image-1',
-        toolName: 'image_generate',
-        args: { prompt: 'draw a cat' },
-        argsText: JSON.stringify({ prompt: 'draw a cat' }),
-        ...(running ? {} : { result: { image: 'https://cdn.example/cat.png', success: true } })
-      }
-    ],
-    status: running ? { type: 'running' } : { type: 'complete', reason: 'stop' },
-    createdAt,
-    metadata: {
-      unstable_state: null,
-      unstable_annotations: [],
-      unstable_data: [],
-      steps: [],
-      custom: {}
-    }
-  } as ThreadMessage
-}
-
 function StreamingHarness() {
  const [messages, setMessages] = useState<ThreadMessage[]>([userMessage()])
  const [isRunning, setIsRunning] = useState(true)
@@ -280,6 +254,20 @@ function StreamingHarness() {
  )
 }

+function StaticThreadHarness() {
+  const runtime = useExternalStoreRuntime<ThreadMessage>({
+    messages: [userMessage(), assistantMessage('complete response', false)],
+    isRunning: false,
+    onNew: async () => {}
+  })
+
+  return (
+    <AssistantRuntimeProvider runtime={runtime}>
+      <Thread />
+    </AssistantRuntimeProvider>
+  )
+}
+
 function TodoHarness({ message }: { message: ThreadMessage }) {
  const runtime = useExternalStoreRuntime<ThreadMessage>({
    messages: [message],
@@ -421,11 +409,222 @@ describe('assistant-ui streaming renderer', () => {
    expect(screen.getByRole('alert').textContent).toContain('OpenRouter rejected the request (403).')
  })

-  // Scroll behavior (follow-at-bottom, escape-on-scroll-up, re-engage) is owned
-  // by the use-stick-to-bottom library and covered by its own test suite. We
-  // don't re-assert its scrollTop mechanics here — doing so in jsdom (no real
-  // layout, spring animation via rAF) only produces brittle change-detector
-  // tests. The rendering/streaming-content tests below remain the contract.
+  it('does not pull the viewport back down after the user scrolls up during streaming', async () => {
+    const { container } = render(<StreamingHarness />)
+
+    const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
+    const viewport = content.parentElement as HTMLDivElement
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+
+    await act(async () => {
+      viewport.scrollTop = 800
+      fireEvent.scroll(viewport)
+    })
+    await wait(0)
+
+    await act(async () => {
+      fireEvent.wheel(viewport, { deltaY: -120 })
+      viewport.scrollTop = 420
+      fireEvent.scroll(viewport)
+    })
+
+    scrollHeight = 1_200
+
+    await act(async () => {
+      for (const observer of resizeObservers) {
+        observer.trigger(1_200)
+      }
+    })
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(420)
+  })
+
+  it('does not auto-follow idle layout shifts', async () => {
+    const { container } = render(<StaticThreadHarness />)
+
+    const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
+    const viewport = content.parentElement as HTMLDivElement
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+
+    await act(async () => {
+      viewport.scrollTop = 420
+      fireEvent.scroll(viewport)
+    })
+
+    scrollHeight = 1_200
+
+    await act(async () => {
+      for (const observer of resizeObservers) {
+        observer.trigger(1_200)
+      }
+    })
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(420)
+  })
+
+  it('does not follow streaming content growth even while parked at the bottom', async () => {
+    const { container } = render(<StreamingHarness />)
+
+    const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
+    const viewport = content.parentElement as HTMLDivElement
+    let clientHeight = 200
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', {
+      configurable: true,
+      get: () => clientHeight
+    })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+
+    // Park the user at the bottom of the current content.
+    await act(async () => {
+      viewport.scrollTop = 800
+      fireEvent.scroll(viewport)
+    })
+
+    clientHeight = 240
+
+    await act(async () => {
+      viewport.scrollTop = 760
+      fireEvent.scroll(viewport)
+    })
+
+    // Content grows as tokens stream in. Streaming auto-follow is removed, so
+    // the viewport must NOT chase the new bottom — it stays where the user
+    // last left it.
+    scrollHeight = 1_200
+
+    await act(async () => {
+      for (const observer of resizeObservers) {
+        observer.trigger(1_200)
+      }
+    })
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(760)
+  })
+
+  it('honors the first upward wheel scroll even when a programmatic bottom-pin scroll event is still pending', async () => {
+    const { container } = render(<StreamingHarness />)
+
+    const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
+    const viewport = content.parentElement as HTMLDivElement
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+    await wait(0)
+
+    await act(async () => {
+      fireEvent.wheel(viewport, { deltaY: -120 })
+      viewport.scrollTop = 420
+      fireEvent.scroll(viewport)
+    })
+
+    scrollHeight = 1_200
+
+    await act(async () => {
+      for (const observer of resizeObservers) {
+        observer.trigger(1_200)
+      }
+    })
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(420)
+  })
+
+  it('does not snap to the bottom on final code-highlight growth after a run completes', async () => {
+    const { container } = render(<StreamingHarness />)
+
+    const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
+    const viewport = content.parentElement as HTMLDivElement
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+
+    await act(async () => {
+      viewport.scrollTop = 800
+      fireEvent.scroll(viewport)
+    })
+
+    await wait(650)
+
+    // Completion re-measures (Shiki highlight) and grows the content. The
+    // post-run bottom lock is removed, so the viewport stays put instead of
+    // snapping to the new bottom.
+    scrollHeight = 1_700
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(800)
+  })
+
+  it('does not restart bottom-follow after completion when the user scrolled up', async () => {
+    const { container } = render(<StreamingHarness />)
+
+    const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
+    const viewport = content.parentElement as HTMLDivElement
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+
+    await act(async () => {
+      viewport.scrollTop = 800
+      fireEvent.scroll(viewport)
+    })
+
+    await act(async () => {
+      fireEvent.wheel(viewport, { deltaY: -120 })
+      viewport.scrollTop = 420
+      fireEvent.scroll(viewport)
+    })
+
+    await wait(650)
+
+    scrollHeight = 1_700
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(420)
+  })

  it('renders an incomplete streaming fenced code block as a code card', async () => {
    const { container } = render(<RunningMessageHarness message={assistantMessage('```ts\nconst answer = 42\n')} />)
@@ -441,19 +640,14 @@ describe('assistant-ui streaming renderer', () => {
  it('renders an incomplete streaming reasoning fenced code block as a code card', async () => {
    const { container } = render(<RunningReasoningHarness />)
    const ui = within(container)
-    const thinkingToggle = ui.getByRole('button', { name: /thinking/i })

-    if (thinkingToggle.getAttribute('aria-expanded') !== 'true') {
-      fireEvent.click(thinkingToggle)
-    }
+    fireEvent.click(ui.getByRole('button', { name: /thinking/i }))

    await waitFor(() => {
      expect(container.querySelector('[data-slot="code-card"]')).toBeTruthy()
    })

-    await waitFor(() => {
-      expect(container.querySelector('[data-slot="aui_reasoning-text"]')?.textContent).toContain('const answer = 42')
-    })
+    expect(container.querySelector('[data-slot="aui_reasoning-text"]')?.textContent).toContain('const answer = 42')
    expect(container.textContent).not.toContain('```ts')
  })

@@ -506,16 +700,4 @@ describe('assistant-ui streaming renderer', () => {

    expect(container.querySelector('[data-slot="aui_todo-hoisted"]')).toBeNull()
  })
-
-  it('renders completed image generation results in the tool slot', async () => {
-    const { container } = render(<MessageHarness message={assistantImageMessage()} />)
-
-    await waitFor(() => {
-      expect(screen.getByRole('img', { name: 'Generated image' }).getAttribute('src')).toBe(
-        'https://cdn.example/cat.png'
-      )
-    })
-    expect(container.querySelector('[data-slot="aui_generated-image"]')).toBeTruthy()
-    expect(screen.queryByRole('status', { name: /rendering image/i })).toBeNull()
-  })
 })
--- a/apps/desktop/src/components/assistant-ui/thread-list.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread-list.tsx
@@ -1,307 +0,0 @@
-import { ThreadPrimitive, useAuiEvent, useAuiState } from '@assistant-ui/react'
-import {
-  type ComponentProps,
-  type FC,
-  memo,
-  type ReactNode,
-  useCallback,
-  useEffect,
-  useLayoutEffect,
-  useRef,
-  useState
-} from 'react'
-import { useStickToBottom } from 'use-stick-to-bottom'
-
-import { useI18n } from '@/i18n'
-import { cn } from '@/lib/utils'
-import {
-  onScrollToBottomRequest,
-  onThreadEditClose,
-  onThreadEditOpen,
-  resetThreadScroll,
-  setThreadAtBottom
-} from '@/store/thread-scroll'
-
-import { MessageRenderBoundary } from './message-render-boundary'
-
-type ThreadMessageComponents = ComponentProps<typeof ThreadPrimitive.MessageByIndex>['components']
-
-type MessageGroup = { id: string; weight: number } & (
-  | { index: number; kind: 'standalone' }
-  | { indices: number[]; kind: 'turn' }
-)
-
-// DOM is bounded by a rendered-PART budget, not a message/turn count: a single
-// assistant message folds every tool call into a part, so heavy sessions are
-// ~40 turns / ~100 messages but ~1000 parts — and parts are what drive node
-// count. "Show earlier" prepends another page; whole turns stay intact so the
-// sticky human bubble never loses its turn. This is the long-session perf lever
-// WITHOUT a virtualizer — pure rendering, never touches scrollTop, so it can't
-// fight use-stick-to-bottom (the single scroll owner).
-const RENDER_BUDGET = 300
-
-interface ThreadMessageListProps {
-  clampToComposer: boolean
-  components: ThreadMessageComponents
-  emptyPlaceholder?: ReactNode
-  loadingIndicator?: ReactNode
-  sessionKey?: string | null
-}
-
-// Group each user message with the assistant turn(s) that follow it so the
-// human bubble can `position: sticky` against the scroller across its whole
-// turn (see StickyHumanMessageContainer in thread.tsx).
-function buildGroups(signature: string): MessageGroup[] {
-  if (!signature) {
-    return []
-  }
-
-  const messages = signature.split('\n').map(row => {
-    const [index, id, role, weight] = row.split(':')
-
-    return { id, index: Number(index), role, weight: Number(weight) || 1 }
-  })
-
-  const groups: MessageGroup[] = []
-
-  for (let i = 0; i < messages.length; i++) {
-    const message = messages[i]
-
-    if (message.role !== 'user') {
-      groups.push({ id: message.id, index: message.index, kind: 'standalone', weight: message.weight })
-
-      continue
-    }
-
-    const indices = [message.index]
-    let weight = message.weight
-
-    while (i + 1 < messages.length && messages[i + 1].role !== 'user') {
-      weight += messages[++i].weight
-      indices.push(messages[i].index)
-    }
-
-    groups.push({ id: message.id, indices, kind: 'turn', weight })
-  }
-
-  return groups
-}
-
-const ThreadMessageListInner: FC<ThreadMessageListProps> = ({
-  clampToComposer,
-  components,
-  emptyPlaceholder,
-  loadingIndicator,
-  sessionKey
-}) => {
-  const messageSignature = useAuiState(s =>
-    s.thread.messages
-      .map((message, index) => `${index}:${message.id}:${message.role}:${message.content?.length ?? 1}`)
-      .join('\n')
-  )
-
-  const { t } = useI18n()
-  const groups = buildGroups(messageSignature)
-  const renderEmpty = groups.length === 0 && Boolean(emptyPlaceholder)
-
-  // use-stick-to-bottom owns scrollTop (single writer): follow while locked,
-  // escape on user scroll-up, re-lock at bottom. Snap instantly, not spring — a
-  // spring can't tell live-token growth from a session-switch bulk relayout, and
-  // chasing the latter reads as the view scrolling to random spots before
-  // settling. Its refs hang off our own DOM so the sticky human bubbles survive.
-  const { scrollRef, contentRef, isAtBottom, scrollToBottom, stopScroll } = useStickToBottom({
-    initial: 'instant',
-    resize: 'instant'
-  })
-
-  const [renderBudget, setRenderBudget] = useState(RENDER_BUDGET)
-
-  // Walk turns newest-first, summing their part weights until the budget is met;
-  // everything before that first kept turn is hidden.
-  let firstVisible = groups.length
-
-  for (let i = groups.length - 1, weight = 0; i >= 0; i--) {
-    weight += groups[i].weight
-    firstVisible = i
-
-    if (weight >= renderBudget) {
-      break
-    }
-  }
-
-  const hiddenCount = firstVisible
-  const visibleGroups = hiddenCount > 0 ? groups.slice(hiddenCount) : groups
-  const restoreFromBottomRef = useRef<number | null>(null)
-
-  useEffect(() => setThreadAtBottom(isAtBottom), [isAtBottom])
-  useEffect(() => () => resetThreadScroll(), [])
-
-  // Floating jump button (outside this subtree) → return to the bottom.
-  useEffect(() => onScrollToBottomRequest(() => void scrollToBottom()), [scrollToBottom])
-
-  const endEditHold = useCallback(() => {
-    scrollRef.current?.removeAttribute('data-editing')
-  }, [scrollRef])
-
-  // Inline edit grows a sticky bubble. Escape before focus/layout so the
-  // resize-follow can't snap scrollTop; native anchoring holds the viewport.
-  const beginEditHold = useCallback(() => {
-    const el = scrollRef.current
-
-    if (!el) {
-      return
-    }
-
-    endEditHold()
-    stopScroll()
-    el.setAttribute('data-editing', 'true')
-  }, [endEditHold, scrollRef, stopScroll])
-
-  useEffect(() => onThreadEditOpen(beginEditHold), [beginEditHold])
-  useEffect(() => onThreadEditClose(endEditHold), [endEditHold])
-  useEffect(() => () => endEditHold(), [endEditHold])
-  // New run → snap to the latest turn.
-  useAuiEvent('thread.runStart', () => void scrollToBottom())
-
-  // Reset the cap and pin to bottom on mount + every session switch (messages
-  // swap in place on a long-lived runtime, so sessionKey is the only signal).
-  // The swap is multi-step and lays out over many frames; letting the library
-  // follow re-pins every frame to a moving target — visible as ~10 scroll jumps.
-  // Instead: quiet it, glue to the true bottom until the height holds steady,
-  // then hand back locked. Live streaming afterward uses the normal resize follow.
-  useLayoutEffect(() => {
-    setRenderBudget(RENDER_BUDGET)
-
-    const el = scrollRef.current
-
-    if (!el) {
-      return
-    }
-
-    stopScroll()
-    el.scrollTop = el.scrollHeight
-
-    let frame = 0
-    let stableFrames = 0
-    let lastHeight = el.scrollHeight
-
-    const settle = () => {
-      const node = scrollRef.current
-
-      if (!node) {
-        return
-      }
-
-      const height = node.scrollHeight
-
-      stableFrames = height === lastHeight ? stableFrames + 1 : 0
-      lastHeight = height
-      node.scrollTop = height
-
-      // ~5 steady frames ≈ layout has settled; the frame cap bounds slow loads.
-      if (stableFrames >= 5 || ++frame > 90) {
-        void scrollToBottom('instant')
-
-        return
-      }
-
-      rafId = requestAnimationFrame(settle)
-    }
-
-    let rafId = requestAnimationFrame(settle)
-
-    return () => cancelAnimationFrame(rafId)
-  }, [scrollRef, scrollToBottom, sessionKey, stopScroll])
-
-  // Prepend an older page while preserving the on-screen position. The user is
-  // scrolled up (reading history) so the stick-to-bottom lock is escaped and
-  // won't fight this manual restore.
-  const showEarlier = useCallback(() => {
-    const el = scrollRef.current
-
-    restoreFromBottomRef.current = el ? el.scrollHeight - el.scrollTop : null
-    setRenderBudget(budget => budget + RENDER_BUDGET)
-  }, [scrollRef])
-
-  useLayoutEffect(() => {
-    const el = scrollRef.current
-
-    if (el && restoreFromBottomRef.current != null) {
-      el.scrollTop = el.scrollHeight - restoreFromBottomRef.current
-      restoreFromBottomRef.current = null
-    }
-  }, [scrollRef, renderBudget])
-
-  return (
-    <div
-      className="relative min-h-0 max-w-full overflow-hidden contain-[layout_paint]"
-      style={{ height: clampToComposer ? 'var(--thread-viewport-height)' : '100%' }}
-    >
-      <div
-        className="size-full overflow-x-hidden overflow-y-auto overscroll-contain"
-        data-following={isAtBottom ? 'true' : 'false'}
-        data-slot="aui_thread-viewport"
-        ref={scrollRef as React.RefCallback<HTMLDivElement>}
-      >
-        {renderEmpty ? (
-          <div
-            className="mx-auto grid h-full w-full max-w-(--composer-width) grid-rows-[minmax(0,1fr)_auto] min-w-0 gap-(--conversation-turn-gap) px-6 py-8"
-            data-slot="aui_thread-content"
-          >
-            {emptyPlaceholder}
-          </div>
-        ) : (
-          <div
-            className={cn(
-              'mx-auto flex w-full max-w-(--composer-width) min-w-0 flex-col px-6 pt-[calc(var(--titlebar-height)+1.5rem)]'
-            )}
-            data-slot="aui_thread-content"
-            ref={contentRef as React.RefCallback<HTMLDivElement>}
-          >
-            {hiddenCount > 0 && (
-              <button
-                className="mx-auto mb-(--conversation-turn-gap) rounded-full border border-border/65 bg-(--composer-fill) px-3 py-1 text-xs text-muted-foreground hover:text-foreground"
-                onClick={showEarlier}
-                type="button"
-              >
-                {t.assistant.thread.showEarlier}
-              </button>
-            )}
-            {visibleGroups.map(group => (
-              <div
-                className="flex min-w-0 flex-col gap-(--conversation-turn-gap) pb-(--conversation-turn-gap)"
-                key={group.id}
-              >
-                <MessageRenderBoundary resetKey={messageSignature}>
-                  {group.kind === 'turn' ? (
-                    <div
-                      className="composer-human-ai-pair-container relative flex min-w-0 flex-col gap-(--conversation-turn-gap)"
-                      data-slot="aui_turn-pair"
-                    >
-                      {group.indices.map(index => (
-                        <ThreadPrimitive.MessageByIndex components={components} index={index} key={index} />
-                      ))}
-                    </div>
-                  ) : (
-                    <ThreadPrimitive.MessageByIndex components={components} index={group.index} />
-                  )}
-                </MessageRenderBoundary>
-              </div>
-            ))}
-            {loadingIndicator}
-            {clampToComposer && (
-              <div
-                aria-hidden="true"
-                className="shrink-0"
-                data-slot="aui_composer-clearance"
-                style={{ height: 'var(--thread-last-message-clearance)' }}
-              />
-            )}
-          </div>
-        )}
-      </div>
-    </div>
-  )
-}
-
-export const ThreadMessageList = memo(ThreadMessageListInner)
--- a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
@@ -0,0 +1,469 @@
+import { ThreadPrimitive, useAuiEvent, useAuiState } from '@assistant-ui/react'
+import { useVirtualizer, type Virtualizer } from '@tanstack/react-virtual'
+import {
+  type ComponentProps,
+  type FC,
+  memo,
+  type ReactNode,
+  useCallback,
+  useEffect,
+  useLayoutEffect,
+  useMemo,
+  useRef
+} from 'react'
+
+import { setMutableRef } from '@/lib/mutable-ref'
+import { cn } from '@/lib/utils'
+import { setThreadScrolledUp } from '@/store/thread-scroll'
+
+import { MessageRenderBoundary } from './message-render-boundary'
+
+const ESTIMATED_ITEM_HEIGHT = 220
+const OVERSCAN = 4
+const AT_BOTTOM_THRESHOLD = 4
+
+type ThreadMessageComponents = ComponentProps<typeof ThreadPrimitive.MessageByIndex>['components']
+
+type MessageGroup = { id: string; index: number; kind: 'standalone' } | { id: string; indices: number[]; kind: 'turn' }
+
+interface VirtualizedThreadProps {
+  clampToComposer: boolean
+  components: ThreadMessageComponents
+  emptyPlaceholder?: ReactNode
+  loadingIndicator?: ReactNode
+  sessionKey?: string | null
+}
+
+function buildGroups(signature: string): MessageGroup[] {
+  if (!signature) {
+    return []
+  }
+
+  const messages = signature.split('\n').map(row => {
+    const [index, id, role] = row.split(':')
+
+    return { id, index: Number(index), role }
+  })
+
+  const groups: MessageGroup[] = []
+
+  for (let i = 0; i < messages.length; i++) {
+    const message = messages[i]
+
+    if (message.role !== 'user') {
+      groups.push({ id: message.id, index: message.index, kind: 'standalone' })
+
+      continue
+    }
+
+    const indices = [message.index]
+
+    while (i + 1 < messages.length && messages[i + 1].role !== 'user') {
+      indices.push(messages[++i].index)
+    }
+
+    groups.push({ id: message.id, indices, kind: 'turn' })
+  }
+
+  return groups
+}
+
+const VirtualizedThreadInner: FC<VirtualizedThreadProps> = ({
+  clampToComposer,
+  components,
+  emptyPlaceholder,
+  loadingIndicator,
+  sessionKey
+}) => {
+  const messageSignature = useAuiState(s =>
+    s.thread.messages.map((message, index) => `${index}:${message.id}:${message.role}`).join('\n')
+  )
+
+  const isRunning = useAuiState(s => s.thread.isRunning)
+
+  const groups = useMemo(() => buildGroups(messageSignature), [messageSignature])
+  const renderEmpty = groups.length === 0 && Boolean(emptyPlaceholder)
+  const scrollerRef = useRef<HTMLDivElement | null>(null)
+
+  // Shared ref so scrollToFn can check whether the user is parked at the
+  // bottom without needing a ref from inside useThreadScrollAnchor.
+  const stickyBottomRef = useRef(true)
+
+  const virtualizer = useVirtualizer({
+    count: groups.length,
+    estimateSize: () => ESTIMATED_ITEM_HEIGHT,
+    getItemKey: index => groups[index]?.id ?? index,
+    getScrollElement: () => scrollerRef.current,
+    // Seed the rect so the initial range mounts something before
+    // `observeElementRect` reports the real layout (it overrides this).
+    initialRect: { height: 600, width: 800 },
+    overscan: OVERSCAN,
+    // When the virtualizer adjusts scroll due to item measurement changes,
+    // skip the adjustment if the user is at the bottom. Our ResizeObserver +
+    // pinToBottom loop handles scroll anchoring; letting the virtualizer also
+    // adjust creates a feedback loop where the two fight each other,
+    // producing visible rubber-banding (the view snaps to the composer
+    // then jumps back up).
+    scrollToFn: (offset, _options, instance) => {
+      const el = instance.scrollElement
+
+      if (!el) {
+        return
+      }
+
+      if (stickyBottomRef.current) {
+        const maxScroll = el.scrollHeight - el.clientHeight
+        const distFromBottom = maxScroll - el.scrollTop
+
+        if (distFromBottom <= AT_BOTTOM_THRESHOLD && offset < maxScroll) {
+          return
+        }
+      }
+
+      ;(el as HTMLElement).scrollTo(0, offset)
+    }
+  })
+
+  useThreadScrollAnchor({
+    enabled: !renderEmpty,
+    groupCount: groups.length,
+    isRunning,
+    scrollerRef,
+    sessionKey: sessionKey ?? null,
+    stickyBottomRef,
+    virtualizer
+  })
+
+  const virtualItems = virtualizer.getVirtualItems()
+  const totalSize = virtualizer.getTotalSize()
+  const paddingTop = virtualItems[0]?.start ?? 0
+  const paddingBottom = Math.max(0, totalSize - (virtualItems.at(-1)?.end ?? 0))
+
+  return (
+    <div
+      className="relative min-h-0 max-w-full overflow-hidden contain-[layout_paint]"
+      style={{ height: clampToComposer ? 'var(--thread-viewport-height)' : '100%' }}
+    >
+      <div
+        className="size-full overflow-x-hidden overflow-y-auto overscroll-contain"
+        data-slot="aui_thread-viewport"
+        ref={scrollerRef}
+      >
+        {renderEmpty ? (
+          <div
+            className="mx-auto grid h-full w-full max-w-(--composer-width) grid-rows-[minmax(0,1fr)_auto] min-w-0 gap-(--conversation-turn-gap) px-6 py-8"
+            data-slot="aui_thread-content"
+          >
+            {emptyPlaceholder}
+          </div>
+        ) : (
+          <div
+            className={cn(
+              'mx-auto flex w-full max-w-(--composer-width) min-w-0 flex-col px-6 pt-[calc(var(--titlebar-height)+1.5rem)]'
+            )}
+            data-slot="aui_thread-content"
+          >
+            {/* Natural-flow virtualization: mounted items render as normal
+                flex siblings so `position: sticky` on the human bubble
+                resolves against the scroller without transform interference.
+                Padding spacers reserve scroll space for unmounted items. */}
+            <div style={{ paddingBottom: `${paddingBottom}px`, paddingTop: `${paddingTop}px` }}>
+              {virtualItems.map(virtualItem => {
+                const group = groups[virtualItem.index]
+
+                if (!group) {
+                  return null
+                }
+
+                return (
+                  <div
+                    className="flex min-w-0 flex-col gap-(--conversation-turn-gap) pb-(--conversation-turn-gap)"
+                    data-index={virtualItem.index}
+                    key={virtualItem.key}
+                    ref={virtualizer.measureElement}
+                  >
+                    <MessageRenderBoundary resetKey={messageSignature}>
+                      {group.kind === 'turn' ? (
+                        <div
+                          className="composer-human-ai-pair-container relative flex min-w-0 flex-col gap-(--conversation-turn-gap)"
+                          data-slot="aui_turn-pair"
+                        >
+                          {group.indices.map(index => (
+                            <ThreadPrimitive.MessageByIndex components={components} index={index} key={index} />
+                          ))}
+                        </div>
+                      ) : (
+                        <ThreadPrimitive.MessageByIndex components={components} index={group.index} />
+                      )}
+                    </MessageRenderBoundary>
+                  </div>
+                )
+              })}
+            </div>
+            {loadingIndicator}
+            {clampToComposer && (
+              <div
+                aria-hidden="true"
+                className="shrink-0"
+                data-slot="aui_composer-clearance"
+                style={{ height: 'var(--thread-last-message-clearance)' }}
+              />
+            )}
+          </div>
+        )}
+      </div>
+    </div>
+  )
+}
+
+export const VirtualizedThread = memo(VirtualizedThreadInner)
+
+function scrollElementToBottom(el: HTMLDivElement) {
+  el.scrollTop = el.scrollHeight
+}
+
+interface ScrollAnchorOptions {
+  enabled: boolean
+  groupCount: number
+  isRunning: boolean
+  scrollerRef: React.RefObject<HTMLDivElement | null>
+  sessionKey: string | null
+  stickyBottomRef: React.MutableRefObject<boolean>
+  virtualizer: Virtualizer<HTMLDivElement, Element>
+}
+
+function useThreadScrollAnchor({
+  enabled,
+  groupCount,
+  isRunning,
+  scrollerRef,
+  sessionKey,
+  stickyBottomRef,
+  virtualizer
+}: ScrollAnchorOptions) {
+  // `stickyBottomRef` = parked at bottom, content growth should follow. Cleared on
+  // user-driven upward scroll; re-armed when they reach bottom again.
+  // This is a shared ref — scrollToFn reads it to prevent the virtualizer's
+  // measurement adjustments from fighting our pinToBottom.
+  const lastTopRef = useRef(0)
+  const lastHeightRef = useRef(0)
+  const lastClientHeightRef = useRef(0)
+  // Counter that tracks how many scroll events we expect to be ours rather
+  // than the user's. `pinToBottom` writes `el.scrollTop`, which fires an
+  // async `scroll` event; without this guard the on-scroll handler can race
+  // with the programmatic write (because content also grew, the *resulting*
+  // scrollTop can be lower than `lastTopRef` from the previous frame) and
+  // misread the programmatic pin as the user scrolling up — which disarms
+  // sticky-bottom and the user's just-submitted message slides above the
+  // fold. See `apps/desktop/scripts/measure-jump.mjs` for the repro
+  // (distFromBottom 0 → 49 within one frame, sticking forever).
+  const programmaticScrollPendingRef = useRef(0)
+  const prevSessionKeyRef = useRef(sessionKey)
+  const prevGroupCountRef = useRef(0)
+
+  const pinToBottom = useCallback(() => {
+    const el = scrollerRef.current
+
+    if (!el) {
+      return
+    }
+
+    // Already parked at the bottom: writing `scrollTop` is a no-op and the
+    // browser fires NO scroll event, so arming the programmatic gate here would
+    // leave it permanently set. Repeated pins (streaming heartbeats, the
+    // post-run lock loop) then accumulate the gate, and the next genuine user
+    // scroll-up is misread as one of our programmatic scrolls — re-arming
+    // sticky-bottom and yanking the viewport back down. Refresh trackers, bail.
+    const distFromBottom = el.scrollHeight - (el.scrollTop + el.clientHeight)
+
+    if (distFromBottom <= AT_BOTTOM_THRESHOLD) {
+      lastTopRef.current = el.scrollTop
+      lastHeightRef.current = el.scrollHeight
+      lastClientHeightRef.current = el.clientHeight
+
+      return
+    }
+
+    // Hold the disarm gate across the scroll event the next line will fire.
+    // Set to 1 rather than incrementing: coalesced writes within a frame fire a
+    // single scroll event, so a counter > 1 can never drain and would swallow a
+    // later real user scroll.
+    programmaticScrollPendingRef.current = 1
+    scrollElementToBottom(el)
+    lastTopRef.current = el.scrollTop
+    lastHeightRef.current = el.scrollHeight
+    lastClientHeightRef.current = el.clientHeight
+  }, [scrollerRef])
+
+  const jumpToBottom = useCallback(() => {
+    setMutableRef(stickyBottomRef, true)
+
+    if (groupCount > 0) {
+      virtualizer.scrollToIndex(groupCount - 1, { align: 'end', behavior: 'auto' })
+    }
+
+    requestAnimationFrame(() => {
+      if (stickyBottomRef.current) {
+        pinToBottom()
+      }
+    })
+  }, [groupCount, pinToBottom, stickyBottomRef, virtualizer])
+
+  useEffect(() => () => setThreadScrolledUp(false), [])
+
+  // Track at-bottom state, dim composer when scrolled up, disarm on user
+  // scroll/wheel/touch.
+  useEffect(() => {
+    const el = scrollerRef.current
+
+    if (!el) {
+      return undefined
+    }
+
+    const disarm = () => {
+      setMutableRef(stickyBottomRef, false)
+      programmaticScrollPendingRef.current = 0
+    }
+
+    const onScroll = () => {
+      const top = el.scrollTop
+
+      // If this scroll event is the consequence of `pinToBottom` writing
+      // `el.scrollTop`, treat it as ours: don't disarm. The RO + rAF pin
+      // loop will re-pin on the next frame if the browser clamped us
+      // short of bottom (because content grew in the same frame).
+      // Without this guard the post-pin scrollTop gets misread as the
+      // user scrolling up, disarming sticky-bottom permanently and
+      // leaving the just-submitted message below the fold.
+      if (programmaticScrollPendingRef.current > 0) {
+        programmaticScrollPendingRef.current -= 1
+        lastTopRef.current = top
+        lastHeightRef.current = el.scrollHeight
+        lastClientHeightRef.current = el.clientHeight
+        // Always re-arm — sticky-bottom should hold through clamp races.
+        setMutableRef(stickyBottomRef, true)
+        const atBottom = el.scrollHeight - (top + el.clientHeight) <= AT_BOTTOM_THRESHOLD
+        setThreadScrolledUp(!atBottom)
+
+        return
+      }
+
+      // Disarm only when `scrollTop` decreases while both content height and
+      // viewport height are stable. A bare `top < lastTopRef.current` check is
+      // unsafe: virtualizer measurement, streaming markdown, composer resizing,
+      // window resizing, and toolbar/status updates can all move scrollTop as a
+      // layout side effect. Wheel-up and touchmove still disarm immediately via
+      // their own listeners below, so real user intent remains covered.
+      const heightGrew = el.scrollHeight > lastHeightRef.current
+      const clientHeightChanged = Math.abs(el.clientHeight - lastClientHeightRef.current) > 1
+
+      if (!heightGrew && !clientHeightChanged && top + 1 < lastTopRef.current) {
+        setMutableRef(stickyBottomRef, false)
+      }
+
+      lastTopRef.current = top
+      lastHeightRef.current = el.scrollHeight
+      lastClientHeightRef.current = el.clientHeight
+
+      const atBottom = el.scrollHeight - (top + el.clientHeight) <= AT_BOTTOM_THRESHOLD
+
+      if (atBottom) {
+        setMutableRef(stickyBottomRef, true)
+      }
+
+      setThreadScrolledUp(!atBottom)
+    }
+
+    const onWheel = (event: WheelEvent) => {
+      if (event.deltaY < 0) {
+        disarm()
+      }
+    }
+
+    el.addEventListener('scroll', onScroll, { passive: true })
+    el.addEventListener('wheel', onWheel, { passive: true })
+    el.addEventListener('touchmove', disarm, { passive: true })
+
+    return () => {
+      el.removeEventListener('scroll', onScroll)
+      el.removeEventListener('wheel', onWheel)
+      el.removeEventListener('touchmove', disarm)
+    }
+  }, [scrollerRef, stickyBottomRef])
+
+  // Intentionally NO streaming auto-follow. Earlier builds ran a
+  // ResizeObserver here that re-pinned the viewport to the bottom on every
+  // content growth while a turn was running, so the chat tracked tokens as
+  // they streamed. That behavior is removed by request: once a turn is in
+  // flight the viewport stays exactly where the user left it. The viewport
+  // is still moved to the bottom ONCE per user submit / new turn / session
+  // change (see the layout effect and the session-change effect below) so a
+  // freshly submitted message lands in view — but it does not chase the
+  // stream afterward.
+
+  // Jump to bottom on session change OR when an empty thread first gets
+  // content. Both share the same intent and the same effect.
+  useEffect(() => {
+    const sessionChanged = prevSessionKeyRef.current !== sessionKey
+    const becameNonEmpty = prevGroupCountRef.current === 0 && groupCount > 0
+
+    prevSessionKeyRef.current = sessionKey
+    prevGroupCountRef.current = groupCount
+
+    if (enabled && (sessionChanged || becameNonEmpty)) {
+      jumpToBottom()
+    }
+  }, [enabled, groupCount, jumpToBottom, sessionKey])
+
+  // Pre-paint pin: when groupCount increases while armed (a new turn arriving
+  // from the user submit or assistant turn start), pin BEFORE the browser
+  // commits the layout to screen. Using useLayoutEffect rather than useEffect
+  // so this runs synchronously after React commits the DOM mutation but before
+  // the browser paints. Without this, there's a ~50ms visual window where the
+  // new message sits below the fold.
+  //
+  // We pin TWICE in this critical path — once synchronously, then once on
+  // the next rAF. The second pin catches the case where React mounts the
+  // new message in the second commit (after our layout effect ran), which
+  // grows scrollHeight again; without the rAF pin the user briefly sees a
+  // ~15 px gap below the new message. This fires once per user submit / new
+  // turn arrival — it is NOT streaming-token follow (that path is removed
+  // above), so a turn that streams a long response after this initial jump
+  // will not chase the bottom.
+  const prevGroupCountForLayoutRef = useRef(groupCount)
+  useLayoutEffect(() => {
+    if (!enabled) {
+      return
+    }
+
+    if (groupCount > prevGroupCountForLayoutRef.current && stickyBottomRef.current) {
+      // Defer to rAF so that browser scroll/wheel events from the current
+      // frame are processed first.  Without this deferral, a trackpad
+      // scroll-up during streaming can race with this effect: the wheel
+      // event hasn't fired yet so stickyBottomRef is still true, and the
+      // immediate pinToBottom() would snap the viewport back to bottom
+      // against the user's intent.
+      requestAnimationFrame(() => {
+        if (stickyBottomRef.current) {
+          pinToBottom()
+        }
+      })
+    }
+
+    prevGroupCountForLayoutRef.current = groupCount
+  }, [enabled, groupCount, pinToBottom, stickyBottomRef])
+
+  // Intentionally NO post-run bottom lock. Earlier builds kept pinning to
+  // the bottom for POST_RUN_BOTTOM_LOCK_MS after `isRunning` flipped false to
+  // chase final Shiki re-highlight measurement. With streaming follow gone,
+  // re-pinning at completion would yank the viewport back to the bottom even
+  // though the user is reading earlier content — the opposite of what's
+  // wanted. The one-time submit / new-turn jump already covers landing a
+  // fresh message in view.
+  const prevIsRunningForLayoutRef = useRef(isRunning)
+  useLayoutEffect(() => {
+    prevIsRunningForLayoutRef.current = isRunning
+  }, [isRunning])
+
+  useAuiEvent('thread.runStart', jumpToBottom)
+}
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -7,8 +7,7 @@ import {
  MessagePrimitive,
  type ToolCallMessagePartProps,
  useAui,
-  useAuiState,
-  useMessageRuntime
+  useAuiState
 } from '@assistant-ui/react'
 import { useStore } from '@nanostores/react'
 import { IconPlayerStopFilled } from '@tabler/icons-react'
@@ -53,24 +52,20 @@ import {
 } from '@/app/chat/composer/rich-editor'
 import { detectTrigger, textBeforeCaret, type TriggerState } from '@/app/chat/composer/text-utils'
 import { ComposerTriggerPopover } from '@/app/chat/composer/trigger-popover'
-import {
-  extractDroppedFiles,
-  HERMES_PATHS_MIME,
-  isImagePath,
-  partitionDroppedFiles
-} from '@/app/chat/hooks/use-composer-actions'
+import { extractDroppedFiles, HERMES_PATHS_MIME, isImagePath, partitionDroppedFiles } from '@/app/chat/hooks/use-composer-actions'
 import { uploadComposerAttachment } from '@/app/session/hooks/use-prompt-actions'
 import { ClarifyTool } from '@/components/assistant-ui/clarify-tool'
 import { DirectiveContent, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
 import { MarkdownText, MarkdownTextContent } from '@/components/assistant-ui/markdown-text'
-import { ThreadMessageList } from '@/components/assistant-ui/thread-list'
+import { VirtualizedThread } from '@/components/assistant-ui/thread-virtualizer'
 import { ToolFallback, ToolGroupSlot } from '@/components/assistant-ui/tool-fallback'
 import { TooltipIconButton } from '@/components/assistant-ui/tooltip-icon-button'
 import { UserMessageText } from '@/components/assistant-ui/user-message-text'
 import { useElapsedSeconds } from '@/components/chat/activity-timer'
 import { ActivityTimerText } from '@/components/chat/activity-timer-text'
 import { DisclosureRow } from '@/components/chat/disclosure-row'
-import { GeneratedImage } from '@/components/chat/generated-image-result'
+import { GeneratedImageProvider, useGeneratedImageContext } from '@/components/chat/generated-image-context'
+import { ImageGenerationPlaceholder } from '@/components/chat/image-generation-placeholder'
 import { Intro, type IntroProps } from '@/components/chat/intro'
 import { PreviewAttachment } from '@/components/chat/preview-attachment'
 import { Codicon } from '@/components/ui/codicon'
@@ -96,22 +91,16 @@ import { extractPreviewTargets } from '@/lib/preview-targets'
 import { useEnterAnimation } from '@/lib/use-enter-animation'
 import { cn } from '@/lib/utils'
 import { playSpeechText, stopVoicePlayback } from '@/lib/voice-playback'
-import { $compactionActive } from '@/store/compaction'
 import type { ComposerAttachment } from '@/store/composer'
 import { notifyError } from '@/store/notifications'
 import { $connection } from '@/store/session'
-import { notifyThreadEditClose, notifyThreadEditOpen } from '@/store/thread-scroll'
 import { $voicePlayback } from '@/store/voice-playback'

 type ThreadLoadingState = 'response' | 'session'

 interface MessageActionProps {
  messageId: string
-  /** Lazy accessor — reads the live message text at action time. Passing the
-   *  text itself as a prop forces the whole footer to re-render on every
-   *  streaming delta flush (the text changes ~30×/s), which profiling showed
-   *  was a large slice of per-token script time on long transcripts. */
-  getMessageText: () => string
+  messageText: string
  onBranchInNewChat?: (messageId: string) => void
 }

@@ -139,28 +128,6 @@ function messageContentText(content: unknown): string {
  return Array.isArray(content) ? content.map(partText).join('').trim() : ''
 }

-// Cheap streaming-stable "does this message have visible text" check: returns
-// on the first non-whitespace text part without concatenating the whole
-// message. Used as a useAuiState selector so its boolean output stays stable
-// across token flushes (flips false→true once per turn).
-function contentHasVisibleText(content: unknown): boolean {
-  if (typeof content === 'string') {
-    return content.trim().length > 0
-  }
-
-  if (!Array.isArray(content)) {
-    return false
-  }
-
-  for (const part of content) {
-    if (partText(part).trim().length > 0) {
-      return true
-    }
-  }
-
-  return false
-}
-
 export const Thread: FC<{
  clampToComposer?: boolean
  cwd?: string | null
@@ -201,16 +168,18 @@ export const Thread: FC<{
  ) : undefined

  return (
-    <div className="relative grid h-full min-h-0 max-w-full grid-rows-[minmax(0,1fr)] overflow-hidden bg-transparent contain-[layout_paint]">
-      <ThreadMessageList
-        clampToComposer={clampToComposer}
-        components={messageComponents}
-        emptyPlaceholder={emptyPlaceholder}
-        loadingIndicator={loading === 'response' ? <ResponseLoadingIndicator /> : null}
-        sessionKey={sessionKey}
-      />
-      {loading === 'session' && <CenteredThreadSpinner />}
-    </div>
+    <GeneratedImageProvider>
+      <div className="relative grid h-full min-h-0 max-w-full grid-rows-[minmax(0,1fr)] overflow-hidden bg-transparent contain-[layout_paint]">
+        <VirtualizedThread
+          clampToComposer={clampToComposer}
+          components={messageComponents}
+          emptyPlaceholder={emptyPlaceholder}
+          loadingIndicator={loading === 'response' ? <ResponseLoadingIndicator /> : null}
+          sessionKey={sessionKey}
+        />
+        {loading === 'session' && <CenteredThreadSpinner />}
+      </div>
+    </GeneratedImageProvider>
  )
 }

@@ -247,36 +216,20 @@ const CenteredThreadSpinner: FC = () => {

 const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
  const messageId = useAuiState(s => s.message.id)
-  const messageRuntime = useMessageRuntime()
-
-  // PERF: this component must NOT subscribe to the streaming text. Every
-  // selector here returns a value that stays referentially stable across
-  // token flushes (booleans, status strings, '' while running), so the
-  // 30 Hz delta stream only re-renders the markdown part and the tiny
-  // StreamStallIndicator leaf — not the footer/preview/root subtree.
-  const messageStatus = useAuiState(s => s.message.status?.type)
-  const isRunning = messageStatus === 'running'
-  const isPlaceholder = useAuiState(s => s.message.status?.type === 'running' && s.message.content.length === 0)
-  const hasVisibleText = useAuiState(s => contentHasVisibleText(s.message.content))
-
-  // Preview targets only materialize once the turn completes — while running
-  // the selector returns '' (stable), so per-token flushes skip the regex
-  // scan and the re-render it would cause.
-  const completedText = useAuiState(s =>
-    s.message.status?.type === 'running' ? '' : messageContentText(s.message.content)
-  )
+  const content = useAuiState(s => s.message.content)
+  const messageText = messageContentText(content)

  const previewTargets = useMemo(() => {
-    if (!completedText || !/(https?:\/\/|file:\/\/)/i.test(completedText)) {
+    if (!messageText || !/(https?:\/\/|file:\/\/)/i.test(messageText)) {
      return []
    }

-    return pickPrimaryPreviewTarget(extractPreviewTargets(completedText))
-  }, [completedText])
+    return pickPrimaryPreviewTarget(extractPreviewTargets(messageText))
+  }, [messageText])

-  const getMessageText = useCallback(() => messageContentText(messageRuntime.getState().content), [messageRuntime])
-
-  const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`)
+  const messageStatus = useAuiState(s => s.message.status?.type)
+  const isPlaceholder = messageStatus === 'running' && content.length === 0
+  const enterRef = useEnterAnimation(messageStatus === 'running', `assistant-message:${messageId}`)

  if (isPlaceholder) {
    return null
@@ -287,7 +240,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
      className="group flex w-full min-w-0 max-w-full flex-col gap-0 self-start overflow-hidden"
      data-role="assistant"
      data-slot="aui_assistant-message-root"
-      data-streaming={isRunning ? 'true' : undefined}
+      data-streaming={messageStatus === 'running' ? 'true' : undefined}
      ref={enterRef}
    >
      <div
@@ -296,7 +249,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
      >
        {/* Todos render in the composer status stack now, not inline. */}
        <MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
-        {isRunning && <StreamStallIndicator />}
+        {messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
        {previewTargets.length > 0 && (
          <div className="mt-3 flex flex-wrap gap-2">
            {previewTargets.map(target => (
@@ -313,8 +266,8 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
          </ErrorPrimitive.Root>
        </MessagePrimitive.Error>
      </div>
-      {hasVisibleText && (
-        <AssistantFooter getMessageText={getMessageText} messageId={messageId} onBranchInNewChat={onBranchInNewChat} />
+      {messageText.trim().length > 0 && (
+        <AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
      )}
    </MessagePrimitive.Root>
  )
@@ -337,25 +290,13 @@ const StatusRow: FC<{ children: ReactNode; label: string } & React.ComponentProp
  </div>
 )

-// Fixed label while auto-compaction runs — decoupled from backend status text.
-const COMPACTION_LABEL = 'Summarizing thread'
-
-const CompactionHint: FC = () => (
-  <span className="shimmer min-w-0 truncate text-muted-foreground/55">{COMPACTION_LABEL}</span>
-)
-
 const ResponseLoadingIndicator: FC = () => {
  const { t } = useI18n()
  const elapsed = useElapsedSeconds()
-  const compacting = useStore($compactionActive)

  return (
-    <StatusRow
-      data-slot="aui_response-loading"
-      label={compacting ? COMPACTION_LABEL : t.assistant.thread.loadingResponse}
-    >
+    <StatusRow data-slot="aui_response-loading" label={t.assistant.thread.loadingResponse}>
      <span aria-hidden="true" className="dither inline-block size-3 rounded-[2px] text-midground/80 animate-pulse" />
-      {compacting && <CompactionHint />}
      <ActivityTimerText seconds={elapsed} />
    </StatusRow>
  )
@@ -367,30 +308,11 @@ const STREAM_STALL_S = 2

 // Tail "still thinking" indicator: the pre-first-token spinner goes away once
 // text flows, but if the stream then goes quiet mid-turn (tool think-time,
-// provider stall) nothing signals that work continues. Watch a per-flush
+// provider stall) nothing signals that work continues. Watch a per-render
 // activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
 // dither + a timer counting from the last activity.
-//
-// Subscribes to the activity signal ITSELF (rather than taking it as a prop)
-// so that per-token updates re-render only this leaf, not the whole
-// AssistantMessage subtree.
-const StreamStallIndicator: FC = () => {
-  const activity = useAuiState(s => {
-    let textLength = 0
-
-    for (const part of s.message.content) {
-      const text = (part as { text?: unknown }).text
-
-      if (typeof text === 'string') {
-        textLength += text.length
-      }
-    }
-
-    return `${s.message.content.length}:${textLength}`
-  })
-
+const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
  const [stalled, setStalled] = useState(false)
-  const compacting = useStore($compactionActive)

  useEffect(() => {
    setStalled(false)
@@ -399,32 +321,35 @@ const StreamStallIndicator: FC = () => {
    return () => window.clearTimeout(id)
  }, [activity])

-  const active = stalled || compacting
-  const elapsed = useElapsedSeconds(active)
+  const elapsed = useElapsedSeconds(stalled)

-  if (!active) {
+  if (!stalled) {
    return null
  }

  return (
-    <StatusRow
-      className="mt-1.5"
-      data-slot="aui_stream-stall"
-      label={compacting ? COMPACTION_LABEL : 'Hermes is thinking'}
-    >
+    <StatusRow className="mt-1.5" data-slot="aui_stream-stall" label="Hermes is thinking">
      <span aria-hidden="true" className="dither inline-block size-3 rounded-[2px] text-midground/80 animate-pulse" />
-      {compacting && <CompactionHint />}
      <ActivityTimerText seconds={elapsed} />
    </StatusRow>
  )
 }

-const ImageGenerateTool: FC<ToolCallMessagePartProps> = ({ args, result }) => {
-  const aspectRatio = typeof args?.aspect_ratio === 'string' ? args.aspect_ratio : undefined
+const ImageGenerateTool: FC<ToolCallMessagePartProps> = ({ result }) => {
+  const generatedImage = useGeneratedImageContext()
+  const running = result === undefined
+
+  useEffect(() => {
+    generatedImage?.setPending(running)
+  }, [generatedImage, running])
+
+  if (!running) {
+    return null
+  }

  return (
    <div className="mt-1.5">
-      <GeneratedImage aspectRatio={aspectRatio} result={result} />
+      <ImageGenerationPlaceholder />
    </div>
  )
 }
@@ -588,7 +513,10 @@ const ReasoningTextPart: FC<{ text: string; status?: { type: string } }> = ({ te

  return (
    <MarkdownTextContent
-      containerClassName="text-xs leading-snug text-muted-foreground/85"
+      containerClassName={cn(
+        'text-xs leading-snug text-muted-foreground/85',
+        isRunning && 'shimmer text-muted-foreground/55'
+      )}
      containerProps={{ 'data-slot': 'aui_reasoning-text' } as ComponentProps<'div'>}
      isRunning={isRunning}
      text={displayText}
@@ -651,7 +579,7 @@ function formatMessageTimestamp(
  return SHORT_FMT.format(date)
 }

-const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText, onBranchInNewChat }) => {
+const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, onBranchInNewChat }) => {
  const { t } = useI18n()
  const copy = t.assistant.thread
  const [menuOpen, setMenuOpen] = useState(false)
@@ -672,7 +600,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText,
        )}
        data-slot="aui_msg-actions"
      >
-        <CopyButton appearance="icon" buttonSize="icon" label={copy.copy} text={getMessageText} />
+        <CopyButton appearance="icon" buttonSize="icon" disabled={!messageText} label={copy.copy} text={messageText} />
        <ActionBarPrimitive.Reload asChild>
          <TooltipIconButton onClick={() => triggerHaptic('submit')} tooltip={copy.refresh}>
            <Codicon name="refresh" />
@@ -690,7 +618,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText,
              <GitBranchIcon />
              {copy.branchNewChat}
            </DropdownMenuItem>
-            <ReadAloudItem getText={getMessageText} messageId={messageId} />
+            <ReadAloudItem messageId={messageId} text={messageText} />
          </DropdownMenuContent>
        </DropdownMenu>
      </ActionBarPrimitive.Root>
@@ -698,7 +626,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText,
  )
 }

-const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getText, messageId }) => {
+const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
  const { t } = useI18n()
  const copy = t.assistant.thread
  const voicePlayback = useStore($voicePlayback)
@@ -712,8 +640,6 @@ const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getTe
  const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon

  const read = useCallback(async () => {
-    const text = getText()
-
    if (!text || $voicePlayback.get().status !== 'idle') {
      return
    }
@@ -723,11 +649,11 @@ const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getTe
    } catch (error) {
      notifyError(error, copy.readAloudFailed)
    }
-  }, [copy.readAloudFailed, getText, messageId])
+  }, [copy.readAloudFailed, messageId, text])

  return (
    <DropdownMenuItem
-      disabled={isPreparing || (!isSpeaking && anyPlaybackActive)}
+      disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
      onSelect={e => {
        e.preventDefault()
        void (isSpeaking ? stopVoicePlayback() : read())
@@ -781,22 +707,15 @@ function messageAttachmentRefs(value: unknown): string[] {
  return value.every(ref => typeof ref === 'string') ? value : EMPTY_ATTACHMENT_REFS
 }

-function StickyHumanMessageContainer({ attachments, children }: { attachments?: ReactNode; children: ReactNode }) {
+function StickyHumanMessageContainer({ children }: { children: ReactNode }) {
  return (
-    // Fragment, not a wrapper: a wrapping element becomes the sticky's
-    // containing block (it'd stick within its own height = never). The bubble
-    // and attachments are flow siblings so the bubble pins against the scroller
-    // while attachments below it scroll away.
-    <>
-      <div
-        className="group/user-message sticky z-40 -mx-4 flex w-[calc(100%+2rem)] min-w-0 max-w-none flex-col items-stretch gap-0 self-end overflow-visible bg-(--ui-chat-surface-background) px-4 pb-(--conversation-turn-gap) pt-1"
-        data-role="user"
-        data-slot="aui_user-message-root"
-      >
-        {children}
-      </div>
-      {attachments}
-    </>
+    <div
+      className="group/user-message sticky z-40 -mx-4 flex w-[calc(100%+2rem)] min-w-0 max-w-none flex-col items-stretch gap-0 self-end overflow-visible bg-(--ui-chat-surface-background) px-4 pb-(--conversation-turn-gap) pt-2"
+      data-role="user"
+      data-slot="aui_user-message-root"
+    >
+      {children}
+    </div>
  )
 }

@@ -889,10 +808,8 @@ const UserMessage: FC<{
  // changes, not on every frame while the outer max-height animates open.
  const clampInnerRef = useRef<HTMLDivElement | null>(null)
  const [bodyClamped, setBodyClamped] = useState(false)
-  const lastClampHeightRef = useRef(-1)
-  const lineHeightRef = useRef(0)

-  const measureClamp = useCallback((entries: readonly ResizeObserverEntry[]) => {
+  const measureClamp = useCallback(() => {
    const inner = clampInnerRef.current
    const outer = inner?.parentElement

@@ -900,28 +817,12 @@ const UserMessage: FC<{
      return
    }

-    // Prefer the size the ResizeObserver already computed — reading
-    // `scrollHeight` outside RO timing forces a synchronous layout, and with
-    // many user bubbles observed at once those reads interleave with the
-    // style write below into a read-write-read reflow cascade.
-    const entryHeight = entries.find(entry => entry.target === inner)?.borderBoxSize?.[0]?.blockSize
-    const fullHeight = Math.ceil(entryHeight ?? inner.scrollHeight)
-
-    if (fullHeight === lastClampHeightRef.current) {
-      return
-    }
-
-    lastClampHeightRef.current = fullHeight
-
-    // Line-height is stable for the life of the bubble (font settings don't
-    // change under it) — resolve the computed style once.
-    if (!lineHeightRef.current) {
-      const styles = getComputedStyle(inner)
-      lineHeightRef.current = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
-    }
+    const styles = getComputedStyle(inner)
+    const lineHeight = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
+    const fullHeight = inner.scrollHeight

    outer.style.setProperty('--human-msg-full', `${fullHeight}px`)
-    setBodyClamped(fullHeight > lineHeightRef.current * 2 + 1)
+    setBodyClamped(fullHeight > lineHeight * 2 + 1)
  }, [])

  useResizeObserver(measureClamp, clampInnerRef)
@@ -954,34 +855,29 @@ const UserMessage: FC<{
    'border-(--ui-stroke-tertiary) hover:border-(--ui-stroke-secondary)'
  )

-  const bubbleContent = hasBody && (
-    // Render the user's text through a minimal markdown pipeline:
-    // backtick `code` and ``` fenced ``` blocks, with directive chips
-    // (`@file:` etc.) still resolved inside the plain-text spans.
-    <div className="sticky-human-clamp" data-clamped={bodyClamped ? 'true' : undefined}>
-      {/* Match the edit composer's collapsed line box (min-h-[1.25rem]) so
-          clicking to edit can't grow the bubble by a sub-pixel and reflow the
-          turn 1px. */}
-      <div className="min-h-[1.25rem]" ref={clampInnerRef}>
-        <UserMessageText className="wrap-anywhere" text={messageText} />
-      </div>
-    </div>
+  const bubbleContent = (
+    <>
+      {attachmentRefs.length > 0 && (
+        <span className="-mx-1 flex flex-wrap gap-1 border-b border-border/45 pb-1.5">
+          <DirectiveContent text={attachmentRefs.join(' ')} />
+        </span>
+      )}
+      {hasBody && (
+        // Render the user's text through a minimal markdown pipeline:
+        // backtick `code` and ``` fenced ``` blocks, with directive chips
+        // (`@file:` etc.) still resolved inside the plain-text spans.
+        <div className="sticky-human-clamp" data-clamped={bodyClamped ? 'true' : undefined}>
+          <div ref={clampInnerRef}>
+            <UserMessageText className="wrap-anywhere" text={messageText} />
+          </div>
+        </div>
+      )}
+    </>
  )

  return (
    <MessagePrimitive.Root asChild>
-      <StickyHumanMessageContainer
-        attachments={
-          // Attachments live BELOW the sticky bubble in normal flow, so they
-          // scroll away behind the pinned bubble instead of riding along with
-          // it. Image refs render as thumbnails, file refs as chips; no border.
-          attachmentRefs.length > 0 ? (
-            <div className="flex flex-wrap gap-1 -mt-3 mb-2">
-              <DirectiveContent text={attachmentRefs.join(' ')} />
-            </div>
-          ) : null
-        }
-      >
+      <StickyHumanMessageContainer>
        <ActionBarPrimitive.Root className="relative w-full max-w-full" data-slot="aui_user-bubble-actions">
          <div className="human-message-with-todos-wrapper flex w-full flex-col gap-0">
            <div className="relative w-full">
@@ -992,7 +888,6 @@ const UserMessage: FC<{
                  aria-label={copy.editMessage}
                  className={bubbleClassName}
                  onClick={() => triggerHaptic('selection')}
-                  onPointerDown={() => notifyThreadEditOpen()}
                  title={copy.editMessage}
                  type="button"
                >
@@ -1182,8 +1077,6 @@ const UserEditComposer: FC<UserEditComposerProps> = ({ cwd, gateway, sessionId }
  const at = useAtCompletions({ cwd, gateway, sessionId })
  const slash = useSlashCompletions({ gateway })

-  useEffect(() => () => notifyThreadEditClose(), [])
-
  const focusEditor = useCallback(() => {
    const editor = editorRef.current

@@ -1437,10 +1330,7 @@ const UserEditComposer: FC<UserEditComposerProps> = ({ cwd, gateway, sessionId }
      }

      const remote = $connection.get()?.mode === 'remote'
-
-      const requestGateway = <T,>(method: string, params?: Record<string, unknown>) =>
-        gateway.request<T>(method, params)
-
+      const requestGateway = <T,>(method: string, params?: Record<string, unknown>) => gateway.request<T>(method, params)
      const refs: InlineRefInput[] = []

      for (const candidate of osDrops) {
@@ -1709,8 +1599,9 @@ const UserEditComposer: FC<UserEditComposerProps> = ({ cwd, gateway, sessionId }
              aria-label={copy.editMessage}
              autoCapitalize="off"
              autoCorrect="off"
+              autoFocus
              className={cn(
-                'ui-prompt-input-editor__input max-h-48 w-full resize-none bg-transparent p-0 pr-7 text-[length:var(--conversation-text-font-size)] text-foreground/95 outline-none',
+                'ui-prompt-input-editor__input max-h-48 w-full resize-none bg-transparent p-0 pr-7 text-[length:var(--conversation-text-font-size)] leading-(--dt-line-height) text-foreground/95 outline-none',
                'empty:before:content-[attr(data-placeholder)] empty:before:text-muted-foreground/60',
                '**:data-ref-text:cursor-default',
                expanded ? 'min-h-16' : 'min-h-[1.25rem]'
--- a/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
@@ -1,10 +1,9 @@
 import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
-import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react'
+import { cleanup, render, waitFor } from '@testing-library/react'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

 import { clearAllPrompts, setApprovalRequest } from '@/store/prompts'
 import { $activeSessionId } from '@/store/session'
-import { clearDismissedToolRows } from '@/store/tool-dismiss'
 import { $toolDisclosureStates } from '@/store/tool-view'

 import { Thread } from './thread'
@@ -105,84 +104,6 @@ function groupedPendingMessage(): ThreadMessage {
  } as ThreadMessage
 }

-function pendingOnlyMessage(): ThreadMessage {
-  return {
-    id: 'assistant-pending-only',
-    role: 'assistant',
-    content: [
-      {
-        type: 'tool-call',
-        toolCallId: 'term-only',
-        toolName: 'terminal',
-        args: { command: 'sleep 10' },
-        argsText: JSON.stringify({ command: 'sleep 10' })
-      }
-    ],
-    status: { type: 'running' },
-    createdAt,
-    metadata: {
-      unstable_state: null,
-      unstable_annotations: [],
-      unstable_data: [],
-      steps: [],
-      custom: {}
-    }
-  } as ThreadMessage
-}
-
-function completedOnlyMessage(): ThreadMessage {
-  return {
-    id: 'assistant-completed-only',
-    role: 'assistant',
-    content: [
-      {
-        type: 'tool-call',
-        toolCallId: 'read-only',
-        toolName: 'read_file',
-        args: { path: '/etc/hosts' },
-        argsText: JSON.stringify({ path: '/etc/hosts' }),
-        result: { content: '127.0.0.1 localhost' }
-      }
-    ],
-    status: { type: 'complete', reason: 'stop' },
-    createdAt,
-    metadata: {
-      unstable_state: null,
-      unstable_annotations: [],
-      unstable_data: [],
-      steps: [],
-      custom: {}
-    }
-  } as ThreadMessage
-}
-
-function failedOnlyMessage(): ThreadMessage {
-  return {
-    id: 'assistant-failed-only',
-    role: 'assistant',
-    content: [
-      {
-        type: 'tool-call',
-        toolCallId: 'term-failed',
-        toolName: 'terminal',
-        args: { command: 'exit 1' },
-        argsText: JSON.stringify({ command: 'exit 1' }),
-        isError: true,
-        result: { stderr: 'boom' }
-      }
-    ],
-    status: { type: 'complete', reason: 'stop' },
-    createdAt,
-    metadata: {
-      unstable_state: null,
-      unstable_annotations: [],
-      unstable_data: [],
-      steps: [],
-      custom: {}
-    }
-  } as ThreadMessage
-}
-
 function GroupHarness({ message }: { message: ThreadMessage }) {
  const runtime = useExternalStoreRuntime<ThreadMessage>({
    messages: [message],
@@ -201,14 +122,12 @@ beforeEach(() => {
  clearAllPrompts()
  $activeSessionId.set('sess-1')
  $toolDisclosureStates.set({})
-  clearDismissedToolRows()
 })

 afterEach(() => {
  cleanup()
  clearAllPrompts()
  $activeSessionId.set(null)
-  clearDismissedToolRows()
 })

 describe('flat tool list approval surfacing', () => {
@@ -236,64 +155,4 @@ describe('flat tool list approval surfacing', () => {
      expect(bar?.closest('[hidden]')).toBeNull()
    })
  })
-
-  it('lets completed tool rows be dismissed', async () => {
-    const { container } = render(<GroupHarness message={completedOnlyMessage()} />)
-
-    const dismiss = await screen.findByLabelText('Dismiss')
-
-    expect(container.querySelectorAll('[data-slot="tool-block"]').length).toBeGreaterThan(1)
-
-    fireEvent.click(dismiss)
-
-    await waitFor(() => {
-      expect(screen.queryByLabelText('Dismiss')).toBeNull()
-    })
-  })
-
-  it('keeps a dismissed row hidden after a remount (virtualization)', async () => {
-    // The thread virtualizes, so a row's component unmounts/remounts as it
-    // scrolls. Dismissal must persist across that — component-local state would
-    // forget it and the row would pop back. Simulate the remount by unmounting
-    // and rendering the same message fresh.
-    const first = render(<GroupHarness message={completedOnlyMessage()} />)
-
-    fireEvent.click(await screen.findByLabelText('Dismiss'))
-
-    await waitFor(() => {
-      expect(screen.queryByLabelText('Dismiss')).toBeNull()
-    })
-
-    first.unmount()
-
-    const { container } = render(<GroupHarness message={completedOnlyMessage()} />)
-
-    await waitFor(() => {
-      expect(container.querySelectorAll('[data-slot="tool-block"]').length).toBeGreaterThan(0)
-    })
-
-    expect(screen.queryByLabelText('Dismiss')).toBeNull()
-  })
-
-  it('lets failed tool rows be dismissed', async () => {
-    render(<GroupHarness message={failedOnlyMessage()} />)
-
-    const dismiss = await screen.findByLabelText('Dismiss')
-
-    fireEvent.click(dismiss)
-
-    await waitFor(() => {
-      expect(screen.queryByLabelText('Dismiss')).toBeNull()
-    })
-  })
-
-  it('does not show dismiss for pending tool rows', async () => {
-    const { container } = render(<GroupHarness message={pendingOnlyMessage()} />)
-
-    await waitFor(() => {
-      expect(container.querySelectorAll('[data-slot="tool-block"]').length).toBeGreaterThan(0)
-    })
-
-    expect(screen.queryByLabelText('Dismiss')).toBeNull()
-  })
 })
--- a/apps/desktop/src/components/assistant-ui/tool-approval.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-approval.test.tsx
@@ -84,19 +84,6 @@ describe('PendingToolApproval', () => {
    expect($approvalRequest.get()).toBeNull()
  })

-  it('reveals the full command inline when the Command toggle is clicked', () => {
-    const longCommand = 'python -c "' + 'x'.repeat(400) + '"'
-    setRequest(longCommand)
-    render(<PendingToolApproval part={part('terminal')} />)
-
-    // Collapsed by default: the full command is not in the DOM yet.
-    expect(screen.queryByText(longCommand)).toBeNull()
-
-    fireEvent.click(screen.getByRole('button', { name: /Command/ }))
-
-    expect(screen.getByText(longCommand)).toBeTruthy()
-  })
-
  it('sends choice "deny" on Reject', async () => {
    const request = mockGateway()
    setRequest()
--- a/apps/desktop/src/components/assistant-ui/tool-approval.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-approval.tsx
@@ -16,7 +16,6 @@ import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigge
 import { useI18n } from '@/i18n'
 import { triggerHaptic } from '@/lib/haptics'
 import { ChevronDown, Loader2 } from '@/lib/icons'
-import { cn } from '@/lib/utils'
 import { $gateway } from '@/store/gateway'
 import { notifyError } from '@/store/notifications'
 import { $approvalRequest, type ApprovalRequest, clearApprovalRequest } from '@/store/prompts'
@@ -61,15 +60,9 @@ const ApprovalBar: FC<{ request: ApprovalRequest }> = ({ request }) => {
  // "Always allow" persists the pattern to ~/.hermes/config.yaml permanently, so
  // it goes through a confirm step rather than firing straight from the menu.
  const [confirmAlways, setConfirmAlways] = useState(false)
-  // The pending tool row only shows a single truncated line of the command, and
-  // a pending row can't be expanded (no result yet), so the full command was
-  // previously only reachable via the "Always allow" modal. Let the user reveal
-  // it inline instead — "expand, Run" (2 clicks) rather than the modal dance.
-  const [showCommand, setShowCommand] = useState(false)
  const busy = submitting !== null
  // false when the backend won't honor a permanent allow (tirith warning) → hide "Always allow".
  const allowPermanent = request.allowPermanent !== false
-  const hasCommand = request.command.trim().length > 0

  const respond = useCallback(
    async (choice: ApprovalChoice) => {
@@ -126,89 +119,70 @@ const ApprovalBar: FC<{ request: ApprovalRequest }> = ({ request }) => {
  }, [confirmAlways, respond])

  return (
-    <div className="mt-1 ps-5" data-slot="tool-approval-inline">
-      <div className="flex items-center gap-2.5">
-        <div className="inline-flex h-6 items-stretch overflow-hidden rounded-md border border-primary/25 bg-primary/10 text-primary">
-          <Button
-            className="h-full gap-1 rounded-none px-2 text-xs font-medium text-primary hover:bg-primary/15 hover:text-primary"
-            disabled={busy}
-            onClick={() => void respond('once')}
-            size="xs"
-            variant="ghost"
-          >
-            {submitting === 'once' ? <Loader2 className="size-3 animate-spin" /> : copy.run}
-            {submitting !== 'once' && <span className="text-[0.625rem] text-primary/60">{isMac ? '⌘⏎' : 'Ctrl⏎'}</span>}
-          </Button>
-          <span aria-hidden className="w-px self-stretch bg-primary/20" />
-          <DropdownMenu>
-            <DropdownMenuTrigger asChild>
-              <Button
-                aria-label={copy.moreOptions}
-                className="h-full w-5 rounded-none px-0 text-primary hover:bg-primary/15 hover:text-primary"
-                disabled={busy}
-                size="xs"
-                variant="ghost"
-              >
-                <ChevronDown className="size-3" />
-              </Button>
-            </DropdownMenuTrigger>
-            <DropdownMenuContent align="start" className="min-w-44">
-              <DropdownMenuItem onSelect={() => void respond('session')}>{copy.allowSession}</DropdownMenuItem>
-              {allowPermanent && (
-                <DropdownMenuItem
-                  onSelect={() => {
-                    // Defer one tick so the menu fully unmounts before the dialog
-                    // mounts — otherwise Radix's focus-return races the dialog and
-                    // dismisses it via onInteractOutside.
-                    setTimeout(() => setConfirmAlways(true), 0)
-                  }}
-                >
-                  {copy.alwaysAllowMenu}
-                </DropdownMenuItem>
-              )}
-              <DropdownMenuItem onSelect={() => void respond('deny')} variant="destructive">
-                {copy.reject}
-              </DropdownMenuItem>
-            </DropdownMenuContent>
-          </DropdownMenu>
-        </div>
-
+    <div className="mt-1 flex items-center gap-2.5 ps-5" data-slot="tool-approval-inline">
+      <div className="inline-flex h-6 items-stretch overflow-hidden rounded-md border border-primary/25 bg-primary/10 text-primary">
        <Button
-          className="h-6 gap-1.5 rounded-md px-1.5 text-xs font-normal text-(--ui-text-tertiary) hover:text-foreground"
+          className="h-full gap-1 rounded-none px-2 text-xs font-medium text-primary hover:bg-primary/15 hover:text-primary"
          disabled={busy}
-          onClick={() => void respond('deny')}
+          onClick={() => void respond('once')}
          size="xs"
          variant="ghost"
        >
-          {submitting === 'deny' ? <Loader2 className="size-3 animate-spin" /> : copy.reject}
-          {submitting !== 'deny' && <span className="text-[0.625rem] opacity-55">Esc</span>}
+          {submitting === 'once' ? <Loader2 className="size-3 animate-spin" /> : copy.run}
+          {submitting !== 'once' && <span className="text-[0.625rem] text-primary/60">{isMac ? '⌘⏎' : 'Ctrl⏎'}</span>}
        </Button>
-
-        {hasCommand && (
-          <Button
-            aria-expanded={showCommand}
-            className="h-6 gap-1 rounded-md px-1.5 text-xs font-normal text-(--ui-text-tertiary) hover:text-foreground"
-            onClick={() => setShowCommand(value => !value)}
-            size="xs"
-            variant="ghost"
-          >
-            {copy.command}
-            <ChevronDown className={cn('size-3 transition-transform', showCommand && 'rotate-180')} />
-          </Button>
-        )}
+        <span aria-hidden className="w-px self-stretch bg-primary/20" />
+        <DropdownMenu>
+          <DropdownMenuTrigger asChild>
+            <Button
+              aria-label={copy.moreOptions}
+              className="h-full w-5 rounded-none px-0 text-primary hover:bg-primary/15 hover:text-primary"
+              disabled={busy}
+              size="xs"
+              variant="ghost"
+            >
+              <ChevronDown className="size-3" />
+            </Button>
+          </DropdownMenuTrigger>
+          <DropdownMenuContent align="start" className="min-w-44">
+            <DropdownMenuItem onSelect={() => void respond('session')}>{copy.allowSession}</DropdownMenuItem>
+            {allowPermanent && (
+              <DropdownMenuItem
+                onSelect={() => {
+                  // Defer one tick so the menu fully unmounts before the dialog
+                  // mounts — otherwise Radix's focus-return races the dialog and
+                  // dismisses it via onInteractOutside.
+                  setTimeout(() => setConfirmAlways(true), 0)
+                }}
+              >
+                {copy.alwaysAllowMenu}
+              </DropdownMenuItem>
+            )}
+            <DropdownMenuItem onSelect={() => void respond('deny')} variant="destructive">
+              {copy.reject}
+            </DropdownMenuItem>
+          </DropdownMenuContent>
+        </DropdownMenu>
      </div>

-      {showCommand && hasCommand && (
-        <pre className="mt-1.5 max-h-40 overflow-auto whitespace-pre-wrap break-words rounded-md border border-(--ui-stroke-tertiary) bg-(--ui-chat-surface-background) px-2.5 py-1.5 font-mono text-xs leading-snug text-foreground">
-          {request.command.trim()}
-        </pre>
-      )}
+      <Button
+        className="h-6 gap-1.5 rounded-md px-1.5 text-xs font-normal text-(--ui-text-tertiary) hover:text-foreground"
+        disabled={busy}
+        onClick={() => void respond('deny')}
+        size="xs"
+        variant="ghost"
+      >
+        {submitting === 'deny' ? <Loader2 className="size-3 animate-spin" /> : copy.reject}
+        {submitting !== 'deny' && <span className="text-[0.625rem] opacity-55">Esc</span>}
+      </Button>

      <Dialog onOpenChange={setConfirmAlways} open={confirmAlways}>
        <DialogContent className="max-w-md">
          <DialogHeader>
            <DialogTitle>{copy.alwaysTitle}</DialogTitle>
-            <DialogDescription>{copy.alwaysDescription(request.description)}</DialogDescription>
+            <DialogDescription>
+              {copy.alwaysDescription(request.description)}
+            </DialogDescription>
          </DialogHeader>

          {request.command.trim() && (
--- a/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
@@ -12,20 +12,16 @@ import { DiffLines } from '@/components/chat/diff-lines'
 import { DisclosureRow } from '@/components/chat/disclosure-row'
 import { PreviewAttachment } from '@/components/chat/preview-attachment'
 import { ZoomableImage } from '@/components/chat/zoomable-image'
-import { Button } from '@/components/ui/button'
-import { Codicon } from '@/components/ui/codicon'
 import { CopyButton } from '@/components/ui/copy-button'
 import { FadeText } from '@/components/ui/fade-text'
 import { GlyphSpinner } from '@/components/ui/glyph-spinner'
 import { ToolIcon } from '@/components/ui/tool-icon'
-import { Tip } from '@/components/ui/tooltip'
 import { useI18n } from '@/i18n'
 import { PrettyLink, LinkifiedText as SharedLinkifiedText, urlSlugTitleLabel } from '@/lib/external-link'
 import { AlertCircle, CheckCircle2 } from '@/lib/icons'
 import { useEnterAnimation } from '@/lib/use-enter-animation'
 import { cn } from '@/lib/utils'
 import { $toolInlineDiffs } from '@/store/tool-diffs'
-import { $toolRowDismissed, dismissToolRow } from '@/store/tool-dismiss'
 import { $toolDisclosureOpen, $toolViewMode, setToolDisclosureOpen } from '@/store/tool-view'

 import { PendingToolApproval } from './tool-approval'
@@ -197,16 +193,13 @@ function useDisclosureOpen(disclosureId: string, fallbackOpen = false): boolean
 function ToolEntry({ part }: ToolEntryProps) {
  const { t } = useI18n()
  const copy = t.assistant.tool
-  const statusCopy = t.statusStack
  const messageId = useAuiState(s => s.message.id)
  const messageRunning = useAuiState(selectMessageRunning)
  const embedded = useContext(ToolEmbedContext)
  const toolViewMode = useStore($toolViewMode)
  const disclosureId = `tool-entry:${messageId}:${toolPartDisclosureId(part)}`
-  const dismissed = useStore($toolRowDismissed(disclosureId))
  const open = useDisclosureOpen(disclosureId)
  const isPending = messageRunning && part.result === undefined
-  const canDismiss = !isPending && !embedded
  // Only animate entries that mount while their message is actively
  // streaming — historical sessions mount with `messageRunning === false`,
  // so they paint statically without a settle cascade. The wrapping group
@@ -289,33 +282,9 @@ function ToolEntry({ part }: ToolEntryProps) {
  // the disclosure caret hard to hit. Copy now lives in the expanded body's
  // top-right, where it can't fight the caret for the right edge.
  const trailing =
-    isPending && !embedded ? <ActivityTimerText className={TOOL_HEADER_DURATION_CLASS} seconds={elapsed} /> : undefined
-
-  // Once a turn has settled, a hover/focus-revealed dismiss lets the user clear
-  // a completed/failed row that would otherwise sit at the tail of the chat.
-  // It goes in the in-flow `action` slot (not `trailing`) so it can't overlap
-  // the disclosure caret's hit-target — see the comment above `trailing`.
-  const dismissAction = canDismiss ? (
-    <Tip label={statusCopy.dismiss}>
-      <Button
-        aria-label={statusCopy.dismiss}
-        className="size-5 rounded-md text-(--ui-text-tertiary) opacity-0 transition-opacity hover:text-(--ui-text-primary) hover:opacity-100 group-hover/disclosure-row:opacity-80 group-focus-within/disclosure-row:opacity-80"
-        onClick={event => {
-          event.stopPropagation()
-          dismissToolRow(disclosureId)
-        }}
-        size="icon-xs"
-        type="button"
-        variant="ghost"
-      >
-        <Codicon name="close" size="0.75rem" />
-      </Button>
-    </Tip>
-  ) : undefined
-
-  if (dismissed) {
-    return null
-  }
+    isPending && !embedded ? (
+      <ActivityTimerText className={TOOL_HEADER_DURATION_CLASS} seconds={elapsed} />
+    ) : undefined

  return (
    <div
@@ -328,7 +297,6 @@ function ToolEntry({ part }: ToolEntryProps) {
    >
      <div className={cn(open && 'border-b border-(--ui-stroke-tertiary) px-2 py-1.5')}>
        <DisclosureRow
-          action={dismissAction}
          onToggle={hasExpandableContent ? () => setToolDisclosureOpen(disclosureId, !open) : undefined}
          open={open}
          trailing={trailing}
--- a/apps/desktop/src/components/chat/disclosure-row.tsx
+++ b/apps/desktop/src/components/chat/disclosure-row.tsx
@@ -14,19 +14,12 @@ import { cn } from '@/lib/utils'
 //     title text, NOT the full row — and reaches just past the chevron with
 //     `-mx-1.5 px-1.5` so it reads as a soft hit-target rather than a slab
 //     stretching to the message edge.
-//   - `trailing` overlays the right edge (absolute) and must stay
-//     non-interactive (e.g. a duration timer) — an opacity-0-but-clickable
-//     control there steals clicks from the caret. Interactive controls go in
-//     `action`, which lays out *in flow* at the far right so it never sits on
-//     top of the caret's hit-target, no matter how long the title is.
 export function DisclosureRow({
-  action,
  children,
  onToggle,
  open,
  trailing
 }: {
-  action?: ReactNode
  children: ReactNode
  onToggle?: () => void
  open: boolean
@@ -62,11 +55,6 @@ export function DisclosureRow({
          </span>
        )}
      </button>
-      {action && (
-        <span className="ml-auto flex h-(--conversation-line-height) shrink-0 items-center self-start pl-1.5">
-          {action}
-        </span>
-      )}
      {trailing && (
        <span className="absolute right-1 top-0 flex h-(--conversation-line-height) items-center">{trailing}</span>
      )}
--- a/apps/desktop/src/components/chat/generated-image-context.tsx
+++ b/apps/desktop/src/components/chat/generated-image-context.tsx
@@ -0,0 +1,19 @@
+'use client'
+
+import { createContext, type ReactNode, useContext, useMemo, useState } from 'react'
+
+type Value = {
+  isPending: boolean
+  setPending: (pending: boolean) => void
+}
+
+const Ctx = createContext<Value | null>(null)
+
+export function GeneratedImageProvider({ children }: { children: ReactNode }) {
+  const [isPending, setPending] = useState(false)
+  const value = useMemo(() => ({ isPending, setPending }), [isPending])
+
+  return <Ctx.Provider value={value}>{children}</Ctx.Provider>
+}
+
+export const useGeneratedImageContext = () => useContext(Ctx)
--- a/apps/desktop/src/components/chat/generated-image-result.tsx
+++ b/apps/desktop/src/components/chat/generated-image-result.tsx
@@ -1,174 +0,0 @@
-'use client'
-
-import { type FC, useEffect, useState } from 'react'
-
-import { DiffusionCanvas } from '@/components/chat/image-generation-placeholder'
-import { ImageActionButton, ImageLightbox } from '@/components/chat/zoomable-image'
-import { useImageDownload } from '@/hooks/use-image-download'
-import { useI18n } from '@/i18n'
-import { generatedImageFromResult } from '@/lib/generated-images'
-import { filePathFromMediaPath, gatewayMediaDataUrl, isRemoteGateway, mediaExternalUrl, mediaName } from '@/lib/media'
-import { cn } from '@/lib/utils'
-
-// Aspect hint from the tool args sizes the frame *before* the image loads, so
-// the placeholder and the resolved image occupy the same box — no layout shift.
-const ASPECT_HINTS: Record<string, number> = {
-  landscape: 16 / 9,
-  square: 1,
-  portrait: 9 / 16
-}
-
-function hintedRatio(aspectRatio?: string): number {
-  return ASPECT_HINTS[String(aspectRatio ?? '').toLowerCase().trim()] ?? ASPECT_HINTS.landscape
-}
-
-function isInlineSrc(path: string): boolean {
-  return /^(?:https?|data):/i.test(path)
-}
-
-async function resolveImageSrc(path: string): Promise<string> {
-  if (isInlineSrc(path)) {
-    return path
-  }
-
-  if (window.hermesDesktop && isRemoteGateway()) {
-    return gatewayMediaDataUrl(path)
-  }
-
-  if (!window.hermesDesktop?.readFileDataUrl) {
-    return mediaExternalUrl(path)
-  }
-
-  return window.hermesDesktop.readFileDataUrl(filePathFromMediaPath(path))
-}
-
-export const GeneratedImage: FC<{ aspectRatio?: string; result?: unknown }> = ({ aspectRatio, result }) => {
-  const { t } = useI18n()
-  const copy = t.desktop
-  const image = result === undefined ? null : generatedImageFromResult(result)
-  const pending = result === undefined
-
-  const [ratio, setRatio] = useState(() => hintedRatio(aspectRatio))
-  const [src, setSrc] = useState(() => (image && isInlineSrc(image) ? image : ''))
-  const [loaded, setLoaded] = useState(false)
-  const [canvasGone, setCanvasGone] = useState(false)
-  const [failed, setFailed] = useState(false)
-  const [lightboxOpen, setLightboxOpen] = useState(false)
-  const { download, saving } = useImageDownload(src)
-
-  useEffect(() => setRatio(hintedRatio(aspectRatio)), [aspectRatio])
-
-  // Resolve the deliverable path (local read / gateway proxy / remote URL). The
-  // <img> stays mounted under the placeholder and only fades in once it decodes,
-  // so the frame keeps its hinted size and never jumps.
-  useEffect(() => {
-    let cancelled = false
-    setFailed(false)
-    setLoaded(false)
-    setCanvasGone(false)
-    setSrc(image && isInlineSrc(image) ? image : '')
-
-    if (!image || isInlineSrc(image)) {
-      return
-    }
-
-    void resolveImageSrc(image)
-      .then(resolved => !cancelled && setSrc(resolved))
-      .catch(() => !cancelled && setFailed(true))
-
-    return () => {
-      cancelled = true
-    }
-  }, [image])
-
-  // Completed but no usable image (generation failed): the agent's prose carries
-  // the explanation, so render nothing here.
-  if (!pending && !image) {
-    return null
-  }
-
-  if (failed && image) {
-    return (
-      <a
-        className="mt-2 inline-block font-semibold text-foreground underline underline-offset-4 decoration-current/20 wrap-anywhere"
-        href="#"
-        onClick={event => {
-          event.preventDefault()
-          void window.hermesDesktop?.openExternal(mediaExternalUrl(image))
-        }}
-      >
-        {copy.openImage}: {mediaName(image)}
-      </a>
-    )
-  }
-
-  return (
-    <>
-      <span
-        aria-label={pending ? t.assistant.tool.renderingImage : undefined}
-        aria-live={pending ? 'polite' : undefined}
-        className="group/image relative block max-w-full overflow-hidden rounded-2xl transition-[width,height] duration-300 ease-out"
-        data-slot="aui_generated-image"
-        role={pending ? 'status' : undefined}
-        style={{
-          aspectRatio: ratio,
-          // Width is capped so the derived height (width / ratio) never exceeds
-          // --image-preview-height; the box then matches the image exactly with
-          // no letterboxing.
-          width: `min(calc(var(--image-preview-height) * ${ratio}), var(--image-preview-max-width), 100%)`
-        }}
-      >
-        {!canvasGone && (
-          <div
-            className={cn('absolute inset-0 transition-opacity duration-500 ease-out', loaded && 'opacity-0')}
-            onTransitionEnd={() => loaded && setCanvasGone(true)}
-          >
-            <DiffusionCanvas />
-          </div>
-        )}
-        {src && (
-          <button
-            className="absolute inset-0 block size-full cursor-zoom-in"
-            onClick={() => setLightboxOpen(true)}
-            title={copy.openImage}
-            type="button"
-          >
-            <img
-              alt="Generated image"
-              className={cn(
-                'absolute inset-0 size-full object-contain opacity-0 transition-opacity duration-500 ease-out',
-                loaded && 'opacity-100'
-              )}
-              draggable={false}
-              onError={() => setFailed(true)}
-              onLoad={event => {
-                const { naturalHeight, naturalWidth } = event.currentTarget
-
-                if (naturalWidth && naturalHeight) {
-                  setRatio(naturalWidth / naturalHeight)
-                }
-
-                setLoaded(true)
-              }}
-              src={src}
-            />
-          </button>
-        )}
-        {loaded && src && (
-          <ImageActionButton className="group-hover/image:opacity-100" copy={copy} onClick={download} saving={saving} />
-        )}
-      </span>
-      {src && (
-        <ImageLightbox
-          alt="Generated image"
-          copy={copy}
-          onClick={download}
-          onOpenChange={setLightboxOpen}
-          open={lightboxOpen}
-          saving={saving}
-          src={src}
-        />
-      )}
-    </>
-  )
-}
--- a/apps/desktop/src/components/chat/image-generation-placeholder.tsx
+++ b/apps/desktop/src/components/chat/image-generation-placeholder.tsx
@@ -1,6 +1,7 @@
 import { type FC, useCallback, useEffect, useRef } from 'react'

 import { useResizeObserver } from '@/hooks/use-resize-observer'
+import { useI18n } from '@/i18n'

 type Rgb = { r: number; g: number; b: number }

@@ -23,26 +24,19 @@ const smoothstep = (edge0: number, edge1: number, value: number) => {
 }

 const parseColor = (value: string, fallback: Rgb): Rgb => {
-  const v = value.trim()
-
-  const hex = v.match(/^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i)
+  const hex = value.trim().match(/^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i)

  if (hex) {
-    return { r: Number.parseInt(hex[1], 16), g: Number.parseInt(hex[2], 16), b: Number.parseInt(hex[3], 16) }
+    return {
+      r: Number.parseInt(hex[1], 16),
+      g: Number.parseInt(hex[2], 16),
+      b: Number.parseInt(hex[3], 16)
+    }
  }

-  const rgb = v.match(/rgba?\(\s*(\d+)[\s,]+(\d+)[\s,]+(\d+)/i)
+  const rgb = value.trim().match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/i)

-  if (rgb) {
-    return { r: Number(rgb[1]), g: Number(rgb[2]), b: Number(rgb[3]) }
-  }
-
-  // Chromium serialises `color-mix(in srgb, …)` as `color(srgb r g b / a)` with 0–1 floats.
-  const srgb = v.match(/color\(\s*srgb\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)/i)
-
-  return srgb
-    ? { r: Math.round(Number(srgb[1]) * 255), g: Math.round(Number(srgb[2]) * 255), b: Math.round(Number(srgb[3]) * 255) }
-    : fallback
+  return rgb ? { r: Number(rgb[1]), g: Number(rgb[2]), b: Number(rgb[3]) } : fallback
 }

 const mix = (a: Rgb, b: Rgb, amount: number): Rgb => ({
@@ -88,22 +82,17 @@ const fbm = (x: number, y: number) => {
  return value
 }

-type Theme = Record<keyof typeof FALLBACKS, Rgb>
+const readTheme = () => {
+  const styles = getComputedStyle(document.documentElement)

-const TOKENS = Object.keys(FALLBACKS) as (keyof typeof FALLBACKS)[]
-
-// `--dt-*` resolve through `var()` chains into `color-mix()`, which
-// getPropertyValue hands back verbatim — unreadable. Bouncing each token through
-// a probe's `color` lets the browser compute it to a concrete color we can
-// parse, so the canvas tracks the live theme instead of a hardcoded fallback.
-const readTheme = (probe: HTMLElement): Theme =>
-  Object.fromEntries(
-    TOKENS.map(key => {
-      probe.style.color = `var(--dt-${key})`
-
-      return [key, parseColor(getComputedStyle(probe).color, FALLBACKS[key])]
-    })
-  ) as Theme
+  return {
+    card: parseColor(styles.getPropertyValue('--dt-card'), FALLBACKS.card),
+    muted: parseColor(styles.getPropertyValue('--dt-muted'), FALLBACKS.muted),
+    foreground: parseColor(styles.getPropertyValue('--dt-foreground'), FALLBACKS.foreground),
+    primary: parseColor(styles.getPropertyValue('--dt-primary'), FALLBACKS.primary),
+    ring: parseColor(styles.getPropertyValue('--dt-ring'), FALLBACKS.ring)
+  }
+}

 const fitCanvas = (canvas: HTMLCanvasElement, ctx: CanvasRenderingContext2D) => {
  const rect = canvas.getBoundingClientRect()
@@ -118,13 +107,8 @@ const fitCanvas = (canvas: HTMLCanvasElement, ctx: CanvasRenderingContext2D) =>
  return { width, height }
 }

-const drawAsciiDiffusion = (
-  ctx: CanvasRenderingContext2D,
-  theme: Theme,
-  width: number,
-  height: number,
-  time: number
-) => {
+const drawAsciiDiffusion = (ctx: CanvasRenderingContext2D, width: number, height: number, time: number) => {
+  const theme = readTheme()
  const bg = ctx.createLinearGradient(0, 0, width, height)
  bg.addColorStop(0, rgba(mix(theme.card, theme.primary, 0.08), 1))
  bg.addColorStop(0.54, rgba(mix(theme.card, theme.muted, 0.68), 1))
@@ -141,9 +125,6 @@ const drawAsciiDiffusion = (
  const cellHeight = fontSize * 1.28
  const cols = Math.ceil(width / cellWidth)
  const rows = Math.ceil(height / cellHeight)
-  // Normalise both axes by the shorter side so the radial bloom stays a circle
-  // (not a squished ellipse) when the frame isn't landscape.
-  const short = Math.min(width, height)
  const centerX = 0.53 + Math.sin(time * 0.055) * 0.02
  const centerY = 0.5 + Math.cos(time * 0.048) * 0.02
  const timestep = Math.floor(time * 1.15)
@@ -157,10 +138,10 @@ const drawAsciiDiffusion = (
    for (let col = -1; col <= cols + 1; col += 1) {
      const x = col * cellWidth + cellWidth * 0.5
      const y = row * cellHeight + cellHeight * 0.5
-      const sx = (x - centerX * width) / short
-      const sy = (y - centerY * height) / short
-      const dx = sx * 1.2
-      const dy = sy * 0.95
+      const nx = x / width
+      const ny = y / height
+      const dx = (nx - centerX) * 1.2
+      const dy = (ny - centerY) * 0.95
      const radius = Math.hypot(dx, dy)
      const angle = Math.atan2(dy, dx)

@@ -171,7 +152,7 @@ const drawAsciiDiffusion = (
      const contour =
        Math.exp(-((Math.sin(angle * 3 + radius * 17 - time * 0.17) * 0.5 + 0.5 - radius) ** 2) / 0.016) * 0.38

-      const stem = Math.exp(-((sx + 0.05) ** 2 / 0.004 + (sy - 0.25) ** 2 / 0.08)) * 0.46
+      const stem = Math.exp(-((nx - centerX + 0.05) ** 2 / 0.004 + (ny - centerY - 0.25) ** 2 / 0.08)) * 0.46

      const latent = clamp(bloom + contour + stem, 0, 1)
      const staticA = hash2(col + timestep * 19, row - timestep * 11)
@@ -243,10 +224,9 @@ const drawAsciiDiffusion = (
  ctx.fillRect(0, 0, width, height)
 }

-export const DiffusionCanvas: FC = () => {
+const DiffusionCanvas: FC = () => {
  const canvasRef = useRef<HTMLCanvasElement | null>(null)
  const sizeRef = useRef({ width: 0, height: 0 })
-  const themeRef = useRef<Theme>(FALLBACKS)

  const fitToContainer = useCallback(() => {
    const canvas = canvasRef.current
@@ -261,28 +241,6 @@ export const DiffusionCanvas: FC = () => {

  useResizeObserver(fitToContainer, canvasRef)

-  useEffect(() => {
-    const probe = document.createElement('span')
-    probe.style.cssText = 'position:absolute;width:0;height:0;visibility:hidden;pointer-events:none'
-    document.documentElement.appendChild(probe)
-
-    const sync = () => {
-      themeRef.current = readTheme(probe)
-    }
-
-    sync()
-
-    // Re-resolve when the theme repaints (`applyTheme` toggles `.dark` and
-    // rewrites inline custom props on the root) instead of per animation frame.
-    const observer = new MutationObserver(sync)
-    observer.observe(document.documentElement, { attributes: true, attributeFilter: ['class', 'style', 'data-hermes-mode'] })
-
-    return () => {
-      observer.disconnect()
-      probe.remove()
-    }
-  }, [])
-
  useEffect(() => {
    const canvas = canvasRef.current
    const ctx = canvas?.getContext('2d')
@@ -296,7 +254,7 @@ export const DiffusionCanvas: FC = () => {
    let frame = requestAnimationFrame(function draw(now) {
      const { width, height } = sizeRef.current
      ctx.clearRect(0, 0, width, height)
-      drawAsciiDiffusion(ctx, themeRef.current, width, height, now / 1000)
+      drawAsciiDiffusion(ctx, width, height, now / 1000)
      frame = requestAnimationFrame(draw)
    })

@@ -307,3 +265,15 @@ export const DiffusionCanvas: FC = () => {

  return <canvas className="absolute inset-0 h-full w-full" ref={canvasRef} />
 }
+
+export const ImageGenerationPlaceholder: FC = () => {
+  const { t } = useI18n()
+
+  return (
+    <div aria-label={t.assistant.tool.renderingImage} aria-live="polite" className="w-full max-w-136 self-start" role="status">
+      <div className="relative h-(--image-preview-height) overflow-hidden rounded-4xl border border-border/55 shadow-[inset_0_0.0625rem_0_color-mix(in_srgb,white_45%,transparent),inset_0_0_0_0.0625rem_color-mix(in_srgb,var(--dt-border)_34%,transparent),inset_0_-0.75rem_1.75rem_color-mix(in_srgb,var(--dt-primary)_5%,transparent)]">
+        <DiffusionCanvas />
+      </div>
+    </div>
+  )
+}
--- a/apps/desktop/src/components/chat/status-row.tsx
+++ b/apps/desktop/src/components/chat/status-row.tsx
@@ -51,9 +51,7 @@ export function StatusRow({
      role={onActivate ? 'button' : undefined}
      tabIndex={onActivate ? 0 : undefined}
    >
-      {leading !== undefined && (
-        <span className="flex size-3.5 shrink-0 items-center justify-center">{leading}</span>
-      )}
+      <span className="flex size-3.5 shrink-0 items-center justify-center">{leading}</span>
      <div className="flex min-w-0 flex-1 items-center gap-2">{children}</div>
      {trailing && (
        <div
--- a/apps/desktop/src/components/chat/zoomable-image.tsx
+++ b/apps/desktop/src/components/chat/zoomable-image.tsx
@@ -3,17 +3,55 @@
 import { type ComponentProps, useState } from 'react'

 import { Dialog, DialogContent } from '@/components/ui/dialog'
-import { useImageDownload } from '@/hooks/use-image-download'
 import { useI18n } from '@/i18n'
 import { Download } from '@/lib/icons'
 import { cn } from '@/lib/utils'
+import { notify, notifyError } from '@/store/notifications'
+
+function imageFilename(src?: string): string {
+  if (!src) {
+    return 'image'
+  }
+
+  try {
+    const { pathname } = new URL(src, window.location.href)
+
+    return pathname.split('/').filter(Boolean).pop() || 'image'
+  } catch {
+    return src.split(/[\\/]/).filter(Boolean).pop() || 'image'
+  }
+}
+
+function isMissingIpcHandler(error: unknown): boolean {
+  const message = error instanceof Error ? error.message : typeof error === 'string' ? error : ''
+
+  return message.includes("No handler registered for 'hermes:saveImageFromUrl'")
+}
+
+async function startBrowserDownload(src: string) {
+  const response = await fetch(src)
+
+  if (!response.ok) {
+    throw new Error(`Could not fetch image: ${response.status}`)
+  }
+
+  const blobUrl = URL.createObjectURL(await response.blob())
+  const link = document.createElement('a')
+  link.href = blobUrl
+  link.download = imageFilename(src)
+  link.rel = 'noopener noreferrer'
+  document.body.appendChild(link)
+  link.click()
+  link.remove()
+  window.setTimeout(() => URL.revokeObjectURL(blobUrl), 30_000)
+}

 export interface ZoomableImageProps extends ComponentProps<'img'> {
  containerClassName?: string
  slot?: string
 }

-export interface ImageActionCopy {
+interface ImageActionCopy {
  downloadImage: string
  savingImage: string
 }
@@ -21,10 +59,70 @@ export interface ImageActionCopy {
 export function ZoomableImage({ className, containerClassName, src, alt, slot, ...props }: ZoomableImageProps) {
  const { t } = useI18n()
  const copy = t.desktop
-  const { download, saving } = useImageDownload(src)
+  const [saving, setSaving] = useState(false)
  const [lightboxOpen, setLightboxOpen] = useState(false)
  const canOpen = Boolean(src)

+  async function handleDownload() {
+    if (!src || saving) {
+      return
+    }
+
+    setSaving(true)
+
+    try {
+      if (window.hermesDesktop?.saveImageFromUrl) {
+        const saved = await window.hermesDesktop.saveImageFromUrl(src)
+
+        if (saved) {
+          notify({ kind: 'success', title: copy.imageSaved, message: imageFilename(src) })
+        }
+
+        return
+      }
+
+      await startBrowserDownload(src)
+    } catch (error) {
+      if (isMissingIpcHandler(error)) {
+        try {
+          await startBrowserDownload(src)
+          notify({
+            kind: 'info',
+            title: copy.downloadStarted,
+            message: copy.restartToUseSaveImage
+          })
+        } catch (fallbackError) {
+          notifyError(fallbackError, copy.restartToSaveImages)
+        }
+
+        return
+      }
+
+      notifyError(error, copy.imageDownloadFailed)
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const lightbox = src ? (
+    <Dialog onOpenChange={setLightboxOpen} open={lightboxOpen}>
+      <DialogContent
+        className="block w-auto max-h-[calc(100vh-12rem)] max-w-[calc(100vw-12rem)] overflow-visible border-0 bg-transparent p-0 shadow-none"
+        showCloseButton={false}
+      >
+        <div className="group/lightbox relative inline-block">
+          <img
+            alt={alt ?? ''}
+            className="block max-h-[calc(100vh-12rem)] max-w-[calc(100vw-12rem)] cursor-zoom-out select-auto rounded-lg object-contain shadow-2xl"
+            onClick={() => setLightboxOpen(false)}
+            src={src}
+          />
+          <ImageActionButton copy={copy} onClick={handleDownload} saving={saving} variant="lightbox" />
+        </div>
+      </DialogContent>
+    </Dialog>
+  ) : null
+
  return (
    <>
      <span
@@ -40,79 +138,30 @@ export function ZoomableImage({ className, containerClassName, src, alt, slot, .
        >
          <img alt={alt ?? ''} className={className} src={src} {...props} />
        </button>
-        {src && (
-          <ImageActionButton className="group-hover/image:opacity-100" copy={copy} onClick={download} saving={saving} />
-        )}
+        {src && <ImageActionButton copy={copy} onClick={handleDownload} saving={saving} variant="inline" />}
      </span>
-      {src && (
-        <ImageLightbox
-          alt={alt}
-          copy={copy}
-          onClick={download}
-          onOpenChange={setLightboxOpen}
-          open={lightboxOpen}
-          saving={saving}
-          src={src}
-        />
-      )}
+      {lightbox}
    </>
  )
 }

-export function ImageLightbox({
-  alt,
+function ImageActionButton({
  copy,
  onClick,
-  onOpenChange,
-  open,
  saving,
-  src
+  variant
 }: {
-  alt?: string
-  copy: ImageActionCopy
-  onClick: () => void
-  onOpenChange: (open: boolean) => void
-  open: boolean
-  saving: boolean
-  src: string
-}) {
-  return (
-    <Dialog onOpenChange={onOpenChange} open={open}>
-      <DialogContent
-        className="block w-auto max-h-[calc(100vh-12rem)] max-w-[calc(100vw-12rem)] overflow-visible border-0 bg-transparent p-0 shadow-none"
-        showCloseButton={false}
-      >
-        <div className="group/lightbox relative inline-block">
-          <img
-            alt={alt ?? ''}
-            className="block max-h-[calc(100vh-12rem)] max-w-[calc(100vw-12rem)] cursor-zoom-out select-auto rounded-lg object-contain shadow-2xl"
-            onClick={() => onOpenChange(false)}
-            src={src}
-          />
-          <ImageActionButton className="group-hover/lightbox:opacity-100" copy={copy} onClick={onClick} saving={saving} />
-        </div>
-      </DialogContent>
-    </Dialog>
-  )
-}
-
-export function ImageActionButton({
-  className,
-  copy,
-  onClick,
-  saving
-}: {
-  className?: string
  copy: ImageActionCopy
  onClick: () => void
  saving: boolean
+  variant: 'inline' | 'lightbox'
 }) {
  return (
    <button
      aria-label={saving ? copy.savingImage : copy.downloadImage}
      className={cn(
        'absolute right-2 top-2 grid size-8 place-items-center rounded-full border border-border/70 bg-background/80 text-muted-foreground opacity-0 shadow-sm backdrop-blur transition-opacity hover:bg-accent hover:text-foreground focus-visible:opacity-100 disabled:opacity-50',
-        className
+        variant === 'inline' ? 'group-hover/image:opacity-100' : 'group-hover/lightbox:opacity-100'
      )}
      disabled={saving}
      onClick={event => {
--- a/apps/desktop/src/components/desktop-onboarding-overlay.tsx
+++ b/apps/desktop/src/components/desktop-onboarding-overlay.tsx
@@ -180,7 +180,7 @@ const PROVIDER_DISPLAY: Record<string, { order: number; title: string }> = {

 const assetPath = (path: string) => `${import.meta.env.BASE_URL}${path.replace(/^\/+/, '')}`

-export const providerTitle = (p: OAuthProvider) => PROVIDER_DISPLAY[p.id]?.title ?? p.name
+const providerTitle = (p: OAuthProvider) => PROVIDER_DISPLAY[p.id]?.title ?? p.name
 const orderOf = (p: OAuthProvider) => PROVIDER_DISPLAY[p.id]?.order ?? 99

 export const sortProviders = (providers: OAuthProvider[]) =>
--- a/apps/desktop/src/components/gateway-connecting-overlay.test.tsx
+++ b/apps/desktop/src/components/gateway-connecting-overlay.test.tsx
@@ -3,23 +3,23 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'

 import { $desktopBoot } from '@/store/boot'
 import { $desktopOnboarding } from '@/store/onboarding'
-import { setGatewayState } from '@/store/session'
+import { $gatewayState, setGatewayState } from '@/store/session'

 import { BootFailureOverlay } from './boot-failure-overlay'
 import { GatewayConnectingOverlay } from './gateway-connecting-overlay'

 // Repro for the "remote gateway → stuck on CONNECTING, no way to settings"
-// report. The connecting overlay (z-1200, full-screen, pointer-events on) used
-// to be shown whenever `gatewayState !== 'open' && !boot.error`. The ONLY escape
+// report. The connecting overlay (z-1200, full-screen, pointer-events on) is
+// shown whenever `gatewayState !== 'open' && !boot.error`. The ONLY escape
 // hatch — BootFailureOverlay, which has "Use local gateway" / "Sign in" /
 // "Retry" — only renders when `boot.error` is set.
 //
 // useGatewayBoot only calls failDesktopBoot() (which sets boot.error) when the
 // INITIAL boot() throws. After the first successful connect (bootCompleted),
 // any later socket drop goes through scheduleReconnect(), which loops FOREVER
-// against the dead remote. So gatewayState sits at 'closed'/'error' with
-// boot.error null. The fix keeps the initial-boot overlay out of post-boot
-// reconnects, leaving chat/settings usable while the reconnect loop runs.
+// against the dead remote and never sets boot.error. So gatewayState sits at
+// 'closed'/'error' with boot.error null → CONNECTING forever, recovery overlay
+// never appears, settings unreachable.

 function resetStores() {
  setGatewayState('idle')
@@ -75,7 +75,7 @@ describe('connecting overlay vs recovery surface', () => {
    expect(isConnectingShown()).toBe(false)
  })

-  it('post-boot socket drops do not re-cover the app with the initial CONNECTING overlay', () => {
+  it('REPRO: remote socket drops AFTER a successful boot → stuck on CONNECTING, no recovery, no settings', () => {
    // 1. Initial boot succeeded: gateway opened, boot completed (no error).
    setGatewayState('open')
    const { rerender } = render(
@@ -97,14 +97,14 @@ describe('connecting overlay vs recovery surface', () => {
      </>
    )

-    // The initial-boot connecting overlay stays out of the way, so settings and
-    // the composer remain reachable during the reconnect loop.
-    expect(isConnectingShown()).toBe(false)
+    // The connecting overlay reappears and latches...
+    expect(isConnectingShown()).toBe(true)
+    // ...with NO recovery surface, because boot.error was never set.
    expect(isRecoveryShown()).toBe(false)

-    // 3. Reconnect loops against the dead remote: gatewayState bounces closed
-    //    → error → closed. Until the escalation path sets boot.error, the app
-    //    remains usable instead of modal-blocked.
+    // 3. Reconnect loops forever against the dead remote: gatewayState bounces
+    //    closed → error → closed, boot.error never gets set. The user is
+    //    pinned on CONNECTING with no path to Settings indefinitely.
    setGatewayState('error')
    rerender(
      <>
@@ -113,7 +113,7 @@ describe('connecting overlay vs recovery surface', () => {
      </>
    )
    expect($desktopBoot.get().error).toBeNull()
-    expect(isConnectingShown()).toBe(false)
+    expect(isConnectingShown()).toBe(true)
    expect(isRecoveryShown()).toBe(false)
  })

--- a/apps/desktop/src/components/gateway-connecting-overlay.tsx
+++ b/apps/desktop/src/components/gateway-connecting-overlay.tsx
@@ -52,13 +52,7 @@ export function GatewayConnectingOverlay() {
  const [tail, setTail] = useState(TAIL)
  const [phase, setPhase] = useState<Phase>('live')

-  // The full-screen connecting overlay is for initial boot only. After a
-  // healthy boot, flaky networks / sleep-wake can drop the socket and flip the
-  // gateway state back to closed/error while the app reconnects. Do not cover
-  // the chat then — users should still be able to type drafts, open settings,
-  // and recover instead of staring at a modal CONNECTING screen.
-  const initialBootActive = boot.visible || boot.running || boot.progress < 100
-  const connecting = gatewayState !== 'open' && !boot.error && initialBootActive
+  const connecting = gatewayState !== 'open' && !boot.error
  // Latches once we've actually shown the overlay, so the brief frame where
  // gatewayState flips to "open" (connecting -> false) before the exit phase
  // kicks in doesn't unmount us and cause a flash.
--- a/apps/desktop/src/components/haptics-provider.tsx
+++ b/apps/desktop/src/components/haptics-provider.tsx
@@ -15,29 +15,5 @@ export function HapticsProvider({ children }: { children: ReactNode }) {
    return () => registerHapticTrigger(null)
  }, [muted, trigger])

-  // web-haptics builds its AudioContext lazily inside the first trigger(), and
-  // the process's first AudioContext pays the CoreAudio spin-up (~850ms stall
-  // in profiles) — which landed on the first streamStart haptic as the first
-  // token painted. Open/close a throwaway context at idle so the real one
-  // connects to an already-warm audio service in single-digit ms.
-  useEffect(() => {
-    if (typeof requestIdleCallback !== 'function' || typeof AudioContext === 'undefined') {
-      return undefined
-    }
-
-    const id = requestIdleCallback(
-      () => {
-        try {
-          void new AudioContext().close().catch(() => undefined)
-        } catch {
-          // No audio device (headless CI) — nothing to warm.
-        }
-      },
-      { timeout: 2000 }
-    )
-
-    return () => cancelIdleCallback(id)
-  }, [])
-
  return <>{children}</>
 }
--- a/Show More
+++ b/Show More