fix(delegation): stream subagent progress per-tool so live windows update mid-run

Subagent tool activity relayed two kinds of events: subagent.tool fired live per tool call, but the subagent.progress running summary was buffered 5-deep (_BATCH_SIZE=5) and only flushed once 5 tools accumulated, with any remainder flushed at end-of-run via _flush(). Most subagents run fewer than 5 tools (e.g. a short research or single-edit task), so the progress summary never reached the threshold mid-run and only appeared when the child finished — the live subagent window stayed silent until the very end ("subagent output just appears all at once"). Lower _BATCH_SIZE to 1 so each tool's progress summary streams in step with the per-tool subagent.tool events. _flush() stays as a harmless end-of-run safety net (now a no-op in the common case). Converts the three batch-of-5 change-detector tests into invariant tests that assert live per-tool streaming and per-child summary isolation.
2026-06-21 17:41:08 +08:00 · 2026-06-16 09:35:13 -04:00
484 changed files with 3861 additions and 39615 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -102,3 +102,6 @@ acp_registry/
 .gitattributes
 .hadolint.yaml
 .mailmap
+
+# Top-level LICENSE (not matched by *.md); not needed inside the container
+LICENSE
--- a/.github/pr-screenshots/45449/billing-confirm.png
+++ b/.github/pr-screenshots/45449/billing-confirm.png
--- a/.github/pr-screenshots/45449/billing-overview.png
+++ b/.github/pr-screenshots/45449/billing-overview.png
--- a/.github/workflows/contributor-check.yml
+++ b/.github/workflows/contributor-check.yml
@@ -1,11 +1,12 @@
 name: Contributor Attribution Check

 on:
+  pull_request:
+    branches: [main]
  # No paths filter — the job must always run so the required check
  # reports a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-  pull_request:
-    branches: [main]
+
 permissions:
  contents: read

--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -18,12 +18,13 @@ on:
      - docker/**
      - .hadolint.yaml
      - .github/workflows/docker-lint.yml
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths:
+      - Dockerfile
+      - docker/**
+      - .hadolint.yaml
+      - .github/workflows/docker-lint.yml

 permissions:
  contents: read
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -11,13 +11,16 @@ on:
      - 'docker/**'
      - '.github/workflows/docker-publish.yml'
      - '.github/actions/hermes-smoke-test/**'
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
-
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
+      - '.github/actions/hermes-smoke-test/**'
  release:
    types: [published]

--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -1,12 +1,10 @@
 name: Docs Site Checks

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
-    branches: [main]
-
+    paths:
+      - 'website/**'
+      - '.github/workflows/docs-site-checks.yml'
  workflow_dispatch:

 permissions:
@@ -16,9 +14,9 @@ jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 22
          cache: npm
@@ -28,9 +26,9 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.11"
+          python-version: '3.11'

      - name: Install ascii-guard
        run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
--- a/.github/workflows/history-check.yml
+++ b/.github/workflows/history-check.yml
@@ -14,9 +14,6 @@ name: History Check
 # the PR head and main to be non-empty.

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

@@ -27,9 +24,9 @@ jobs:
  check-common-ancestor:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
-          fetch-depth: 0 # full history both sides for merge-base
+          fetch-depth: 0  # full history both sides for merge-base

      - name: Reject PRs with no common ancestor on main
        run: |
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -15,12 +15,12 @@ on:
      - "**/*.md"
      - "docs/**"
      - "website/**"
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "website/**"

 permissions:
  contents: read
@@ -154,6 +154,7 @@ jobs:
              });
            }

+
  ruff-blocking:
    # Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
    # PLW1514 (unspecified-encoding) — catches bare ``open()`` /
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -20,23 +20,29 @@ name: OSV-Scanner
 # vulnerabilities in pinned deps that we may need to patch deliberately.

 on:
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths:
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'ui-tui/package.json'
+      - 'website/package.json'
+      - 'website/package-lock.json'
+      - '.github/workflows/osv-scanner.yml'
  push:
    branches: [main]
    paths:
-      - "uv.lock"
-      - "pyproject.toml"
-      - "package.json"
-      - "package-lock.json"
-      - "website/package-lock.json"
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'package.json'
+      - 'package-lock.json'
+      - 'website/package-lock.json'
  schedule:
    # Weekly scan against main — catches CVEs published after merge for
    # deps that haven't changed since.
-    - cron: "0 9 * * 1"
+    - cron: '0 9 * * 1'
  workflow_dispatch:

 permissions:
@@ -48,7 +54,7 @@ permissions:
 jobs:
  scan:
    name: Scan lockfiles
-    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
+    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2  # v2.3.8
    with:
      # Scan explicit lockfiles rather than recursing, so we only look at
      # the three sources of truth and skip vendored / test / worktree dirs.
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -1,11 +1,11 @@
 name: Supply Chain Audit

 on:
+  pull_request:
+    types: [opened, synchronize, reopened]
  # No paths filter — the jobs must always run so required checks
  # report a status (path-gated workflows leave checks "pending" forever
  # when no matching files change, which blocks merge).
-  pull_request:
-    types: [opened, synchronize, reopened]

 permissions:
  pull-requests: write
@@ -32,7 +32,7 @@ jobs:
      # True when the curated MCP catalog / bundled MCP manifests changed.
      mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0
      - name: Check for relevant file changes
@@ -72,7 +72,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

@@ -207,7 +207,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

@@ -286,7 +286,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          fetch-depth: 0

--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,11 +6,11 @@ on:
    paths-ignore:
      - "**/*.md"
      - "docs/**"
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"

 permissions:
  contents: read
@@ -219,4 +219,4 @@ jobs:
        env:
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -4,9 +4,6 @@ name: Typecheck
 on:
  push:
    branches: [main]
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
  pull_request:
    branches: [main]

--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -47,15 +47,15 @@ on:
  push:
    branches: [main]
    paths:
-      - "pyproject.toml"
-      - "uv.lock"
-      - ".github/workflows/uv-lockfile-check.yml"
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - '.github/workflows/uv-lockfile-check.yml'
  pull_request:
    branches: [main]
+    paths:
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - '.github/workflows/uv-lockfile-check.yml'

 permissions:
  contents: read
@@ -71,10 +71,10 @@ jobs:
    timeout-minutes: 5
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,6 @@
 *.pyc*
 __pycache__/
 .venv/
-.venv
 .vscode/
 .env
 .env.local
--- a/57
+++ b/57
@@ -9,11 +9,8 @@ FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df228
 FROM node:22-bookworm-slim@sha256:7af03b14a13c8cdd38e45058fd957bf00a72bbe17feac43b1c15a689c029c732 AS node_source
 FROM debian:13.4

-# Disable Python stdout buffering to ensure logs are printed immediately.
-# Do not write .pyc files at runtime: /opt/hermes is immutable in the
-# published container and writable state belongs under /opt/data.
+# Disable Python stdout buffering to ensure logs are printed immediately
 ENV PYTHONUNBUFFERED=1
-ENV PYTHONDONTWRITEBYTECODE=1

 # Store Playwright browsers outside the volume mount so the build-time
 # install survives the /opt/data volume overlay at runtime.
@@ -189,38 +186,36 @@ RUN cd web && npm run build && \

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
-COPY . .
+COPY --chown=hermes:hermes . .

 # ---------- Permissions ----------
-# Link hermes-agent itself (editable). Deps are already installed in the
-# cached layer above; `--no-deps` makes this a fast egg-link creation with no
-# resolution or downloads.
-RUN uv pip install --no-cache-dir --no-deps -e "."
-
-# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
-# instances must not be able to self-edit the installed source or venv; user
-# data, skills, plugins, config, logs, and dashboard uploads live under
-# /opt/data instead. Root can still repair the image during build/boot, but
-# supervised Hermes processes drop to the non-root hermes user.
+# Make install dir world-readable so any HERMES_UID can read it at runtime.
+# The venv needs to be traversable too.
+# node_modules trees additionally need to be writable by the hermes user
+# so the runtime `npm install` triggered by _tui_need_npm_install() in
+# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
+# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
+# not chowned here.
+# /opt/hermes/gateway is runtime-writable: Python may create __pycache__ and
+# gateway state artifacts beneath the package after services drop privileges,
+# especially when the hermes UID is remapped at boot (#27221).
+# The .venv MUST remain hermes-writable so lazy_deps.py can install
+# remaining optional platform packages and future pin bumps at first use.
+# Without this, `uv pip install` fails with EACCES and adapters silently
+# fail to load.  See tools/lazy_deps.py.
 USER root
-RUN mkdir -p /opt/hermes/bin && \
-    cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
-    chmod 0755 /opt/hermes/bin/hermes && \
-    printf 'docker\n' > /opt/hermes/.install_method && \
-    chown -R root:root /opt/hermes && \
-    chmod -R a+rX /opt/hermes && \
-    chmod -R a-w /opt/hermes
-# The ``.install_method`` stamp is baked next to the running code (the install
-# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
-# volume that is commonly bind-mounted from the host and even shared with a
-# host-side Desktop/CLI install; stamping it at boot used to clobber that
-# host install's marker and wrongly block its ``hermes update``. A code-scoped
-# stamp is read first by detect_install_method() and is immune to the share.
+RUN chmod -R a+rX /opt/hermes && \
+    chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/gateway /opt/hermes/node_modules
 # Start as root so the s6-overlay stage2 hook can usermod/groupmod and chown
 # the data volume. Each supervised service then drops to the hermes user via
 # `s6-setuidgid hermes` in its run script. If HERMES_UID is unset, services
 # run as the default hermes user (UID 10000).

+# ---------- Link hermes-agent itself (editable) ----------
+# Deps are already installed in the cached layer above; `--no-deps` makes
+# this a fast (~1s) egg-link creation with no resolution or downloads.
+RUN uv pip install --no-cache-dir --no-deps -e "."
+
 # ---------- Bake build-time git revision ----------
 # .dockerignore excludes .git, so `git rev-parse HEAD` from inside the
 # container always returns nothing — meaning `hermes dump` reports
@@ -240,9 +235,8 @@ RUN mkdir -p /opt/hermes/bin && \
 # every published image has it.
 ARG HERMES_GIT_SHA=
 RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
-        chmod u+w /opt/hermes && \
        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
-        chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
+        chown hermes:hermes /opt/hermes/.hermes_build_sha; \
    fi

 # ---------- s6-overlay service wiring ----------
@@ -288,8 +282,6 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 # check. (A separate launcher hardening is tracked independently.)
 ENV HERMES_TUI_DIR=/opt/hermes/ui-tui
 ENV HERMES_HOME=/opt/data
-ENV HERMES_WRITE_SAFE_ROOT=/opt/data
-ENV HERMES_DISABLE_LAZY_INSTALLS=1

 # `docker exec` privilege-drop shim. When operators run
 # `docker exec <c> hermes ...` they default to root, and any file the
@@ -302,6 +294,7 @@ ENV HERMES_DISABLE_LAZY_INSTALLS=1
 # Recursion is impossible because the shim exec's the venv binary by
 # absolute path (/opt/hermes/.venv/bin/hermes). See the shim source for
 # the opt-out env var (HERMES_DOCKER_EXEC_AS_ROOT=1).
+COPY --chmod=0755 docker/hermes-exec-shim.sh /opt/hermes/bin/hermes

 # Pre-s6 entrypoint.sh did `source .venv/bin/activate` which exported
 # the venv bin onto PATH; Architecture B's main-wrapper.sh does the
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -27,7 +27,7 @@ import threading
 import time
 import uuid
 from datetime import datetime
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 from urllib.parse import urlparse, parse_qs, urlunparse

 from agent.context_compressor import ContextCompressor
@@ -195,7 +195,6 @@ def init_agent(
    status_callback: callable = None,
    notice_callback: callable = None,
    notice_clear_callback: callable = None,
-    event_callback: Optional[Callable[[str, dict], None]] = None,
    max_tokens: int = None,
    reasoning_config: Dict[str, Any] = None,
    service_tier: str = None,
@@ -300,7 +299,6 @@ def init_agent(
    # would mangle the escape sequences.  None = use builtins.print.
    agent._print_fn = None
    agent.background_review_callback = None  # Optional sync callback for gateway delivery
-    agent.memory_notifications = "on"  # Memory update notifications: "off", "on", "verbose"
    agent.skip_context_files = skip_context_files
    agent.load_soul_identity = load_soul_identity
    agent.pass_session_id = pass_session_id
@@ -427,7 +425,6 @@ def init_agent(
    agent.status_callback = status_callback
    agent.notice_callback = notice_callback
    agent.notice_clear_callback = notice_clear_callback
-    agent.event_callback = event_callback
    agent.tool_gen_callback = tool_gen_callback

    
@@ -599,7 +596,6 @@ def init_agent(
    # (e.g. CLI voice mode adds a temporary prefix for the live call only).
    agent._persist_user_message_idx = None
    agent._persist_user_message_override = None
-    agent._persist_user_message_timestamp = None

    # Cache anthropic image-to-text fallbacks per image payload/URL so a
    # single tool loop does not repeatedly re-run auxiliary vision on the
@@ -1156,9 +1152,6 @@ def init_agent(
                        "hermes_home": str(get_hermes_home()),
                        "agent_context": "primary",
                    }
-                    if _init_kwargs["platform"] == "cli":
-                        _init_kwargs["warning_callback"] = agent._emit_warning
-                        _init_kwargs["status_callback"] = agent._emit_status
                    # Thread session title for memory provider scoping
                    # (e.g. honcho uses this to derive chat-scoped session keys)
                    if agent._session_db:
@@ -1227,35 +1220,12 @@ def init_agent(
    # targets.
    agent._task_completion_guidance = bool(_agent_section.get("task_completion_guidance", True))

-    # Universal parallel-tool-call guidance toggle.  Default True.  Separate
-    # flag from task_completion_guidance because a user may want one but not
-    # the other.  Steers the model to batch independent tool calls into a
-    # single turn; the runtime already executes such batches concurrently.
-    agent._parallel_tool_call_guidance = bool(_agent_section.get("parallel_tool_call_guidance", True))
-
    # Local Python toolchain probe toggle.  Default True.  When False,
    # the probe is skipped entirely (no subprocess calls, no system-prompt
    # line).  Useful for users on exotic setups where the probe heuristics
    # are noisy.
    agent._environment_probe = bool(_agent_section.get("environment_probe", True))

-    # Per-platform prompt-hint overrides (config.yaml → platform_hints).
-    # Lets an enterprise admin append to or replace Hermes' built-in
-    # platform hint for a single messaging platform (e.g. WhatsApp) without
-    # affecting other platforms. Shape:
-    #   platform_hints:
-    #     whatsapp:
-    #       append: "When tabular output would help, invoke the ... skill."
-    #     slack:
-    #       replace: "Custom Slack hint that fully replaces the default."
-    # Stored verbatim; resolution happens in agent/system_prompt.py against
-    # the active platform. Invalid shapes are ignored defensively so a bad
-    # config entry can never break prompt assembly.
-    _platform_hints_cfg = _agent_cfg.get("platform_hints", {})
-    if not isinstance(_platform_hints_cfg, dict):
-        _platform_hints_cfg = {}
-    agent._platform_hint_overrides = _platform_hints_cfg
-
    # App-level API retry count (wraps each model API call).  Default 3,
    # overridable via agent.api_max_retries in config.yaml.  See #11616.
    try:
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -1839,42 +1839,28 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
    elif function_name == "memory":
        def _execute(next_args: dict) -> Any:
            target = next_args.get("target", "memory")
-            operations = next_args.get("operations")
            from tools.memory_tool import memory_tool as _memory_tool
            result = _memory_tool(
                action=next_args.get("action"),
                target=target,
                content=next_args.get("content"),
                old_text=next_args.get("old_text"),
-                operations=operations,
                store=agent._memory_store,
            )
-            # Bridge: notify external memory provider of built-in memory writes.
-            # Covers both the single-op shape and each add/replace inside a batch.
-            if agent._memory_manager:
-                if operations:
-                    _mem_ops = [
-                        op for op in operations
-                        if isinstance(op, dict) and op.get("action") in {"add", "replace"}
-                    ]
-                else:
-                    _mem_ops = (
-                        [{"action": next_args.get("action"), "content": next_args.get("content")}]
-                        if next_args.get("action") in {"add", "replace"} else []
+            # Bridge: notify external memory provider of built-in memory writes
+            if agent._memory_manager and next_args.get("action") in {"add", "replace"}:
+                try:
+                    agent._memory_manager.on_memory_write(
+                        next_args.get("action", ""),
+                        target,
+                        next_args.get("content", ""),
+                        metadata=agent._build_memory_write_metadata(
+                            task_id=effective_task_id,
+                            tool_call_id=tool_call_id,
+                        ),
                    )
-                for _op in _mem_ops:
-                    try:
-                        agent._memory_manager.on_memory_write(
-                            _op.get("action", ""),
-                            target,
-                            _op.get("content", "") or "",
-                            metadata=agent._build_memory_write_metadata(
-                                task_id=effective_task_id,
-                                tool_call_id=tool_call_id,
-                            ),
-                        )
-                    except Exception:
-                        pass
+                except Exception:
+                    pass
            return _finish_agent_tool(result, next_args)
    elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
        def _execute(next_args: dict) -> Any:
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -372,7 +372,7 @@ def _detect_claude_code_version() -> str:


 _CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
-_MCP_TOOL_PREFIX = "mcp__"
+_MCP_TOOL_PREFIX = "mcp_"


 def _get_claude_code_version() -> str:
@@ -2349,46 +2349,25 @@ def build_anthropic_kwargs(
                text = text.replace("Nous Research", "Anthropic")
                block["text"] = text

-        # 3. Normalize tool names so NOTHING goes on the OAuth wire with a
-        #    single-underscore ``mcp_`` prefix.  Anthropic's subscription/OAuth
-        #    billing classifier treats a single-underscore ``mcp_`` tool name as
-        #    a third-party-app fingerprint and rejects the request with HTTP 400
-        #    "Third-party apps now draw from extra usage, not plan limits"
-        #    (verified empirically: a single ``mcp_foo`` tool flips a request
-        #    from plan-billing to the extra-usage lane; ``mcp__foo`` is accepted).
-        #
-        #    Two cases, both must land on the double-underscore ``mcp__`` form:
-        #      a) bare Hermes-native tools (``read_file``)  -> ``mcp__read_file``
-        #      b) native MCP server tools registered under their full
-        #         single-underscore ``mcp_<server>_<tool>`` name
-        #         (``mcp_linear_get_issue``) -> ``mcp__linear_get_issue``
-        #    Case (b) is the gap that the bare ``mcp_``->``mcp__`` constant swap
-        #    left open: those tools were *skipped* and stayed single-underscore,
-        #    so any session with an MCP server configured still tripped the
-        #    classifier. normalize_response reverses both forms via registry
-        #    lookup so the dispatcher still sees the original name. GH-25255.
-        def _to_oauth_wire_name(name: str) -> str:
-            if name.startswith("mcp__"):
-                return name  # already correct, don't double-prefix
-            if name.startswith("mcp_"):
-                # single-underscore native MCP tool -> promote to double
-                return "mcp__" + name[len("mcp_"):]
-            return _MCP_TOOL_PREFIX + name  # bare name -> mcp__<name>
-
+        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        #    Skip names that already begin with the marker — native MCP server
+        #    tools (from mcp_servers: in config.yaml) are registered under their
+        #    full mcp_<server>_<tool> name and would double-prefix otherwise,
+        #    breaking round-trip registry lookup in normalize_response. GH-25255.
        if anthropic_tools:
            for tool in anthropic_tools:
-                if "name" in tool:
-                    tool["name"] = _to_oauth_wire_name(tool["name"])
+                if "name" in tool and not tool["name"].startswith(_MCP_TOOL_PREFIX):
+                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]

-        # 4. Apply the same normalization to tool names in message history
-        #    (tool_use blocks) so replayed turns match the wire names above.
+        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
        for msg in anthropic_messages:
            content = msg.get("content")
            if isinstance(content, list):
                for block in content:
                    if isinstance(block, dict):
                        if block.get("type") == "tool_use" and "name" in block:
-                            block["name"] = _to_oauth_wire_name(block["name"])
+                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
+                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
                            pass  # tool_result uses ID, not name

--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -3079,20 +3079,23 @@ def _try_configured_fallback_chain(
        if not fb_provider or fb_provider.lower() == skip:
            continue
        fb_model = str(entry.get("model", "")).strip() or None
+        fb_base_url = str(entry.get("base_url", "")).strip() or None
+        fb_api_key = str(entry.get("api_key", "")).strip() or None

        label = f"fallback_chain[{i}]({fb_provider})"

        try:
-            fb_client, resolved_model = _resolve_fallback_entry(entry)
+            fb_client = _resolve_single_provider(
+                fb_provider, fb_model, fb_base_url, fb_api_key)
        except Exception:
-            fb_client, resolved_model = None, None
+            fb_client = None

        if fb_client is not None:
            logger.info(
                "Auxiliary %s: %s on %s — configured fallback to %s (%s)",
-                task, reason, failed_provider, label, resolved_model or fb_model or "default",
+                task, reason, failed_provider, label, fb_model or "default",
            )
-            return fb_client, resolved_model or fb_model, label
+            return fb_client, fb_model, label
        tried.append(label)

    if tried:
@@ -3103,103 +3106,6 @@ def _try_configured_fallback_chain(
    return None, None, ""


-def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
-    """Resolve inline or env-backed API key from a fallback-chain entry."""
-    explicit = str(entry.get("api_key") or "").strip()
-    if explicit:
-        return explicit
-    key_env = str(entry.get("key_env") or entry.get("api_key_env") or "").strip()
-    if key_env:
-        return os.getenv(key_env, "").strip() or None
-    return None
-
-
-def _resolve_fallback_entry(entry: Dict[str, Any]) -> Tuple[Optional[Any], Optional[str]]:
-    """Resolve one fallback entry through the central provider router."""
-    provider = str(entry.get("provider") or "").strip()
-    model = str(entry.get("model") or "").strip() or None
-    if not provider or not model:
-        return None, None
-    base_url = str(entry.get("base_url") or "").strip() or None
-    api_key = _fallback_entry_api_key(entry)
-    api_mode = str(entry.get("api_mode") or entry.get("transport") or "").strip() or None
-    return resolve_provider_client(
-        provider,
-        model=model,
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
-        api_mode=api_mode,
-    )
-
-
-def _try_main_fallback_chain(
-    task: Optional[str],
-    failed_provider: str = "",
-    reason: str = "error",
-) -> Tuple[Optional[Any], Optional[str], str]:
-    """Try the top-level main-agent fallback chain for an auxiliary call.
-
-    ``provider: auto`` auxiliary tasks should respect the user's declared
-    main fallback policy before dropping into Hermes' built-in discovery
-    chain. The top-level chain is read through ``get_fallback_chain`` so
-    both modern ``fallback_providers`` and legacy ``fallback_model`` entries
-    participate in the same order as the main agent.
-    """
-    try:
-        from hermes_cli.config import load_config
-        from hermes_cli.fallback_config import get_fallback_chain
-
-        chain = get_fallback_chain(load_config())
-    except Exception as exc:
-        logger.debug("Auxiliary %s: could not load main fallback chain: %s", task or "call", exc)
-        return None, None, ""
-
-    if not chain:
-        return None, None, ""
-
-    failed_norm = (failed_provider or "").strip().lower()
-    main_norm = (_read_main_provider() or "").strip().lower()
-    skip = {p for p in (failed_norm, main_norm, "auto") if p}
-    tried: List[str] = []
-
-    for i, entry in enumerate(chain):
-        if not isinstance(entry, dict):
-            continue
-        fb_provider = str(entry.get("provider") or "").strip()
-        fb_model = str(entry.get("model") or "").strip()
-        if not fb_provider or not fb_model:
-            continue
-        fb_norm = fb_provider.lower()
-        label = f"fallback_providers[{i}]({fb_provider})"
-        if fb_norm in skip:
-            tried.append(f"{label} (skipped)")
-            continue
-        if _is_provider_unhealthy(fb_norm):
-            _log_skip_unhealthy(fb_norm, task)
-            tried.append(f"{label} (unhealthy)")
-            continue
-        try:
-            fb_client, resolved_model = _resolve_fallback_entry(entry)
-        except Exception as exc:
-            logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
-            fb_client, resolved_model = None, None
-        if fb_client is not None:
-            logger.info(
-                "Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
-                task or "call", reason, failed_provider or "auto", label,
-                resolved_model or fb_model,
-            )
-            return fb_client, resolved_model or fb_model, fb_provider
-        tried.append(label)
-
-    if tried:
-        logger.debug(
-            "Auxiliary %s: main fallback chain exhausted (tried: %s)",
-            task or "call", ", ".join(tried),
-        )
-    return None, None, ""
-
-
 def _resolve_single_provider(
    provider: str,
    model: Optional[str] = None,
@@ -3210,19 +3116,16 @@ def _resolve_single_provider(

    Uses the existing provider resolution infrastructure where possible.
    """
-    # Reuse resolve_provider_client which handles provider→client mapping.
+    # Reuse resolve_provider_client which handles provider→client mapping
    client, resolved_model = resolve_provider_client(
        provider=provider,
        model=model,
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
+        base_url=base_url,
+        api_key=api_key,
    )
    return client

-def _resolve_auto(
-    main_runtime: Optional[Dict[str, Any]] = None,
-    task: Optional[str] = None,
-) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -3320,22 +3223,7 @@ def _resolve_auto(
                            main_provider, resolved or main_model)
                return client, resolved or main_model

-    # ── Step 2: user-configured fallback policy ─────────────────────────
-    # In auto mode, respect the task-specific fallback chain first, then the
-    # main agent's top-level fallback_providers/fallback_model chain. The
-    # hardcoded provider discovery chain below is only the convenience default
-    # for users who have not declared a fallback policy.
-    if task:
-        fb_client, fb_model, _fb_label = _try_configured_fallback_chain(
-            task, main_provider or "auto", reason="main provider unavailable")
-        if fb_client is not None:
-            return fb_client, fb_model
-    fb_client, fb_model, _fb_label = _try_main_fallback_chain(
-        task, main_provider or "auto", reason="main provider unavailable")
-    if fb_client is not None:
-        return fb_client, fb_model
-
-    # ── Step 3: aggregator / fallback chain ──────────────────────────────
+    # ── Step 2: aggregator / fallback chain ──────────────────────────────
    tried = []
    for label, try_fn in _get_provider_chain():
        if _is_provider_unhealthy(label):
@@ -3456,7 +3344,6 @@ def resolve_provider_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -3577,7 +3464,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto(main_runtime=main_runtime, task=task)
+        client, resolved = _resolve_auto(main_runtime=main_runtime)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -4470,16 +4357,11 @@ def _client_cache_key(
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    # `auto` can now resolve through task-specific or main fallback policy,
-    # so the task participates in the cache key. Non-auto providers keep the
-    # old cache shape because the explicit provider/model tuple is sufficient.
-    task_key = (task or "") if provider == "auto" else ""
    pool_hint = _pool_cache_hint(provider, main_runtime=main_runtime)
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, task_key, pool_hint)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision, pool_hint)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -4672,7 +4554,6 @@ def _get_cached_client(
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
    is_vision: bool = False,
-    task: Optional[str] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -4710,7 +4591,6 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=main_runtime,
        is_vision=is_vision,
-        task=task,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -4755,7 +4635,6 @@ def _get_cached_client(
        api_mode=api_mode,
        main_runtime=runtime,
        is_vision=is_vision,
-        task=task,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -5261,7 +5140,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -5587,19 +5466,14 @@ def call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. For auto: top-level main fallback_providers/fallback_model
-            #   3. For auto: built-in auxiliary discovery chain
-            #   4. For explicit aux providers: main agent model safety net
+            #   2. Main agent model (last-resort safety net)
+            # For auto users (no explicit aux provider), use the full
+            # auto-detection chain instead — its Step 1 IS the main agent
+            # model, so users on `auto` already get main-model fallback.
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
-                    task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
-                        task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_payment_fallback(
-                        resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
@@ -5762,7 +5636,7 @@ async def async_call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
+                client, final_model = _get_cached_client("auto", async_mode=True)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -6030,19 +5904,13 @@ async def async_call_llm(

            # Fallback order (#26882, #26803):
            #   1. User-configured fallback_chain (per-task) if set
-            #   2. For auto: top-level main fallback_providers/fallback_model
-            #   3. For auto: built-in auxiliary discovery chain
-            #   4. For explicit aux providers: main agent model safety net
+            #   2. Main agent model (last-resort safety net)
+            # Auto users get the full auto-detection chain instead — its
+            # Step 1 IS the main agent model.
            fb_client, fb_model, fb_label = (None, None, "")
            if is_auto:
-                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
-                    task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_main_fallback_chain(
-                        task, resolved_provider or "auto", reason=reason)
-                if fb_client is None:
-                    fb_client, fb_model, fb_label = _try_payment_fallback(
-                        resolved_provider, task, reason=reason)
+                fb_client, fb_model, fb_label = _try_payment_fallback(
+                    resolved_provider, task, reason=reason)
            else:
                fb_client, fb_model, fb_label = _try_configured_fallback_chain(
                    task, resolved_provider or "auto", reason=reason)
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -237,25 +237,18 @@ _COMBINED_REVIEW_PROMPT = (
 def summarize_background_review_actions(
    review_messages: List[Dict],
    prior_snapshot: List[Dict],
-    notification_mode: str = "on",
 ) -> List[str]:
    """Build the human-facing action summary for a background review pass.

-    Walks the review agent's session messages and collects successful memory
-    and skill-management actions to surface to the user. Tool messages already
-    present in ``prior_snapshot`` are skipped so stale inherited results are
-    not re-surfaced as fresh background work (issue #14944).
+    Walks the review agent's session messages and collects "successful tool
+    action" descriptions to surface to the user (e.g. "Memory updated").
+    Tool messages already present in ``prior_snapshot`` are skipped so we
+    don't re-surface stale results from the prior conversation that the
+    review agent inherited via ``conversation_history`` (issue #14944).

-    ``notification_mode`` controls display detail:
-    - ``off``: return no actions.
-    - ``on``: generic "Memory updated"/tool messages.
-    - ``verbose``: include compact content previews from tool-call arguments.
+    Matching is by ``tool_call_id`` when available, with a content-equality
+    fallback for tool messages that lack one.
    """
-    mode = str(notification_mode or "on").lower()
-    if mode == "off":
-        return []
-    verbose = mode == "verbose"
-
    existing_tool_call_ids = set()
    existing_tool_contents = set()
    for prior in prior_snapshot or []:
@@ -269,43 +262,6 @@ def summarize_background_review_actions(
            if isinstance(content, str):
                existing_tool_contents.add(content)

-    # Map review-agent tool results back to the calls that produced them.  The
-    # result JSON only says "Entry added"; the call arguments contain action,
-    # target, and content previews.  Restricting to notify_tools also prevents
-    # helper tools from surfacing as memory work just because they succeeded.
-    notify_tools = {"memory", "skill_manage"}
-    all_tool_call_ids: set = set()
-    call_details: dict = {}
-    for msg in review_messages or []:
-        if not isinstance(msg, dict) or msg.get("role") != "assistant":
-            continue
-        for tc in msg.get("tool_calls", []) or []:
-            if not isinstance(tc, dict):
-                continue
-            fn = tc.get("function", {}) or {}
-            fn_name = fn.get("name", "")
-            tcid = tc.get("id")
-            if tcid:
-                all_tool_call_ids.add(tcid)
-            if fn_name not in notify_tools:
-                continue
-            try:
-                args = json.loads(fn.get("arguments", "{}"))
-            except (json.JSONDecodeError, TypeError):
-                args = {}
-            if tcid:
-                call_details[tcid] = {
-                    "tool": fn_name,
-                    "action": args.get("action", "?"),
-                    "target": args.get("target", "memory"),
-                    "content": args.get("content", ""),
-                    "old_text": args.get("old_text", ""),
-                    "operations": args.get("operations") or [],
-                    "name": args.get("name", ""),
-                    "old_string": args.get("old_string", ""),
-                    "new_string": args.get("new_string", ""),
-                }
-
    actions: List[str] = []
    for msg in review_messages or []:
        if not isinstance(msg, dict) or msg.get("role") != "tool":
@@ -317,8 +273,6 @@ def summarize_background_review_actions(
            content_str = msg.get("content")
            if isinstance(content_str, str) and content_str in existing_tool_contents:
                continue
-        if tcid and all_tool_call_ids and tcid not in call_details:
-            continue
        try:
            data = json.loads(msg.get("content", "{}"))
        except (json.JSONDecodeError, TypeError):
@@ -326,92 +280,19 @@ def summarize_background_review_actions(
        if not isinstance(data, dict) or not data.get("success"):
            continue
        message = data.get("message", "")
-        detail = call_details.get(tcid, {})
-        target = data.get("target", "") or detail.get("target", "")
-        is_skill = detail.get("tool") == "skill_manage"
-
-        message_lower = message.lower()
-        if not verbose:
-            if "created" in message_lower:
-                actions.append(message)
-                continue
-            if "updated" in message_lower:
-                actions.append(message)
-                continue
-            if is_skill and "patched" in message_lower:
-                actions.append(message)
-                continue
-
-        if is_skill:
-            label = "Skill"
-        elif target:
+        target = data.get("target", "")
+        if "created" in message.lower():
+            actions.append(message)
+        elif "updated" in message.lower():
+            actions.append(message)
+        elif "added" in message.lower() or (target and "add" in message.lower()):
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "Entry added" in message:
+            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+            actions.append(f"{label} updated")
+        elif "removed" in message.lower() or "replaced" in message.lower():
            label = "Memory" if target == "memory" else "User profile" if target == "user" else target
-        else:
-            continue
-
-        if verbose:
-            action = detail.get("action", "")
-            content = detail.get("content", "")
-            old_text = detail.get("old_text", "")
-            skill_name = detail.get("name", "")
-            operations = detail.get("operations") or []
-            max_preview = 120
-            if is_skill:
-                change = data.get("_change", {})
-                old_string = change.get("old", "") or detail.get("old_string", "")
-                new_string = change.get("new", "") or detail.get("new_string", "")
-                description = change.get("description", "")
-                if action == "patch" and (old_string or new_string):
-                    old_preview = old_string[:80].replace("\n", " ") + (
-                        "…" if len(old_string) > 80 else ""
-                    )
-                    new_preview = new_string[:80].replace("\n", " ") + (
-                        "…" if len(new_string) > 80 else ""
-                    )
-                    actions.append(
-                        f"📝 Skill '{skill_name}' patched: "
-                        f"\"{old_preview}\" → \"{new_preview}\""
-                    )
-                elif action == "create" and description:
-                    actions.append(f"📝 Skill '{skill_name}' created: {description}")
-                elif action == "edit" and description:
-                    actions.append(f"📝 Skill '{skill_name}' rewritten: {description}")
-                else:
-                    actions.append(f"📝 {message}" if message else f"Skill {action}")
-            elif operations:
-                for op in operations:
-                    op = op or {}
-                    op_act = op.get("action", "")
-                    op_content = (op.get("content") or "")
-                    op_old = (op.get("old_text") or "")
-                    if op_act == "add" and op_content:
-                        preview = op_content[:max_preview] + ("…" if len(op_content) > max_preview else "")
-                        actions.append(f"{label} ➕ {preview}")
-                    elif op_act == "replace" and op_content:
-                        preview = op_content[:max_preview] + ("…" if len(op_content) > max_preview else "")
-                        actions.append(f"{label} ✏️ {preview}")
-                    elif op_act == "remove" and op_old:
-                        preview = op_old[:60] + ("…" if len(op_old) > 60 else "")
-                        actions.append(f"{label} ➖ {preview}")
-            elif action == "add" and content:
-                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
-                actions.append(f"{label} ➕ {preview}")
-            elif action == "replace" and content:
-                preview = content[:max_preview] + ("…" if len(content) > max_preview else "")
-                actions.append(f"{label} ✏️ {preview}")
-            elif action == "remove" and old_text:
-                preview = old_text[:60] + ("…" if len(old_text) > 60 else "")
-                actions.append(f"{label} ➖ {preview}")
-            else:
-                actions.append(f"{label} updated")
-        elif (
-            "added" in message_lower
-            or "replaced" in message_lower
-            or "removed" in message_lower
-            or "applied" in message_lower
-            or (target and "add" in message.lower())
-            or "Entry added" in message
-        ):
            actions.append(f"{label} updated")
    return actions

@@ -641,7 +522,6 @@ def _run_review_in_thread(
        actions = summarize_background_review_actions(
            review_messages,
            messages_snapshot,
-            notification_mode=getattr(agent, "memory_notifications", "on"),
        )

        if actions:
--- a/agent/billing_view.py
+++ b/agent/billing_view.py
@@ -1,295 +0,0 @@
-"""Surface-agnostic core for the Phase 2b terminal-billing screens.
-
-One fetch/parse per concern, consumed identically by the CLI handler
-(``cli.py::_show_billing``), the TUI JSON-RPC methods
-(``tui_gateway/server.py``), and any other surface. Mirrors the proven
-``agent/account_usage.py::build_credits_view`` pattern: parse the server payload
-into a frozen dataclass; **fail open** — when not logged in or the portal is
-unreachable, return a struct with ``logged_in=False`` and let the surface degrade
-gracefully (never crash).
-
-Money discipline: the server emits decimal STRINGS (``"142.5"``, not fixed 2dp).
-We keep them as :class:`decimal.Decimal` end-to-end and only format for display.
-"""
-
-from __future__ import annotations
-
-import logging
-import uuid
-from dataclasses import dataclass, field
-from decimal import Decimal, InvalidOperation
-from typing import Any, Optional
-
-logger = logging.getLogger(__name__)
-
-
-# =============================================================================
-# Decimal money helpers
-# =============================================================================
-
-
-def parse_money(value: Any) -> Optional[Decimal]:
-    """Parse a server money value (decimal string) into :class:`Decimal`.
-
-    Returns None for missing/invalid input. Never raises. Accepts str/int (and,
-    defensively, float — though the server always sends strings).
-    """
-    if value is None:
-        return None
-    try:
-        # Decimal(str(...)) avoids binary-float artifacts if a float ever sneaks in.
-        return Decimal(str(value).strip())
-    except (InvalidOperation, ValueError, TypeError):
-        return None
-
-
-def format_money(value: Optional[Decimal]) -> str:
-    """Format a Decimal as ``$X`` / ``$X.YY`` for display.
-
-    Whole dollars show no decimals; any fractional amount shows exactly 2dp:
-    ``Decimal("142.5")`` → ``"$142.50"``, ``Decimal("100")`` → ``"$100"``,
-    ``Decimal("0.01")`` → ``"$0.01"``.
-    """
-    if value is None:
-        return "—"
-    if value == value.to_integral_value():
-        # Whole dollars — no decimal point. format(..., "f") avoids 1E+3 for 1000.
-        return f"${format(value.to_integral_value(), 'f')}"
-    # Fractional — always show 2dp.
-    return f"${format(value.quantize(Decimal('0.01')), 'f')}"
-
-
-# =============================================================================
-# Parsed sub-structures
-# =============================================================================
-
-
-@dataclass(frozen=True)
-class CardInfo:
-    brand: str
-    last4: str
-
-    @property
-    def masked(self) -> str:
-        return f"{self.brand} ····{self.last4}"
-
-
-@dataclass(frozen=True)
-class MonthlyCap:
-    limit_usd: Optional[Decimal] = None
-    spent_this_month_usd: Optional[Decimal] = None
-    is_default_ceiling: bool = False
-
-
-@dataclass(frozen=True)
-class AutoReload:
-    enabled: bool = False
-    threshold_usd: Optional[Decimal] = None
-    reload_to_usd: Optional[Decimal] = None
-
-
-@dataclass(frozen=True)
-class BillingState:
-    """Parsed ``GET /api/billing/state`` — the overview screen's data.
-
-    Fail-open: ``logged_in=False`` (and empty fields) when not logged in or the
-    portal is unreachable.
-    """
-
-    logged_in: bool
-    org_id: Optional[str] = None
-    org_slug: Optional[str] = None
-    org_name: Optional[str] = None
-    role: Optional[str] = None  # "OWNER" | "ADMIN" | "MEMBER"
-    balance_usd: Optional[Decimal] = None
-    cli_billing_enabled: bool = False
-    charge_presets: tuple[Decimal, ...] = ()
-    min_usd: Optional[Decimal] = None
-    max_usd: Optional[Decimal] = None
-    card: Optional[CardInfo] = None
-    monthly_cap: Optional[MonthlyCap] = None
-    auto_reload: Optional[AutoReload] = None
-    portal_url: Optional[str] = None
-    # When the fetch failed (vs cleanly not-logged-in), the message for the surface.
-    error: Optional[str] = None
-
-    @property
-    def is_admin(self) -> bool:
-        """True for OWNER/ADMIN — the roles that can manage billing."""
-        return (self.role or "").upper() in ("OWNER", "ADMIN")
-
-    @property
-    def can_charge(self) -> bool:
-        """True when the UI should offer charge/auto-reload actions.
-
-        Admin role AND the per-org kill-switch on. (The server still enforces;
-        this is just for graying out actions the user can't take.)
-        """
-        return self.is_admin and self.cli_billing_enabled
-
-
-def _parse_card(raw: Any) -> Optional[CardInfo]:
-    if not isinstance(raw, dict):
-        return None
-    brand = raw.get("brand")
-    last4 = raw.get("last4")
-    if isinstance(brand, str) and isinstance(last4, str):
-        return CardInfo(brand=brand, last4=last4)
-    return None
-
-
-def _parse_monthly_cap(raw: Any) -> Optional[MonthlyCap]:
-    if not isinstance(raw, dict):
-        return None
-    return MonthlyCap(
-        limit_usd=parse_money(raw.get("limitUsd")),
-        spent_this_month_usd=parse_money(raw.get("spentThisMonthUsd")),
-        is_default_ceiling=bool(raw.get("isDefaultCeiling")),
-    )
-
-
-def _parse_auto_reload(raw: Any) -> Optional[AutoReload]:
-    if not isinstance(raw, dict):
-        return None
-    return AutoReload(
-        enabled=bool(raw.get("enabled")),
-        threshold_usd=parse_money(raw.get("thresholdUsd")),
-        reload_to_usd=parse_money(raw.get("reloadToUsd")),
-    )
-
-
-def billing_state_from_payload(
-    payload: dict[str, Any], *, portal_url: Optional[str] = None
-) -> BillingState:
-    """Map a raw ``/api/billing/state`` JSON dict into :class:`BillingState`."""
-    raw_org = payload.get("org")
-    org: dict[str, Any] = raw_org if isinstance(raw_org, dict) else {}
-    raw_bounds = payload.get("bounds")
-    bounds: dict[str, Any] = raw_bounds if isinstance(raw_bounds, dict) else {}
-
-    presets: list[Decimal] = []
-    for item in payload.get("chargePresets") or ():
-        parsed = parse_money(item)
-        if parsed is not None:
-            presets.append(parsed)
-
-    return BillingState(
-        logged_in=True,
-        org_id=org.get("id"),
-        org_slug=org.get("slug"),
-        org_name=org.get("name"),
-        role=org.get("role"),
-        balance_usd=parse_money(payload.get("balanceUsd")),
-        cli_billing_enabled=bool(payload.get("cliBillingEnabled")),
-        charge_presets=tuple(presets),
-        min_usd=parse_money(bounds.get("minUsd")),
-        max_usd=parse_money(bounds.get("maxUsd")),
-        card=_parse_card(payload.get("card")),
-        monthly_cap=_parse_monthly_cap(payload.get("monthlyCap")),
-        auto_reload=_parse_auto_reload(payload.get("autoReload")),
-        portal_url=portal_url,
-    )
-
-
-# =============================================================================
-# Fail-open builders (the surface front doors)
-# =============================================================================
-
-
-def build_billing_state(*, timeout: float = 15.0) -> BillingState:
-    """Fetch + parse ``/api/billing/state``. Fail-open.
-
-    Returns ``BillingState(logged_in=False)`` when not logged in. On a portal/HTTP
-    failure, returns ``logged_in=False`` with ``error`` set so the surface can show
-    a clear message rather than crashing.
-    """
-    try:
-        from hermes_cli.nous_billing import (
-            BillingAuthError,
-            BillingError,
-            _absolutize_portal_url,
-            get_billing_state,
-            resolve_portal_base_url,
-        )
-    except Exception:
-        return BillingState(logged_in=False, error="billing client unavailable")
-
-    try:
-        payload = get_billing_state(timeout=timeout)
-    except BillingAuthError:
-        return BillingState(logged_in=False)
-    except BillingError as exc:
-        logger.debug("billing ▸ /state fetch failed (fail-open)", exc_info=True)
-        return BillingState(logged_in=False, error=str(exc))
-    except Exception:
-        logger.debug("billing ▸ /state unexpected error (fail-open)", exc_info=True)
-        return BillingState(logged_in=False, error="could not load billing state")
-
-    # Prefer a server-supplied portalUrl if present (resolved to absolute in case
-    # it's relative); else build the standard one.
-    raw_portal = payload.get("portalUrl") if isinstance(payload, dict) else None
-    portal_url = _absolutize_portal_url(raw_portal) if raw_portal else None
-    if not portal_url:
-        try:
-            portal_url = _fallback_portal_url(resolve_portal_base_url())
-        except Exception:
-            portal_url = None
-
-    return billing_state_from_payload(payload, portal_url=portal_url)
-
-
-def _fallback_portal_url(base: str) -> str:
-    """Standard billing deep-link when the server omits ``portalUrl``."""
-    return f"{base.rstrip('/')}/billing?topup=open"
-
-
-# =============================================================================
-# Idempotency
-# =============================================================================
-
-
-def new_idempotency_key() -> str:
-    """Fresh UUID for a user-confirmed purchase (reuse on retry of the SAME buy).
-
-    The ``Idempotency-Key`` header is mandatory on ``POST /charge``; generate one
-    per confirmed purchase and reuse it across retries so a double-submit collapses
-    to a single charge. Never reuse a key across different amounts (the server
-    returns 409 idempotency_conflict).
-    """
-    return str(uuid.uuid4())
-
-
-# =============================================================================
-# Amount validation (Screen 3 custom input)
-# =============================================================================
-
-
-@dataclass(frozen=True)
-class AmountValidation:
-    ok: bool
-    amount: Optional[Decimal] = None
-    error: Optional[str] = None
-
-
-def validate_charge_amount(
-    raw: str, *, min_usd: Optional[Decimal], max_usd: Optional[Decimal]
-) -> AmountValidation:
-    """Validate a custom charge amount against bounds + 2dp (multipleOf 0.01).
-
-    Mirrors the server's accept/reject so the UI can give instant feedback rather
-    than round-tripping a sure-to-fail charge. The server is still authoritative.
-    """
-    cleaned = (raw or "").strip().lstrip("$").strip()
-    amount = parse_money(cleaned)
-    if amount is None:
-        return AmountValidation(ok=False, error="Enter a dollar amount, e.g. 100")
-    if amount <= 0:
-        return AmountValidation(ok=False, error="Amount must be greater than $0")
-    # multipleOf 0.01 — reject sub-cent precision.
-    if amount != amount.quantize(Decimal("0.01")):
-        return AmountValidation(ok=False, error="Amount can't be smaller than a cent")
-    if min_usd is not None and amount < min_usd:
-        return AmountValidation(ok=False, error=f"Minimum is {format_money(min_usd)}")
-    if max_usd is not None and amount > max_usd:
-        return AmountValidation(ok=False, error=f"Maximum is {format_money(max_usd)}")
-    return AmountValidation(ok=True, amount=amount)
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -262,26 +262,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
    return converted or None


-# Provider-executed built-in tool *declaration* types accepted on the
-# Responses ``tools`` array.  These are declared by ``type`` alone (no
-# client-side name/parameters schema) and run server-side — the provider
-# owns the implementation and reports progress via the matching ``*_call``
-# output items.  Hermes injects xAI's native ``web_search`` for the xAI
-# transport (see agent/transports/codex.py); the rest are listed so the
-# preflight validator passes them through rather than rejecting them as
-# "unsupported type".  Mirrors the ``*_call`` item-type set used in
-# _normalize_codex_response.
-_RESPONSES_BUILTIN_TOOL_TYPES = {
-    "web_search",
-    "web_search_preview",
-    "file_search",
-    "code_interpreter",
-    "image_generation",
-    "computer_use_preview",
-    "local_shell",
-}
-
-
 # ---------------------------------------------------------------------------
 # Message format conversion
 # ---------------------------------------------------------------------------
@@ -822,22 +802,7 @@ def _preflight_codex_api_kwargs(
        for idx, tool in enumerate(tools):
            if not isinstance(tool, dict):
                raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
-
-            tool_type = tool.get("type")
-
-            # Provider-executed built-in tools (xAI native web_search, code
-            # interpreter, etc.) are declared by ``type`` alone and carry no
-            # ``name``/``parameters`` schema — the provider owns the
-            # implementation.  Pass them through verbatim instead of forcing
-            # them through the function-tool validation below (which would
-            # otherwise reject them with "unsupported type").  See
-            # agent/transports/codex.py for where xAI's native web_search is
-            # injected.
-            if tool_type in _RESPONSES_BUILTIN_TOOL_TYPES:
-                normalized_tools.append(dict(tool))
-                continue
-
-            if tool_type != "function":
+            if tool.get("type") != "function":
                raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")

            name = tool.get("name")
@@ -1121,33 +1086,6 @@ def _normalize_codex_response(
    saw_final_answer_phase = False
    saw_reasoning_item = False

-    # Server-side built-in tool calls (xAI's native web_search, code
-    # interpreter, etc.) are executed by the provider and reported as
-    # discrete ``*_call`` output items.  xAI's /v1/responses surface
-    # (e.g. grok-composer-2.5-fast on SuperGrok OAuth) routinely leaves
-    # these items at ``status="in_progress"`` even when the overall
-    # ``response.status == "completed"`` — the search ran to completion
-    # server-side, the per-item status simply isn't reconciled.  These
-    # are NOT a signal that the model's turn is unfinished, so they must
-    # not flip ``has_incomplete_items``.  Only the response-level status
-    # and genuine model output items (message/reasoning/function_call)
-    # govern the incomplete verdict.  Without this guard, any turn where
-    # grok-composer invokes server-side search is misclassified as
-    # ``finish_reason="incomplete"`` and burns 3 fruitless continuation
-    # retries before failing with "Codex response remained incomplete
-    # after 3 continuation attempts".  client-side function/custom tool
-    # calls keep their own in_progress handling below (they are skipped,
-    # not awaited).
-    _SERVER_SIDE_TOOL_CALL_TYPES = {
-        "web_search_call",
-        "file_search_call",
-        "code_interpreter_call",
-        "image_generation_call",
-        "computer_call",
-        "local_shell_call",
-        "mcp_call",
-    }
-
    for item in output:
        item_type = getattr(item, "type", None)
        item_status = getattr(item, "status", None)
@@ -1156,10 +1094,7 @@ def _normalize_codex_response(
        else:
            item_status = None

-        if (
-            item_status in {"queued", "in_progress", "incomplete"}
-            and item_type not in _SERVER_SIDE_TOOL_CALL_TYPES
-        ):
+        if item_status in {"queued", "in_progress", "incomplete"}:
            has_incomplete_items = True
            saw_streaming_or_item_incomplete = True

--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -512,16 +512,6 @@ def compress_context(
            old_title = agent._session_db.get_session_title(agent.session_id)
            # Trigger memory extraction on the old session before it rotates.
            agent.commit_memory_session(messages)
-            # Flush any un-persisted messages from the current turn to the
-            # old session *before* rotating.  compress_context() can be
-            # called mid-turn (auto-compress when context exceeds threshold)
-            # at a point when _flush_messages_to_session_db() has not yet
-            # run.  Without this, messages generated during the current turn
-            # are silently lost on session rotation (#47202).
-            try:
-                agent._flush_messages_to_session_db(messages)
-            except Exception:
-                pass  # best-effort — don't block compression on a flush error
            agent._session_db.end_session(agent.session_id, "compression")
            old_session_id = agent.session_id
            agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
@@ -613,20 +603,6 @@ def compress_context(
            force=True,
        )

-    # Emit session:compress event so hooks (e.g. MemPalace sync) can ingest
-    # the completed old session before its details are lost.
-    _old_sid_for_event = locals().get("old_session_id")
-    if getattr(agent, "event_callback", None):
-        try:
-            agent.event_callback("session:compress", {
-                "platform": agent.platform or "",
-                "session_id": agent.session_id,
-                "old_session_id": _old_sid_for_event or "",
-                "compression_count": agent.context_compressor.compression_count,
-            })
-        except Exception as e:
-            logger.debug("event_callback error on session:compress: %s", e)
-
    # Keep the post-compression rough estimate for diagnostics, but do not
    # treat it as provider-reported prompt usage. Schema-heavy rough estimates
    # can remain above threshold even after the next real API request fits.
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -300,20 +300,11 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
                agent.session_id, exc,
            )

-    if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
+    if stored_prompt:
        # Continuing session — reuse the exact system prompt from the
        # previous turn so the Anthropic cache prefix matches.
        agent._cached_system_prompt = stored_prompt
        return
-    if stored_prompt:
-        stored_state = "stale_runtime"
-        logger.info(
-            "Stored system prompt for session %s has stale runtime identity; "
-            "rebuilding for model=%s provider=%s.",
-            agent.session_id,
-            getattr(agent, "model", "") or "",
-            getattr(agent, "provider", "") or "",
-        )

    if conversation_history and stored_state in ("null", "empty"):
        # Continuing session whose stored prompt is unusable.  The
@@ -375,30 +366,6 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
            )


-def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
-    """Return False when the persisted Model/Provider lines are stale."""
-
-    def line_value(label: str) -> str:
-        prefix = f"{label}:"
-        value = ""
-        for line in prompt.splitlines():
-            if line.startswith(prefix):
-                value = line[len(prefix):].strip()
-        return value
-
-    stored_model = line_value("Model")
-    current_model = str(getattr(agent, "model", "") or "").strip()
-    if stored_model and current_model and stored_model != current_model:
-        return False
-
-    stored_provider = line_value("Provider")
-    current_provider = str(getattr(agent, "provider", "") or "").strip()
-    if stored_provider and current_provider and stored_provider != current_provider:
-        return False
-
-    return True
-
-
 def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
    if is_partial_stub and dropped_tools:
        tool_list = ", ".join(dropped_tools[:3])
@@ -474,7 +441,6 @@ def run_conversation(
    task_id: str = None,
    stream_callback: Optional[callable] = None,
    persist_user_message: Optional[str] = None,
-    persist_user_timestamp: Optional[float] = None,
 ) -> Dict[str, Any]:
    """
    Run a complete conversation with tool calling until completion.
@@ -490,8 +456,6 @@ def run_conversation(
        persist_user_message: Optional clean user message to store in
            transcripts/history when user_message contains API-only
            synthetic prefixes.
-        persist_user_timestamp: Optional platform event timestamp to store
-            as metadata on that persisted user message.
                or queuing follow-up prefetch work.

    Returns:
@@ -513,7 +477,6 @@ def run_conversation(
        task_id,
        stream_callback,
        persist_user_message,
-        persist_user_timestamp,
        restore_or_build_system_prompt=_restore_or_build_system_prompt,
        install_safe_stdio=_install_safe_stdio,
        sanitize_surrogates=_sanitize_surrogates,
@@ -3756,30 +3719,8 @@ def run_conversation(
                    assistant_msg = agent._build_assistant_message(assistant_message, finish_reason)
                    messages.append(assistant_msg)
                    for tc in assistant_message.tool_calls:
-                        _tc_name = tc.function.name
-                        if _tc_name not in agent.valid_tool_names:
-                            # A blank/whitespace-only name is not a typo the
-                            # model can fuzzy-correct toward a real tool — it is
-                            # almost always a weak open model echoing tool-call
-                            # XML/JSON it saw in file or tool output (#47967:
-                            # <tool_call>/<invoke name=...> payloads in a file
-                            # prime mimo/nemotron-class models to emit empty
-                            # structured calls). Dumping the full tool catalog
-                            # in that case feeds the priming loop more names to
-                            # mimic and inflates context 3-4x across retries, so
-                            # send a terse error that tells the model in-context
-                            # tool-call syntax is DATA, not a call to make.
-                            if not (_tc_name or "").strip():
-                                content = (
-                                    "Tool call rejected: the tool name was empty. "
-                                    "If tool-call XML or JSON appeared in file "
-                                    "contents or tool output, that is data — do "
-                                    "not re-emit it as a tool call. To call a "
-                                    "tool, use a valid name from your tool list; "
-                                    "otherwise reply in plain text."
-                                )
-                            else:
-                                content = f"Tool '{_tc_name}' does not exist. Available tools: {available}"
+                        if tc.function.name not in agent.valid_tool_names:
+                            content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
                        else:
                            content = "Skipped: another tool call in this turn used an invalid name. Please retry this tool call."
                        messages.append({
--- a/agent/curator.py
+++ b/agent/curator.py
@@ -57,11 +57,6 @@ DEFAULT_INTERVAL_HOURS = 24 * 7  # 7 days
 DEFAULT_MIN_IDLE_HOURS = 2
 DEFAULT_STALE_AFTER_DAYS = 30
 DEFAULT_ARCHIVE_AFTER_DAYS = 90
-# Consolidation (the LLM umbrella-building fork) is OFF by default. The
-# deterministic inactivity prune (apply_automatic_transitions) still runs
-# whenever the curator is enabled; only the opinionated, aux-model-cost
-# consolidation pass is opt-in.
-DEFAULT_CONSOLIDATE = False


 # ---------------------------------------------------------------------------
@@ -187,22 +182,6 @@ def get_prune_builtins() -> bool:
    return bool(cfg.get("prune_builtins", True))


-def get_consolidate() -> bool:
-    """Whether the curator runs its LLM consolidation (umbrella-building) pass.
-
-    OFF by default. When off, a curator run does ONLY the deterministic
-    inactivity prune (mark stale / archive long-unused skills) and skips the
-    forked aux-model review entirely — no consolidation, no umbrella-building,
-    no aux-model cost. Set ``curator.consolidate: true`` to opt back into the
-    LLM pass that merges overlapping skills into class-level umbrellas.
-
-    The explicit ``hermes curator run --consolidate`` flag overrides this for
-    a single invocation regardless of the config value.
-    """
-    cfg = _load_config()
-    return bool(cfg.get("consolidate", DEFAULT_CONSOLIDATE))
-
-
 # ---------------------------------------------------------------------------
 # Idle / interval check
 # ---------------------------------------------------------------------------
@@ -1429,38 +1408,25 @@ def run_curator_review(
    on_summary: Optional[Callable[[str], None]] = None,
    synchronous: bool = False,
    dry_run: bool = False,
-    consolidate: Optional[bool] = None,
 ) -> Dict[str, Any]:
    """Execute a single curator review pass.

    Steps:
      1. Apply automatic state transitions (pure, no LLM).
-      2. If consolidation is enabled AND there are agent-created skills, spawn
-         a forked AIAgent that runs the LLM review prompt against the current
-         candidate list.
+      2. If there are agent-created skills, spawn a forked AIAgent that runs
+         the LLM review prompt against the current candidate list.
      3. Update .curator_state with last_run_at and a one-line summary.
      4. Invoke *on_summary* with a user-visible description.

    If *synchronous* is True, the LLM review runs in the calling thread; the
    default is to spawn a daemon thread so the caller returns immediately.

-    *consolidate* gates the LLM umbrella-building pass. ``None`` (the default)
-    reads ``curator.consolidate`` from config (OFF by default). Passing
-    ``True``/``False`` overrides the config for this invocation — used by the
-    ``hermes curator run --consolidate`` flag. When consolidation is off, only
-    the deterministic inactivity prune runs and the forked aux-model review is
-    skipped entirely (no aux-model cost).
-
    If *dry_run* is True, the automatic stale/archive transitions are SKIPPED
    and the LLM review pass is instructed to produce a report only — no
    skill_manage mutations, no terminal archive moves. The REPORT.md still
    gets written and ``state.last_report_path`` still records it so users
-    can read what the curator WOULD have done. A dry-run also honors
-    *consolidate*: when consolidation is off, the preview only reports the
-    deterministic prune candidates.
+    can read what the curator WOULD have done.
    """
-    if consolidate is None:
-        consolidate = get_consolidate()
    start = datetime.now(timezone.utc)
    if dry_run:
        # Count candidates without mutating state.
@@ -1523,53 +1489,6 @@ def run_curator_review(
            before_report = []
        before_names = {r.get("name") for r in before_report if isinstance(r, dict)}

-        # Consolidation gate. When off (the default), the curator does ONLY the
-        # deterministic inactivity prune above — no forked aux-model review, no
-        # umbrella-building, no aux-model cost. Record the run, write a report
-        # reflecting the prune-only outcome, and return without spawning a fork.
-        if not consolidate:
-            final_summary = (
-                f"{prefix}{auto_summary}; llm: skipped (consolidation off)"
-            )
-            llm_meta = {
-                "final": "",
-                "summary": "skipped (consolidation off)",
-                "model": "",
-                "provider": "",
-                "tool_calls": [],
-                "error": None,
-            }
-            elapsed = (datetime.now(timezone.utc) - start).total_seconds()
-            state2 = load_state()
-            state2["last_run_duration_seconds"] = elapsed
-            state2["last_run_summary"] = final_summary
-            try:
-                after_report = skill_usage.agent_created_report()
-            except Exception:
-                after_report = []
-            try:
-                report_path = _write_run_report(
-                    started_at=start,
-                    elapsed_seconds=elapsed,
-                    auto_counts=counts,
-                    auto_summary=auto_summary,
-                    before_report=before_report,
-                    before_names=before_names,
-                    after_report=after_report,
-                    llm_meta=llm_meta,
-                )
-                if report_path is not None:
-                    state2["last_report_path"] = str(report_path)
-            except Exception as e:
-                logger.debug("Curator report write failed: %s", e, exc_info=True)
-            save_state(state2)
-            if on_summary:
-                try:
-                    on_summary(f"curator: {final_summary}")
-                except Exception:
-                    pass
-            return
-
        llm_meta: Dict[str, Any] = {}
        try:
            candidate_list = _render_candidate_list()
--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -46,7 +46,7 @@ import shutil
 import tarfile
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 from hermes_constants import get_hermes_home
 from agent.skill_utils import is_excluded_skill_path
@@ -208,17 +208,13 @@ def _write_manifest(dest: Path, reason: str, archive_path: Path,
    )


-def snapshot_skills(reason: str = "manual", *, protect_ids: Optional[Set[str]] = None) -> Optional[Path]:
+def snapshot_skills(reason: str = "manual") -> Optional[Path]:
    """Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.

    Returns the snapshot directory path, or ``None`` if the snapshot was
    skipped (backup disabled, skills dir missing, or an IO error occurred —
    in which case we log at debug and return None so the curator never
    aborts a pass because of a backup failure).
-
-    ``protect_ids`` is forwarded to the prune step so callers can guarantee
-    specific snapshot ids survive even when they fall outside the keep
-    window (rollback passes the id it is about to restore from).
    """
    if not is_enabled():
        logger.debug("Curator backup disabled by config; skipping snapshot")
@@ -280,19 +276,15 @@ def snapshot_skills(reason: str = "manual", *, protect_ids: Optional[Set[str]] =
            pass
        return None

-    _prune_old(keep=get_keep(), protect=protect_ids)
+    _prune_old(keep=get_keep())
    logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
    return dest


-def _prune_old(keep: int, protect: Optional[Set[str]] = None) -> List[str]:
+def _prune_old(keep: int) -> List[str]:
    """Delete regular snapshots beyond the newest *keep*. Returns deleted
-    ids. Snapshot ids in *protect* are never deleted even when they fall
-    outside the keep window — rollback() uses this so the mandatory
-    pre-rollback safety snapshot can never evict the very snapshot being
-    restored. Staging dirs (``.rollback-staging-*``) are implementation
-    detail and pruned independently on every call."""
-    protect = protect or set()
+    ids. Staging dirs (``.rollback-staging-*``) are implementation detail
+    and pruned independently on every call."""
    backups = _backups_dir()
    if not backups.exists():
        return []
@@ -313,8 +305,6 @@ def _prune_old(keep: int, protect: Optional[Set[str]] = None) -> List[str]:
    entries.sort(key=lambda t: t[0], reverse=True)
    deleted: List[str] = []
    for _, path in entries[keep:]:
-        if path.name in protect:
-            continue
        try:
            shutil.rmtree(path)
            deleted.append(path.name)
@@ -574,13 +564,7 @@ def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]
    # out before touching anything — otherwise a failed extract could leave
    # the user with no skills.
    try:
-        # Protect the target from this snapshot's prune step: at the steady
-        # keep limit, pruning the oldest snapshot would otherwise delete the
-        # very snapshot we are about to extract from.
-        snapshot_skills(
-            reason=f"pre-rollback to {target.name}",
-            protect_ids={target.name},
-        )
+        snapshot_skills(reason=f"pre-rollback to {target.name}")
    except Exception as e:
        return (False, f"pre-rollback safety snapshot failed: {e}", None)

--- a/agent/image_gen_provider.py
+++ b/agent/image_gen_provider.py
@@ -11,18 +11,6 @@ Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
 as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
 via ``plugins.enabled``).

-Unified surface
---------------
-One tool — ``image_generate`` — covers **text-to-image** and
-**image-to-image / image editing**. The router is the presence of
-``image_url`` (and/or ``reference_image_urls``): if any source image is
-provided, the provider routes to its image-to-image / edit endpoint; if
-omitted, the provider routes to text-to-image. Users pick one **model**
-(e.g. nano-banana-pro, gpt-image-2, grok-imagine-image); the provider
-handles which underlying endpoint to hit. This mirrors the ``video_gen``
-provider design (``agent/video_gen_provider.py``) so the two surfaces
-stay learnable together.
-
 Response shape
 --------------
 All providers return a dict that :func:`success_response` / :func:`error_response`
@@ -33,7 +21,6 @@ produce. The tool wrapper JSON-serializes it. Keys:
    model          str              provider-specific model identifier
    prompt         str              echoed prompt
    aspect_ratio   str              "landscape" | "square" | "portrait"
-    modality       str              "text" | "image" (which mode was used)
    provider       str              provider name (for diagnostics)
    error          str              only when success=False
    error_type     str              only when success=False
@@ -140,51 +127,19 @@ class ImageGenProvider(abc.ABC):
            return models[0].get("id")
        return None

-    def capabilities(self) -> Dict[str, Any]:
-        """Return what this provider supports.
-
-        Returned dict (all keys optional)::
-
-            {
-                "modalities": ["text", "image"],   # which inputs the backend accepts
-                "max_reference_images": 9,          # cap for reference_image_urls
-            }
-
-        ``modalities`` declares whether the active backend/model supports
-        text-to-image (``"text"``), image-to-image / editing (``"image"``),
-        or both. The tool layer surfaces this in the dynamic schema so the
-        model knows when ``image_url`` is honored. Used by ``hermes tools``
-        for the picker too. Default: text-only (backward compatible — a
-        provider that doesn't override this advertises text-to-image only).
-        """
-        return {
-            "modalities": ["text"],
-            "max_reference_images": 0,
-        }
-
    @abc.abstractmethod
    def generate(
        self,
        prompt: str,
        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-        *,
-        image_url: Optional[str] = None,
-        reference_image_urls: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> Dict[str, Any]:
-        """Generate an image from a text prompt, or edit/transform a source image.
-
-        Routing: if ``image_url`` (or any ``reference_image_urls``) is
-        provided, the provider should route to its image-to-image / edit
-        endpoint; otherwise text-to-image. ``image_url`` is the primary
-        source image to edit; ``reference_image_urls`` are additional
-        style/composition references (provider clamps to its declared
-        ``max_reference_images``).
+        """Generate an image.

        Implementations should return the dict from :func:`success_response`
        or :func:`error_response`. ``kwargs`` may contain forward-compat
-        parameters future versions of the schema will expose —
-        implementations MUST ignore unknown keys (no TypeError).
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
        """


@@ -207,26 +162,6 @@ def resolve_aspect_ratio(value: Optional[str]) -> str:
    return DEFAULT_ASPECT_RATIO


-def normalize_reference_images(value: Any) -> Optional[List[str]]:
-    """Coerce a reference-image argument into a clean list of URL/path strings.
-
-    Accepts a single string or a list; strips blanks and whitespace. Returns
-    ``None`` when nothing usable remains so providers can treat "no refs" as a
-    single sentinel.
-    """
-    if value is None:
-        return None
-    if isinstance(value, str):
-        value = [value]
-    if not isinstance(value, (list, tuple)):
-        return None
-    out: List[str] = []
-    for item in value:
-        if isinstance(item, str) and item.strip():
-            out.append(item.strip())
-    return out or None
-
-
 def _images_cache_dir() -> Path:
    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
    from hermes_constants import get_hermes_home
@@ -345,16 +280,13 @@ def success_response(
    prompt: str,
    aspect_ratio: str,
    provider: str,
-    modality: str = "text",
    extra: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
    """Build a uniform success response dict.

    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
-    providers like OpenAI). ``modality`` is ``"text"`` (text-to-image) or
-    ``"image"`` (image-to-image / editing) — indicates which endpoint was
-    actually hit, useful for diagnostics. Callers that need to pass through
-    additional backend-specific fields can supply ``extra``.
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
    """
    payload: Dict[str, Any] = {
        "success": True,
@@ -362,7 +294,6 @@ def success_response(
        "model": model,
        "prompt": prompt,
        "aspect_ratio": aspect_ratio,
-        "modality": modality,
        "provider": provider,
    }
    if extra:
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -33,7 +33,6 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
-from agent.skill_commands import extract_user_instruction_from_skill_message
 from tools.registry import tool_error

 logger = logging.getLogger(__name__)
@@ -431,37 +430,16 @@ class MemoryManager:

    # -- Prefetch / recall ---------------------------------------------------

-    @staticmethod
-    def _strip_skill_scaffolding(text: str) -> Optional[str]:
-        """Return memory-worthy user text, or None to skip the turn.
-
-        When a user invokes a /skill or /bundle, Hermes expands the turn into
-        a model-facing message that embeds the entire skill body. Feeding that
-        verbatim to memory providers pollutes their stores/embeddings with
-        prompt scaffolding instead of what the user actually asked. We recover
-        just the user's instruction here, once, for every provider — so this
-        is fixed for the whole provider fan-out, not per backend.
-
-        - Non-skill messages pass through unchanged.
-        - Skill turns with a user instruction return that instruction.
-        - Bare skill invocations (no instruction) return None → callers skip
-          the turn, since there is no user content worth remembering.
-        """
-        return extract_user_instruction_from_skill_message(text)
-
    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
        """Collect prefetch context from all providers.

        Returns merged context text labeled by provider. Empty providers
        are skipped. Failures in one provider don't block others.
        """
-        clean_query = self._strip_skill_scaffolding(query)
-        if not clean_query:
-            return ""
        parts = []
        for provider in self._providers:
            try:
-                result = provider.prefetch(clean_query, session_id=session_id)
+                result = provider.prefetch(query, session_id=session_id)
                if result and result.strip():
                    parts.append(result)
            except Exception as e:
@@ -482,14 +460,10 @@ class MemoryManager:
        if not providers:
            return

-        clean_query = self._strip_skill_scaffolding(query)
-        if not clean_query:
-            return
-
        def _run() -> None:
            for provider in providers:
                try:
-                    provider.queue_prefetch(clean_query, session_id=session_id)
+                    provider.queue_prefetch(query, session_id=session_id)
                except Exception as e:
                    logger.debug(
                        "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
@@ -541,11 +515,6 @@ class MemoryManager:
        if not providers:
            return

-        clean_user_content = self._strip_skill_scaffolding(user_content)
-        if not clean_user_content:
-            return
-        user_content = clean_user_content
-
        def _run() -> None:
            for provider in providers:
                try:
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -275,11 +275,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    # via a custom provider. Values sourced from models.dev (2026-04).
    # Keys use substring matching (longest-first), so e.g. "grok-4.20"
    # matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
-    # OAuth-only slug; absent from GET /v1/models. xAI publishes a 200k
-    # usable context window for Composer 2.5 on Grok Build (SuperGrok /
-    # Premium+); /v1/responses additionally enforces a ~262144 input+output
-    # budget, but the usable context (what we track here) is 200k.
-    "grok-composer": 200000,    # grok-composer-2.5-fast (Grok Build CLI)
    "grok-build": 256000,       # grok-build-0.1
    "grok-code-fast": 256000,   # grok-code-fast-1
    "grok-2-vision": 8192,      # grok-2-vision, -1212, -latest
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -8,7 +8,6 @@ import json
 import logging
 import os
 import threading
-import contextvars
 from collections import OrderedDict
 from pathlib import Path

@@ -305,47 +304,6 @@ TASK_COMPLETION_GUIDANCE = (
    "is always better than inventing a result."
 )

-# Universal parallel-tool-call guidance — applied to ALL models.
-#
-# Why this matters for cost: every assistant turn resends the entire
-# accumulated conversation (and, on cache-friendly providers, re-reads the
-# cached prefix and pays for the newly-appended turn). A model that issues
-# one tool call per turn multiplies the number of round-trips — and therefore
-# the resent context — for any task that needs several independent reads,
-# searches, or safe lookups. Batching independent calls into a single
-# assistant response collapses N turns into one, cutting both latency and the
-# resent-context cost that compounds over a long conversation.
-#
-# The hermes-agent runtime already executes a batch of tool calls
-# concurrently when they are independent (read-only tools always; path-scoped
-# file ops when their targets don't overlap — see
-# run_agent._execute_tool_calls / tool_dispatch_helpers). The missing piece
-# was telling the *model* to emit those calls together in the first place.
-# Until now the only batching steer in the prompt lived in
-# GOOGLE_MODEL_OPERATIONAL_GUIDANCE — Gemini/Gemma got it, every other model
-# got nothing. This block makes the steer universal; the now-redundant
-# Google-only bullet has been dropped so no model receives it twice.
-#
-# Short on purpose — shipped in the cached system prompt to every user, every
-# session. Token cost is paid once at install and amortised across all
-# sessions via prefix caching. Keep it tight.
-#
-# Ported from cline/cline#11514 ("encourage parallel tool calls"), adapted
-# from Cline's TypeScript tool-surface guidance to hermes-agent's Python
-# prompt-assembly architecture.
-PARALLEL_TOOL_CALL_GUIDANCE = (
-    "# Parallel tool calls\n"
-    "When you need several pieces of information that don't depend on each "
-    "other, request them together in a single response instead of one tool "
-    "call per turn. Independent reads, searches, web fetches, and read-only "
-    "commands should be batched into the same assistant turn — the runtime "
-    "executes independent calls concurrently, and batching avoids resending "
-    "the whole conversation on every extra round-trip.\n"
-    "Only serialize calls when a later call genuinely depends on an earlier "
-    "call's result (e.g. you must read a file before you can patch it). When "
-    "in doubt and the calls are independent, batch them."
-)
-
 # OpenAI GPT/Codex-specific execution guidance.  Addresses known failure modes
 # where GPT models abandon work on partial results, skip prerequisite lookups,
 # hallucinate instead of using tools, and declare "done" without verification.
@@ -427,10 +385,9 @@ GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
    "package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
    "- **Conciseness:** Keep explanatory text brief — a few sentences, not "
    "paragraphs. Focus on actions and results over narration.\n"
-    # Parallel-tool-call steering now lives in the universal
-    # PARALLEL_TOOL_CALL_GUIDANCE block (injected for all models), so it is no
-    # longer duplicated here — keeping it would send Gemini/Gemma the same
-    # instruction twice.
+    "- **Parallel tool calls:** When you need to perform multiple independent "
+    "operations (e.g. reading several files), make all the tool calls in a "
+    "single response rather than sequentially.\n"
    "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
    "to prevent CLI tools from hanging on prompts.\n"
    "- **Keep going:** Work autonomously until the task is fully resolved. "
@@ -1000,80 +957,6 @@ CONTEXT_FILE_MAX_CHARS = 20_000
 CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2

-# Dynamic-cap parameters (used when no explicit context_file_max_chars is set).
-# The cap scales with the model's context window so large-context models rarely
-# truncate a project doc, while small-context models stay at the historical
-# 20K floor. ~4 chars/token is the usual English heuristic; we spend a small
-# slice of the window on context files since they share the cached prefix with
-# the system prompt, tools, memory, and the whole conversation.
-_CONTEXT_FILE_CHARS_PER_TOKEN = 4
-_CONTEXT_FILE_WINDOW_FRACTION = 0.06
-_CONTEXT_FILE_DYNAMIC_CEILING = 500_000
-
-
-def _dynamic_context_file_max_chars(context_length: Optional[int]) -> int:
-    """Derive a char cap from the model's context window.
-
-    Returns at least ``CONTEXT_FILE_MAX_CHARS`` (the historical 20K floor) and
-    at most ``_CONTEXT_FILE_DYNAMIC_CEILING``. When ``context_length`` is
-    unknown/invalid, returns the flat default so behavior is unchanged.
-    """
-    if not isinstance(context_length, int) or context_length <= 0:
-        return CONTEXT_FILE_MAX_CHARS
-    budget = int(
-        context_length * _CONTEXT_FILE_CHARS_PER_TOKEN * _CONTEXT_FILE_WINDOW_FRACTION
-    )
-    return max(CONTEXT_FILE_MAX_CHARS, min(budget, _CONTEXT_FILE_DYNAMIC_CEILING))
-
-
-def _get_context_file_max_chars(context_length: Optional[int] = None) -> int:
-    """Return the context-file truncation limit.
-
-    Resolution order:
-      1. Explicit ``context_file_max_chars`` in config.yaml — user knows best,
-         always wins (including over the dynamic cap).
-      2. Dynamic cap derived from the model's ``context_length`` when provided
-         (scales the budget to the window; floor 20K, ceiling 500K).
-      3. ``CONTEXT_FILE_MAX_CHARS`` (20K) as the upstream-compatible fallback.
-    """
-    try:
-        from hermes_cli.config import load_config
-
-        val = load_config().get("context_file_max_chars")
-        if isinstance(val, (int, float)) and val > 0:
-            return int(val)
-    except Exception as e:
-        logger.debug("Could not read context_file_max_chars from config: %s", e)
-    return _dynamic_context_file_max_chars(context_length)
-
-# Collect truncation warnings so the caller (run_agent) can surface them.
-# A ContextVar (not a module-global list) isolates accumulation per thread /
-# per async task, so concurrent gateway-session prompt builds can't drain or
-# clear each other's pending warnings (cross-session leak). Each build runs in
-# its own context, collects its own warnings, and drains them synchronously.
-_truncation_warnings: "contextvars.ContextVar[Optional[list]]" = contextvars.ContextVar(
-    "context_file_truncation_warnings", default=None
-)
-
-
-def _record_truncation_warning(msg: str) -> None:
-    """Append a truncation warning to the current context's accumulator."""
-    warnings = _truncation_warnings.get()
-    if warnings is None:
-        warnings = []
-        _truncation_warnings.set(warnings)
-    warnings.append(msg)
-
-
-def drain_truncation_warnings() -> list:
-    """Return and clear any truncation warnings accumulated in this context."""
-    warnings = _truncation_warnings.get()
-    if not warnings:
-        return []
-    drained = list(warnings)
-    warnings.clear()
-    return drained
-

 # =========================================================================
 # Skills prompt cache
@@ -1580,47 +1463,19 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================

-def _truncate_content(
-    content: str,
-    filename: str,
-    max_chars: Optional[int] = None,
-    context_length: Optional[int] = None,
-    read_path: Optional[str] = None,
-) -> str:
-    """Head/tail truncation with a marker in the middle.
-
-    ``filename`` is the human label used in warnings. ``read_path`` is the
-    concrete path the agent should ``read_file`` to recover the full content
-    (defaults to ``filename`` when not supplied). ``context_length`` lets the
-    cap scale to the model's window when no explicit config override is set.
-    """
-    if max_chars is None:
-        max_chars = _get_context_file_max_chars(context_length)
+def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
+    """Head/tail truncation with a marker in the middle."""
    if len(content) <= max_chars:
        return content
-    target = read_path or filename
-    msg = (
-        f"⚠️  Context file {filename} TRUNCATED: "
-        f"{len(content)} chars exceeds limit of {max_chars} — "
-        f"trim the file, pin a larger context_file_max_chars, or use a "
-        f"larger-context model!"
-    )
-    logger.warning(msg)
-    _record_truncation_warning(msg)
    head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
    tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
    head = content[:head_chars]
    tail = content[-tail_chars:]
-    marker = (
-        f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of "
-        f"{len(content)} chars. The middle is omitted — if you need the full "
-        f"instructions, read the complete file with the read_file tool: "
-        f"{target}]\n\n"
-    )
+    marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
    return head + marker + tail


-def load_soul_md(context_length: Optional[int] = None) -> Optional[str]:
+def load_soul_md() -> Optional[str]:
    """Load SOUL.md from HERMES_HOME and return its content, or None.

    Used as the agent identity (slot #1 in the system prompt).  When this
@@ -1641,17 +1496,14 @@ def load_soul_md(context_length: Optional[int] = None) -> Optional[str]:
        if not content:
            return None
        content = _scan_context_content(content, "SOUL.md")
-        content = _truncate_content(
-            content, "SOUL.md", context_length=context_length,
-            read_path=str(soul_path),
-        )
+        content = _truncate_content(content, "SOUL.md")
        return content
    except Exception as e:
        logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
        return None


-def _load_hermes_md(cwd_path: Path, context_length: Optional[int] = None) -> str:
+def _load_hermes_md(cwd_path: Path) -> str:
    """.hermes.md / HERMES.md — walk to git root."""
    hermes_md_path = _find_hermes_md(cwd_path)
    if not hermes_md_path:
@@ -1668,16 +1520,13 @@ def _load_hermes_md(cwd_path: Path, context_length: Optional[int] = None) -> str
            pass
        content = _scan_context_content(content, rel)
        result = f"## {rel}\n\n{content}"
-        return _truncate_content(
-            result, ".hermes.md", context_length=context_length,
-            read_path=str(hermes_md_path),
-        )
+        return _truncate_content(result, ".hermes.md")
    except Exception as e:
        logger.debug("Could not read %s: %s", hermes_md_path, e)
        return ""


-def _load_agents_md(cwd_path: Path, context_length: Optional[int] = None) -> str:
+def _load_agents_md(cwd_path: Path) -> str:
    """AGENTS.md — top-level only (no recursive walk)."""
    for name in ["AGENTS.md", "agents.md"]:
        candidate = cwd_path / name
@@ -1687,16 +1536,13 @@ def _load_agents_md(cwd_path: Path, context_length: Optional[int] = None) -> str
                if content:
                    content = _scan_context_content(content, name)
                    result = f"## {name}\n\n{content}"
-                    return _truncate_content(
-                        result, "AGENTS.md", context_length=context_length,
-                        read_path=str(candidate),
-                    )
+                    return _truncate_content(result, "AGENTS.md")
            except Exception as e:
                logger.debug("Could not read %s: %s", candidate, e)
    return ""


-def _load_claude_md(cwd_path: Path, context_length: Optional[int] = None) -> str:
+def _load_claude_md(cwd_path: Path) -> str:
    """CLAUDE.md / claude.md — cwd only."""
    for name in ["CLAUDE.md", "claude.md"]:
        candidate = cwd_path / name
@@ -1706,16 +1552,13 @@ def _load_claude_md(cwd_path: Path, context_length: Optional[int] = None) -> str
                if content:
                    content = _scan_context_content(content, name)
                    result = f"## {name}\n\n{content}"
-                    return _truncate_content(
-                        result, "CLAUDE.md", context_length=context_length,
-                        read_path=str(candidate),
-                    )
+                    return _truncate_content(result, "CLAUDE.md")
            except Exception as e:
                logger.debug("Could not read %s: %s", candidate, e)
    return ""


-def _load_cursorrules(cwd_path: Path, context_length: Optional[int] = None) -> str:
+def _load_cursorrules(cwd_path: Path) -> str:
    """.cursorrules + .cursor/rules/*.mdc — cwd only."""
    cursorrules_content = ""
    cursorrules_file = cwd_path / ".cursorrules"
@@ -1742,17 +1585,10 @@ def _load_cursorrules(cwd_path: Path, context_length: Optional[int] = None) -> s

    if not cursorrules_content:
        return ""
-    return _truncate_content(
-        cursorrules_content, ".cursorrules", context_length=context_length,
-        read_path=str(cwd_path / ".cursorrules"),
-    )
+    return _truncate_content(cursorrules_content, ".cursorrules")


-def build_context_files_prompt(
-    cwd: Optional[str] = None,
-    skip_soul: bool = False,
-    context_length: Optional[int] = None,
-) -> str:
+def build_context_files_prompt(cwd: Optional[str] = None, skip_soul: bool = False) -> str:
    """Discover and load context files for the system prompt.

    Priority (first found wins — only ONE project context type is loaded):
@@ -1762,11 +1598,7 @@ def build_context_files_prompt(
      4. .cursorrules / .cursor/rules/*.mdc  (cwd only)

    SOUL.md from HERMES_HOME is independent and always included when present.
-
-    Each context source is capped before injection. The cap defaults to the
-    model's context window (scaled — see ``_dynamic_context_file_max_chars``)
-    when *context_length* is provided, falling back to 20,000 chars otherwise.
-    An explicit ``context_file_max_chars`` in config.yaml always wins.
+    Each context source is capped at 20,000 chars.

    When *skip_soul* is True, SOUL.md is not included here (it was already
    loaded via ``load_soul_md()`` for the identity slot).
@@ -1779,17 +1611,17 @@ def build_context_files_prompt(

    # Priority-based project context: first match wins
    project_context = (
-        _load_hermes_md(cwd_path, context_length)
-        or _load_agents_md(cwd_path, context_length)
-        or _load_claude_md(cwd_path, context_length)
-        or _load_cursorrules(cwd_path, context_length)
+        _load_hermes_md(cwd_path)
+        or _load_agents_md(cwd_path)
+        or _load_claude_md(cwd_path)
+        or _load_cursorrules(cwd_path)
    )
    if project_context:
        sections.append(project_context)

    # SOUL.md from HERMES_HOME only — skip when already loaded as identity
    if not skip_soul:
-        soul_content = load_soul_md(context_length)
+        soul_content = load_soul_md()
        if soul_content:
            sections.append(soul_content)

--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -26,91 +26,6 @@ _skill_commands_platform: Optional[str] = None
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# ---------------------------------------------------------------------------
-# Skill-scaffolding markers and the canonical extractor.
-#
-# When a user invokes a /skill (or /bundle), Hermes expands the turn into a
-# model-facing message that embeds the full skill body plus scaffolding. That
-# expanded text is what flows into the agent loop — and into memory providers
-# via MemoryManager. Providers that store or embed the raw user turn (mem0,
-# openviking, hindsight, retaindb, byterover, honcho, supermemory) would
-# otherwise capture the entire skill body instead of what the user actually
-# asked. ``extract_user_instruction_from_skill_message`` recovers just the
-# user's instruction so memory stays clean.
-#
-# These markers MUST stay byte-identical to the builders below
-# (``_build_skill_message`` here, ``build_bundle_invocation_message`` in
-# agent/skill_bundles.py). They are co-located with the single-skill builder
-# on purpose, and the bundle markers are asserted against the bundle builder in
-# tests/openviking_plugin/test_openviking.py::test_skill_markers_match_hermes_scaffolding.
-# ---------------------------------------------------------------------------
-_SKILL_INVOCATION_PREFIX = "[IMPORTANT: The user has invoked the "
-_SINGLE_SKILL_MARKER = "The full skill content is loaded below.]"
-_SINGLE_SKILL_INSTRUCTION = (
-    "The user has provided the following instruction alongside the skill invocation: "
-)
-_RUNTIME_NOTE = "\n\n[Runtime note:"
-_BUNDLE_MARKER = " skill bundle,"
-_BUNDLE_USER_INSTRUCTION = "\nUser instruction: "
-_BUNDLE_FIRST_SKILL_BLOCK = "\n\n[Loaded as part of the "
-
-
-def extract_user_instruction_from_skill_message(content: Any) -> Optional[str]:
-    """Recover the user's instruction from a slash-skill-expanded turn.
-
-    Returns:
-        - The original string unchanged when it is NOT skill scaffolding
-          (a normal user message passes straight through).
-        - The extracted user instruction when the scaffolding carried one.
-        - ``None`` when the content is skill scaffolding with no user
-          instruction (i.e. a bare ``/skill`` invocation). Callers that feed
-          memory providers should skip the turn in that case — there is no
-          user content worth storing.
-    """
-    if not isinstance(content, str):
-        return None
-
-    if not content.startswith(_SKILL_INVOCATION_PREFIX):
-        return content
-
-    if _BUNDLE_MARKER in content:
-        return _extract_bundle_user_instruction(content)
-
-    if _SINGLE_SKILL_MARKER in content:
-        return _extract_single_skill_user_instruction(content)
-
-    return None
-
-
-def _extract_single_skill_user_instruction(message: str) -> Optional[str]:
-    # Single-skill format appends the user instruction after the skill body, so
-    # the last occurrence is the user-provided one; the body may quote this text.
-    marker_idx = message.rfind(_SINGLE_SKILL_INSTRUCTION)
-    if marker_idx < 0:
-        return None
-
-    instruction = message[marker_idx + len(_SINGLE_SKILL_INSTRUCTION):]
-    runtime_idx = instruction.find(_RUNTIME_NOTE)
-    if runtime_idx >= 0:
-        instruction = instruction[:runtime_idx]
-    instruction = instruction.strip()
-    return instruction or None
-
-
-def _extract_bundle_user_instruction(message: str) -> Optional[str]:
-    # Bundle format puts the user instruction before the loaded skills, so the
-    # first occurrence is the user-provided one.
-    marker_idx = message.find(_BUNDLE_USER_INSTRUCTION)
-    if marker_idx < 0:
-        return None
-
-    instruction = message[marker_idx + len(_BUNDLE_USER_INSTRUCTION):]
-    first_skill_idx = instruction.find(_BUNDLE_FIRST_SKILL_BLOCK)
-    if first_skill_idx >= 0:
-        instruction = instruction[:first_skill_idx]
-    instruction = instruction.strip()
-    return instruction or None
-

 def _resolve_skill_commands_platform() -> Optional[str]:
    """Return the current platform scope used for disabled-skill filtering.
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -43,20 +43,14 @@ EXCLUDED_SKILL_DIRS = frozenset(
    )
 )

-# Supporting files live inside a skill package and are loaded explicitly via
-# skill_view(skill, file_path=...). They are not standalone skills and must not
-# be scanned for active SKILL.md/DESCRIPTION.md entries, even if a Curator or
-# archive workflow preserves a complete old skill package under references/.
-SKILL_SUPPORT_DIRS = frozenset(("references", "templates", "assets", "scripts"))
-

 def is_excluded_skill_path(path) -> bool:
-    """True if *path* should be skipped by active skill scanners.
+    """True if any component of *path* is in EXCLUDED_SKILL_DIRS.

-    Use this on every ``SKILL.md`` path produced by direct ``rglob`` scans to
-    prune dependency, virtualenv, VCS, cache, and progressive-disclosure
-    support-package paths. Centralising the check here keeps every
-    skill-scanning site in sync with the shared exclusion set.
+    Use this on every SKILL.md path produced by ``rglob`` to prune
+    dependency, virtualenv, VCS, and cache directories. Centralising the
+    check here keeps every skill-scanning site in sync with the shared
+    exclusion set.

    Accepts a Path or string.
    """
@@ -65,36 +59,7 @@ def is_excluded_skill_path(path) -> bool:
    except AttributeError:
        from pathlib import PurePath
        parts = PurePath(str(path)).parts
-    return any(part in EXCLUDED_SKILL_DIRS for part in parts) or is_skill_support_path(
-        path
-    )
-
-
-def is_skill_support_path(path) -> bool:
-    """True if *path* is under a support dir of an actual skill root.
-
-    ``references/``, ``templates/``, ``assets/``, and ``scripts/`` are
-    progressive-disclosure support areas when they sit directly inside a skill
-    directory containing ``SKILL.md``. They are not active discovery roots for
-    standalone skills. A preserved package such as
-    ``some-skill/references/old-skill-package/SKILL.md`` is documentation data
-    unless the caller explicitly loads it via ``file_path``.
-
-    Legitimate categories or skill names such as ``skills/scripts/foo`` remain
-    discoverable because their ``scripts`` component is not directly under a
-    directory that contains ``SKILL.md``.
-    """
-    path_obj = path if isinstance(path, Path) else Path(str(path))
-    parts = path_obj.parts
-    # Last component may be a file or candidate skill directory name. Only
-    # components before the leaf can be containing support directories.
-    for idx, part in enumerate(parts[:-1]):
-        if part not in SKILL_SUPPORT_DIRS or idx == 0:
-            continue
-        skill_root = Path(*parts[:idx])
-        if (skill_root / "SKILL.md").exists():
-            return True
-    return False
+    return any(part in EXCLUDED_SKILL_DIRS for part in parts)


 # ── Lazy YAML loader ─────────────────────────────────────────────────────
@@ -696,21 +661,12 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes Hermes metadata, VCS, virtualenv/dependency, cache, and skill
-    support directories. Support directories (references/templates/assets/
-    scripts) can contain arbitrary markdown and even archived package
-    ``SKILL.md`` files, but they are progressive-disclosure data loaded through
-    ``skill_view(..., file_path=...)`` rather than active skill roots.
+    Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
+    directories so dependencies cannot register nested skills.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
-        has_skill_md = "SKILL.md" in files
-        dirs[:] = [
-            d
-            for d in dirs
-            if d not in EXCLUDED_SKILL_DIRS
-            and not (has_skill_md and d in SKILL_SUPPORT_DIRS)
-        ]
+        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
    for path in sorted(matches, key=lambda p: str(p.relative_to(skills_dir))):
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -33,7 +33,6 @@ from agent.prompt_builder import (
    KANBAN_GUIDANCE,
    MEMORY_GUIDANCE,
    OPENAI_MODEL_EXECUTION_GUIDANCE,
-    PARALLEL_TOOL_CALL_GUIDANCE,
    PLATFORM_HINTS,
    SESSION_SEARCH_GUIDANCE,
    SKILLS_GUIDANCE,
@@ -41,7 +40,6 @@ from agent.prompt_builder import (
    TASK_COMPLETION_GUIDANCE,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
    TOOL_USE_ENFORCEMENT_MODELS,
-    drain_truncation_warnings,
 )
 from agent.runtime_cwd import resolve_context_cwd

@@ -61,55 +59,6 @@ def _ra():
    return run_agent


-def _resolve_platform_hint(agent: Any, platform_key: str, default_hint: str) -> str:
-    """Apply a per-platform prompt-hint override to the default hint.
-
-    Reads ``agent._platform_hint_overrides`` (populated from
-    ``config.yaml`` ``platform_hints`` by ``agent_init``) and resolves the
-    effective hint for *platform_key*:
-
-      * ``replace`` — substitute the default hint entirely.
-      * ``append``  — keep the default and append the extra text.
-      * a bare string value — treated as ``append`` (convenience shorthand).
-
-    Precedence: ``replace`` wins over ``append`` if both are present.
-    Override text is added on top of (not instead of) the SOUL/context/
-    memory tiers — it only affects the platform-hint segment, so other
-    platforms are unaffected and general system instructions still apply.
-
-    Defensive: any malformed entry falls back to the unmodified default so
-    a bad config value can never break prompt assembly or leak across
-    platforms.
-    """
-    if not platform_key:
-        return default_hint
-    overrides = getattr(agent, "_platform_hint_overrides", None)
-    if not isinstance(overrides, dict) or not overrides:
-        return default_hint
-    spec = overrides.get(platform_key)
-    if spec is None:
-        return default_hint
-
-    # Shorthand: a bare string is treated as append text.
-    if isinstance(spec, str):
-        extra = spec.strip()
-        return f"{default_hint}\n\n{extra}".strip() if extra else default_hint
-
-    if not isinstance(spec, dict):
-        return default_hint
-
-    replace_text = spec.get("replace")
-    if isinstance(replace_text, str) and replace_text.strip():
-        base = replace_text.strip()
-    else:
-        base = default_hint
-
-    append_text = spec.get("append")
-    if isinstance(append_text, str) and append_text.strip():
-        return f"{base}\n\n{append_text.strip()}".strip()
-    return base
-
-
 def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
    """Assemble the system prompt as three ordered parts.

@@ -133,17 +82,6 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    # we resolve through ``_ra()`` to honor those patches.
    _r = _ra()

-    # Resolve the model's context window once so context-file caps can scale
-    # to it (dynamic cap — see prompt_builder._dynamic_context_file_max_chars).
-    # None falls back to the historical flat default. This value is stable for
-    # the life of the conversation, so it does not threaten prompt caching.
-    _ctx_len: Optional[int] = None
-    _cc = getattr(agent, "context_compressor", None)
-    if _cc is not None:
-        _cc_len = getattr(_cc, "context_length", None)
-        if isinstance(_cc_len, int) and _cc_len > 0:
-            _ctx_len = _cc_len
-
    # ── Stable tier ────────────────────────────────────────────────
    stable_parts: List[str] = []

@@ -152,7 +90,7 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    # cwd project instructions disabled.
    _soul_loaded = False
    if agent.load_soul_identity or not agent.skip_context_files:
-        _soul_content = _r.load_soul_md(_ctx_len)
+        _soul_content = _r.load_soul_md()
        if _soul_content:
            stable_parts.append(_soul_content)
            _soul_loaded = True
@@ -173,17 +111,6 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    if getattr(agent, "_task_completion_guidance", True) and agent.valid_tool_names:
        stable_parts.append(TASK_COMPLETION_GUIDANCE)

-    # Universal parallel-tool-call guidance.  Tells the model to batch
-    # independent tool calls into one assistant turn rather than emitting one
-    # call per turn — the runtime already runs independent calls concurrently
-    # (read-only tools always; non-overlapping path-scoped file ops), so the
-    # only thing missing was steering the model to produce the batch.  Cuts
-    # round-trips and the resent-context cost that compounds over a long
-    # conversation.  Gated by config.yaml ``agent.parallel_tool_call_guidance``
-    # (default True) and only injected when tools are actually loaded.
-    if getattr(agent, "_parallel_tool_call_guidance", True) and agent.valid_tool_names:
-        stable_parts.append(PARALLEL_TOOL_CALL_GUIDANCE)
-
    # Tool-aware behavioral guidance: only inject when the tools are loaded
    tool_guidance = []
    if "memory" in agent.valid_tool_names:
@@ -380,25 +307,18 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
        )

    platform_key = (agent.platform or "").lower().strip()
-    # Resolve the built-in/plugin default hint for this platform, then apply
-    # any per-platform override from config (platform_hints.<platform>).
-    _default_hint = ""
    if platform_key in PLATFORM_HINTS:
-        _default_hint = PLATFORM_HINTS[platform_key]
+        stable_parts.append(PLATFORM_HINTS[platform_key])
    elif platform_key:
        # Check plugin registry for platform-specific LLM guidance
        try:
            from gateway.platform_registry import platform_registry
            _entry = platform_registry.get(platform_key)
            if _entry and _entry.platform_hint:
-                _default_hint = _entry.platform_hint
+                stable_parts.append(_entry.platform_hint)
        except Exception:
            pass

-    _effective_hint = _resolve_platform_hint(agent, platform_key, _default_hint)
-    if _effective_hint:
-        stable_parts.append(_effective_hint)
-
    # ── Context tier (cwd-dependent, may change between sessions) ─
    context_parts: List[str] = []

@@ -413,8 +333,7 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
        # dir — the user's real cwd there, but the install dir for the gateway
        # daemon, which is why the gateway sets TERMINAL_CWD.
        context_files_prompt = _r.build_context_files_prompt(
-            cwd=resolve_context_cwd(), skip_soul=_soul_loaded,
-            context_length=_ctx_len)
+            cwd=resolve_context_cwd(), skip_soul=_soul_loaded)
        if context_files_prompt:
            context_parts.append(context_files_prompt)

@@ -481,14 +400,7 @@ def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str
    warm across turns.
    """
    parts = build_system_prompt_parts(agent, system_message=system_message)
-    joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
-
-    # Surface context-file truncation warnings through the normal agent status
-    # channel so gateway/CLI users see them in chat instead of only in logs.
-    for warning in drain_truncation_warnings():
-        agent._emit_status(warning)
-
-    return joined
+    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)


 def invalidate_system_prompt(agent: Any) -> None:
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -1012,42 +1012,28 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
        elif function_name == "memory":
            def _execute(next_args: dict) -> Any:
                target = next_args.get("target", "memory")
-                operations = next_args.get("operations")
                from tools.memory_tool import memory_tool as _memory_tool
                result = _memory_tool(
                    action=next_args.get("action"),
                    target=target,
                    content=next_args.get("content"),
                    old_text=next_args.get("old_text"),
-                    operations=operations,
                    store=agent._memory_store,
                )
-                # Bridge: notify external memory provider of built-in memory writes.
-                # Covers both the single-op shape and each add/replace inside a batch.
-                if agent._memory_manager:
-                    if operations:
-                        _mem_ops = [
-                            op for op in operations
-                            if isinstance(op, dict) and op.get("action") in {"add", "replace"}
-                        ]
-                    else:
-                        _mem_ops = (
-                            [{"action": next_args.get("action"), "content": next_args.get("content")}]
-                            if next_args.get("action") in {"add", "replace"} else []
+                # Bridge: notify external memory provider of built-in memory writes
+                if agent._memory_manager and next_args.get("action") in {"add", "replace"}:
+                    try:
+                        agent._memory_manager.on_memory_write(
+                            next_args.get("action", ""),
+                            target,
+                            next_args.get("content", ""),
+                            metadata=agent._build_memory_write_metadata(
+                                task_id=effective_task_id,
+                                tool_call_id=getattr(tool_call, "id", None),
+                            ),
                        )
-                    for _op in _mem_ops:
-                        try:
-                            agent._memory_manager.on_memory_write(
-                                _op.get("action", ""),
-                                target,
-                                _op.get("content", "") or "",
-                                metadata=agent._build_memory_write_metadata(
-                                    task_id=effective_task_id,
-                                    tool_call_id=getattr(tool_call, "id", None),
-                                ),
-                            )
-                        except Exception:
-                            pass
+                    except Exception:
+                        pass
                return result
            function_result, function_args = _run_agent_tool_execution_middleware(
                agent,
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -88,7 +88,7 @@ class AnthropicTransport(ProviderTransport):
        from agent.transports.types import ToolCall

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        _MCP_PREFIX = "mcp__"
+        _MCP_PREFIX = "mcp_"

        text_parts = []
        reasoning_parts = []
@@ -132,25 +132,17 @@ class AnthropicTransport(ProviderTransport):
            elif block.type == "tool_use":
                name = block.name
                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    # On the OAuth wire every tool carries a double-underscore
-                    # ``mcp__`` prefix (added in build_anthropic_kwargs to avoid
-                    # Anthropic's single-underscore third-party classifier).
-                    # Reverse it back to the name the registry/dispatcher knows.
-                    # Two original forms map onto the same ``mcp__`` wire name:
-                    #   ``mcp__read_file``       <- bare native tool ``read_file``
-                    #   ``mcp__linear_get_issue`` <- MCP server tool
-                    #                                ``mcp_linear_get_issue``
-                    # Resolve by registry lookup, preferring whichever original
-                    # is actually registered; never rewrite a name the LLM used
-                    # that already resolves natively. GH-25255.
+                    stripped = name[len(_MCP_PREFIX):]
+                    # Only strip the mcp_ prefix for OAuth-injected tools
+                    # (where Hermes adds the prefix when sending to Anthropic
+                    # and must remove it on the way back).  Native MCP server
+                    # tools (from mcp_servers: in config.yaml) are registered
+                    # in the tool registry under their FULL mcp_<server>_<tool>
+                    # name and must NOT be stripped.  GH-25255.
                    from tools.registry import registry as _tool_registry
-                    if not _tool_registry.get_entry(name):
-                        bare = name[len(_MCP_PREFIX):]            # read_file
-                        single = "mcp_" + bare                    # mcp_read_file / mcp_linear_get_issue
-                        if _tool_registry.get_entry(single):
-                            name = single
-                        elif _tool_registry.get_entry(bare):
-                            name = bare
+                    if (_tool_registry.get_entry(stripped)
+                            and not _tool_registry.get_entry(name)):
+                        name = stripped
                tool_calls.append(
                    ToolCall(
                        id=block.id,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -128,65 +128,6 @@ class ResponsesApiTransport(ProviderTransport):
        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)

        response_tools = _responses_tools(tools)
-
-        # xAI server-side web search.
-        #
-        # grok models on xAI's /v1/responses surface (notably
-        # grok-composer-2.5-fast on SuperGrok OAuth) have a *native*,
-        # server-executed web search.  When the model is handed a
-        # client-side function literally named ``web_search``, it routes
-        # the intent to that native engine — but because the tool is
-        # declared as a plain ``function`` rather than xAI's first-class
-        # ``{"type": "web_search"}`` built-in, the server-side search is
-        # dispatched but never reconciled: the response streams reasoning
-        # + ``web_search_call`` progress items, the searches never reach
-        # ``status="completed"`` in the assembled output, no final
-        # message is emitted, and ``_normalize_codex_response`` correctly
-        # sees reasoning-with-no-answer and reports ``incomplete``.  The
-        # turn then burns 3 continuation retries and fails with "Codex
-        # response remained incomplete after 3 continuation attempts".
-        # Verified live against grok-composer-2.5-fast (2026-06).
-        #
-        # Fix: when the agent HAS a client-side ``web_search`` function (i.e.
-        # the user enabled the web toolset), declare xAI's native
-        # ``web_search`` built-in instead so the search actually runs to
-        # completion server-side and the model streams a real answer.  The
-        # Responses API rejects two tools sharing the name ``web_search``
-        # (HTTP 400 "Duplicate tool names"), so we drop the client-side
-        # ``web_search`` function for the xAI path and let the native tool
-        # satisfy it.  All other client-side tools (read_file, terminal,
-        # web_extract, MCP tools, …) are untouched and continue to dispatch
-        # through Hermes's agent loop.
-        #
-        # Scope: we ONLY swap in the native built-in when the client
-        # ``web_search`` was actually present.  We do NOT force-enable Grok
-        # server-side search on turns where the user never had web enabled —
-        # that would silently route around Hermes's web-provider config and
-        # tool-trace/citation plumbing for every xai-oauth turn.  The swap is
-        # a 1:1 replacement of an already-requested capability, not an
-        # additive grant.
-        #
-        # NOTE: for the swapped case this routes ``web_search`` to Grok's
-        # native search engine for xAI sessions instead of Hermes's
-        # configured web provider (Tavily/etc.), and those results bypass
-        # Hermes's tool-trace / citation plumbing (they arrive baked into the
-        # model's answer rather than as a tool result the loop observes).
-        # Scoped to ``is_xai_responses`` deliberately; narrow to specific
-        # models if a future grok variant should keep the client-side
-        # function.
-        if is_xai_responses and response_tools:
-            has_client_web_search = any(
-                isinstance(t, dict) and t.get("name") == "web_search"
-                for t in response_tools
-            )
-            if has_client_web_search:
-                filtered = [
-                    t for t in response_tools
-                    if not (isinstance(t, dict) and t.get("name") == "web_search")
-                ]
-                filtered.append({"type": "web_search"})
-                response_tools = filtered
-
        # ``tools`` MUST be omitted entirely when there are no functions to
        # expose: the openai SDK's ``responses.stream()`` / ``responses.parse()``
        # eagerly call ``_make_tools(tools)`` which does ``for tool in tools``
@@ -277,28 +218,10 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs.pop("timeout", None)

        if is_codex_backend:
-            # The Codex backend rejects body-level ``extra_headers`` with
-            # HTTP 400, but the OpenAI SDK's ``extra_headers`` kwarg maps
-            # to actual HTTP request headers (not body fields).  We need
-            # these headers for cache-scope routing so prompt cache hits
-            # remain high.  Send session_id / x-client-request-id as HTTP
-            # headers while keeping ``prompt_cache_key`` in the body for
-            # standard OpenAI routing as a belt-and-braces fallback.
-            cache_scope_id = str(session_id or "").strip()
-            if cache_scope_id:
-                existing_extra_headers = kwargs.get("extra_headers")
-                merged_extra_headers: Dict[str, str] = {}
-                if isinstance(existing_extra_headers, dict):
-                    merged_extra_headers.update(
-                        {
-                            str(key): str(value)
-                            for key, value in existing_extra_headers.items()
-                            if key and value is not None
-                        }
-                    )
-                merged_extra_headers["session_id"] = cache_scope_id
-                merged_extra_headers["x-client-request-id"] = cache_scope_id
-                kwargs["extra_headers"] = merged_extra_headers
+            # chatgpt.com/backend-api/codex rejects body-level
+            # ``extra_headers`` with HTTP 400. Correlation/cache routing for
+            # this backend must not be sent through the Responses payload.
+            kwargs.pop("extra_headers", None)

        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -69,7 +69,6 @@ def build_turn_context(
    task_id: Optional[str],
    stream_callback,
    persist_user_message: Optional[str],
-    persist_user_timestamp: Optional[float] = None,
    *,
    restore_or_build_system_prompt,
    install_safe_stdio,
@@ -122,7 +121,6 @@ def build_turn_context(
    agent._stream_callback = stream_callback
    agent._persist_user_message_idx = None
    agent._persist_user_message_override = persist_user_message
-    agent._persist_user_message_timestamp = persist_user_timestamp
    # Generate unique task_id if not provided to isolate VMs between tasks.
    effective_task_id = task_id or str(uuid.uuid4())
    agent._current_task_id = effective_task_id
--- a/apps/bootstrap-installer/src-tauri/src/update.rs
+++ b/apps/bootstrap-installer/src-tauri/src/update.rs
@@ -286,7 +286,7 @@ async fn run_update(app: AppHandle) -> Result<()> {
    emit_stage(&app, "rebuild", StageState::Running, None, None);
    let started = Instant::now();
    let rebuild_args: Vec<String> = vec!["desktop".into(), "--build-only".into()];
-    let mut rebuild = run_streamed(
+    let rebuild = run_streamed(
        &app,
        &hermes,
        &rebuild_args,
@@ -295,33 +295,6 @@ async fn run_update(app: AppHandle) -> Result<()> {
        Some("rebuild"),
    )
    .await?;
-
-    // Retry-once: the first `--build-only` can return nonzero on a still-settling
-    // post-update tree or a network-blocked Electron fetch that our self-heal
-    // repaired mid-run. A second attempt then builds clean off the healed dist
-    // (the content-hash stamp makes it a near-no-op when the first actually
-    // succeeded). Without this the updater bails here and never reaches the
-    // relaunch below — the app updates but doesn't restart. Matches the
-    // retry-once `hermes update` already does above, and `hermes update`'s own
-    // desktop rebuild in cmd_update.
-    if rebuild_needs_retry(rebuild.exit_code) {
-        emit_log(
-            &app,
-            Some("rebuild"),
-            LogStream::Stdout,
-            "[rebuild] first desktop rebuild failed; retrying once (a self-healed \
-             Electron download builds clean on the second run)…",
-        );
-        rebuild = run_streamed(
-            &app,
-            &hermes,
-            &rebuild_args,
-            &install_root,
-            &child_env,
-            Some("rebuild"),
-        )
-        .await?;
-    }
    let rebuild_ms = started.elapsed().as_millis() as u64;

    if rebuild.exit_code != Some(0) {
@@ -560,14 +533,6 @@ fn is_locked(path: &Path) -> bool {
    }
 }

-/// Whether the `desktop --build-only` rebuild should be retried once. Any
-/// non-success exit qualifies: the common cause is a transient first-attempt
-/// failure (still-settling tree / self-healed Electron download) that a clean
-/// second run resolves.
-fn rebuild_needs_retry(exit_code: Option<i32>) -> bool {
-    exit_code != Some(0)
-}
-
 /// Spawn `hermes <args>` from `cwd`, stream stdout/stderr as Log events on the
 /// bootstrap channel, and return the exit code. Mirrors powershell::run_script
 /// but for an arbitrary command (no install.ps1 -File wrapping).
@@ -1005,16 +970,6 @@ mod tests {
        assert_eq!(update_branch_from_args(["--update"]), None);
    }

-    #[test]
-    fn rebuild_retries_only_on_failure() {
-        assert!(!rebuild_needs_retry(Some(0)), "a clean rebuild must not retry");
-        assert!(rebuild_needs_retry(Some(1)), "a failed rebuild retries once");
-        assert!(
-            rebuild_needs_retry(None),
-            "a killed/signalled rebuild (no exit code) retries once"
-        );
-    }
-
    #[test]
    fn parses_only_app_targets() {
        assert_eq!(
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -34,7 +34,7 @@ It builds and launches the GUI against your existing install — same config, ke

 ### Prebuilt installers

-Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/).
+Prebuilt installers are built and distributed via [the Hermes Desktop website.](https://hermes-agent.nousresearch.com/desktop).

 ---

--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -166,39 +166,6 @@ function profileRemoteOverride(config, profile) {
  return { url, authMode: normAuthMode(entry.authMode), token: entry.token }
 }

-/**
- * In global-remote mode one backend serves every Desktop profile, so REST calls
- * that are scoped by renderer-side `request.profile` must carry that scope as a
- * query parameter. Local pooled backends and per-profile remote overrides do not
- * need this: they already run against a backend scoped to the target profile.
- */
-function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
-  const scopedProfile = connectionScopeKey(profile)
-  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
-    return path
-  }
-
-  const rawPath = String(path || '')
-  if (!rawPath) {
-    return path
-  }
-
-  let parsed
-  try {
-    parsed = new URL(rawPath, 'http://hermes.local')
-  } catch {
-    return path
-  }
-
-  if (parsed.searchParams.has('profile')) {
-    return path
-  }
-
-  parsed.searchParams.set('profile', scopedProfile)
-
-  return `${parsed.pathname}${parsed.search}${parsed.hash}`
-}
-
 function tokenPreview(value) {
  const raw = String(value || '')

@@ -280,7 +247,6 @@ module.exports = {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -24,7 +24,6 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -91,72 +90,6 @@ test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
  assert.equal(profileRemoteOverride(null, 'coder'), null)
 })

-// --- pathWithGlobalRemoteProfile ---
-
-test('pathWithGlobalRemoteProfile appends profile in global remote mode', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info?profile=iris'
-  )
-})
-
-test('pathWithGlobalRemoteProfile preserves existing query params', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/options?force=1', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/options?force=1&profile=iris'
-  )
-})
-
-test('pathWithGlobalRemoteProfile does not replace an explicit profile query', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info?profile=default', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info?profile=default'
-  )
-})
-
-test('pathWithGlobalRemoteProfile skips local and per-profile remote override paths', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: false,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info'
-  )
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: true
-    }),
-    '/api/model/info'
-  )
-})
-
-test('pathWithGlobalRemoteProfile skips empty profile/path safely', () => {
-  assert.equal(
-    pathWithGlobalRemoteProfile('/api/model/info', '', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    '/api/model/info'
-  )
-  assert.equal(
-    pathWithGlobalRemoteProfile('', 'iris', {
-      globalRemote: true,
-      profileRemoteOverride: false
-    }),
-    ''
-  )
-})
-
 // --- normalizeRemoteBaseUrl ---

 test('normalizeRemoteBaseUrl strips trailing slashes, hash, and query', () => {
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -28,7 +28,6 @@ const { detectRemoteDisplay, isWindowsBinaryPathInWsl, isWslEnvironment } = requ
 const { runBootstrap } = require('./bootstrap-runner.cjs')
 const {
  buildSessionWindowUrl,
-  chatWindowWebPreferences,
  createSessionWindowRegistry,
  SESSION_WINDOW_MIN_HEIGHT,
  SESSION_WINDOW_MIN_WIDTH
@@ -40,12 +39,10 @@ const { waitForDashboardPort } = require('./backend-ready.cjs')
 const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')
 const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs')
 const { buildDesktopBackendEnv, normalizeHermesHomeRoot } = require('./backend-env.cjs')
-const { readWindowsUserEnvVar } = require('./windows-user-env.cjs')
 const { readDirForIpc } = require('./fs-read-dir.cjs')
 const { gitRootForIpc } = require('./git-root.cjs')
 const { worktreesForIpc } = require('./git-worktrees.cjs')
 const { OFFICIAL_REPO_HTTPS_URL, isOfficialSshRemote } = require('./update-remote.cjs')
-const { runRebuildWithRetry } = require('./update-rebuild.cjs')
 const {
  buildPosixCleanupScript,
  buildWindowsCleanupScript,
@@ -65,7 +62,6 @@ const {
  cookiesHaveLiveSession,
  normAuthMode,
  normalizeRemoteBaseUrl,
-  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
@@ -246,16 +242,6 @@ if (INSTALL_STAMP) {
 function resolveHermesHome() {
  if (process.env.HERMES_HOME) return normalizeHermesHomeRoot(process.env.HERMES_HOME)
  if (USER_DATA_OVERRIDE) return path.join(path.resolve(USER_DATA_OVERRIDE), 'hermes-home')
-  if (IS_WINDOWS) {
-    // A GUI app launched from Explorer inherits the environment block captured
-    // at login, so a HERMES_HOME set via `setx` AFTER login is invisible in
-    // process.env even though the CLI (a fresh shell) sees it. Without this the
-    // backend silently falls back to %LOCALAPPDATA%\hermes and reports "No
-    // inference provider configured" despite a valid configured home (#45471).
-    // Consult the live User-scoped registry value before the default below.
-    const fromRegistry = readWindowsUserEnvVar('HERMES_HOME')
-    if (fromRegistry) return normalizeHermesHomeRoot(fromRegistry)
-  }
  if (IS_WINDOWS && process.env.LOCALAPPDATA) {
    const localappdata = path.join(process.env.LOCALAPPDATA, 'hermes')
    const legacy = path.join(app.getPath('home'), '.hermes')
@@ -2010,14 +1996,10 @@ async function applyUpdatesPosixInApp() {
  }

  emitUpdateProgress({ stage: 'rebuild', message: 'Rebuilding the desktop app…', percent: 60 })
-  // Retry-once: a first rebuild can fail on a still-settling tree or a
-  // self-healed (network-blocked) Electron download; a second run builds clean
-  // off the healed dist so we reach the swap+relaunch below instead of bailing.
-  const rebuilt = await runRebuildWithRetry(attempt => {
-    if (attempt > 0) {
-      emitUpdateProgress({ stage: 'rebuild', message: 'Retrying the desktop rebuild…', percent: 60 })
-    }
-    return runStreamedUpdate(hermes, ['desktop', '--build-only'], { cwd: updateRoot, env, stage: 'rebuild' })
+  const rebuilt = await runStreamedUpdate(hermes, ['desktop', '--build-only'], {
+    cwd: updateRoot,
+    env,
+    stage: 'rebuild'
  })
  if (rebuilt.code !== 0) {
    emitUpdateProgress({
@@ -5090,68 +5072,65 @@ function focusWindow(win) {
  win.focus()
 }

-function spawnSecondaryWindow({ sessionId, watch, newSession } = {}) {
-  const icon = getAppIconPath()
-  const win = new BrowserWindow({
-    width: SESSION_WINDOW_MIN_WIDTH,
-    height: SESSION_WINDOW_MIN_HEIGHT,
-    minWidth: SESSION_WINDOW_MIN_WIDTH,
-    minHeight: SESSION_WINDOW_MIN_HEIGHT,
-    title: 'Hermes',
-    titleBarStyle: 'hidden',
-    titleBarOverlay: getTitleBarOverlayOptions(),
-    trafficLightPosition: IS_MAC ? WINDOW_BUTTON_POSITION : undefined,
-    vibrancy: IS_MAC ? 'sidebar' : undefined,
-    opacity: windowOpacity(),
-    icon,
-    // Don't show until the renderer's first themed paint is ready. macOS
-    // `vibrancy` ignores `backgroundColor` and paints a translucent OS
-    // material (which follows the OS appearance, not the app theme), so a
-    // dark-themed app on a light-mode Mac flashes white until the renderer
-    // covers it. ready-to-show fires after the boot-time paint in
-    // themes/context.tsx, so the window appears already themed.
-    show: false,
-    backgroundColor: getWindowBackgroundColor(),
-    webPreferences: chatWindowWebPreferences(path.join(__dirname, 'preload.cjs'))
-  })
-
-  if (IS_MAC) {
-    win.setWindowButtonPosition?.(WINDOW_BUTTON_POSITION)
-  }
-
-  win.once('ready-to-show', () => {
-    if (!win.isDestroyed()) win.show()
-  })
-
-  win.on('will-enter-full-screen', () => sendWindowStateChanged(true))
-  win.on('enter-full-screen', () => sendWindowStateChanged(true))
-  win.on('will-leave-full-screen', () => sendWindowStateChanged(false))
-  win.on('leave-full-screen', () => sendWindowStateChanged(false))
-
-  wireCommonWindowHandlers(win)
-
-  win.loadURL(
-    buildSessionWindowUrl(sessionId, {
-      devServer: DEV_SERVER,
-      rendererIndexPath: DEV_SERVER ? undefined : resolveRendererIndex(),
-      watch,
-      newSession
-    })
-  )
-
-  return win
-}
-
 // Open (or focus) a standalone window for a single chat session.
 function createSessionWindow(sessionId, { watch = false } = {}) {
-  return sessionWindows.openOrFocus(sessionId, () => spawnSecondaryWindow({ sessionId, watch }))
-}
+  return sessionWindows.openOrFocus(sessionId, () => {
+    const icon = getAppIconPath()
+    const win = new BrowserWindow({
+      width: SESSION_WINDOW_MIN_WIDTH,
+      height: SESSION_WINDOW_MIN_HEIGHT,
+      minWidth: SESSION_WINDOW_MIN_WIDTH,
+      minHeight: SESSION_WINDOW_MIN_HEIGHT,
+      title: 'Hermes',
+      titleBarStyle: 'hidden',
+      titleBarOverlay: getTitleBarOverlayOptions(),
+      trafficLightPosition: IS_MAC ? WINDOW_BUTTON_POSITION : undefined,
+      vibrancy: IS_MAC ? 'sidebar' : undefined,
+      opacity: windowOpacity(),
+      icon,
+      // Don't show until the renderer's first themed paint is ready. macOS
+      // `vibrancy` ignores `backgroundColor` and paints a translucent OS
+      // material (which follows the OS appearance, not the app theme), so a
+      // dark-themed app on a light-mode Mac flashes white until the renderer
+      // covers it. ready-to-show fires after the boot-time paint in
+      // themes/context.tsx, so the window appears already themed.
+      show: false,
+      backgroundColor: getWindowBackgroundColor(),
+      webPreferences: {
+        preload: path.join(__dirname, 'preload.cjs'),
+        contextIsolation: true,
+        webviewTag: true,
+        sandbox: true,
+        nodeIntegration: false,
+        devTools: true
+      }
+    })

-// Open a fresh compact window on the new-session draft (#/). Not registry-keyed:
-// like ⌘N in a browser, every press opens a new window — and a draft window that
-// later converts to a real session must not get refocused as if it were blank.
-function createNewSessionWindow() {
-  return spawnSecondaryWindow({ newSession: true })
+    if (IS_MAC) {
+      win.setWindowButtonPosition?.(WINDOW_BUTTON_POSITION)
+    }
+
+    win.once('ready-to-show', () => {
+      if (!win.isDestroyed()) win.show()
+    })
+
+    win.on('will-enter-full-screen', () => sendWindowStateChanged(true))
+    win.on('enter-full-screen', () => sendWindowStateChanged(true))
+    win.on('will-leave-full-screen', () => sendWindowStateChanged(false))
+    win.on('leave-full-screen', () => sendWindowStateChanged(false))
+
+    wireCommonWindowHandlers(win)
+
+    win.loadURL(
+      buildSessionWindowUrl(sessionId, {
+        devServer: DEV_SERVER,
+        rendererIndexPath: DEV_SERVER ? undefined : resolveRendererIndex(),
+        watch
+      })
+    )
+
+    return win
+  })
 }

 function createWindow() {
@@ -5179,11 +5158,23 @@ function createWindow() {
    // material before the renderer paints the app theme. See createSessionWindow.
    show: false,
    backgroundColor: getWindowBackgroundColor(),
-    // Shared with the secondary session windows (chatWindowWebPreferences) so
-    // both keep `backgroundThrottling: false` — the chat transcript streams via
-    // a requestAnimationFrame-gated flush that Chromium pauses for blurred
-    // windows, stalling the live answer until refocus. See session-windows.cjs.
-    webPreferences: chatWindowWebPreferences(path.join(__dirname, 'preload.cjs'))
+    webPreferences: {
+      preload: path.join(__dirname, 'preload.cjs'),
+      contextIsolation: true,
+      webviewTag: true,
+      sandbox: true,
+      nodeIntegration: false,
+      devTools: true,
+      // Keep timers + requestAnimationFrame running at full speed when the
+      // window is blurred/occluded. The chat transcript streams to the screen
+      // through a requestAnimationFrame-gated flush (useSessionStateCache),
+      // so with Chromium's default background throttling the live answer
+      // stalls whenever this window isn't focused (e.g. you switch to your
+      // editor mid-turn, or open detached devtools) and only appears once you
+      // refocus or refresh. A streaming chat app must render in the
+      // background, so opt out — matching the secondary windows above.
+      backgroundThrottling: false
+    }
  })

  if (IS_MAC) {
@@ -5326,11 +5317,6 @@ ipcMain.handle('hermes:window:openSession', async (_event, sessionId, opts) => {

  return { ok: true }
 })
-ipcMain.handle('hermes:window:openNewSession', async () => {
-  createNewSessionWindow()
-
-  return { ok: true }
-})
 ipcMain.handle('hermes:bootstrap:reset', async () => {
  // Renderer's "Reload and retry" path. Clear the latched failure and
  // reset connection state so the next startHermes() call restarts the
@@ -5600,14 +5586,9 @@ ipcMain.handle('hermes:api', async (_event, request) => {

  await prepareProfileDeleteRequest(request)

-  const profile = request?.profile
-  const connection = await ensureBackend(profile)
+  const connection = await ensureBackend(request?.profile)
  const timeoutMs = resolveTimeoutMs(request?.timeoutMs, DEFAULT_FETCH_TIMEOUT_MS)
-  const requestPath = pathWithGlobalRemoteProfile(request.path, profile, {
-    globalRemote: globalRemoteActive(),
-    profileRemoteOverride: profileHasRemoteOverride(profile)
-  })
-  const url = `${connection.baseUrl}${requestPath}`
+  const url = `${connection.baseUrl}${request.path}`
  // OAuth gateways authenticate REST via the HttpOnly session cookie held in
  // the OAuth partition — route through Electron's net stack bound to that
  // session so the cookie attaches automatically. Token/local modes keep using
@@ -6551,12 +6532,6 @@ app.on('before-quit', () => {
  flushDesktopLogBufferSync()
  closePreviewWatchers()

-  // Kill open PTYs before environment teardown to avoid the node-pty#904
-  // ThreadSafeFunction SIGABRT race.
-  for (const id of [...terminalSessions.keys()]) {
-    disposeTerminalSession(id)
-  }
-
  if (hermesProcess && !hermesProcess.killed) {
    hermesProcess.kill('SIGTERM')
  }
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -6,7 +6,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
  getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
  openSessionWindow: (sessionId, opts) => ipcRenderer.invoke('hermes:window:openSession', sessionId, opts),
-  openNewSessionWindow: () => ipcRenderer.invoke('hermes:window:openNewSession'),
  getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
  getConnectionConfig: profile => ipcRenderer.invoke('hermes:connection-config:get', profile),
  saveConnectionConfig: payload => ipcRenderer.invoke('hermes:connection-config:save', payload),
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -10,41 +10,17 @@ const { pathToFileURL } = require('node:url')
 const SESSION_WINDOW_MIN_WIDTH = 420
 const SESSION_WINDOW_MIN_HEIGHT = 620

-// Shared webPreferences for every window that renders the chat transcript — the
-// primary window AND the secondary session windows. Keeping it in one place is
-// the whole point: the two BrowserWindow definitions in main.cjs used to be
-// hand-copied, and the secondary windows silently lost `backgroundThrottling:
-// false`, so a streamed answer stalled until the window regained focus.
-//
-// `backgroundThrottling: false` is load-bearing: the transcript streams to the
-// screen through a requestAnimationFrame-gated flush, which Chromium pauses for
-// blurred/occluded windows. A streaming chat app must keep painting in the
-// background, so every chat window opts out. The preload path is injected
-// because it depends on the Electron entry's __dirname.
-function chatWindowWebPreferences(preloadPath) {
-  return {
-    preload: preloadPath,
-    contextIsolation: true,
-    webviewTag: true,
-    sandbox: true,
-    nodeIntegration: false,
-    devTools: true,
-    backgroundThrottling: false
-  }
-}
-
 // Build the renderer URL for a secondary window. The renderer uses a
 // HashRouter, so the session route lives after the '#'. The `?win=secondary`
 // flag MUST sit in the query string BEFORE the '#': anything after the '#' is
 // treated as the route by HashRouter and would break routeSessionId(). The
 // renderer reads the flag from window.location.search to suppress the install /
-// onboarding overlays and the global session sidebar. `new=1` marks the compact
-// scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
-// session): the renderer resumes it lazily so the gateway never builds an agent
-// just to stream into it.
-function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
-  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
-  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`
+// onboarding overlays and the global session sidebar. `watch=1` marks a
+// spectator window (e.g. a running subagent's session): the renderer resumes
+// it lazily so the gateway never builds an agent just to stream into it.
+function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch } = {}) {
+  const query = `?win=secondary${watch ? '&watch=1' : ''}`
+  const route = `#/${encodeURIComponent(sessionId)}`

  if (devServer) {
    const base = devServer.endsWith('/') ? devServer.slice(0, -1) : devServer
@@ -117,7 +93,6 @@ function createSessionWindowRegistry() {

 module.exports = {
  buildSessionWindowUrl,
-  chatWindowWebPreferences,
  createSessionWindowRegistry,
  SESSION_WINDOW_MIN_HEIGHT,
  SESSION_WINDOW_MIN_WIDTH
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -1,11 +1,7 @@
 const assert = require('node:assert/strict')
 const test = require('node:test')

-const {
-  buildSessionWindowUrl,
-  chatWindowWebPreferences,
-  createSessionWindowRegistry
-} = require('./session-windows.cjs')
+const { buildSessionWindowUrl, createSessionWindowRegistry } = require('./session-windows.cjs')

 // A minimal fake BrowserWindow: tracks listeners + destroyed state and lets a
 // test fire the 'closed' event, mirroring the slice of the Electron API the
@@ -86,12 +82,6 @@ test('buildSessionWindowUrl adds the watch flag for spectator windows, before th
  assert.equal(url, 'http://localhost:5173/?win=secondary&watch=1#/abc')
 })

-test('buildSessionWindowUrl routes new-session windows to the draft (#/)', () => {
-  const url = buildSessionWindowUrl(null, { devServer: 'http://localhost:5173', newSession: true })
-
-  assert.equal(url, 'http://localhost:5173/?win=secondary&new=1#/')
-})
-
 test('registry opens one window per session and focuses on re-open', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
@@ -179,21 +169,3 @@ test('registry trims the session id before keying', () => {

  assert.equal(registry.has('s1'), true)
 })
-
-test('chatWindowWebPreferences disables background throttling so streaming paints while blurred', () => {
-  // Regression: secondary session windows used to omit this flag, so a streamed
-  // answer stalled until the window regained focus (Chromium pauses the
-  // requestAnimationFrame-gated transcript flush for backgrounded windows).
-  const prefs = chatWindowWebPreferences('/tmp/preload.cjs')
-
-  assert.equal(prefs.backgroundThrottling, false)
-})
-
-test('chatWindowWebPreferences passes the preload path through and keeps the hardened defaults', () => {
-  const prefs = chatWindowWebPreferences('/some/preload.cjs')
-
-  assert.equal(prefs.preload, '/some/preload.cjs')
-  assert.equal(prefs.contextIsolation, true)
-  assert.equal(prefs.sandbox, true)
-  assert.equal(prefs.nodeIntegration, false)
-})
--- a/apps/desktop/electron/update-rebuild.cjs
+++ b/apps/desktop/electron/update-rebuild.cjs
@@ -1,29 +0,0 @@
-'use strict'
-
-/**
- * Retry-once policy for the desktop `--build-only` rebuild during self-update.
- *
- * The first rebuild can return nonzero on a still-settling post-update tree or a
- * network-blocked Electron fetch that the installer's self-heal repaired mid-run.
- * A second attempt then builds clean off the healed dist (the content-hash stamp
- * makes it a near-no-op when the first actually succeeded). Without the retry the
- * updater bails before the relaunch step — the app updates but doesn't restart.
- */
-
-function shouldRetryRebuild(code) {
-  return code !== 0
-}
-
-/**
- * Run `rebuild()` (async, resolves `{ code, ... }`), retrying once on failure.
- * Returns the final result.
- */
-async function runRebuildWithRetry(rebuild) {
-  let result = await rebuild(0)
-  if (shouldRetryRebuild(result.code)) {
-    result = await rebuild(1)
-  }
-  return result
-}
-
-module.exports = { shouldRetryRebuild, runRebuildWithRetry }
--- a/apps/desktop/electron/update-rebuild.test.cjs
+++ b/apps/desktop/electron/update-rebuild.test.cjs
@@ -1,55 +0,0 @@
-/**
- * Tests for electron/update-rebuild.cjs — the retry-once policy for the desktop
- * `--build-only` rebuild during self-update.
- *
- * Run with: node --test electron/update-rebuild.test.cjs
- * (Wired into npm test:desktop:platforms in package.json.)
- *
- * Why this matters: a first rebuild can return nonzero on a still-settling tree
- * or a self-healed (network-blocked) Electron download. Without a second attempt
- * the updater bails before the relaunch step — the app updates but never restarts
- * (the field report behind this fix). The retry must fire on failure, not on
- * success, and must run at most twice.
- */
-
-const test = require('node:test')
-const assert = require('node:assert/strict')
-
-const { shouldRetryRebuild, runRebuildWithRetry } = require('./update-rebuild.cjs')
-
-test('shouldRetryRebuild retries only on a non-success exit', () => {
-  assert.equal(shouldRetryRebuild(0), false)
-  assert.equal(shouldRetryRebuild(1), true)
-  assert.equal(shouldRetryRebuild(null), true)
-})
-
-test('a clean first rebuild runs once and does not retry', async () => {
-  const codes = []
-  const result = await runRebuildWithRetry(attempt => {
-    codes.push(attempt)
-    return Promise.resolve({ code: 0 })
-  })
-  assert.deepEqual(codes, [0])
-  assert.equal(result.code, 0)
-})
-
-test('a failed first rebuild retries once and succeeds', async () => {
-  const codes = []
-  const result = await runRebuildWithRetry(attempt => {
-    codes.push(attempt)
-    return Promise.resolve({ code: attempt === 0 ? 1 : 0 })
-  })
-  assert.deepEqual(codes, [0, 1])
-  assert.equal(result.code, 0)
-})
-
-test('a rebuild that keeps failing runs at most twice and reports the failure', async () => {
-  const codes = []
-  const result = await runRebuildWithRetry(attempt => {
-    codes.push(attempt)
-    return Promise.resolve({ code: 1, error: 'rebuild-failed' })
-  })
-  assert.deepEqual(codes, [0, 1])
-  assert.equal(result.code, 1)
-  assert.equal(result.error, 'rebuild-failed')
-})
--- a/apps/desktop/electron/windows-user-env.cjs
+++ b/apps/desktop/electron/windows-user-env.cjs
@@ -1,76 +0,0 @@
-// windows-user-env.cjs
-//
-// Read a User-scoped environment variable straight from the Windows registry
-// (HKCU\Environment).
-//
-// A GUI app launched from Explorer inherits the environment block captured at
-// login, so a variable set via `setx` AFTER login is invisible in process.env
-// even though a fresh shell — and the Hermes CLI — sees it immediately. The
-// desktop's HERMES_HOME resolution relies on process.env, so that stale-snapshot
-// gap silently sends the backend to the default %LOCALAPPDATA%\hermes. Reading
-// the live registry value closes the gap. See #45471.
-
-const { execFileSync } = require('node:child_process')
-
-// Parse the output of `reg query HKCU\Environment /v <name>`, which looks like:
-//
-//   HKEY_CURRENT_USER\Environment
-//       HERMES_HOME    REG_SZ    F:\Hermes\data
-//
-// Returns the raw value string (spaces inside the value preserved), or null when
-// the requested value line isn't present.
-function parseRegQueryValue(stdout, name) {
-  if (!stdout || !name) return null
-  const typePattern =
-    /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
-  for (const rawLine of String(stdout).split(/\r?\n/)) {
-    const line = rawLine.trim()
-    const match = line.match(typePattern)
-    if (match && match[1].toLowerCase() === name.toLowerCase()) {
-      return match[2]
-    }
-  }
-  return null
-}
-
-// Expand %VAR% references against an env map. REG_EXPAND_SZ values store
-// unexpanded references; plain REG_SZ paths have none, so this is a no-op for
-// the common F:\... case. Unknown references are left verbatim.
-function expandWindowsEnvRefs(value, env = process.env) {
-  if (!value) return value
-  return value.replace(/%([^%]+)%/g, (whole, name) => {
-    const key = Object.keys(env).find(k => k.toUpperCase() === String(name).toUpperCase())
-    return key != null && env[key] != null ? env[key] : whole
-  })
-}
-
-// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
-// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
-// (the value doesn't exist), or when the value is empty.
-function readWindowsUserEnvVar(
-  name,
-  { platform = process.platform, env = process.env, exec = execFileSync } = {}
-) {
-  if (platform !== 'win32' || !name) return null
-  let stdout
-  try {
-    stdout = exec('reg', ['query', 'HKCU\\Environment', '/v', name], {
-      encoding: 'utf8',
-      windowsHide: true,
-      timeout: 5000
-    })
-  } catch {
-    // `reg` missing, or value absent (reg exits 1) — caller falls back.
-    return null
-  }
-  const raw = parseRegQueryValue(stdout, name)
-  if (raw == null) return null
-  const expanded = expandWindowsEnvRefs(raw, env).trim()
-  return expanded || null
-}
-
-module.exports = {
-  expandWindowsEnvRefs,
-  parseRegQueryValue,
-  readWindowsUserEnvVar
-}
--- a/apps/desktop/electron/windows-user-env.test.cjs
+++ b/apps/desktop/electron/windows-user-env.test.cjs
@@ -1,90 +0,0 @@
-const assert = require('node:assert/strict')
-const { test } = require('node:test')
-
-const {
-  expandWindowsEnvRefs,
-  parseRegQueryValue,
-  readWindowsUserEnvVar
-} = require('./windows-user-env.cjs')
-
-// ── parseRegQueryValue ─────────────────────────────────────────────────────
-
-test('parseRegQueryValue extracts a REG_SZ value', () => {
-  const out = [
-    '',
-    'HKEY_CURRENT_USER\\Environment',
-    '    HERMES_HOME    REG_SZ    F:\\Hermes\\data',
-    ''
-  ].join('\r\n')
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
-})
-
-test('parseRegQueryValue matches the name case-insensitively', () => {
-  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Hermes_Home    REG_EXPAND_SZ    %USERPROFILE%\\h\r\n'
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), '%USERPROFILE%\\h')
-})
-
-test('parseRegQueryValue preserves spaces inside the value', () => {
-  const out = '    HERMES_HOME    REG_SZ    C:\\Program Files\\Hermes\r\n'
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'C:\\Program Files\\Hermes')
-})
-
-test('parseRegQueryValue returns null when the value line is absent', () => {
-  const out = 'HKEY_CURRENT_USER\\Environment\r\n    Path    REG_SZ    C:\\x\r\n'
-  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), null)
-  assert.equal(parseRegQueryValue('', 'HERMES_HOME'), null)
-  assert.equal(parseRegQueryValue('garbage', 'HERMES_HOME'), null)
-})
-
-// ── expandWindowsEnvRefs ───────────────────────────────────────────────────
-
-test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
-  assert.equal(
-    expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
-    'C:\\Users\\jeff\\h'
-  )
-})
-
-test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {
-  assert.equal(expandWindowsEnvRefs('F:\\Hermes\\data', {}), 'F:\\Hermes\\data')
-  assert.equal(expandWindowsEnvRefs('%NOPE%\\x', {}), '%NOPE%\\x')
-})
-
-// ── readWindowsUserEnvVar ──────────────────────────────────────────────────
-
-test('readWindowsUserEnvVar returns null off Windows without spawning', () => {
-  let spawned = false
-  const exec = () => {
-    spawned = true
-    return ''
-  }
-  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'linux', exec }), null)
-  assert.equal(spawned, false)
-})
-
-test('readWindowsUserEnvVar queries HKCU\\Environment and expands the value', () => {
-  const calls = []
-  const exec = (cmd, args) => {
-    calls.push([cmd, args])
-    return 'HKEY_CURRENT_USER\\Environment\r\n    HERMES_HOME    REG_EXPAND_SZ    %DRIVE%\\Hermes\r\n'
-  }
-  const value = readWindowsUserEnvVar('HERMES_HOME', {
-    platform: 'win32',
-    env: { DRIVE: 'F:' },
-    exec
-  })
-  assert.equal(value, 'F:\\Hermes')
-  assert.deepEqual(calls, [['reg', ['query', 'HKCU\\Environment', '/v', 'HERMES_HOME']]])
-})
-
-test('readWindowsUserEnvVar returns null when reg exits non-zero (value missing)', () => {
-  const exec = () => {
-    throw new Error('reg exited 1')
-  }
-  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
-})
-
-test('readWindowsUserEnvVar returns null for an empty value', () => {
-  const exec = () => '    HERMES_HOME    REG_SZ    \r\n'
-  assert.equal(readWindowsUserEnvVar('HERMES_HOME', { platform: 'win32', exec }), null)
-})
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -20,8 +20,7 @@
    "start": "npm run build && electron .",
    "build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
    "postbuild": "node scripts/assert-dist-built.cjs",
-    "prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
-    "builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 node scripts/run-electron-builder.cjs",
+    "builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 electron-builder",
    "pack": "npm run build && npm run builder -- --dir",
    "dist": "npm run build && npm run builder",
    "dist:mac": "npm run build && npm run builder -- --mac",
@@ -37,7 +36,7 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-rebuild.test.cjs electron/windows-user-env.test.cjs",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs",
    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
@@ -55,7 +54,7 @@
    "@dnd-kit/sortable": "^10.0.0",
    "@dnd-kit/utilities": "^3.2.2",
    "@hermes/shared": "file:../shared",
-    "@icons-pack/react-simple-icons": "=13.11.1",
+    "@icons-pack/react-simple-icons": "^13.13.0",
    "@nanostores/react": "^1.1.0",
    "@nous-research/ui": "^0.13.0",
    "@radix-ui/react-slot": "^1.2.4",
@@ -117,7 +116,7 @@
    "@vitejs/plugin-react": "^6.0.1",
    "concurrently": "^10.0.3",
    "cross-env": "^10.1.0",
-    "electron": "40.10.2",
+    "electron": "^40.9.3",
    "electron-builder": "^26.8.1",
    "eslint": "^9.39.4",
    "eslint-plugin-perfectionist": "^5.9.0",
@@ -134,7 +133,7 @@
    "wait-on": "^9.0.5"
  },
  "build": {
-    "electronVersion": "40.10.2",
+    "electronVersion": "40.9.3",
    "appId": "com.nousresearch.hermes",
    "productName": "Hermes",
    "executableName": "Hermes",
--- a/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
+++ b/apps/desktop/scripts/patch-electron-builder-mac-binary.cjs
@@ -1,64 +0,0 @@
-const fs = require('node:fs')
-const path = require('node:path')
-
-if (process.platform !== 'darwin') {
-  process.exit(0)
-}
-
-const desktopRoot = path.resolve(__dirname, '..')
-const repoRoot = path.resolve(desktopRoot, '..', '..')
-const electronMacPath = path.join(repoRoot, 'node_modules', 'app-builder-lib', 'out', 'electron', 'electronMac.js')
-
-const marker = 'hermes-macos-electron-binary-fallback'
-const needle = `    await Promise.all([
-        doRename(path.join(contentsPath, "MacOS"), electronBranding.productName, appPlist.CFBundleExecutable),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
-    ]);`
-const replacement = `    // ${marker}: electron-builder 26.8.x can sometimes copy
-    // Electron.app without its main MacOS/Electron binary before this rename.
-    // Restore it from the installed Electron runtime so local desktop installs
-    // do not fail with ENOENT during macOS arm64 packaging.
-    const macosDir = path.join(contentsPath, "MacOS");
-    const bundledElectronBinary = path.join(macosDir, electronBranding.productName);
-    if (!fs.existsSync(bundledElectronBinary)) {
-        const candidates = [
-            path.join(packager.info.framework.distMacOsAppName, "Contents", "MacOS", electronBranding.productName),
-            // npm may nest the workspace-only electron devDep under
-            // apps/desktop/node_modules (process.cwd() during pack), or hoist
-            // it to the repo root. Try the workspace-local install first, then
-            // the root hoist, so the fallback works under either layout.
-            path.join(process.cwd(), "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
-            path.join(process.cwd(), "..", "..", "node_modules", "electron", "dist", "Electron.app", "Contents", "MacOS", electronBranding.productName),
-        ];
-        const sourceBinary = candidates.find(candidate => fs.existsSync(candidate));
-        if (sourceBinary == null) {
-            throw new Error("Electron binary missing from packaged app and Electron runtime: " + bundledElectronBinary);
-        }
-        await (0, promises_1.copyFile)(sourceBinary, bundledElectronBinary);
-        await (0, promises_1.chmod)(bundledElectronBinary, 0o755);
-    }
-    await Promise.all([
-        doRename(macosDir, electronBranding.productName, appPlist.CFBundleExecutable),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSE")),
-        (0, builder_util_1.unlinkIfExists)(path.join(appOutDir, "LICENSES.chromium.html")),
-    ]);`
-
-if (!fs.existsSync(electronMacPath)) {
-  console.warn(`[patch-electron-builder] skipped: ${electronMacPath} not found`)
-  process.exit(0)
-}
-
-const source = fs.readFileSync(electronMacPath, 'utf8')
-if (source.includes(marker)) {
-  console.log('[patch-electron-builder] macOS Electron binary fallback already applied')
-  process.exit(0)
-}
-
-if (!source.includes(needle)) {
-  console.warn('[patch-electron-builder] skipped: expected electronMac.js shape not found')
-  process.exit(0)
-}
-
-fs.writeFileSync(electronMacPath, source.replace(needle, replacement))
-console.log('[patch-electron-builder] applied macOS Electron binary fallback')
--- a/apps/desktop/scripts/run-electron-builder.cjs
+++ b/apps/desktop/scripts/run-electron-builder.cjs
@@ -1,57 +0,0 @@
-"use strict"
-
-// Resolve electronDist at runtime (#38673, #47917): electron-builder 26.8.x can
-// re-unpack a broken Electron.app; reusing the installed dist dodges that.
-// npm workspace hoisting is non-deterministic — require.resolve finds electron
-// wherever it landed. Dist present → -c.electronDist=<abs>/dist; absent → let
-// electron-builder fetch via @electron/get (electronVersion + ELECTRON_MIRROR).
-
-const fs = require("node:fs")
-const path = require("node:path")
-const { spawnSync } = require("node:child_process")
-
-function electronDistDir() {
-  try {
-    return path.join(path.dirname(require.resolve("electron/package.json")), "dist")
-  } catch {
-    return null
-  }
-}
-
-function distBinary(dist) {
-  if (process.platform === "darwin") {
-    return path.join(dist, "Electron.app", "Contents", "MacOS", "Electron")
-  }
-  if (process.platform === "win32") {
-    return path.join(dist, "electron.exe")
-  }
-  return path.join(dist, "electron")
-}
-
-function electronBuilderCli() {
-  const pkgJson = require.resolve("electron-builder/package.json")
-  const bin = require(pkgJson).bin
-  const rel = typeof bin === "string" ? bin : bin["electron-builder"]
-  return path.join(path.dirname(pkgJson), rel)
-}
-
-const dist = electronDistDir()
-const args = []
-if (dist && fs.existsSync(distBinary(dist))) {
-  args.push(`-c.electronDist=${dist}`)
-} else {
-  console.warn(
-    "[run-electron-builder] no local electron dist; electron-builder will fetch " +
-      "via @electron/get (electronVersion + ELECTRON_MIRROR)."
-  )
-}
-args.push(...process.argv.slice(2))
-
-const result = spawnSync(process.execPath, [electronBuilderCli(), ...args], {
-  stdio: "inherit",
-})
-if (result.error) {
-  console.error(`[run-electron-builder] spawn failed: ${result.error.message}`)
-  process.exit(1)
-}
-process.exit(result.status == null ? 1 : result.status)
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -23,7 +23,6 @@ import { type Translations, useI18n } from '@/i18n'
 import { sessionTitle } from '@/lib/chat-runtime'
 import { ExternalLink, ExternalLinkIcon, hostPathLabel, urlSlugTitleLabel, useLinkTitle } from '@/lib/external-link'
 import { FileImage, FileText, FolderOpen, Link2 } from '@/lib/icons'
-import { mediaExternalUrl } from '@/lib/media'
 import { cn } from '@/lib/utils'
 import { notifyError } from '@/store/notifications'
 import type { SessionInfo, SessionMessage } from '@/types/hermes'
@@ -125,12 +124,17 @@ function artifactKind(value: string): ArtifactKind {
 }

 function artifactHref(value: string): string {
-  if (value.startsWith('http://') || value.startsWith('https://') || value.startsWith('data:')) {
+  if (
+    value.startsWith('http://') ||
+    value.startsWith('https://') ||
+    value.startsWith('file://') ||
+    value.startsWith('data:')
+  ) {
    return value
  }

-  if (value.startsWith('file://') || value.startsWith('/')) {
-    return mediaExternalUrl(value)
+  if (value.startsWith('/')) {
+    return `file://${encodeURI(value)}`
  }

  return value
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -9,7 +9,6 @@ import { formatCombo } from '@/lib/keybinds/combo'
 import { cn } from '@/lib/utils'

 import type { ConversationStatus } from './hooks/use-voice-conversation'
-import { ModelPill } from './model-pill'
 import type { ChatBarState, VoiceStatus } from './types'

 export const ICON_BTN = 'size-(--composer-control-size) shrink-0 rounded-md'
@@ -67,7 +66,6 @@ export function ComposerControls({
  const c = t.composer
  const steerCombo = formatCombo('mod+enter')
  const steerLabel = `${c.steer} (${steerCombo})`
-
  const steerTip = (
    <span className="inline-flex items-center gap-1.5">
      {c.steer}
@@ -83,10 +81,8 @@ export function ComposerControls({

  return (
    <div className="ml-auto flex shrink-0 items-center gap-(--composer-control-gap)">
-      <ModelPill disabled={disabled} model={state.model} />
-      {/* While the agent runs and the user is typing, steer takes over the mic's
-          slot rather than crowding the row with an extra button. */}
-      {canSteer ? (
+      <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
+      {canSteer && (
        <Tip label={steerTip}>
          <Button
            aria-label={steerLabel}
@@ -100,8 +96,6 @@ export function ComposerControls({
            <SteeringWheel size={16} />
          </Button>
        </Tip>
-      ) : (
-        <DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
      )}
      {showVoicePrimary ? (
        <Tip label={c.startVoice}>
--- a/apps/desktop/src/app/chat/composer/model-pill.tsx
+++ b/apps/desktop/src/app/chat/composer/model-pill.tsx
@@ -1,86 +0,0 @@
-import { useStore } from '@nanostores/react'
-import { useState } from 'react'
-
-import { ModelMenuCloseContext } from '@/app/shell/model-menu-panel'
-import { Button } from '@/components/ui/button'
-import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
-import { GlyphSpinner } from '@/components/ui/glyph-spinner'
-import { useI18n } from '@/i18n'
-import { ChevronDown } from '@/lib/icons'
-import { formatModelStatusLabel } from '@/lib/model-status-label'
-import { cn } from '@/lib/utils'
-import {
-  $currentFastMode,
-  $currentModel,
-  $currentProvider,
-  $currentReasoningEffort,
-  setModelPickerOpen
-} from '@/store/session'
-
-import type { ChatBarState } from './types'
-
-const PILL = cn(
-  'h-(--composer-control-size) max-w-40 shrink-0 gap-1 rounded-md px-2 text-xs font-normal',
-  'text-(--ui-text-tertiary) hover:bg-(--chrome-action-hover) hover:text-foreground'
-)
-
-/**
- * Composer model selector — the relocated status-bar pill. Reuses the live
- * `model.options` dropdown (`modelMenuContent`) verbatim; falls back to the
- * full picker when the gateway is closed and no live menu exists.
- */
-export function ModelPill({ disabled, model }: { disabled: boolean; model: ChatBarState['model'] }) {
-  const copy = useI18n().t.shell.statusbar
-  const currentModel = useStore($currentModel)
-  const currentProvider = useStore($currentProvider)
-  const fastMode = useStore($currentFastMode)
-  const reasoningEffort = useStore($currentReasoningEffort)
-  const [open, setOpen] = useState(false)
-
-  // The model resolves a beat after the gateway/session comes up. Rather than
-  // flash a literal "No model", show a quiet loader (inherits the pill text
-  // color at half opacity) until a model lands.
-  const label = (
-    <>
-      {currentModel.trim() ? (
-        <span className="truncate">{formatModelStatusLabel(currentModel, { fastMode, reasoningEffort })}</span>
-      ) : (
-        <GlyphSpinner className="opacity-50" spinner="braille" />
-      )}
-      <ChevronDown className="size-2.5 shrink-0 opacity-50" />
-    </>
-  )
-
-  const title = currentProvider ? copy.modelTitle(currentProvider, currentModel || copy.modelNone) : copy.switchModel
-
-  if (!model.modelMenuContent) {
-    return (
-      <Button
-        aria-label={copy.openModelPicker}
-        className={PILL}
-        disabled={disabled}
-        onClick={() => setModelPickerOpen(true)}
-        title={copy.openModelPicker}
-        type="button"
-        variant="ghost"
-      >
-        {label}
-      </Button>
-    )
-  }
-
-  return (
-    <DropdownMenu onOpenChange={setOpen} open={open}>
-      <DropdownMenuTrigger asChild>
-        <Button aria-label={title} className={PILL} disabled={disabled} title={title} type="button" variant="ghost">
-          {label}
-        </Button>
-      </DropdownMenuTrigger>
-      <DropdownMenuContent align="end" className="w-64 p-0" side="top" sideOffset={8}>
-        <ModelMenuCloseContext.Provider value={() => setOpen(false)}>
-          {model.modelMenuContent}
-        </ModelMenuCloseContext.Provider>
-      </DropdownMenuContent>
-    </DropdownMenu>
-  )
-}
--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -1,5 +1,3 @@
-import type { ReactNode } from 'react'
-
 import type { HermesGateway } from '@/hermes'
 import type { ComposerAttachment } from '@/store/composer'

@@ -24,8 +22,6 @@ export interface ChatBarState {
    canSwitch: boolean
    loading?: boolean
    quickModels?: QuickModelOption[]
-    /** Reused status-bar dropdown (built with gateway + selectModel upstream). */
-    modelMenuContent?: ReactNode
  }
  tools: { enabled: boolean; label: string; suggestions?: ContextSuggestion[] }
  voice: { enabled: boolean; active: boolean }
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -15,9 +15,7 @@ import { Backdrop } from '@/components/Backdrop'
 import { PromptOverlays } from '@/components/prompt-overlays'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
-import { ErrorState } from '@/components/ui/error-state'
 import { getGlobalModelOptions, type HermesGateway } from '@/hermes'
-import { useI18n } from '@/i18n'
 import type { ChatMessage } from '@/lib/chat-messages'
 import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime'
 import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
@@ -40,12 +38,10 @@ import {
  $lastVisibleMessageIsUser,
  $messages,
  $messagesEmpty,
-  $resumeExhaustedSessionId,
  $selectedStoredSessionId,
  $sessions,
  sessionPinId
 } from '@/store/session'
-import { isSecondaryWindow } from '@/store/windows'
 import type { ModelOptionsResponse } from '@/types/hermes'

 import { routeSessionId } from '../routes'
@@ -65,7 +61,6 @@ import { threadLoadingState } from './thread-loading'

 interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  gateway: HermesGateway | null
-  modelMenuContent?: React.ReactNode
  onToggleSelectedPin: () => void
  onDeleteSelectedSession: () => void
  onCancel: () => Promise<void> | void
@@ -89,9 +84,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  onEdit: (message: AppendMessage) => Promise<void>
  onReload: (parentId: string | null) => Promise<void>
  onRestoreToMessage?: (messageId: string) => Promise<void>
-  onRetryResume: (sessionId: string) => void
  onTranscribeAudio?: (audio: Blob) => Promise<string>
-  onDismissError?: (messageId: string) => void
 }

 interface ChatHeaderProps {
@@ -126,10 +119,10 @@ function ChatHeader({
      ? pinnedSessionIds.includes(selectedSessionId)
      : false

-  // Secondary windows (new-session scratch, subagent watch, cmd-click pop-out)
-  // are compact side panels — they drop the session-actions header + border
-  // entirely. A brand-new draft has nothing to pin/delete/rename either.
-  if (isSecondaryWindow() || (!selectedSessionId && !activeSessionId && !isRoutedSessionView)) {
+  // A brand-new session has no session to pin/delete/rename, so the header is
+  // just a dead "New session" label + chevron. Drop it (and its border)
+  // entirely until there's a real session to act on.
+  if (!selectedSessionId && !activeSessionId && !isRoutedSessionView) {
    return null
  }

@@ -256,7 +249,6 @@ function ChatRuntimeBoundary({
 export function ChatView({
  className,
  gateway,
-  modelMenuContent,
  onToggleSelectedPin,
  onDeleteSelectedSession,
  onCancel,
@@ -277,12 +269,9 @@ export function ChatView({
  onEdit,
  onReload,
  onRestoreToMessage,
-  onRetryResume,
-  onTranscribeAudio,
-  onDismissError
+  onTranscribeAudio
 }: ChatViewProps) {
  const location = useLocation()
-  const { t } = useI18n()
  const activeSessionId = useStore($activeSessionId)
  const awaitingResponse = useStore($awaitingResponse)
  const busy = useStore($busy)
@@ -304,7 +293,6 @@ export function ChatView({
  const messagesEmpty = useStore($messagesEmpty)
  const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
  const selectedSessionId = useStore($selectedStoredSessionId)
-  const resumeExhaustedSessionId = useStore($resumeExhaustedSessionId)
  const routedSessionId = routeSessionId(location.pathname)
  const isRoutedSessionView = Boolean(routedSessionId)

@@ -314,31 +302,16 @@ export function ChatView({
  // waiting for the resume effect (which paints a frame later) to clear them.
  const routeSessionMismatch = isRoutedSessionView && routedSessionId !== selectedSessionId

-  // The compact new-session pop-out skips the wordmark/tagline intro — it's a
-  // scratch window, not the full-height empty state.
-  const showIntro =
-    !isSecondaryWindow() && freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
+  const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty

  // Session is still loading if the route references a session we haven't
  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
  // session exists — even if it has zero messages (a brand-new routed
  // session). The flicker where `busy` flips true briefly during hydrate
  // is handled by `threadLoadingState`'s last-visible-user gate.
-  //
-  // resumeExhausted: the bounded auto-retry in use-route-resume gave up on this
-  // routed session (gateway RPC + REST fallback failed through every attempt).
-  // Suppress the loader and show an explicit error + manual Retry instead of
-  // spinning forever. Gated on the route matching so a stale latch from another
-  // session can't blank the current one.
-  const resumeExhausted = isRoutedSessionView && resumeExhaustedSessionId === routedSessionId
-
-  const loadingSession =
-    !resumeExhausted && isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
-
+  const loadingSession = isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
-  // Hide the composer in the exhausted error state too: there's no live runtime
-  // to send to until a retry rebinds one.
-  const showChatBar = !loadingSession && !resumeExhausted
+  const showChatBar = !loadingSession
  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')

  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
@@ -369,7 +342,6 @@ export function ChatView({
        provider: currentProvider,
        canSwitch: gatewayOpen,
        loading: !gatewayOpen || (!currentModel && !currentProvider),
-        modelMenuContent,
        quickModels
      },
      tools: {
@@ -382,7 +354,7 @@ export function ChatView({
        active: false
      }
    }),
-    [contextSuggestions, currentModel, currentProvider, gatewayOpen, modelMenuContent, quickModels]
+    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
  )

  // Drop files anywhere in the conversation area, not just on the composer
@@ -453,7 +425,6 @@ export function ChatView({
            loading={threadLoading}
            onBranchInNewChat={onBranchInNewChat}
            onCancel={onCancel}
-            onDismissError={onDismissError}
            onRestoreToMessage={onRestoreToMessage}
            sessionId={activeSessionId}
            sessionKey={threadKey}
@@ -487,21 +458,6 @@ export function ChatView({
            </Suspense>
          )}
        </ChatRuntimeBoundary>
-        {resumeExhausted && routedSessionId && (
-          <div className="absolute inset-0 z-10 grid place-items-center bg-(--ui-chat-surface-background) px-8 py-10">
-            <ErrorState
-              className="max-w-sm"
-              description={t.desktop.resumeStrandedBody}
-              title={t.desktop.resumeStrandedTitle}
-            >
-              <div className="grid justify-items-center">
-                <Button onClick={() => onRetryResume(routedSessionId)} size="sm" variant="outline">
-                  {t.desktop.resumeRetry}
-                </Button>
-              </div>
-            </ErrorState>
-          </div>
-        )}
        {showChatBar && <ScrollToBottomButton />}
        <ChatDropOverlay kind={dragKind} />
        <ChatSwapOverlay profile={gatewaySwapTarget} />
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -13,7 +13,7 @@ import { useSkinCommand } from '@/themes/use-skin-command'

 import { formatRefValue } from '../components/assistant-ui/directive-text'
 import { getCronJobs, getSessionMessages, listAllProfileSessions, type SessionInfo, triggerCronJob } from '../hermes'
-import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
+import { preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
 import {
  isMessagingSource,
  LOCAL_SESSION_SOURCE_IDS,
@@ -52,10 +52,7 @@ import {
  $currentCwd,
  $freshDraftReady,
  $gatewayState,
-  $messages,
  $messagingSessions,
-  $resumeFailedSessionId,
-  $resumeExhaustedSessionId,
  $selectedStoredSessionId,
  $sessions,
  $workingSessionIds,
@@ -80,7 +77,6 @@ import {
  setSessionsLoading,
  setSessionsTotal
 } from '../store/session'
-import { onSessionsChanged } from '../store/session-sync'
 import { clearSessionTodos, setSessionTodos, todoListActive } from '../store/todos'
 import { openUpdatesWindow, startUpdatePoller, stopUpdatePoller } from '../store/updates'
 import { isSecondaryWindow } from '../store/windows'
@@ -202,8 +198,6 @@ export function DesktopController() {
  const activeSessionId = useStore($activeSessionId)
  const currentCwd = useStore($currentCwd)
  const freshDraftReady = useStore($freshDraftReady)
-  const resumeFailedSessionId = useStore($resumeFailedSessionId)
-  const resumeExhaustedSessionId = useStore($resumeExhaustedSessionId)
  const filePreviewTarget = useStore($filePreviewTarget)
  const previewTarget = useStore($previewTarget)
  const selectedStoredSessionId = useStore($selectedStoredSessionId)
@@ -470,17 +464,6 @@ export function DesktopController() {
    void refreshSessions()
  }, [refreshSessions])

-  // Another window mutated the shared session list (e.g. a chat started in the
-  // pop-out). Re-pull so the sidebar reflects it. Pop-outs have no sidebar, so
-  // only real windows bother.
-  useEffect(() => {
-    if (isSecondaryWindow()) {
-      return
-    }
-
-    return onSessionsChanged(() => void refreshSessions().catch(() => undefined))
-  }, [refreshSessions])
-
  // ALL-profiles view pages one profile at a time: fetch that profile's next
  // page and merge it in place, leaving every other profile's rows untouched.
  const loadMoreSessionsForProfile = useCallback(async (profile: string) => {
@@ -716,9 +699,7 @@ export function DesktopController() {
    }

    lastGatewayProfileRef.current = activeGatewayProfile
-    // Force: the new profile has its own default, so reseed even if the composer
-    // already shows the previous profile's model.
-    void refreshCurrentModel(true)
+    void refreshCurrentModel()
    void refreshActiveProfile()
  }, [activeGatewayProfile, refreshCurrentModel])

@@ -741,49 +722,6 @@ export function DesktopController() {
    [branchCurrentSession, refreshSessions]
  )

-  // Clear a failed turn's red error banner from the transcript. Errors are
-  // renderer-local state (never persisted), so dismissing is purely a view +
-  // session-cache edit. A message that errored before emitting any visible
-  // text is a bare error placeholder → drop it entirely; one that streamed
-  // partial output then failed keeps its content and just sheds the error.
-  // Both the per-runtime cache AND the live $messages view must be updated:
-  // `preserveLocalAssistantErrors` re-grafts any still-errored message it
-  // finds in the view onto the next session.info flush, so clearing only the
-  // cache would let the heartbeat resurrect the banner.
-  const dismissError = useCallback(
-    (messageId: string) => {
-      const runtimeSessionId = activeSessionIdRef.current
-
-      if (!runtimeSessionId) {
-        return
-      }
-
-      const clearErrorIn = (messages: ChatMessage[]): ChatMessage[] =>
-        messages.flatMap(message => {
-          if (message.id !== messageId || !message.error) {
-            return [message]
-          }
-
-          if (!chatMessageText(message).trim() && !message.parts.some(part => part.type !== 'text')) {
-            return []
-          }
-
-          return [{ ...message, error: undefined, pending: false }]
-        })
-
-      // View first: the flush below reads $messages as the "current" baseline
-      // for error preservation, so the banner must be gone from it before the
-      // cache update triggers a re-sync.
-      setMessages(clearErrorIn($messages.get()))
-
-      updateSessionState(runtimeSessionId, state => ({
-        ...state,
-        messages: clearErrorIn(state.messages)
-      }))
-    },
-    [activeSessionIdRef, updateSessionState]
-  )
-
  const startSessionInWorkspace = useCallback(
    (path: null | string) => {
      startFreshSessionDraft()
@@ -893,8 +831,6 @@ export function DesktopController() {
    gatewayState,
    locationPathname: location.pathname,
    resumeSession,
-    resumeFailedSessionId,
-    resumeExhaustedSessionId,
    routedSessionId,
    runtimeIdByStoredSessionIdRef,
    selectedStoredSessionId,
@@ -911,6 +847,7 @@ export function DesktopController() {
    gatewayLogLines,
    gatewayState,
    inferenceStatus,
+    modelMenuContent,
    openAgents,
    freshDraftReady,
    openCommandCenterSection,
@@ -1032,7 +969,6 @@ export function DesktopController() {
    <ChatView
      gateway={gatewayRef.current}
      maxVoiceRecordingSeconds={voiceMaxRecordingSeconds}
-      modelMenuContent={modelMenuContent}
      onAddContextRef={composer.addContextRefAttachment}
      onAddUrl={url => composer.addContextRefAttachment(`@url:${formatRefValue(url)}`, url)}
      onAttachDroppedItems={composer.attachDroppedItems}
@@ -1044,7 +980,6 @@ export function DesktopController() {
          void removeSession(selectedStoredSessionId)
        }
      }}
-      onDismissError={dismissError}
      onEdit={editMessage}
      onPasteClipboardImage={() => void composer.pasteClipboardImage()}
      onPickFiles={() => void composer.pickContextPaths('file')}
@@ -1053,7 +988,6 @@ export function DesktopController() {
      onReload={reloadFromMessage}
      onRemoveAttachment={id => void composer.removeAttachment(id)}
      onRestoreToMessage={restoreToMessage}
-      onRetryResume={sessionId => void resumeSession(sessionId, true)}
      onSteer={steerPrompt}
      onSubmit={submitText}
      onThreadMessagesChange={handleThreadMessagesChange}
--- a/apps/desktop/src/app/hooks/use-keybinds.ts
+++ b/apps/desktop/src/app/hooks/use-keybinds.ts
@@ -37,7 +37,6 @@ import {
  switcherActive,
  switcherJustClosed
 } from '@/store/session-switcher'
-import { openNewSessionInNewWindow } from '@/store/windows'
 import { useTheme } from '@/themes/context'

 import { requestComposerFocus } from '../chat/composer/focus'
@@ -133,7 +132,6 @@ export function useKeybinds(deps: KeybindRuntimeDeps): void {
      deps.startFreshSession()
      window.dispatchEvent(new CustomEvent('hermes:new-session-shortcut'))
    },
-    'session.newWindow': () => void openNewSessionInNewWindow(),
    'session.next': () => stepSession(1),
    'session.prev': () => stepSession(-1),
    ...sessionSlotHandlers,
--- a/apps/desktop/src/app/right-sidebar/index.test.tsx
+++ b/apps/desktop/src/app/right-sidebar/index.test.tsx
@@ -1,75 +0,0 @@
-import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react'
-import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
-
-import type { HermesReadDirResult } from '@/global'
-import { $connection, setCurrentCwd } from '@/store/session'
-
-import { resetProjectTreeState } from './files/use-project-tree'
-
-import { RightSidebarPane } from './index'
-
-const readDir = vi.fn<(path: string) => Promise<HermesReadDirResult>>()
-const selectPaths = vi.fn()
-
-function ok(entries: { name: string; path: string; isDirectory: boolean }[]): HermesReadDirResult {
-  return { entries }
-}
-
-function installBridge() {
-  ;(
-    window as unknown as {
-      hermesDesktop: {
-        readDir: typeof readDir
-        selectPaths: typeof selectPaths
-      }
-    }
-  ).hermesDesktop = { readDir, selectPaths }
-}
-
-describe('RightSidebarPane', () => {
-  beforeEach(() => {
-    $connection.set(null)
-    resetProjectTreeState()
-    setCurrentCwd('/repo')
-    readDir.mockReset()
-    selectPaths.mockReset()
-    readDir.mockResolvedValue(ok([{ name: 'README.md', path: '/repo/README.md', isDirectory: false }]))
-    selectPaths.mockResolvedValue(['/repo-next'])
-    installBridge()
-  })
-
-  afterEach(() => {
-    cleanup()
-    $connection.set(null)
-    setCurrentCwd('')
-    resetProjectTreeState()
-    delete (window as unknown as { hermesDesktop?: unknown }).hermesDesktop
-  })
-
-  it('refreshes the current tree without opening the folder picker', async () => {
-    const onChangeCwd = vi.fn()
-
-    render(<RightSidebarPane onActivateFile={vi.fn()} onActivateFolder={vi.fn()} onChangeCwd={onChangeCwd} />)
-
-    await waitFor(() => expect(screen.getByRole('button', { name: 'Refresh tree' }).hasAttribute('disabled')).toBe(false))
-
-    readDir.mockClear()
-
-    fireEvent.click(screen.getByRole('button', { name: 'Refresh tree' }))
-
-    await waitFor(() => expect(readDir).toHaveBeenCalledWith('/repo'))
-    expect(selectPaths).not.toHaveBeenCalled()
-
-    fireEvent.click(screen.getByRole('button', { name: 'Open folder' }))
-
-    await waitFor(() =>
-      expect(selectPaths).toHaveBeenCalledWith({
-        defaultPath: '/repo',
-        directories: true,
-        multiple: false,
-        title: 'Change working directory'
-      })
-    )
-    await waitFor(() => expect(onChangeCwd).toHaveBeenCalledWith('/repo-next'))
-  })
-})
--- a/apps/desktop/src/app/right-sidebar/index.tsx
+++ b/apps/desktop/src/app/right-sidebar/index.tsx
@@ -126,12 +126,12 @@ interface FilesystemTabProps extends FileTreeBodyProps {
  onRefresh: () => void
 }

-// Sidebar palette + hover-reveal: header actions stay reachable while moving
-// from the project label to the action buttons.
+// Sidebar palette + hover-reveal: refresh tracks label hover; collapse-all
+// stays visible while any folder is expanded.
 const HEADER_ACTION_CLASS =
  'text-sidebar-foreground/70 hover:bg-sidebar-accent! hover:text-sidebar-accent-foreground! focus-visible:ring-sidebar-ring'

-const HEADER_ACTION_LABEL_REVEAL = `${HEADER_ACTION_CLASS} pointer-events-none opacity-0 transition-opacity focus-visible:pointer-events-auto focus-visible:opacity-100 group-focus-within/project-header:pointer-events-auto group-focus-within/project-header:opacity-100 group-hover/project-header:pointer-events-auto group-hover/project-header:opacity-100`
+const HEADER_ACTION_LABEL_REVEAL = `${HEADER_ACTION_CLASS} pointer-events-none opacity-0 transition-opacity focus-visible:pointer-events-auto focus-visible:opacity-100 peer-focus-visible/project-label:pointer-events-auto peer-focus-visible/project-label:opacity-100 peer-hover/project-label:pointer-events-auto peer-hover/project-label:opacity-100`

 function FilesystemTab({
  canCollapse,
@@ -158,7 +158,7 @@ function FilesystemTab({
  return (
    <div className="flex min-h-0 flex-1 flex-col">
      <RightSidebarSectionHeader>
-        <div className="flex min-w-0 flex-1">
+        <div className="peer/project-label flex min-w-0 flex-1">
          <button
            className="flex w-full min-w-0 items-center rounded-md text-left hover:text-(--ui-text-secondary)"
            onClick={() => void onChangeFolder()}
@@ -216,7 +216,7 @@ function FilesystemTab({
 }

 export function RightSidebarSectionHeader({ children }: { children: ReactNode }) {
-  return <div className="group/project-header flex h-7 shrink-0 items-center px-2.5">{children}</div>
+  return <div className="flex h-7 shrink-0 items-center px-2.5">{children}</div>
 }

 interface FileTreeBodyProps {
--- a/apps/desktop/src/app/right-sidebar/store.ts
+++ b/apps/desktop/src/app/right-sidebar/store.ts
@@ -9,22 +9,3 @@ export const $terminalTakeover = atom(storedBoolean(TAKEOVER_KEY, false))
 $terminalTakeover.subscribe(active => persistBoolean(TAKEOVER_KEY, active))

 export const setTerminalTakeover = (active: boolean) => $terminalTakeover.set(active)
-
-/** A command queued to run in the embedded terminal. The terminal pane flushes
- *  (and clears) it once its session is live, so a value set before the pane
- *  mounts still runs. Cleared after flush so a later remount can't replay it. */
-export const $terminalInjection = atom<null | string>(null)
-
-/** Open the terminal pane and run a command in it. Used to disconnect external
- *  (CLI-managed) providers, which Hermes can't clear via the API — the user
- *  sees exactly what runs instead of Hermes silently deleting their creds. */
-export const runInTerminal = (command: string) => {
-  const trimmed = command.trim()
-
-  if (!trimmed) {
-    return
-  }
-
-  setTerminalTakeover(true)
-  $terminalInjection.set(trimmed)
-}
--- a/apps/desktop/src/app/right-sidebar/terminal/use-terminal-session.ts
+++ b/apps/desktop/src/app/right-sidebar/terminal/use-terminal-session.ts
@@ -10,8 +10,6 @@ import { triggerHaptic } from '@/lib/haptics'
 import { $filePreviewTarget, $previewTarget } from '@/store/preview'
 import { useTheme } from '@/themes/context'

-import { $terminalInjection } from '../store'
-
 import { makeTerminalReader, setActiveTerminalReader } from './buffer'
 import {
  isAddSelectionShortcut,
@@ -677,28 +675,6 @@ export function useTerminalSession({ cwd, onAddSelectionToChat }: UseTerminalSes
    return () => cancelAnimationFrame(raf)
  }, [activeTheme, themeName])

-  // Flush a queued command (e.g. a provider-disconnect) into the live session.
-  // Only active while open; the subscribe fires immediately, so a command set
-  // before this pane mounted runs as soon as the session is ready. Clearing the
-  // atom after writing stops a later remount from replaying a stale command.
-  useEffect(() => {
-    if (status !== 'open') {
-      return
-    }
-
-    return $terminalInjection.subscribe(command => {
-      const id = sessionIdRef.current
-
-      if (!command || !id) {
-        return
-      }
-
-      void window.hermesDesktop?.terminal?.write(id, `${command}\r`)
-      $terminalInjection.set(null)
-      termRef.current?.focus()
-    })
-  }, [status])
-
  return {
    addSelectionToChat,
    hostRef,
--- a/apps/desktop/src/app/session/hooks/use-message-stream.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts
@@ -13,7 +13,6 @@ import {
  type GatewayEventPayload,
  reasoningPart,
  renderMediaTags,
-  textPart,
  upsertToolPart
 } from '@/lib/chat-messages'
 import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime'
@@ -48,7 +47,6 @@ import {
  setTurnStartedAt,
  setYoloActive
 } from '@/store/session'
-import { broadcastSessionsChanged } from '@/store/session-sync'
 import { clearSessionSubagents, pruneDelegateFallbackSubagents, upsertSubagent } from '@/store/subagents'
 import { setSessionTodos } from '@/store/todos'
 import { recordToolDiff } from '@/store/tool-diffs'
@@ -643,9 +641,6 @@ export function useMessageStream({
      })

      void refreshSessions().catch(() => undefined)
-      // Sync the freshly-titled row to other windows (e.g. main, when the turn
-      // ran in the pop-out).
-      broadcastSessionsChanged()

      if (compactedTurnRef.current.delete(sessionId)) {
        shouldHydrate = false
@@ -1081,32 +1076,6 @@ export function useMessageStream({
          // completions / watch matches here — re-sync the status stack.
          void refreshBackgroundProcesses(sessionId)
        }
-      } else if (event.type === 'review.summary') {
-        // Self-improvement background review saved something to memory/skills
-        // and emitted a persistent summary (Python formats it as
-        // "💾 Self-improvement review: …"). The CLI prints this via
-        // prompt_toolkit and the Ink TUI renders it as a system line; the
-        // desktop has neither, so without this handler the skill/memory
-        // change happens silently. Surface it as a persistent system message
-        // in the transcript so the user is always informed — it must not be a
-        // transient toast that can be missed.
-        const text = coerceGatewayText(payload?.text).trim()
-
-        if (text && sessionId) {
-          flushQueuedDeltas(sessionId)
-          updateSessionState(sessionId, state => ({
-            ...state,
-            messages: [
-              ...state.messages,
-              {
-                id: `review-summary-${Date.now()}`,
-                role: 'system',
-                parts: [textPart(text)],
-                timestamp: Math.floor(Date.now() / 1000)
-              }
-            ]
-          }))
-        }
      } else if (event.type === 'error') {
        const errorMessage = payload?.message || 'Hermes reported an error'
        const looksLikeProviderSetup = isProviderSetupErrorMessage(errorMessage)
@@ -1129,13 +1098,8 @@ export function useMessageStream({

        if (looksLikeProviderSetup) {
          requestDesktopOnboarding(errorMessage)
-        } else {
-          // Toast globally, not just when the failing thread is focused: a
-          // turn-ending error (e.g. out of funds) blocks every thread, so the
-          // inline error alone is too easy to miss. The stable id collapses the
-          // same error from multiple blocked threads into one toast.
+        } else if (isActiveEvent) {
          notify({
-            id: `gateway-error:${errorMessage}`,
            kind: 'error',
            title: 'Hermes error',
            message: errorMessage
--- a/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx
@@ -1,5 +1,5 @@
+import { renderHook } from '@testing-library/react'
 import { QueryClient } from '@tanstack/react-query'
-import { cleanup, render, renderHook } from '@testing-library/react'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

 import { getGlobalModelInfo } from '@/hermes'
@@ -13,51 +13,12 @@ import {

 import { useModelControls } from './use-model-controls'

-const setGlobalModel = vi.fn()
-const notifyError = vi.fn()
-
 vi.mock('@/hermes', () => ({
  getGlobalModelInfo: vi.fn(),
-  setGlobalModel: (...args: Parameters<typeof setGlobalModel>) => setGlobalModel(...args)
+  setGlobalModel: vi.fn()
 }))

-vi.mock('@/i18n', () => ({
-  useI18n: () => ({
-    t: {
-      desktop: {
-        modelSwitchFailed: 'Model switch failed'
-      }
-    }
-  })
-}))
-
-vi.mock('@/store/notifications', () => ({
-  notifyError: (...args: Parameters<typeof notifyError>) => notifyError(...args)
-}))
-
-type Controls = ReturnType<typeof useModelControls>
-
-function Harness({
-  activeSessionId,
-  onReady,
-  requestGateway
-}: {
-  activeSessionId: string | null
-  onReady: (controls: Controls) => void
-  requestGateway: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T>
-}) {
-  const controls = useModelControls({
-    activeSessionId,
-    queryClient: new QueryClient(),
-    requestGateway
-  })
-
-  onReady(controls)
-
-  return null
-}
-
-describe('useModelControls', () => {
+describe('useModelControls.refreshCurrentModel', () => {
  beforeEach(() => {
    $activeSessionId.set(null)
    setCurrentModel('')
@@ -65,7 +26,6 @@ describe('useModelControls', () => {
  })

  afterEach(() => {
-    cleanup()
    vi.restoreAllMocks()
    $activeSessionId.set(null)
    setCurrentModel('')
@@ -114,85 +74,4 @@ describe('useModelControls', () => {
    expect($currentModel.get()).toBe('deepseek/deepseek-v4-pro')
    expect($currentProvider.get()).toBe('deepseek')
  })
-
-  it('routes active-session picker changes through config.set with an explicit provider', async () => {
-    const requestGateway = vi.fn(async () => ({ key: 'model', value: 'claude-sonnet-4.6' }) as never)
-    let controls!: Controls
-
-    render(
-      <Harness
-        activeSessionId="session-1"
-        onReady={value => (controls = value)}
-        requestGateway={requestGateway}
-      />
-    )
-
-    await expect(
-      controls.selectModel({
-        model: 'claude-sonnet-4.6',
-        provider: 'anthropic'
-      })
-    ).resolves.toBe(true)
-
-    expect(requestGateway).toHaveBeenCalledWith('config.set', {
-      session_id: 'session-1',
-      key: 'model',
-      value: 'claude-sonnet-4.6 --provider anthropic'
-    })
-    expect(requestGateway).not.toHaveBeenCalledWith('slash.exec', expect.anything())
-  })
-
-  it('stores a no-session pick as UI state with no gateway or global write', async () => {
-    const requestGateway = vi.fn()
-    let controls!: Controls
-
-    render(
-      <Harness
-        activeSessionId={null}
-        onReady={value => (controls = value)}
-        requestGateway={requestGateway}
-      />
-    )
-
-    await expect(
-      controls.selectModel({
-        model: 'claude-sonnet-4.6',
-        provider: 'anthropic'
-      })
-    ).resolves.toBe(true)
-
-    // The pick is plain UI state; session.create ships it later. Nothing touches
-    // the gateway or the profile default here.
-    expect($currentModel.get()).toBe('claude-sonnet-4.6')
-    expect($currentProvider.get()).toBe('anthropic')
-    expect(requestGateway).not.toHaveBeenCalled()
-    expect(setGlobalModel).not.toHaveBeenCalled()
-  })
-
-  it('seeds an empty composer model from global but never clobbers a pick', async () => {
-    vi.mocked(getGlobalModelInfo).mockResolvedValue({ model: 'openai/gpt-5.5', provider: 'openai-codex' })
-
-    const { result } = renderHook(() =>
-      useModelControls({
-        activeSessionId: null,
-        queryClient: new QueryClient(),
-        requestGateway: vi.fn()
-      })
-    )
-
-    // Empty → seeds the default.
-    await result.current.refreshCurrentModel()
-    expect($currentModel.get()).toBe('openai/gpt-5.5')
-
-    // A user pick must survive the lifecycle refreshes that fire on boot / fresh
-    // draft / session events.
-    setCurrentModel('anthropic/claude-sonnet-4.6')
-    setCurrentProvider('anthropic')
-    await result.current.refreshCurrentModel()
-    expect($currentModel.get()).toBe('anthropic/claude-sonnet-4.6')
-
-    // A profile swap forces a reseed to the new profile's default.
-    await result.current.refreshCurrentModel(true)
-    expect($currentModel.get()).toBe('openai/gpt-5.5')
-  })
 })
--- a/apps/desktop/src/app/session/hooks/use-model-controls.ts
+++ b/apps/desktop/src/app/session/hooks/use-model-controls.ts
@@ -1,7 +1,7 @@
 import { type QueryClient } from '@tanstack/react-query'
 import { useCallback } from 'react'

-import { getGlobalModelInfo } from '@/hermes'
+import { getGlobalModelInfo, setGlobalModel } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { notifyError } from '@/store/notifications'
 import {
@@ -15,6 +15,7 @@ import type { ModelOptionsResponse } from '@/types/hermes'

 interface ModelSelection {
  model: string
+  persistGlobal: boolean
  provider: string
 }

@@ -27,7 +28,6 @@ interface ModelControlsOptions {
 export function useModelControls({ activeSessionId, queryClient, requestGateway }: ModelControlsOptions) {
  const { t } = useI18n()
  const copy = t.desktop
-
  const updateModelOptionsCache = useCallback(
    (provider: string, model: string, includeGlobal: boolean) => {
      const patch = (prev: ModelOptionsResponse | undefined) => ({ ...(prev ?? {}), provider, model })
@@ -41,24 +41,14 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
    [activeSessionId, queryClient]
  )

-  // Seed the composer's model state from the profile default. `force` reseeds
-  // for a profile swap (the new profile has its own default); otherwise this
-  // only fills an EMPTY selection so a user's pick (plain UI state in
-  // $currentModel) survives the lifecycle refreshes that fire on boot / fresh
-  // draft / session events. A live session owns the footer, so skip entirely.
-  const refreshCurrentModel = useCallback(async (force = false) => {
+  const refreshCurrentModel = useCallback(async () => {
    try {
-      if ($activeSessionId.get()) {
-        return
-      }
-
-      if (!force && $currentModel.get()) {
-        return
-      }
-
      const result = await getGlobalModelInfo()

-      if ($activeSessionId.get() || (!force && $currentModel.get())) {
+      // A resumed/live session owns the footer model state. Global config
+      // refreshes (gateway boot, profile swap, settings save) must not clobber
+      // the active chat's runtime model/provider in the status bar.
+      if ($activeSessionId.get()) {
        return
      }

@@ -74,14 +64,12 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
    }
  }, [])

-  // Returns whether the switch succeeded so callers can await it before applying
-  // follow-up changes. The composer model is plain UI state: with no live
-  // session it's just stored (and shipped on the next session.create); with one
-  // it's scoped to that session via config.set. It NEVER writes the profile
-  // default — that lives in Settings → Model — so picking a model here can't
-  // silently mutate global config.
+  // Returns whether the switch succeeded so callers can await it before
+  // applying follow-up changes (e.g. editing a model's reasoning/fast must land
+  // on the right active model — bail rather than write to the previous one).
  const selectModel = useCallback(
    async (selection: ModelSelection): Promise<boolean> => {
+      const includeGlobal = selection.persistGlobal || !activeSessionId
      // Snapshot for rollback: the switch is applied optimistically, so a
      // failure must restore the prior model/provider (store + query cache)
      // rather than leave the UI showing a model the backend never selected.
@@ -90,34 +78,41 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway

      setCurrentModel(selection.model)
      setCurrentProvider(selection.provider)
-      updateModelOptionsCache(selection.provider, selection.model, !activeSessionId)
-
-      // No live session yet: the pick is pure UI state. session.create reads
-      // $currentModel/$currentProvider and applies it as that session's override.
-      if (!activeSessionId) {
-        return true
-      }
+      updateModelOptionsCache(selection.provider, selection.model, includeGlobal)

      try {
-        await requestGateway('config.set', {
-          session_id: activeSessionId,
-          key: 'model',
-          value: `${selection.model} --provider ${selection.provider}`
-        })
+        if (activeSessionId) {
+          await requestGateway('slash.exec', {
+            session_id: activeSessionId,
+            command: `/model ${selection.model} --provider ${selection.provider}${selection.persistGlobal ? ' --global' : ''}`
+          })

-        void queryClient.invalidateQueries({ queryKey: ['model-options', activeSessionId] })
+          if (selection.persistGlobal) {
+            void refreshCurrentModel()
+          }
+
+          void queryClient.invalidateQueries({
+            queryKey: selection.persistGlobal ? ['model-options'] : ['model-options', activeSessionId]
+          })
+
+          return true
+        }
+
+        await setGlobalModel(selection.provider, selection.model)
+        void refreshCurrentModel()
+        void queryClient.invalidateQueries({ queryKey: ['model-options'] })

        return true
      } catch (err) {
        setCurrentModel(prevModel)
        setCurrentProvider(prevProvider)
-        updateModelOptionsCache(prevProvider, prevModel, !activeSessionId)
+        updateModelOptionsCache(prevProvider, prevModel, includeGlobal)
        notifyError(err, copy.modelSwitchFailed)

        return false
      }
    },
-    [activeSessionId, copy.modelSwitchFailed, queryClient, requestGateway, updateModelOptionsCache]
+    [activeSessionId, copy.modelSwitchFailed, queryClient, refreshCurrentModel, requestGateway, updateModelOptionsCache]
  )

  return { refreshCurrentModel, selectModel, updateModelOptionsCache }
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -58,7 +58,6 @@ import { clearSessionTodos } from '@/store/todos'

 import type {
  ClientSessionState,
-  BrowserManageResponse,
  FileAttachResponse,
  HandoffFailResponse,
  HandoffRequestResponse,
@@ -1142,81 +1141,6 @@ export function usePromptActions({
          } catch (err) {
            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
          }
-        },
-        // /browser connect|disconnect|status manages the live CDP connection on
-        // the gateway host, mirroring the TUI's browser.manage RPC. It mutates
-        // BROWSER_CDP_URL (and may launch Chrome) in the gateway process — only
-        // meaningful when that process runs on this machine, so it's gated to
-        // local connections. A remote gateway would act on the wrong host.
-        browser: async ctx => {
-          const resolved = await withSlashOutput(ctx)
-
-          if (!resolved) {
-            return
-          }
-
-          const { render: renderSlashOutput, sessionId } = resolved
-
-          if ($connection.get()?.mode === 'remote') {
-            renderSlashOutput(
-              '/browser manages a Chromium-family browser on the gateway host — only available when connected to a local gateway.'
-            )
-
-            return
-          }
-
-          const [rawAction = 'status', ...rest] = ctx.arg.trim().split(/\s+/).filter(Boolean)
-          const cmdAction = rawAction.toLowerCase()
-
-          if (!['connect', 'disconnect', 'status'].includes(cmdAction)) {
-            renderSlashOutput(
-              'usage: /browser [connect|disconnect|status] [url] · persistent: set browser.cdp_url in config.yaml'
-            )
-
-            return
-          }
-
-          const url = cmdAction === 'connect' ? rest.join(' ').trim() || 'http://127.0.0.1:9222' : undefined
-
-          if (url) {
-            renderSlashOutput(`checking Chromium-family browser remote debugging at ${url}...`)
-          }
-
-          try {
-            const result = await requestGateway<BrowserManageResponse>('browser.manage', {
-              action: cmdAction,
-              session_id: sessionId,
-              ...(url && { url })
-            })
-
-            // Without a streamed session subscription, the gateway bundles its
-            // progress lines into `messages` — flush them inline.
-            result?.messages?.forEach(message => renderSlashOutput(message))
-
-            if (cmdAction === 'status') {
-              renderSlashOutput(
-                result?.connected
-                  ? `browser connected: ${result.url || '(url unavailable)'}`
-                  : 'browser not connected (try /browser connect <url> or set browser.cdp_url in config.yaml)'
-              )
-
-              return
-            }
-
-            if (cmdAction === 'disconnect') {
-              renderSlashOutput('browser disconnected')
-
-              return
-            }
-
-            if (result?.connected) {
-              renderSlashOutput('Browser connected to live Chromium-family browser via CDP')
-              renderSlashOutput(`Endpoint: ${result.url || '(url unavailable)'}`)
-              renderSlashOutput('next browser tool call will use this CDP endpoint')
-            }
-          } catch (err) {
-            renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`)
-          }
        }
      }

--- a/apps/desktop/src/app/session/hooks/use-route-resume.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-route-resume.test.tsx
@@ -2,8 +2,6 @@ import { cleanup, render } from '@testing-library/react'
 import type { MutableRefObject } from 'react'
 import { afterEach, describe, expect, it, vi } from 'vitest'

-import { $resumeExhaustedSessionId, setResumeExhaustedSessionId } from '@/store/session'
-
 import { useRouteResume } from './use-route-resume'

 interface HarnessProps {
@@ -15,8 +13,6 @@ interface HarnessProps {
  gatewayState: string
  locationPathname: string
  resumeSession: (sessionId: string, focus: boolean) => Promise<unknown>
-  resumeFailedSessionId?: null | string
-  resumeExhaustedSessionId?: null | string
  routedSessionId: null | string
  runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>>
  selectedStoredSessionId: null | string
@@ -24,12 +20,8 @@ interface HarnessProps {
  startFreshSessionDraft: (focus: boolean) => unknown
 }

-function RouteResumeHarness({
-  resumeFailedSessionId = null,
-  resumeExhaustedSessionId = null,
-  ...props
-}: HarnessProps) {
-  useRouteResume({ ...props, resumeExhaustedSessionId, resumeFailedSessionId })
+function RouteResumeHarness(props: HarnessProps) {
+  useRouteResume(props)

  return null
 }
@@ -264,212 +256,3 @@ describe('useRouteResume', () => {
    expect(resumeSession).toHaveBeenCalledWith('session-1', true)
  })
 })
-
-describe('useRouteResume bounded auto-retry after a failed resume', () => {
-  afterEach(() => {
-    cleanup()
-    vi.useRealTimers()
-    vi.restoreAllMocks()
-    setResumeExhaustedSessionId(null)
-  })
-
-  // Common stranded-window props: gateway open, route on the session, no runtime
-  // yet, and the ref already synced to the route (resumeSession sets it at entry
-  // before failing) — the exact state that defeats the main effect's self-heal.
-  function strandedProps(resumeSession: (sid: string, focus: boolean) => Promise<unknown>) {
-    return {
-      activeSessionId: null,
-      activeSessionIdRef: { current: null } as MutableRefObject<null | string>,
-      creatingSessionRef: { current: false },
-      currentView: 'chat',
-      freshDraftReady: false,
-      gatewayState: 'open',
-      locationPathname: '/session-1',
-      resumeSession,
-      routedSessionId: 'session-1',
-      runtimeIdByStoredSessionIdRef: { current: new Map<string, string>() },
-      selectedStoredSessionId: 'session-1',
-      // Synced to the route by the failed resume's synchronous entry-write.
-      selectedStoredSessionIdRef: { current: 'session-1' } as MutableRefObject<null | string>,
-      startFreshSessionDraft: vi.fn()
-    }
-  }
-
-  it('retries the resume on backoff when the routed session is flagged as failed', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-
-    render(<RouteResumeHarness {...strandedProps(resumeSession)} resumeFailedSessionId="session-1" />)
-
-    // The main effect fires one resume on mount (pathname-changed). Clear it so
-    // we assert purely the bounded-retry effect's scheduled retry below.
-    resumeSession.mockClear()
-
-    // No immediate fire — the retry is scheduled behind the backoff timer.
-    expect(resumeSession).not.toHaveBeenCalled()
-
-    // First backoff window (1s) elapses → one retry.
-    vi.advanceTimersByTime(1_000)
-    expect(resumeSession).toHaveBeenCalledTimes(1)
-    expect(resumeSession).toHaveBeenCalledWith('session-1', true)
-  })
-
-  it('does NOT retry a failed session that is not the routed one', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-
-    // The failure flag points at a different session than the route.
-    render(<RouteResumeHarness {...strandedProps(resumeSession)} resumeFailedSessionId="other-session" />)
-    resumeSession.mockClear() // drop the mount resume
-
-    vi.advanceTimersByTime(10_000)
-    expect(resumeSession).not.toHaveBeenCalled()
-  })
-
-  it('skips the scheduled retry if the session already recovered when the timer fires', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-    const props = strandedProps(resumeSession)
-
-    render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
-    resumeSession.mockClear() // drop the mount resume
-
-    // A resume landed while we waited: runtime is now bound.
-    props.activeSessionIdRef.current = 'runtime-1'
-
-    vi.advanceTimersByTime(8_000)
-    expect(resumeSession).not.toHaveBeenCalled()
-  })
-
-  it('stops retrying after MAX_RESUME_RETRIES consecutive failures', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-    const props = strandedProps(resumeSession)
-
-    // Model the real re-arm loop: resumeSession clears $resumeFailedSessionId at
-    // entry (null) and a repeat failure re-sets it ('session-1'). That null->id
-    // toggle is what re-runs the effect and advances the bounded counter. The
-    // routed session never changes, so the counter is NOT reset between cycles.
-    const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
-    resumeSession.mockClear() // drop the mount resume; count only the retries
-
-    for (let i = 0; i < 8; i += 1) {
-      vi.advanceTimersByTime(8_000) // fire the scheduled retry (if any)
-      rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />) // cleared at entry
-      rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />) // re-armed on failure
-    }
-
-    // Capped at MAX_RESUME_RETRIES (4): a persistently dead backend can't
-    // hot-loop the resume forever.
-    expect(resumeSession.mock.calls.length).toBe(4)
-
-    // Once auto-retry gives up, the exhausted latch is armed for the routed
-    // session so the chat view can swap the perpetual loader for an explicit
-    // error + manual Retry instead of spinning forever.
-    expect($resumeExhaustedSessionId.get()).toBe('session-1')
-  })
-
-  it('does not arm the exhausted latch while retries remain', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-    const props = strandedProps(resumeSession)
-
-    const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
-    resumeSession.mockClear()
-
-    // Two failure cycles — still under the 4-retry cap, so the latch must stay
-    // clear and the loader keeps spinning (auto-recovery hasn't given up yet).
-    for (let i = 0; i < 2; i += 1) {
-      vi.advanceTimersByTime(8_000)
-      rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />)
-      rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
-    }
-
-    expect($resumeExhaustedSessionId.get()).toBeNull()
-  })
-
-  it('clears a stale exhausted latch when the route moves off the stranded session', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-    const props = strandedProps(resumeSession)
-
-    // Pre-arm the latch as if this session had exhausted its retries.
-    setResumeExhaustedSessionId('session-1')
-
-    // Route is now on a different, healthy session that is not flagged as
-    // failed — the retry effect's "route moved off" branch clears the latch.
-    render(
-      <RouteResumeHarness
-        {...props}
-        activeSessionId="runtime-2"
-        activeSessionIdRef={{ current: 'runtime-2' }}
-        locationPathname="/session-2"
-        resumeFailedSessionId={null}
-        routedSessionId="session-2"
-        selectedStoredSessionId="session-2"
-        selectedStoredSessionIdRef={{ current: 'session-2' }}
-      />
-    )
-
-    expect($resumeExhaustedSessionId.get()).toBeNull()
-  })
-
-  it('resets the retry counter for a fresh backoff cycle when the exhausted latch clears (manual retry, same session)', () => {
-    vi.useFakeTimers()
-    const resumeSession = vi.fn(async () => undefined)
-    const props = strandedProps(resumeSession)
-
-    // Phase A — exhaust the bounded auto-retry (counter → MAX) like a dead
-    // backend. The resumeExhaustedSessionId prop stays null here: the hook sets
-    // the store, which doesn't feed back into the prop in this harness.
-    const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
-    resumeSession.mockClear()
-    for (let i = 0; i < 8; i += 1) {
-      vi.advanceTimersByTime(8_000)
-      rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />)
-      rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
-    }
-    expect(resumeSession.mock.calls.length).toBe(4) // capped
-    expect($resumeExhaustedSessionId.get()).toBe('session-1')
-
-    // Phase B — user clicks Retry on the SAME stranded session. resumeSession
-    // clears both latches at entry; the exhausted latch's armed->cleared edge
-    // must reset the attempt counter so a fresh bounded cycle runs, not a single
-    // one-shot attempt that immediately re-arms the error. Model the prop
-    // transitions: reflect the armed latch, then clear it (retry), then re-arm
-    // the failure latch on the fresh failure.
-    resumeSession.mockClear()
-    rerender(<RouteResumeHarness {...props} resumeExhaustedSessionId="session-1" resumeFailedSessionId="session-1" />)
-    rerender(<RouteResumeHarness {...props} resumeExhaustedSessionId={null} resumeFailedSessionId={null} />)
-    rerender(<RouteResumeHarness {...props} resumeExhaustedSessionId={null} resumeFailedSessionId="session-1" />)
-
-    // A real retry fires again instead of staying pinned at MAX (which would
-    // dispatch nothing). Without the reset the counter stays >= MAX and this
-    // advance dispatches zero resumes.
-    vi.advanceTimersByTime(8_000)
-    expect(resumeSession.mock.calls.length).toBeGreaterThan(0)
-  })
-
-  it('does not burn retry attempts on unrelated re-renders during the backoff window', () => {
-    vi.useFakeTimers()
-    const props = strandedProps(vi.fn())
-
-    // Mount schedules the first backoff timer. Then re-render repeatedly with a
-    // fresh resumeSession identity (referential instability — a real dep change
-    // for the retry effect) WITHOUT ever letting the timer fire. The old code
-    // incremented the attempt counter at schedule time, so >= MAX re-renders
-    // armed the exhausted error with zero resumes actually dispatched. The fix
-    // only advances the counter when a timer truly fires, so the latch stays
-    // clear no matter how many spurious re-renders happen mid-backoff.
-    const { rerender } = render(
-      <RouteResumeHarness {...props} resumeFailedSessionId="session-1" resumeSession={vi.fn(async () => undefined)} />
-    )
-    for (let j = 0; j < 8; j += 1) {
-      rerender(
-        <RouteResumeHarness {...props} resumeFailedSessionId="session-1" resumeSession={vi.fn(async () => undefined)} />
-      )
-    }
-
-    expect($resumeExhaustedSessionId.get()).toBeNull()
-  })
-})
--- a/apps/desktop/src/app/session/hooks/use-route-resume.ts
+++ b/apps/desktop/src/app/session/hooks/use-route-resume.ts
@@ -1,7 +1,6 @@
 import { type MutableRefObject, useEffect, useRef } from 'react'

 import { isNewChatRoute } from '@/app/routes'
-import { setResumeExhaustedSessionId } from '@/store/session'

 interface RouteResumeOptions {
  activeSessionId: string | null
@@ -12,17 +11,6 @@ interface RouteResumeOptions {
  gatewayState: string | undefined
  locationPathname: string
  resumeSession: (sessionId: string, focus: boolean) => Promise<unknown>
-  // Stored-session id whose most recent resume failed terminally (set by
-  // useSessionActions, mirrored from $resumeFailedSessionId). While this equals
-  // routedSessionId the window would otherwise latch on the loader forever, so
-  // the bounded-retry effect below re-attempts the resume.
-  resumeFailedSessionId: string | null
-  // Stored-session id whose bounded auto-retry has EXHAUSTED (mirrored from
-  // $resumeExhaustedSessionId). Only resumeSession clears this latch (manual
-  // Retry / reconnect / reselect) — the auto-retry loop never does — so its
-  // armed->cleared edge is an unambiguous "give me a fresh backoff cycle"
-  // signal the effect below uses to reset the attempt counter.
-  resumeExhaustedSessionId: string | null
  routedSessionId: string | null
  runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>>
  selectedStoredSessionId: string | null
@@ -30,19 +18,6 @@ interface RouteResumeOptions {
  startFreshSessionDraft: (focus: boolean) => unknown
 }

-// Bounded auto-retry for a stranded session window. A resume can fail terminally
-// (gateway RPC reject + REST fallback failure) on a transiently wedged backend —
-// dead provider key, a runaway turn hogging the dispatcher, flaky DNS. Without a
-// retry the loader latches forever. We retry with backoff, capped, so a
-// genuinely dead backend doesn't hot-loop the resume.
-const MAX_RESUME_RETRIES = 4
-const RESUME_RETRY_BASE_MS = 1_000
-const RESUME_RETRY_MAX_MS = 8_000
-
-function resumeRetryDelayMs(attempt: number): number {
-  return Math.min(RESUME_RETRY_MAX_MS, RESUME_RETRY_BASE_MS * 2 ** attempt)
-}
-
 // HashRouter boot edge case: pathname briefly reads `/` before the hash is
 // parsed. If the hash references a real session, defer; resume picks it up
 // next tick. Without this, ctrl+R on `#/:sessionId` flashes 5 loading states.
@@ -74,8 +49,6 @@ export function useRouteResume({
  gatewayState,
  locationPathname,
  resumeSession,
-  resumeFailedSessionId,
-  resumeExhaustedSessionId,
  routedSessionId,
  runtimeIdByStoredSessionIdRef,
  selectedStoredSessionId,
@@ -85,16 +58,6 @@ export function useRouteResume({
  const lastPathnameRef = useRef<string | null>(null)
  const seenGatewayStateRef = useRef(false)
  const wasGatewayOpenRef = useRef(false)
-  // Per-session retry bookkeeping for the bounded auto-retry effect below. Keyed
-  // by the session id we're retrying so switching chats resets the counter.
-  const retrySessionIdRef = useRef<string | null>(null)
-  const retryAttemptRef = useRef(0)
-  // Tracks the previous exhausted-latch value so we can detect its armed->cleared
-  // edge. resumeSession clears $resumeExhaustedSessionId on a manual Retry /
-  // reconnect / reselect; that transition is our cue to reset the attempt counter
-  // for a fresh backoff cycle on the SAME session (the auto-retry loop itself
-  // never touches this latch, so it can't spuriously trigger the reset).
-  const prevResumeExhaustedRef = useRef<string | null>(null)

  useEffect(() => {
    const gatewayOpen = gatewayState === 'open'
@@ -176,111 +139,4 @@ export function useRouteResume({
    selectedStoredSessionIdRef,
    startFreshSessionDraft
  ])
-
-  // Bounded auto-retry: when the routed session's resume failed terminally
-  // (resumeFailedSessionId matches the route), schedule a backoff retry so the
-  // window recovers on its own instead of latching the loader forever. This is
-  // the safety net the main effect above can't provide: after a failed resume,
-  // selectedStoredSessionIdRef.current already equals the route (resumeSession
-  // sets it synchronously at entry) and the pathname/gateway are unchanged, so
-  // none of stuckOnRoutedSession / pathnameChanged / gatewayBecameOpen fire
-  // again. resumeSession clears resumeFailedSessionId on its next attempt; a
-  // success keeps it clear (the effect's guard then no-ops), a repeat failure
-  // re-arms it and we back off further, capped at MAX_RESUME_RETRIES.
-  useEffect(() => {
-    // Detect the exhausted-latch armed->cleared edge for the current route. Only
-    // resumeSession clears $resumeExhaustedSessionId (manual Retry / reconnect /
-    // reselect) — the auto-retry loop never touches it — so this transition
-    // uniquely means "the user asked for another go." Reset the attempt counter
-    // for a fresh bounded backoff cycle on the SAME session. Without this,
-    // retryAttemptRef stays pinned at MAX after exhaustion (the !stranded reset
-    // below only fires on a route CHANGE to a different session), so a manual
-    // retry on the same stranded session would get exactly ONE attempt and then
-    // immediately re-arm the exhausted error — never the renewed backoff cycle
-    // the store/session.ts + use-session-actions.ts comments promise. (Point 2)
-    const wasExhausted = prevResumeExhaustedRef.current
-    prevResumeExhaustedRef.current = resumeExhaustedSessionId
-    if (wasExhausted && wasExhausted === routedSessionId && resumeExhaustedSessionId !== wasExhausted) {
-      retrySessionIdRef.current = routedSessionId
-      retryAttemptRef.current = 0
-    }
-
-    if (currentView !== 'chat' || gatewayState !== 'open') {
-      return
-    }
-
-    const stranded =
-      Boolean(routedSessionId) &&
-      resumeFailedSessionId === routedSessionId &&
-      !creatingSessionRef.current
-
-    if (!stranded) {
-      // Route moved off the stranded session (or it recovered) — reset the
-      // counter so a future failure on another session starts fresh, and clear
-      // any exhausted-latch armed for a session we're no longer viewing (never
-      // the current route: that's the error state we want to keep showing).
-      // resumeSession also clears it on a fresh attempt; this covers a plain
-      // route-change away from the stranded window.
-      if (retrySessionIdRef.current !== routedSessionId) {
-        retrySessionIdRef.current = null
-        retryAttemptRef.current = 0
-        setResumeExhaustedSessionId(current => (current && current !== routedSessionId ? null : current))
-      }
-
-      return
-    }
-
-    // New stranded session id → reset the attempt counter.
-    if (retrySessionIdRef.current !== routedSessionId) {
-      retrySessionIdRef.current = routedSessionId
-      retryAttemptRef.current = 0
-    }
-
-    if (retryAttemptRef.current >= MAX_RESUME_RETRIES) {
-      // Give up auto-retrying a persistently dead backend; the user can still
-      // reconnect / reselect (which resets the counter via the branch above).
-      // Surface an explicit error + manual Retry in the chat view instead of
-      // spinning the loader forever — resumeSession (manual Retry / reconnect /
-      // reselect) clears this latch and resets the counter for a fresh cycle.
-      setResumeExhaustedSessionId(routedSessionId)
-
-      return
-    }
-
-    const attempt = retryAttemptRef.current
-    const sessionId = routedSessionId as string
-
-    const timer = setTimeout(() => {
-      // Re-check liveness at fire time: a resume may have landed while we waited.
-      if (
-        creatingSessionRef.current ||
-        selectedStoredSessionIdRef.current !== sessionId ||
-        activeSessionIdRef.current !== null
-      ) {
-        return
-      }
-
-      // Consume an attempt ONLY now that a resume is actually dispatching.
-      // Incrementing at schedule time (the old behavior) let unrelated dep
-      // changes during the 1s–8s backoff window — a transient gatewayState
-      // flip, a non-referentially-stable resumeSession — clear the pending
-      // timer and re-run the effect, burning an attempt without any resume
-      // having fired. A flapping backend could then hit MAX in a couple of
-      // re-renders with far fewer than MAX real attempts. (Point 3)
-      retryAttemptRef.current += 1
-      void resumeSession(sessionId, true)
-    }, resumeRetryDelayMs(attempt))
-
-    return () => clearTimeout(timer)
-  }, [
-    activeSessionIdRef,
-    creatingSessionRef,
-    currentView,
-    gatewayState,
-    resumeSession,
-    resumeFailedSessionId,
-    resumeExhaustedSessionId,
-    routedSessionId,
-    selectedStoredSessionIdRef
-  ])
 }
--- a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
@@ -3,9 +3,8 @@ import type { MutableRefObject } from 'react'
 import { useEffect } from 'react'
 import { afterEach, describe, expect, it, vi } from 'vitest'

-import { getSessionMessages } from '@/hermes'
 import { $activeGatewayProfile, $newChatProfile } from '@/store/profile'
-import { $currentCwd, $messages, $resumeFailedSessionId, setMessages, setResumeFailedSessionId } from '@/store/session'
+import { $currentCwd } from '@/store/session'

 import type { ClientSessionState } from '../../types'

@@ -118,142 +117,3 @@ describe('createBackendSessionForSend profile routing', () => {
    expect(params).toMatchObject({ profile: 'default' })
  })
 })
-
-// ── Resume failure recovery (the "stuck loading session window" bug) ──────────
-// When session.resume rejects AND the REST transcript fallback ALSO fails, the
-// hook must (a) not throw out of the fallback (which stranded the loader), and
-// (b) arm $resumeFailedSessionId so use-route-resume can retry. A resume that
-// succeeds must NOT leave the flag armed.
-function ResumeHarness({
-  onReady,
-  requestGateway
-}: {
-  onReady: (resume: (storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) => void
-  requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
-}) {
-  const ref = <T,>(value: T): MutableRefObject<T> => ({ current: value })
-
-  const actions = useSessionActions({
-    activeSessionId: null,
-    activeSessionIdRef: ref<string | null>(null),
-    busyRef: ref(false),
-    creatingSessionRef: ref(false),
-    ensureSessionState: () => ({}) as ClientSessionState,
-    getRouteToken: () => 'token',
-    navigate: vi.fn() as never,
-    requestGateway,
-    runtimeIdByStoredSessionIdRef: ref(new Map<string, string>()),
-    selectedStoredSessionId: null,
-    selectedStoredSessionIdRef: ref<string | null>(null),
-    sessionStateByRuntimeIdRef: ref(new Map<string, ClientSessionState>()),
-    syncSessionStateToView: vi.fn(),
-    updateSessionState: (_sessionId, updater) => updater({} as ClientSessionState)
-  })
-
-  useEffect(() => {
-    onReady(actions.resumeSession)
-  }, [actions.resumeSession, onReady])
-
-  return null
-}
-
-describe('resumeSession failure recovery', () => {
-  afterEach(() => {
-    cleanup()
-    setResumeFailedSessionId(null)
-    setMessages([])
-    vi.restoreAllMocks()
-  })
-
-  async function runResume(
-    requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
-  ): Promise<void> {
-    let resume: ((storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) | null = null
-    render(<ResumeHarness onReady={r => (resume = r)} requestGateway={requestGateway} />)
-    await waitFor(() => expect(resume).not.toBeNull())
-    await resume!('stored-1', true)
-  }
-
-  it('arms $resumeFailedSessionId when resume RPC and REST fallback both fail', async () => {
-    // session.resume rejects (e.g. timeout against a wedged backend)...
-    const requestGateway = vi.fn(async (method: string) => {
-      if (method === 'session.resume') {
-        throw new Error('request timed out: session.resume')
-      }
-
-      return {} as never
-    })
-
-    // ...and the REST transcript fallback also rejects (backend unreachable).
-    vi.mocked(getSessionMessages).mockRejectedValue(new Error('network down'))
-
-    await runResume(requestGateway)
-
-    // The window is no longer silently stranded: the failure latch is armed for
-    // the stored session, which use-route-resume consumes to retry.
-    expect($resumeFailedSessionId.get()).toBe('stored-1')
-  })
-
-  it('does NOT arm the failure latch when the resume RPC fails but the REST fallback paints history', async () => {
-    // session.resume rejects, but the REST transcript fallback succeeds and
-    // hydrates a readable transcript — the window is NOT stranded.
-    const requestGateway = vi.fn(async (method: string) => {
-      if (method === 'session.resume') {
-        throw new Error('request timed out: session.resume')
-      }
-
-      return {} as never
-    })
-
-    vi.mocked(getSessionMessages).mockResolvedValue({
-      messages: [
-        { content: 'hello', role: 'user', timestamp: 1 },
-        { content: 'hi there', role: 'assistant', timestamp: 2 }
-      ],
-      session_id: 'stored-1'
-    } as never)
-
-    await runResume(requestGateway)
-
-    // Arming here would auto-retry a window that already shows history and,
-    // on exhaustion, blank that transcript behind the error overlay — a
-    // regression vs. plain fallback-success. The latch must stay clear.
-    expect($resumeFailedSessionId.get()).toBeNull()
-    // The fallback transcript is visible.
-    expect($messages.get().length).toBeGreaterThan(0)
-  })
-
-  it('does NOT throw out of the fallback when REST also fails (no unhandled rejection)', async () => {
-    const requestGateway = vi.fn(async (method: string) => {
-      if (method === 'session.resume') {
-        throw new Error('request timed out: session.resume')
-      }
-
-      return {} as never
-    })
-
-    vi.mocked(getSessionMessages).mockRejectedValue(new Error('network down'))
-
-    // resumeSession must resolve (swallow the fallback failure), not reject.
-    await expect(runResume(requestGateway)).resolves.toBeUndefined()
-  })
-
-  it('leaves the failure latch clear when resume succeeds', async () => {
-    // Pre-arm to prove a successful resume clears it (entry-clear path).
-    setResumeFailedSessionId('stored-1')
-
-    const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
-      if (method === 'session.resume') {
-        return { session_id: 'runtime-1', resumed: params?.session_id, messages: [], info: {} } as never
-      }
-
-      return {} as never
-    })
-
-    vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never)
-
-    await runResume(requestGateway)
-
-    expect($resumeFailedSessionId.get()).toBeNull()
-  })
-})
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -15,10 +15,6 @@ import { requestDesktopOnboarding } from '@/store/onboarding'
 import { $activeGatewayProfile, $newChatProfile, $profiles, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile'
 import {
  $currentCwd,
-  $currentFastMode,
-  $currentModel,
-  $currentProvider,
-  $currentReasoningEffort,
  $messages,
  $sessions,
  $yoloActive,
@@ -38,8 +34,6 @@ import {
  setFreshDraftReady,
  setIntroSeed,
  setMessages,
-  setResumeExhaustedSessionId,
-  setResumeFailedSessionId,
  setSelectedStoredSessionId,
  setSessions,
  setSessionStartedAt,
@@ -48,7 +42,6 @@ import {
  setYoloActive,
  workspaceCwdForNewSession
 } from '@/store/session'
-import { broadcastSessionsChanged } from '@/store/session-sync'
 import { reportBackendContract } from '@/store/updates'
 import { isWatchWindow } from '@/store/windows'
 import type { SessionCreateResponse, SessionInfo, SessionResumeResponse, SessionRuntimeInfo, UsageStats } from '@/types/hermes'
@@ -413,13 +406,13 @@ export function useSessionActions({
      })
      setSessionStartedAt(null)
      setTurnStartedAt(null)
-      // The composer's model/effort/fast is sticky UI state (persisted in
-      // localStorage) — a new chat FOLLOWS your last pick instead of snapping
-      // back to the profile default, so we deliberately don't reset it here. The
-      // profile default still owns first-run seeding and profile switches (see
-      // refreshCurrentModel). Only $currentServiceTier (a live-session mirror)
-      // is cleared.
+      // New chats start in the configured default project dir when set,
+      // otherwise the sticky last-used workspace (PR #37586).
+      setCurrentModel('')
+      setCurrentProvider('')
+      setCurrentReasoningEffort('')
      setCurrentServiceTier('')
+      setCurrentFastMode(false)
      setYoloActive(false)
      setCurrentCwd(workspaceCwdForNewSession())
      setCurrentBranch('')
@@ -449,23 +442,11 @@ export function useSessionActions({
        const newChatProfile = $newChatProfile.get() ?? normalizeProfileKey($activeGatewayProfile.get())
        await ensureGatewayProfile(newChatProfile)
        const cwd = $currentCwd.get().trim() || workspaceCwdForNewSession()
-        // The composer's model/effort/fast is sticky UI state ($currentModel,
-        // $currentProvider, $currentReasoningEffort, $currentFastMode). Ship it
-        // with every session.create so the new chat opens on whatever the picker
-        // shows — applied as per-session overrides, never written to the profile
-        // default (that lives in Settings → Model).
-        const uiModel = $currentModel.get().trim()
-        const uiProvider = $currentProvider.get().trim()
-        const uiEffort = $currentReasoningEffort.get().trim()
-        const uiFast = $currentFastMode.get()

        const created = await requestGateway<SessionCreateResponse>('session.create', {
          cols: 96,
          ...(cwd && { cwd }),
-          ...(newChatProfile ? { profile: newChatProfile } : {}),
-          ...(uiModel ? { model: uiModel, ...(uiProvider ? { provider: uiProvider } : {}) } : {}),
-          ...(uiEffort ? { reasoning_effort: uiEffort } : {}),
-          ...(uiFast ? { fast: true } : {})
+          ...(newChatProfile ? { profile: newChatProfile } : {})
        })

        const stored = created.stored_session_id ?? null
@@ -491,9 +472,6 @@ export function useSessionActions({
          // server later returns its own preview/title and supersedes this.
          upsertOptimisticSession(created, stored, null, preview?.trim() || null)
          navigate(sessionRoute(stored), { replace: true })
-          // Other windows (e.g. the main window when this is the pop-out) can't
-          // see this session until they re-pull the shared list.
-          broadcastSessionsChanged()
        }

        setFreshDraftReady(false)
@@ -581,15 +559,6 @@ export function useSessionActions({
      clearNotifications()
      setSelectedStoredSessionId(storedSessionId)
      selectedStoredSessionIdRef.current = storedSessionId
-      // Optimistically clear any prior resume-failure latch for this session:
-      // we're attempting a fresh resume, so the self-heal in use-route-resume
-      // must not keep treating it as stranded. It's re-armed below only if THIS
-      // attempt fails terminally (RPC reject + REST fallback failure).
-      setResumeFailedSessionId(current => (current === storedSessionId ? null : current))
-      // Also clear the exhausted-latch: a fresh attempt (manual Retry, reconnect,
-      // reselect) gives the bounded auto-retry counter a clean cycle, so the
-      // chat view drops the error state and shows the loader again.
-      setResumeExhaustedSessionId(current => (current === storedSessionId ? null : current))

      const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)

@@ -780,41 +749,13 @@ export function useSessionActions({
          return
        }

-        // The gateway resume RPC failed. Try the REST transcript as a fallback
-        // so the window at least shows history. CRITICAL: this fallback must be
-        // wrapped in its own try — if it ALSO throws (wedged/unreachable backend,
-        // the common case when resume failed in the first place), an unguarded
-        // throw here skips setMessages AND leaves activeSessionId null with an
-        // empty transcript. That is the exact state the thread loader latches on
-        // forever (messagesEmpty && !activeSessionId) with no recovery path —
-        // the "open in new window stays stuck loading, even after a nap" bug.
-        try {
-          const fallback = await getSessionMessages(storedSessionId, sessionProfile)
+        const fallback = await getSessionMessages(storedSessionId, sessionProfile)

-          if (!isCurrentResume()) {
-            return
-          }
-
-          setMessages(preserveLocalAssistantErrors(toChatMessages(fallback.messages), $messages.get()))
-        } catch {
-          // Fallback also failed: nothing to paint. Leave whatever messages are
-          // already shown and fall through to arm the resume-failure latch so
-          // use-route-resume re-attempts the resume on the next render / window
-          // focus / gateway reconnect instead of stranding the loader.
-        }
-
-        if (isCurrentResume() && $messages.get().length === 0) {
-          // Arm the self-heal ONLY when the window is still empty: the gateway
-          // resume rejected AND the REST fallback failed to paint a transcript.
-          // That is the exact stranded state the loader latches on
-          // (messagesEmpty && !activeSessionId), and matches $resumeFailedSessionId's
-          // documented contract. If the REST fallback DID paint history, the
-          // window is readable — arming here would needlessly auto-retry and,
-          // once retries exhaust, blank that visible transcript behind the
-          // exhausted-state error overlay (a regression vs. plain fallback success).
-          setResumeFailedSessionId(storedSessionId)
+        if (!isCurrentResume()) {
+          return
        }

+        setMessages(preserveLocalAssistantErrors(toChatMessages(fallback.messages), $messages.get()))
        notifyError(err, copy.resumeFailed)
      } finally {
        if (isCurrentResume()) {
--- a/apps/desktop/src/app/session/hooks/use-session-state-cache.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-session-state-cache.test.tsx
@@ -2,14 +2,12 @@ import { act, cleanup, render } from '@testing-library/react'
 import type { MutableRefObject } from 'react'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

-import type { ChatMessage } from '@/lib/chat-messages'
 import {
  $currentFastMode,
  $currentModel,
  $currentProvider,
  $currentReasoningEffort,
  $currentServiceTier,
-  $messages,
  $turnStartedAt,
  setCurrentFastMode,
  setCurrentModel,
@@ -215,113 +213,3 @@ describe('useSessionStateCache — per-session turn timer', () => {
    expect($currentFastMode.get()).toBe(false)
  })
 })
-
-function userMessage(id: string, text: string): ChatMessage {
-  return { id, role: 'user', parts: [{ type: 'text', text }] }
-}
-
-function assistantText(id: string, text: string): ChatMessage {
-  return { id, role: 'assistant', parts: [{ type: 'text', text }] }
-}
-
-function assistantError(id: string, error: string): ChatMessage {
-  return { id, role: 'assistant', parts: [], error, pending: false }
-}
-
-interface ViewHarnessProps {
-  activeSessionId: string | null
-  onReady: (cache: Cache) => void
-}
-
-function ViewHarness({ activeSessionId, onReady }: ViewHarnessProps) {
-  const busyRef: MutableRefObject<boolean> = { current: false }
-  const cache = useSessionStateCache({
-    activeSessionId,
-    busyRef,
-    selectedStoredSessionId: null,
-    setAwaitingResponse: () => undefined,
-    setBusy: () => undefined,
-    // Wire the published view back into the real $messages atom the flush
-    // reads from, so the round-trip matches production.
-    setMessages: messages => $messages.set(messages)
-  })
-
-  onReady(cache)
-
-  return null
-}
-
-describe('useSessionStateCache — cross-thread error isolation', () => {
-  afterEach(() => {
-    cleanup()
-    $messages.set([])
-  })
-
-  it('does not leak a failed turn into another thread on switch', () => {
-    $messages.set([])
-    let cache!: Cache
-    const { rerender } = render(<ViewHarness activeSessionId="thread-A" onReady={c => (cache = c)} />)
-
-    // Thread A ends its turn with an out-of-funds error and is on screen.
-    act(() => {
-      cache.updateSessionState(
-        'thread-A',
-        state => ({
-          ...state,
-          busy: false,
-          messages: [userMessage('user-a', 'do the thing'), assistantError('assistant-a-error', 'Out of funds')]
-        }),
-        'stored-A'
-      )
-    })
-
-    expect($messages.get().some(message => message.error === 'Out of funds')).toBe(true)
-
-    // Switch to thread B (which completed cleanly). Its cached state syncs to
-    // the view while $messages still holds thread A's transcript.
-    rerender(<ViewHarness activeSessionId="thread-B" onReady={c => (cache = c)} />)
-    act(() => {
-      cache.updateSessionState(
-        'thread-B',
-        state => ({
-          ...state,
-          busy: false,
-          messages: [userMessage('user-b', 'hello'), assistantText('assistant-b', 'hi there')]
-        }),
-        'stored-B'
-      )
-    })
-
-    expect($messages.get().map(message => message.id)).toEqual(['user-b', 'assistant-b'])
-    expect($messages.get().some(message => message.error === 'Out of funds')).toBe(false)
-  })
-
-  it('still preserves a same-session local error a heartbeat dropped', () => {
-    $messages.set([])
-    let cache!: Cache
-    render(<ViewHarness activeSessionId="thread-A" onReady={c => (cache = c)} />)
-
-    // First paint establishes thread A as the on-screen session.
-    act(() => {
-      cache.updateSessionState(
-        'thread-A',
-        state => ({ ...state, busy: false, messages: [userMessage('user-a', 'do the thing')] }),
-        'stored-A'
-      )
-    })
-
-    // A local error lands in the view (e.g. failAssistantMessage wrote it).
-    $messages.set([userMessage('user-a', 'do the thing'), assistantError('assistant-a-error', 'OpenRouter 403')])
-
-    // A later same-session heartbeat carries cached state that lost the error.
-    act(() => {
-      cache.updateSessionState('thread-A', state => ({
-        ...state,
-        busy: false,
-        messages: [userMessage('user-a', 'do the thing')]
-      }))
-    })
-
-    expect($messages.get().some(message => message.error === 'OpenRouter 403')).toBe(true)
-  })
-})
--- a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts
@@ -79,9 +79,6 @@ export function useSessionStateCache({
  const runtimeIdByStoredSessionIdRef = useRef(new Map<string, string>())
  const pendingViewStateRef = useRef<{ sessionId: string; state: ClientSessionState } | null>(null)
  const viewSyncRafRef = useRef<number | null>(null)
-  // Runtime id whose transcript currently occupies `$messages` — lets the
-  // flush below tell a same-session refresh from a thread switch.
-  const viewSessionIdRef = useRef<string | null>(null)

  useEffect(() => {
    activeSessionIdRef.current = activeSessionId
@@ -145,22 +142,12 @@ export function useSessionStateCache({
    // jerks the scroll position while the user is reading. Skip the publish when
    // the merged result is content-identical to what's already on screen.
    const currentMessages = $messages.get()
-    // On a thread switch `$messages` still holds the *previous* thread, so
-    // preserving its local errors would graft that thread's failed turn (e.g.
-    // an out-of-funds error) onto this one — then cascade it everywhere as the
-    // polluted view becomes the next switch's baseline. Only carry errors
-    // across a same-session refresh; our cached state already keeps its own.
-    const nextMessages =
-      viewSessionIdRef.current === pending.sessionId
-        ? preserveLocalAssistantErrors(pending.state.messages, currentMessages)
-        : pending.state.messages
+    const nextMessages = preserveLocalAssistantErrors(pending.state.messages, currentMessages)

    if (!sameMessageList(nextMessages, currentMessages)) {
      setMessages(nextMessages)
    }

-    viewSessionIdRef.current = pending.sessionId
-
    syncRuntimeMetadataToView(pending.state)
    setBusy(pending.state.busy)
    setMutableRef(busyRef, pending.state.busy)
--- a/apps/desktop/src/app/settings/config-settings.tsx
+++ b/apps/desktop/src/app/settings/config-settings.tsx
@@ -23,7 +23,6 @@ import { fieldCopyForSchemaKey } from './field-copy'
 import { enumOptionsFor, getNested, prettyName, setNested } from './helpers'
 import { ModelSettings } from './model-settings'
 import { EmptyState, ListRow, LoadingState, SettingsContent } from './primitives'
-import { ProviderConfigPanel } from './provider-config-panel'

 function ConfigField({
  schemaKey,
@@ -369,9 +368,6 @@ export function ConfigSettings({
                schemaKey={key}
                value={getNested(config, key)}
              />
-              {key === 'memory.provider' && typeof getNested(config, key) === 'string' && getNested(config, key) ? (
-                <ProviderConfigPanel provider={String(getNested(config, key))} />
-              ) : null}
            </div>
          ))}
        </div>
--- a/apps/desktop/src/app/settings/constants.ts
+++ b/apps/desktop/src/app/settings/constants.ts
@@ -239,7 +239,7 @@ export const ENUM_OPTIONS: Record<string, string[]> = {
  'code_execution.mode': ['project', 'strict'],
  'context.engine': ['compressor', 'default', 'custom'],
  'delegation.reasoning_effort': ['', 'minimal', 'low', 'medium', 'high', 'xhigh'],
-  'memory.provider': ['', 'builtin', 'hindsight', 'honcho'],
+  'memory.provider': ['', 'builtin', 'honcho'],
  // Terminal execution backends — kept in sync with the dispatch ladder in
  // tools/terminal_tool.py::_create_environment (local/docker/singularity/
  // modal/daytona/ssh). Remote backends need extra env (image, tokens, host).
--- a/apps/desktop/src/app/settings/helpers.test.ts
+++ b/apps/desktop/src/app/settings/helpers.test.ts
@@ -6,12 +6,6 @@ import { defineFieldCopy, fieldCopyForSchemaKey, schemaKeyToFieldCopyKey } from
 import { enumOptionsFor, getNested, providerGroup, setNested, stripToolsetLabel, toolsetDisplayLabel } from './helpers'

 describe('settings helpers', () => {
-  it('lists Hindsight as a built-in desktop memory provider option', () => {
-    const options = enumOptionsFor('memory.provider', '', {})
-
-    expect(options).toContain('hindsight')
-  })
-
  describe('defineFieldCopy', () => {
    it('flattens nested field copy paths', () => {
      const copy = defineFieldCopy({
--- a/apps/desktop/src/app/settings/index.tsx
+++ b/apps/desktop/src/app/settings/index.tsx
@@ -228,7 +228,7 @@ export function SettingsView({ gateway, onClose, onConfigSaved, onMainModelChang
              onMainModelChanged={onMainModelChanged}
            />
          ) : activeView === 'providers' ? (
-            <ProvidersSettings onClose={onClose} onViewChange={setProviderView} view={providerView} />
+            <ProvidersSettings onViewChange={setProviderView} view={providerView} />
          ) : activeView === 'keys' ? (
            <KeysSettings view={keysView} />
          ) : activeView === 'mcp' ? (
--- a/apps/desktop/src/app/settings/model-settings.test.tsx
+++ b/apps/desktop/src/app/settings/model-settings.test.tsx
@@ -16,8 +16,6 @@ const getAuxiliaryModels = vi.fn()
 const setModelAssignment = vi.fn()
 const getRecommendedDefaultModel = vi.fn()
 const setEnvVar = vi.fn()
-const getHermesConfigRecord = vi.fn()
-const saveHermesConfig = vi.fn()
 const startManualProviderOAuth = vi.fn()

 vi.mock('@/hermes', () => ({
@@ -26,9 +24,7 @@ vi.mock('@/hermes', () => ({
  getAuxiliaryModels: () => getAuxiliaryModels(),
  setModelAssignment: (body: unknown) => setModelAssignment(body),
  getRecommendedDefaultModel: (slug: string) => getRecommendedDefaultModel(slug),
-  setEnvVar: (key: string, value: string) => setEnvVar(key, value),
-  getHermesConfigRecord: () => getHermesConfigRecord(),
-  saveHermesConfig: (config: unknown) => saveHermesConfig(config)
+  setEnvVar: (key: string, value: string) => setEnvVar(key, value)
 }))

 vi.mock('@/store/onboarding', () => ({
@@ -39,13 +35,7 @@ beforeEach(() => {
  getGlobalModelInfo.mockResolvedValue({ provider: 'nous', model: 'hermes-4' })
  getGlobalModelOptions.mockResolvedValue({
    providers: [
-      {
-        name: 'Nous',
-        slug: 'nous',
-        models: ['hermes-4', 'hermes-4-mini'],
-        authenticated: true,
-        capabilities: { 'hermes-4': { reasoning: true, fast: true } }
-      },
+      { name: 'Nous', slug: 'nous', models: ['hermes-4', 'hermes-4-mini'], authenticated: true },
      // An unconfigured api_key provider — surfaced by the full-universe payload.
      { name: 'DeepSeek', slug: 'deepseek', models: [], authenticated: false, auth_type: 'api_key', key_env: 'DEEPSEEK_API_KEY' }
    ]
@@ -57,8 +47,6 @@ beforeEach(() => {
  setModelAssignment.mockResolvedValue({ provider: 'nous', model: 'hermes-4', gateway_tools: [] })
  getRecommendedDefaultModel.mockResolvedValue({ provider: 'deepseek', model: 'deepseek-chat', free_tier: null })
  setEnvVar.mockResolvedValue({ ok: true })
-  getHermesConfigRecord.mockResolvedValue({ agent: { reasoning_effort: 'medium', service_tier: 'normal' } })
-  saveHermesConfig.mockResolvedValue({ ok: true })
 })

 afterEach(() => {
@@ -112,31 +100,6 @@ describe('ModelSettings', () => {
    await waitFor(() => expect(setEnvVar).toHaveBeenCalledWith('DEEPSEEK_API_KEY', 'sk-test-123'))
  })

-  it('writes the profile default speed (service_tier) when the fast switch is toggled', async () => {
-    await renderModelSettings()
-    await waitFor(() => expect(getHermesConfigRecord).toHaveBeenCalled())
-
-    const fastSwitch = await screen.findByRole('switch')
-    fireEvent.click(fastSwitch)
-
-    await waitFor(() =>
-      expect(saveHermesConfig).toHaveBeenCalledWith(
-        expect.objectContaining({ agent: expect.objectContaining({ service_tier: 'fast' }) })
-      )
-    )
-  })
-
-  it('hides the reasoning/speed defaults when the main model reports no capabilities', async () => {
-    getGlobalModelOptions.mockResolvedValueOnce({
-      providers: [{ name: 'Nous', slug: 'nous', models: ['hermes-4'], authenticated: true, capabilities: { 'hermes-4': { reasoning: false, fast: false } } }]
-    })
-
-    await renderModelSettings()
-    await waitFor(() => expect(getHermesConfigRecord).toHaveBeenCalled())
-
-    expect(screen.queryByRole('switch')).toBeNull()
-  })
-
  it('renders the auxiliary task rows', async () => {
    await renderModelSettings()

--- a/apps/desktop/src/app/settings/model-settings.tsx
+++ b/apps/desktop/src/app/settings/model-settings.tsx
@@ -3,14 +3,11 @@ import { useCallback, useEffect, useMemo, useState } from 'react'
 import { Button } from '@/components/ui/button'
 import { Input } from '@/components/ui/input'
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
-import { Switch } from '@/components/ui/switch'
 import {
  getAuxiliaryModels,
  getGlobalModelInfo,
  getGlobalModelOptions,
-  getHermesConfigRecord,
  getRecommendedDefaultModel,
-  saveHermesConfig,
  setEnvVar,
  setModelAssignment
 } from '@/hermes'
@@ -18,26 +15,11 @@ import type { AuxiliaryModelsResponse, ModelOptionProvider, StaleAuxAssignment }
 import { useI18n } from '@/i18n'
 import { AlertTriangle, Cpu, Loader2 } from '@/lib/icons'
 import { cn } from '@/lib/utils'
-import { notifyError } from '@/store/notifications'
 import { startManualLocalEndpoint, startManualProviderOAuth } from '@/store/onboarding'
-import type { HermesConfigRecord } from '@/types/hermes'

 import { CONTROL_TEXT } from './constants'
-import { getNested, setNested } from './helpers'
 import { ListRow, LoadingState, Pill, SectionHeading } from './primitives'

-// Hermes' reasoning levels (VALID_REASONING_EFFORTS); `none` = thinking off.
-// Empty config = Hermes default (medium), shown as Medium.
-const EFFORT_VALUES = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'] as const
-
-// agent.service_tier stores "fast"/"priority"/"on" for fast; anything else is
-// normal (mirrors tui_gateway _load_service_tier).
-const isFastTier = (tier: unknown): boolean =>
-  ['fast', 'priority', 'on'].includes(String(tier ?? '').trim().toLowerCase())
-
-// Reuse the composer's effort labels (`xhigh` shows as "Max", else 1:1).
-const effortLabelKey = (v: string) => (v === 'xhigh' ? 'max' : v) as 'high' | 'low' | 'max' | 'medium' | 'minimal'
-
 // A provider row is "ready" to pick a model from when it reports models. The
 // backend now surfaces the full `hermes model` universe (every canonical
 // provider), so unconfigured providers come back with `authenticated:false`
@@ -115,9 +97,6 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
  const [selectedProvider, setSelectedProvider] = useState('')
  const [selectedModel, setSelectedModel] = useState('')
  const [auxiliary, setAuxiliary] = useState<AuxiliaryModelsResponse | null>(null)
-  // Full profile config, kept so the reasoning/speed defaults round-trip
-  // (read agent.* → write back the whole record) like the generic config page.
-  const [config, setConfig] = useState<HermesConfigRecord | null>(null)
  const [applying, setApplying] = useState(false)
  const [editingAuxTask, setEditingAuxTask] = useState<null | string>(null)
  const [auxDraft, setAuxDraft] = useState<{ model: string; provider: string }>({ model: '', provider: '' })
@@ -134,11 +113,10 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
    setError('')

    try {
-      const [modelInfo, modelOptions, auxiliaryModels, cfg] = await Promise.all([
+      const [modelInfo, modelOptions, auxiliaryModels] = await Promise.all([
        getGlobalModelInfo(),
        getGlobalModelOptions(),
-        getAuxiliaryModels(),
-        getHermesConfigRecord()
+        getAuxiliaryModels()
      ])

      setMainModel({ model: modelInfo.model, provider: modelInfo.provider })
@@ -146,7 +124,6 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
      setSelectedProvider(prev => prev || modelInfo.provider)
      setSelectedModel(prev => prev || modelInfo.model)
      setAuxiliary(auxiliaryModels)
-      setConfig(cfg)
    } catch (err) {
      setError(err instanceof Error ? err.message : String(err))
    } finally {
@@ -204,42 +181,6 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
      .map(entry => ({ task: entry.task, provider: entry.provider, model: entry.model }))
  }, [auxiliary, mainModel])

-  // Capabilities of the APPLIED main model — gates the profile-default
-  // reasoning/speed controls the same way the composer picker gates per-model
-  // edits (reasoning defaults on, fast defaults off when unreported).
-  const mainCaps = useMemo(() => {
-    const row = providers.find(provider => provider.slug === mainModel?.provider)
-
-    return mainModel ? row?.capabilities?.[mainModel.model] : undefined
-  }, [providers, mainModel])
-
-  const reasoningSupported = mainCaps?.reasoning ?? true
-  const fastSupported = mainCaps?.fast ?? false
-  const effortValue = String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '').trim().toLowerCase() || 'medium'
-  const fastOn = isFastTier(getNested(config ?? {}, 'agent.service_tier'))
-
-  // Persist a single agent.* default by round-tripping the whole config record
-  // (PUT /api/config replaces it) — optimistic, with rollback on failure.
-  const writeAgentDefault = useCallback(
-    async (key: string, value: string) => {
-      if (!config) {
-        return
-      }
-
-      const prev = config
-      const next = setNested(config, key, value)
-      setConfig(next)
-
-      try {
-        await saveHermesConfig(next)
-      } catch (err) {
-        setConfig(prev)
-        notifyError(err, m.defaultsFailed)
-      }
-    },
-    [config, m.defaultsFailed]
-  )
-
  // Paste an API key for the selected `api_key` provider, persist it, then
  // refresh so the now-authenticated provider's models populate. Auto-selects
  // the recommended default model so the user can Apply in one more click.
@@ -492,38 +433,6 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
              : `${selectedProviderRow?.name} signs in through your browser — Hermes runs the flow for you.`}
          </p>
        )}
-        {config && mainModel && (reasoningSupported || fastSupported) && (
-          <div className="mt-3 flex flex-wrap items-center gap-x-6 gap-y-3">
-            <span className="text-xs text-muted-foreground">{m.defaultsLabel}</span>
-            {reasoningSupported && (
-              <div className="flex items-center gap-2 text-xs">
-                {m.reasoning}
-                <Select onValueChange={value => void writeAgentDefault('agent.reasoning_effort', value)} value={effortValue}>
-                  <SelectTrigger className={cn('min-w-28', CONTROL_TEXT)}>
-                    <SelectValue />
-                  </SelectTrigger>
-                  <SelectContent>
-                    {EFFORT_VALUES.map(value => (
-                      <SelectItem key={value} value={value}>
-                        {value === 'none' ? m.reasoningOff : t.shell.modelOptions[effortLabelKey(value)]}
-                      </SelectItem>
-                    ))}
-                  </SelectContent>
-                </Select>
-              </div>
-            )}
-            {fastSupported && (
-              <label className="flex items-center gap-2 text-xs">
-                {t.shell.modelOptions.fast}
-                <Switch
-                  checked={fastOn}
-                  onCheckedChange={checked => void writeAgentDefault('agent.service_tier', checked ? 'fast' : 'normal')}
-                  size="xs"
-                />
-              </label>
-            )}
-          </div>
-        )}
        {error && <div className="mt-2 text-xs text-destructive">{error}</div>}
        {switchStaleAux.length > 0 && (
          <div className="mt-2">
--- a/apps/desktop/src/app/settings/provider-config-panel.test.tsx
+++ b/apps/desktop/src/app/settings/provider-config-panel.test.tsx
@@ -1,142 +0,0 @@
-import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react'
-import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
-
-import type { MemoryProviderConfig } from '@/types/hermes'
-
-const getMemoryProviderConfig = vi.fn()
-const saveMemoryProviderConfig = vi.fn()
-
-vi.mock('@/hermes', () => ({
-  getMemoryProviderConfig: (provider: string) => getMemoryProviderConfig(provider),
-  saveMemoryProviderConfig: (provider: string, values: unknown) => saveMemoryProviderConfig(provider, values)
-}))
-
-vi.mock('@/store/notifications', () => ({
-  notify: vi.fn(),
-  notifyError: vi.fn()
-}))
-
-function hindsightSchema(overrides: Partial<MemoryProviderConfig['fields'][number]>[] = []): MemoryProviderConfig {
-  const fields: MemoryProviderConfig['fields'] = [
-    {
-      key: 'mode',
-      label: 'Mode',
-      kind: 'select',
-      value: 'cloud',
-      description: 'How Hermes connects to Hindsight.',
-      placeholder: '',
-      is_set: true,
-      options: [
-        { value: 'cloud', label: 'Cloud', description: 'Hindsight Cloud API (lightweight, just needs an API key)' },
-        { value: 'local_external', label: 'Local External', description: 'Connect to an existing Hindsight instance' }
-      ]
-    },
-    {
-      key: 'api_key',
-      label: 'API key',
-      kind: 'secret',
-      value: '',
-      description: 'Used to authenticate with the Hindsight API.',
-      placeholder: 'Enter Hindsight API key',
-      is_set: false,
-      options: []
-    },
-    {
-      key: 'api_url',
-      label: 'API URL',
-      kind: 'text',
-      value: 'https://api.hindsight.vectorize.io',
-      description: '',
-      placeholder: '',
-      is_set: true,
-      options: []
-    },
-    { key: 'bank_id', label: 'Bank ID', kind: 'text', value: 'hermes', description: '', placeholder: '', is_set: true, options: [] },
-    {
-      key: 'recall_budget',
-      label: 'Recall budget',
-      kind: 'select',
-      value: 'mid',
-      description: '',
-      placeholder: '',
-      is_set: true,
-      options: [
-        { value: 'low', label: 'low', description: '' },
-        { value: 'mid', label: 'mid', description: '' },
-        { value: 'high', label: 'high', description: '' }
-      ]
-    }
-  ]
-
-  return {
-    name: 'hindsight',
-    label: 'Hindsight',
-    fields: fields.map((field, index) => ({ ...field, ...overrides[index] }))
-  }
-}
-
-beforeEach(() => {
-  getMemoryProviderConfig.mockResolvedValue(hindsightSchema())
-  saveMemoryProviderConfig.mockResolvedValue({ ok: true })
-})
-
-afterEach(() => {
-  cleanup()
-  vi.clearAllMocks()
-})
-
-async function renderPanel(provider = 'hindsight') {
-  const { ProviderConfigPanel } = await import('./provider-config-panel')
-
-  return render(<ProviderConfigPanel provider={provider} />)
-}
-
-describe('ProviderConfigPanel', () => {
-  it('renders the declared provider fields generically', async () => {
-    await renderPanel()
-
-    expect(await screen.findByDisplayValue('https://api.hindsight.vectorize.io')).toBeTruthy()
-    expect(screen.getByDisplayValue('hermes')).toBeTruthy()
-    expect(screen.getByText('Cloud')).toBeTruthy()
-    expect(screen.getAllByText('Hindsight Cloud API (lightweight, just needs an API key)').length).toBeGreaterThan(0)
-    expect(screen.getByText('mid')).toBeTruthy()
-  })
-
-  it('collapses and expands the fields', async () => {
-    await renderPanel()
-
-    expect(await screen.findByLabelText('API URL')).toBeTruthy()
-    fireEvent.click(screen.getByRole('button', { name: /Hindsight settings/ }))
-    expect(screen.queryByLabelText('API URL')).toBeNull()
-    fireEvent.click(screen.getByRole('button', { name: /Hindsight settings/ }))
-    expect(await screen.findByLabelText('API URL')).toBeTruthy()
-  })
-
-  it('saves edited values without requiring a secret replacement', async () => {
-    await renderPanel()
-
-    const apiUrl = await screen.findByLabelText('API URL')
-    fireEvent.change(apiUrl, { target: { value: 'http://localhost:8888' } })
-    fireEvent.change(screen.getByLabelText('Bank ID'), { target: { value: 'ben-bank' } })
-    fireEvent.click(screen.getByRole('button', { name: 'Save' }))
-
-    await waitFor(() =>
-      expect(saveMemoryProviderConfig).toHaveBeenCalledWith('hindsight', {
-        mode: 'cloud',
-        api_key: '',
-        api_url: 'http://localhost:8888',
-        bank_id: 'ben-bank',
-        recall_budget: 'mid'
-      })
-    )
-  })
-
-  it('renders nothing for a provider with no declared config surface', async () => {
-    getMemoryProviderConfig.mockResolvedValue({ name: 'builtin', label: 'builtin', fields: [] })
-
-    const { container } = await renderPanel('builtin')
-
-    await waitFor(() => expect(getMemoryProviderConfig).toHaveBeenCalledWith('builtin'))
-    expect(container.querySelector('section')).toBeNull()
-  })
-})
--- a/apps/desktop/src/app/settings/provider-config-panel.tsx
+++ b/apps/desktop/src/app/settings/provider-config-panel.tsx
@@ -1,182 +0,0 @@
-import { useCallback, useEffect, useState } from 'react'
-
-import { Button } from '@/components/ui/button'
-import { DisclosureCaret } from '@/components/ui/disclosure-caret'
-import { Input } from '@/components/ui/input'
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
-import { getMemoryProviderConfig, saveMemoryProviderConfig } from '@/hermes'
-import { Check, Loader2, Save } from '@/lib/icons'
-import { notify, notifyError } from '@/store/notifications'
-import type { MemoryProviderConfig, MemoryProviderField } from '@/types/hermes'
-
-import { CONTROL_TEXT } from './constants'
-import { LoadingState, Pill } from './primitives'
-
-/** Seed editable values from the schema: non-secret fields keep their current
- *  value, secret fields start blank (their value is never returned). */
-function seedValues(config: MemoryProviderConfig): Record<string, string> {
-  return Object.fromEntries(
-    config.fields.map(field => [field.key, field.kind === 'secret' ? '' : field.value])
-  )
-}
-
-function FieldControl({
-  field,
-  value,
-  onChange
-}: {
-  field: MemoryProviderField
-  value: string
-  onChange: (value: string) => void
-}) {
-  if (field.kind === 'select') {
-    const selected = field.options.find(option => option.value === value)
-
-    return (
-      <>
-        <Select onValueChange={onChange} value={value}>
-          <SelectTrigger className={CONTROL_TEXT}>
-            <SelectValue />
-          </SelectTrigger>
-          <SelectContent>
-            {field.options.map(option => (
-              <SelectItem key={option.value} value={option.value}>
-                {option.label}
-              </SelectItem>
-            ))}
-          </SelectContent>
-        </Select>
-        {(selected?.description || field.description) && (
-          <span className="text-xs text-muted-foreground">{selected?.description || field.description}</span>
-        )}
-      </>
-    )
-  }
-
-  if (field.kind === 'secret') {
-    return (
-      <div className="flex flex-wrap items-center gap-2">
-        <Input
-          className="min-w-64 flex-1 font-mono"
-          onChange={event => onChange(event.target.value)}
-          placeholder={field.is_set ? 'Leave blank to keep current value' : field.placeholder}
-          type="password"
-          value={value}
-        />
-        {field.is_set && (
-          <Pill tone="primary">
-            <Check className="size-3" />
-            Set
-          </Pill>
-        )}
-      </div>
-    )
-  }
-
-  return (
-    <Input
-      className="font-mono"
-      onChange={event => onChange(event.target.value)}
-      placeholder={field.placeholder}
-      value={value}
-    />
-  )
-}
-
-export function ProviderConfigPanel({ provider }: { provider: string }) {
-  const [config, setConfig] = useState<MemoryProviderConfig | null>(null)
-  const [values, setValues] = useState<Record<string, string>>({})
-  const [expanded, setExpanded] = useState(true)
-  const [saving, setSaving] = useState(false)
-
-  const refresh = useCallback(async () => {
-    try {
-      const next = await getMemoryProviderConfig(provider)
-      setConfig(next)
-      setValues(seedValues(next))
-    } catch (err) {
-      notifyError(err, 'Memory provider settings failed to load')
-      setConfig(null)
-    }
-  }, [provider])
-
-  useEffect(() => {
-    setConfig(null)
-    void refresh()
-  }, [refresh])
-
-  const save = useCallback(async () => {
-    if (!config) {
-      return
-    }
-
-    setSaving(true)
-
-    try {
-      await saveMemoryProviderConfig(provider, values)
-      notify({ kind: 'success', title: `${config.label} saved`, message: 'Memory provider configuration updated.' })
-      await refresh()
-    } catch (err) {
-      notifyError(err, `Failed to save ${config.label} settings`)
-    } finally {
-      setSaving(false)
-    }
-  }, [config, provider, refresh, values])
-
-  // Providers without a declared config surface (e.g. builtin) render nothing.
-  if (config && config.fields.length === 0) {
-    return null
-  }
-
-  if (!config) {
-    return <LoadingState label="Loading memory provider settings..." />
-  }
-
-  const secretFields = config.fields.filter(field => field.kind === 'secret')
-
-  return (
-    <section className="py-3">
-      <button
-        aria-expanded={expanded}
-        className="flex w-full items-center justify-between gap-3 rounded-lg bg-background/60 px-3 py-2 text-left hover:bg-accent/50"
-        onClick={() => setExpanded(open => !open)}
-        type="button"
-      >
-        <span className="flex min-w-0 items-center gap-2">
-          <DisclosureCaret open={expanded} />
-          <span className="text-[length:var(--conversation-text-font-size)] font-medium text-foreground">
-            {config.label} settings
-          </span>
-          {secretFields.map(field => (
-            <Pill key={field.key}>{field.is_set ? `${field.label} set` : `${field.label} not set`}</Pill>
-          ))}
-        </span>
-      </button>
-
-      {expanded && (
-        <div className="mt-3 grid gap-4 rounded-xl bg-background/60 p-4">
-          {config.fields.map(field => (
-            <label className="grid gap-1.5" key={field.key}>
-              <span className="text-xs font-medium text-muted-foreground">{field.label}</span>
-              <FieldControl
-                field={field}
-                onChange={value => setValues(current => ({ ...current, [field.key]: value }))}
-                value={values[field.key] ?? ''}
-              />
-              {field.kind !== 'select' && field.description && (
-                <span className="text-xs text-muted-foreground">{field.description}</span>
-              )}
-            </label>
-          ))}
-
-          <div className="flex justify-end">
-            <Button disabled={saving} onClick={() => void save()} size="sm">
-              {saving ? <Loader2 className="size-3.5 animate-spin" /> : <Save />}
-              Save
-            </Button>
-          </div>
-        </div>
-      )}
-    </section>
-  )
-}
--- a/apps/desktop/src/app/settings/providers-settings.test.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.test.tsx
@@ -55,7 +55,7 @@ afterEach(() => {
 async function renderProvidersSettings() {
  const { ProvidersSettings } = await import('./providers-settings')

-  return render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="accounts" />)
+  return render(<ProvidersSettings onViewChange={vi.fn()} view="accounts" />)
 }

 describe('ProvidersSettings', () => {
@@ -95,6 +95,6 @@ describe('ProvidersSettings', () => {

    expect(await screen.findByText('Qwen Code')).toBeTruthy()
    expect(screen.queryByRole('button', { name: 'Remove Qwen Code' })).toBeNull()
-    expect(screen.getByText(/managed by its own CLI/)).toBeTruthy()
+    expect(screen.getByText(/managed outside Hermes/)).toBeTruthy()
  })
 })
--- a/apps/desktop/src/app/settings/providers-settings.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.tsx
@@ -1,8 +1,6 @@
 import { useStore } from '@nanostores/react'
-import type { ReactNode } from 'react'
 import { useCallback, useEffect, useMemo, useState } from 'react'

-import { runInTerminal } from '@/app/right-sidebar/store'
 import {
  FEATURED_ID,
  FeaturedProviderRow,
@@ -25,20 +23,6 @@ import { SettingsCategoryHeading, useEnvCredentials } from './env-credentials'
 import { providerGroup, providerMeta, providerPriority } from './helpers'
 import { LoadingState, SettingsContent } from './primitives'

-// The embedded terminal (and thus the "run disconnect command" path) only
-// exists in the Electron desktop shell, not the web dashboard.
-const canRunInTerminal = () => typeof window !== 'undefined' && Boolean(window.hermesDesktop?.terminal)
-
-// Parallel group headers ("Connected", "Other providers") so the expanded list
-// reads as its own section instead of bleeding into the connected group.
-function GroupLabel({ children }: { children: ReactNode }) {
-  return (
-    <p className="mt-3 px-0.5 text-[length:var(--conversation-caption-font-size)] font-medium text-(--ui-text-tertiary)">
-      {children}
-    </p>
-  )
-}
-
 // Sub-views surfaced as a sidebar subnav: account sign-in vs raw API keys.
 export const PROVIDER_VIEWS = ['accounts', 'keys'] as const

@@ -106,13 +90,11 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
 function OAuthPicker({
  disconnecting,
  onDisconnect,
-  onTerminalDisconnect,
  onWantApiKey,
  providers
 }: {
  disconnecting: null | string
  onDisconnect: (provider: OAuthProvider) => void
-  onTerminalDisconnect: (provider: OAuthProvider) => void
  onWantApiKey: () => void
  providers: OAuthProvider[]
 }) {
@@ -156,14 +138,15 @@ function OAuthPicker({
      {featured && <FeaturedProviderRow onSelect={select} provider={featured} />}
      {connected.length > 0 && (
        <>
-          <GroupLabel>{p.connected}</GroupLabel>
+          <p className="mt-1 px-0.5 text-[length:var(--conversation-caption-font-size)] font-medium text-(--ui-text-tertiary)">
+            {p.connected}
+          </p>
          {connected.map(p => (
            <ConnectedProviderRow
              disconnecting={disconnecting === p.id}
              key={p.id}
              onDisconnect={onDisconnect}
              onSelect={select}
-              onTerminalDisconnect={onTerminalDisconnect}
              provider={p}
            />
          ))}
@@ -171,7 +154,6 @@ function OAuthPicker({
      )}
      {showOthers && (
        <>
-          {connected.length > 0 && <GroupLabel>{p.otherProviders}</GroupLabel>}
          {others.map(p => (
            <ProviderRow key={p.id} onSelect={select} provider={p} />
          ))}
@@ -198,26 +180,21 @@ function ConnectedProviderRow({
  disconnecting,
  onDisconnect,
  onSelect,
-  onTerminalDisconnect,
  provider
 }: {
  disconnecting: boolean
  onDisconnect: (provider: OAuthProvider) => void
  onSelect: (provider: OAuthProvider) => void
-  onTerminalDisconnect: (provider: OAuthProvider) => void
  provider: OAuthProvider
 }) {
  const { t } = useI18n()
-  const copy = t.settings.providers
  const title = providerTitle(provider)
  const Trail = provider.flow === 'external' ? Terminal : ChevronRight
-  // Hermes can clear this provider's creds via the API.
  const canDisconnect = provider.disconnectable ?? provider.flow !== 'external'
-  // External (CLI-managed) provider Hermes can't clear via the API, but ships a
-  // command we can run in the embedded terminal (Electron shell only).
-  const terminalDisconnect = !canDisconnect && Boolean(provider.disconnect_command) && canRunInTerminal()
-  // Only fall back to a static "remove it elsewhere" hint when we offer no button.
-  const showHint = !canDisconnect && !terminalDisconnect
+
+  const disconnectHint = provider.flow === 'external'
+    ? t.settings.providers.removeExternal(title, provider.cli_command)
+    : t.settings.providers.removeKeyManaged(title)

  return (
    <div className="group grid grid-cols-[minmax(0,1fr)_auto] items-center gap-1 rounded-[6px] transition-colors hover:bg-(--ui-control-hover-background)">
@@ -226,13 +203,13 @@ function ConnectedProviderRow({
          <span className="truncate text-[length:var(--conversation-text-font-size)] font-semibold">{title}</span>
          <span className="inline-flex shrink-0 items-center gap-1 bg-primary/10 px-2 py-0.5 text-xs font-medium text-primary">
            <Check className="size-3" />
-            {copy.connected}
+            {t.settings.providers.connected}
          </span>
        </div>
        <p className="mt-1 text-xs leading-5 text-muted-foreground">{t.onboarding.flowSubtitles[provider.flow]}</p>
-        {showHint && (
+        {!canDisconnect && (
          <p className="mt-0.5 truncate text-[0.68rem] leading-5 text-muted-foreground/70">
-            {provider.flow === 'external' ? copy.removeExternalGeneric(title) : copy.removeKeyManaged(title)}
+            {disconnectHint}
          </p>
        )}
      </button>
@@ -251,18 +228,6 @@ function ConnectedProviderRow({
            {disconnecting ? <Loader2 className="size-3 animate-spin" /> : <Trash2 className="size-3" />}
          </Button>
        )}
-        {terminalDisconnect && (
-          <Button
-            aria-label={`${copy.disconnect} ${title}`}
-            onClick={() => onTerminalDisconnect(provider)}
-            size="icon-xs"
-            title={copy.disconnectInTerminal}
-            type="button"
-            variant="ghost"
-          >
-            <Trash2 className="size-3" />
-          </Button>
-        )}
      </div>
    </div>
  )
@@ -278,7 +243,7 @@ function NoProviderKeys() {
  )
 }

-export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSettingsProps) {
+export function ProvidersSettings({ onViewChange, view }: ProvidersSettingsProps) {
  const { t } = useI18n()
  const { rowProps, vars } = useEnvCredentials()
  const [oauthProviders, setOauthProviders] = useState<OAuthProvider[]>([])
@@ -317,29 +282,6 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
    return () => void (cancelled = true)
  }, [onboardingActive])

-  // External (CLI-managed) providers can't be cleared via the API by design —
-  // Hermes never deletes creds another tool owns behind a silent API call.
-  // Instead we run the documented removal command in the embedded terminal so
-  // the user sees exactly what executes, then return them to chat to watch it.
-  function handleTerminalDisconnect(provider: OAuthProvider) {
-    const command = provider.disconnect_command
-
-    if (!command) {
-      return
-    }
-
-    const name = providerTitle(provider)
-
-    if (!window.confirm(t.settings.providers.removeTerminalConfirm(name, command))) {
-      return
-    }
-
-    // Leave the settings overlay so the terminal pane (chat-only) is visible.
-    onClose()
-    runInTerminal(command)
-    notify({ kind: 'info', title: t.settings.providers.removedTitle, message: t.settings.providers.removeTerminalRunning(name) })
-  }
-
  async function handleDisconnect(provider: OAuthProvider) {
    const name = providerTitle(provider)

@@ -399,7 +341,6 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
      <OAuthPicker
        disconnecting={disconnecting}
        onDisconnect={provider => void handleDisconnect(provider)}
-        onTerminalDisconnect={handleTerminalDisconnect}
        onWantApiKey={() => onViewChange('keys')}
        providers={oauthProviders}
      />
@@ -418,7 +359,6 @@ interface ProviderKeyGroup {
 }

 interface ProvidersSettingsProps {
-  onClose: () => void
  onViewChange: (view: ProviderView) => void
  view: ProviderView
 }
--- a/apps/desktop/src/app/shell/app-shell.tsx
+++ b/apps/desktop/src/app/shell/app-shell.tsx
@@ -80,10 +80,6 @@ export function AppShell({
  const connection = useStore($connection)
  const viewportFullscreen = useSyncExternalStore(subscribeWindowSize, viewportIsFullscreen, () => false)
  const isFullscreen = Boolean(connection?.isFullscreen) || viewportFullscreen
-  // Every secondary window (new-session scratch, subagent watch, cmd-click
-  // pop-out) is a compact side panel — none of them carry the full titlebar
-  // tool cluster. Gate on isSecondaryWindow, never the narrower new-session flag.
-  const hideTitlebarControls = isSecondaryWindow()
  const titlebarControls = titlebarControlsPosition(connection?.windowButtonPosition, isFullscreen)
  // Width Windows/Linux reserve for the OS-painted min/max/close overlay (zero
  // on macOS, where window controls sit on the left and are reported via
@@ -166,9 +162,7 @@ export function AppShell({
        } as CSSProperties
      }
    >
-      {!hideTitlebarControls && (
-        <TitlebarControls leftTools={leftTitlebarTools} onOpenSettings={onOpenSettings} tools={titlebarTools} />
-      )}
+      <TitlebarControls leftTools={leftTitlebarTools} onOpenSettings={onOpenSettings} tools={titlebarTools} />

      <main className="relative z-3 flex min-h-0 w-full flex-1 flex-col overflow-hidden transition-none">
        <PaneShell className="min-h-0 flex-1">
@@ -189,9 +183,7 @@ export function AppShell({
            the panes' z-20 resize handles, keeping every pane resizable. */}
        {mainOverlays}

-        {/* The compact pop-out drops the statusbar — it's a scratch window, not
-            the full shell. */}
-        {!isSecondaryWindow() && <StatusbarControls items={statusbarItems} leftItems={leftStatusbarItems} />}
+        <StatusbarControls items={statusbarItems} leftItems={leftStatusbarItems} />
      </main>

      {overlays}
--- a/apps/desktop/src/app/shell/hooks/use-statusbar-items.tsx
+++ b/apps/desktop/src/app/shell/hooks/use-statusbar-items.tsx
@@ -1,4 +1,5 @@
 import { useStore } from '@nanostores/react'
+import type { ReactNode } from 'react'
 import { useCallback, useMemo } from 'react'

 import type { CommandCenterSection } from '@/app/command-center'
@@ -8,6 +9,7 @@ import { useI18n } from '@/i18n'
 import {
  Activity,
  AlertCircle,
+  ChevronDown,
  Clock,
  Command,
  Hash,
@@ -17,6 +19,7 @@ import {
  Zap,
  ZapFilled
 } from '@/lib/icons'
+import { formatModelStatusLabel } from '@/lib/model-status-label'
 import type { RuntimeReadinessResult } from '@/lib/runtime-readiness'
 import { contextBarLabel, LiveDuration, usageContextLabel } from '@/lib/statusbar'
 import { cn } from '@/lib/utils'
@@ -27,11 +30,16 @@ import {
  $activeSessionId,
  $busy,
  $connection,
+  $currentFastMode,
+  $currentModel,
+  $currentProvider,
+  $currentReasoningEffort,
  $currentUsage,
  $sessionStartedAt,
  $turnStartedAt,
  $workingSessionIds,
  $yoloActive,
+  setModelPickerOpen,
  setYoloActive
 } from '@/store/session'
 import { $subagentsBySession, activeSubagentCount } from '@/store/subagents'
@@ -57,6 +65,7 @@ interface StatusbarItemsOptions {
  gatewayLogLines: readonly string[]
  gatewayState: string
  inferenceStatus: RuntimeReadinessResult | null
+  modelMenuContent?: ReactNode
  openAgents: () => void
  openCommandCenterSection: (section: CommandCenterSection) => void
  freshDraftReady: boolean
@@ -74,6 +83,7 @@ export function useStatusbarItems({
  gatewayLogLines,
  gatewayState,
  inferenceStatus,
+  modelMenuContent,
  openAgents,
  openCommandCenterSection,
  freshDraftReady,
@@ -87,6 +97,10 @@ export function useStatusbarItems({
  const terminalTakeover = useStore($terminalTakeover)
  const yoloActive = useStore($yoloActive)
  const busy = useStore($busy)
+  const currentFastMode = useStore($currentFastMode)
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const currentReasoningEffort = useStore($currentReasoningEffort)
  const currentUsage = useStore($currentUsage)
  const desktopActionTasks = useStore($desktopActionTasks)
  const previewServerRestartStatus = useStore($previewServerRestartStatus)
@@ -402,6 +416,37 @@ export function useStatusbarItems({
        title: yoloActive ? copy.yoloOn : copy.yoloOff,
        variant: 'action'
      },
+      {
+        id: 'model-summary',
+        label: (
+          <span className="inline-flex min-w-0 items-center gap-0.5">
+            <span className="truncate">
+              {formatModelStatusLabel(currentModel, {
+                fastMode: currentFastMode,
+                reasoningEffort: currentReasoningEffort
+              })}
+            </span>
+            <ChevronDown className="size-2.5 shrink-0 opacity-50" />
+          </span>
+        ),
+        ...(modelMenuContent
+          ? {
+              menuAlign: 'end' as const,
+              menuClassName: 'w-64',
+              menuContent: modelMenuContent,
+              title: currentProvider
+                ? copy.modelTitle(currentProvider, currentModel || copy.modelNone)
+                : copy.switchModel,
+              variant: 'menu' as const
+            }
+          : {
+              onSelect: () => setModelPickerOpen(true),
+              title: currentProvider
+                ? copy.providerModelTitle(currentProvider, currentModel || copy.noModel)
+                : copy.openModelPicker,
+              variant: 'action' as const
+            })
+      },
      {
        className: `w-7 justify-center px-0${terminalTakeover ? ' bg-accent/55 text-foreground' : ''}`,
        hidden: !chatOpen,
@@ -420,6 +465,11 @@ export function useStatusbarItems({
      contextBar,
      contextUsage,
      copy,
+      currentFastMode,
+      currentModel,
+      currentProvider,
+      currentReasoningEffort,
+      modelMenuContent,
      sessionStartedAt,
      showYoloToggle,
      terminalTakeover,
--- a/apps/desktop/src/app/shell/model-edit-submenu.test.tsx
+++ b/apps/desktop/src/app/shell/model-edit-submenu.test.tsx
@@ -1,84 +0,0 @@
-import { cleanup, fireEvent, render, screen } from '@testing-library/react'
-import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'
-
-import { DropdownMenu, DropdownMenuContent, DropdownMenuSub, DropdownMenuSubTrigger } from '@/components/ui/dropdown-menu'
-import { $modelPresets, getModelPreset } from '@/store/model-presets'
-import { $activeSessionId } from '@/store/session'
-
-import { type FastControl, ModelEditSubmenu } from './model-edit-submenu'
-
-// Radix calls these on open; jsdom doesn't implement them.
-beforeAll(() => {
-  Element.prototype.scrollIntoView = vi.fn()
-  Element.prototype.hasPointerCapture = vi.fn(() => false)
-  Element.prototype.releasePointerCapture = vi.fn()
-})
-
-beforeEach(() => {
-  $modelPresets.set({})
-  $activeSessionId.set(null)
-})
-
-afterEach(() => {
-  cleanup()
-  vi.clearAllMocks()
-})
-
-// Render the submenu inside an open menu/sub so its content (switches) mounts.
-function renderSubmenu(opts: { fastControl: FastControl; reasoning: boolean; requestGateway: () => Promise<unknown> }) {
-  return render(
-    <DropdownMenu open>
-      <DropdownMenuContent>
-        <DropdownMenuSub open>
-          <DropdownMenuSubTrigger>edit</DropdownMenuSubTrigger>
-          <ModelEditSubmenu
-            effort="medium"
-            fastControl={opts.fastControl}
-            isActive
-            model="m1"
-            onSelectModel={vi.fn()}
-            provider="p1"
-            reasoning={opts.reasoning}
-            requestGateway={opts.requestGateway as never}
-          />
-        </DropdownMenuSub>
-      </DropdownMenuContent>
-    </DropdownMenu>
-  )
-}
-
-// Regression: editing the active row before a live session exists must stay
-// preset-only — the gateway's config.set falls back to global config when no
-// session matches, so it must not be called. (Caught in the second review.)
-describe('ModelEditSubmenu no-session guard', () => {
-  it('param fast: records the preset but skips the gateway without a session', () => {
-    const requestGateway = vi.fn().mockResolvedValue({})
-    renderSubmenu({ fastControl: { kind: 'param', on: false }, reasoning: false, requestGateway })
-
-    fireEvent.click(screen.getByRole('switch'))
-
-    expect(getModelPreset('p1', 'm1').fast).toBe(true)
-    expect(requestGateway).not.toHaveBeenCalled()
-  })
-
-  it('reasoning: records the preset but skips the gateway without a session', () => {
-    const requestGateway = vi.fn().mockResolvedValue({})
-    renderSubmenu({ fastControl: { kind: 'none' }, reasoning: true, requestGateway })
-
-    // Thinking starts on (medium); toggling it off routes through patchReasoning.
-    fireEvent.click(screen.getByRole('switch'))
-
-    expect(getModelPreset('p1', 'm1').effort).toBe('none')
-    expect(requestGateway).not.toHaveBeenCalled()
-  })
-
-  it('param fast: pushes to the gateway once a session is active', async () => {
-    const requestGateway = vi.fn().mockResolvedValue({})
-    $activeSessionId.set('sess1')
-    renderSubmenu({ fastControl: { kind: 'param', on: false }, reasoning: false, requestGateway })
-
-    fireEvent.click(screen.getByRole('switch'))
-
-    expect(requestGateway).toHaveBeenCalledWith('config.set', { key: 'fast', session_id: 'sess1', value: 'fast' })
-  })
-})
--- a/apps/desktop/src/app/shell/model-edit-submenu.tsx
+++ b/apps/desktop/src/app/shell/model-edit-submenu.tsx
@@ -12,9 +12,13 @@ import {
 } from '@/components/ui/dropdown-menu'
 import { Switch } from '@/components/ui/switch'
 import { useI18n } from '@/i18n'
-import { setModelPreset } from '@/store/model-presets'
 import { notifyError } from '@/store/notifications'
-import { $activeSessionId, setCurrentFastMode, setCurrentReasoningEffort } from '@/store/session'
+import {
+  $activeSessionId,
+  $currentReasoningEffort,
+  setCurrentFastMode,
+  setCurrentReasoningEffort
+} from '@/store/session'

 // Hermes' real reasoning levels (see VALID_REASONING_EFFORTS); `none` is owned
 // by the Thinking toggle, not the radio.
@@ -72,104 +76,96 @@ export function resolveFastControl(
 }

 interface ModelEditSubmenuProps {
-  /** This row's effective reasoning effort (live for the active model, else its
-   *  preset) — the submenu shows and edits from this, never the raw session. */
-  effort: string
  /** How fast mode is offered for this model (param toggle vs. variant swap). */
  fastControl: FastControl
  /** Whether this row's model is the active one. */
  isActive: boolean
-  /** This row's model id — edits persist as its global preset. */
-  model: string
+  /** Switch to this model (resolves false on failure). Awaited before applying
+   *  edits when not active so a failed switch doesn't write to the old model. */
+  onActivate: () => Promise<boolean> | void
  /** Switch to a specific model id (used to swap base ⇄ -fast variant). */
  onSelectModel: (model: string) => Promise<boolean> | void
-  /** This row's provider slug — edits persist as its global preset. */
-  provider: string
  /** Whether this model supports reasoning effort. */
  reasoning: boolean
  requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
 }

 export function ModelEditSubmenu({
-  effort,
  fastControl,
  isActive,
-  model,
+  onActivate,
  onSelectModel,
-  provider,
  reasoning,
  requestGateway
 }: ModelEditSubmenuProps) {
  const { t } = useI18n()
  const copy = t.shell.modelOptions
+  // Reactive session state comes straight from the stores rather than being
+  // drilled through the panel, so editing it re-renders only this submenu.
  const activeSessionId = useStore($activeSessionId)
+  const currentReasoningEffort = useStore($currentReasoningEffort)

-  const effortValue = normalizeEffort(effort)
-  const thinkingOn = isThinkingEnabled(effort)
+  const effort = normalizeEffort(currentReasoningEffort)
+  const thinkingOn = isThinkingEnabled(currentReasoningEffort)

-  // Editing always records the model's global preset; the active model also gets
-  // it pushed onto the live session. Non-active edits stay preset-only — they do
-  // not switch you to that model.
-  const patchReasoning = async (next: string) => {
-    setModelPreset(provider, model, { effort: next })
-
-    if (!isActive) {
-      return
+  // Reasoning/fast are session-scoped (they apply to the active model), so
+  // editing a non-active model first switches to it. Returns false if the
+  // switch failed, so callers skip applying to the wrong (previous) model.
+  const ensureActive = async (): Promise<boolean> => {
+    if (isActive) {
+      return true
    }

+    return (await onActivate()) !== false
+  }
+
+  const patchReasoning = async (next: string, rollback: string) => {
    setCurrentReasoningEffort(next)

-    // Preset-only without a session: `isActive` holds for the global/default
-    // row pre-session, and the gateway's `config.set` falls back to global
-    // config when none matches — so don't reach it (preset + optimistic store
-    // are the whole effect). Same guard in applyModelPreset / toggleFast.
-    if (!activeSessionId) {
-      return
-    }
-
    try {
-      await requestGateway('config.set', { key: 'reasoning', session_id: activeSessionId, value: next })
+      if (!(await ensureActive())) {
+        setCurrentReasoningEffort(rollback)
+
+        return
+      }
+
+      await requestGateway('config.set', {
+        key: 'reasoning',
+        session_id: activeSessionId ?? '',
+        value: next
+      })
    } catch (err) {
-      setCurrentReasoningEffort(effort)
-      setModelPreset(provider, model, { effort })
+      setCurrentReasoningEffort(rollback)
      notifyError(err, copy.updateFailed)
    }
  }

  const toggleFast = (enabled: boolean) => {
    if (fastControl.kind === 'variant') {
-      // Fast is a separate model id. Record the choice on the base model's
-      // preset (selectFamily picks the `-fast` sibling later when set), and
-      // only swap models now if this is the active row — inactive edits must
-      // stay preset-only, same as the param path below.
-      setModelPreset(provider, fastControl.baseId, { fast: enabled })
-
-      if (isActive) {
-        void onSelectModel(enabled ? fastControl.fastId : fastControl.baseId)
-      }
+      // Fast is a separate model id — swap to it (or back to the base).
+      void onSelectModel(enabled ? fastControl.fastId : fastControl.baseId)

      return
    }

    if (fastControl.kind === 'param') {
-      setModelPreset(provider, model, { fast: enabled })
-
-      if (!isActive) {
-        return
-      }
-
      setCurrentFastMode(enabled)

-      // Preset-only without a session (see patchReasoning).
-      if (!activeSessionId) {
-        return
-      }
      void (async () => {
        try {
-          await requestGateway('config.set', { key: 'fast', session_id: activeSessionId, value: enabled ? 'fast' : 'normal' })
+          if (!(await ensureActive())) {
+            setCurrentFastMode(!enabled)
+
+            return
+          }
+
+          await requestGateway('config.set', {
+            key: 'fast',
+            session_id: activeSessionId ?? '',
+            value: enabled ? 'fast' : 'normal'
+          })
        } catch (err) {
          setCurrentFastMode(!enabled)
-          setModelPreset(provider, model, { fast: !enabled })
          notifyError(err, copy.fastFailed)
        }
      })()
@@ -192,7 +188,9 @@ export function ModelEditSubmenu({
              <Switch
                checked={thinkingOn}
                className="ml-auto"
-                onCheckedChange={checked => void patchReasoning(checked ? effortValue || 'medium' : 'none')}
+                onCheckedChange={checked =>
+                  void patchReasoning(checked ? effort || 'medium' : 'none', currentReasoningEffort)
+                }
                size="xs"
              />
            </DropdownMenuItem>
@@ -207,7 +205,10 @@ export function ModelEditSubmenu({
            <>
              <DropdownMenuSeparator className="mx-0" />
              <DropdownMenuLabel className={dropdownMenuSectionLabel}>{copy.effort}</DropdownMenuLabel>
-              <DropdownMenuRadioGroup onValueChange={value => void patchReasoning(value)} value={effortValue}>
+              <DropdownMenuRadioGroup
+                onValueChange={value => void patchReasoning(value, currentReasoningEffort)}
+                value={effort}
+              >
                {EFFORT_OPTIONS.map(option => (
                  <DropdownMenuRadioItem
                    className={dropdownMenuRow}
--- a/apps/desktop/src/app/shell/model-menu-panel.tsx
+++ b/apps/desktop/src/app/shell/model-menu-panel.tsx
@@ -1,6 +1,6 @@
 import { useStore } from '@nanostores/react'
-import { useQuery, useQueryClient } from '@tanstack/react-query'
-import { createContext, useContext, useMemo, useState } from 'react'
+import { useQuery } from '@tanstack/react-query'
+import { useMemo, useState } from 'react'

 import { Codicon } from '@/components/ui/codicon'
 import {
@@ -18,9 +18,8 @@ import { Skeleton } from '@/components/ui/skeleton'
 import type { HermesGateway } from '@/hermes'
 import { getGlobalModelOptions } from '@/hermes'
 import { useI18n } from '@/i18n'
-import { currentPickerSelection, displayModelName, modelDisplayParts, reasoningEffortLabel } from '@/lib/model-status-label'
+import { displayModelName, modelDisplayParts, reasoningEffortLabel } from '@/lib/model-status-label'
 import { cn } from '@/lib/utils'
-import { $modelPresets, applyModelPreset, modelPresetKey } from '@/store/model-presets'
 import {
  $visibleModels,
  collapseModelFamilies,
@@ -41,14 +40,9 @@ import type { ModelOptionProvider, ModelOptionsResponse } from '@/types/hermes'

 import { ModelEditSubmenu, resolveFastControl } from './model-edit-submenu'

-// Lets the host dropdown (model-pill) hand the panel a way to dismiss itself so
-// clicking a model row commits + closes, while the hover-revealed edit submenu
-// (reasoning/fast) stays open to play with (its items preventDefault on select).
-export const ModelMenuCloseContext = createContext<() => void>(() => {})
-
 interface ModelMenuPanelProps {
  gateway?: HermesGateway
-  onSelectModel: (selection: { model: string; provider: string }) => Promise<boolean> | void
+  onSelectModel: (selection: { model: string; persistGlobal: boolean; provider: string }) => Promise<boolean> | void
  requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
 }

@@ -60,10 +54,7 @@ interface ProviderGroup {
 export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: ModelMenuPanelProps) {
  const { t } = useI18n()
  const copy = t.shell.modelMenu
-  const closeMenu = useContext(ModelMenuCloseContext)
  const [search, setSearch] = useState('')
-  const [refreshing, setRefreshing] = useState(false)
-  const queryClient = useQueryClient()
  // Reactive session state is read from the stores here (not drilled in), so
  // toggling effort/fast/model re-renders this panel in place without forcing
  // the parent to rebuild the menu content (which would close the dropdown).
@@ -72,7 +63,6 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
  const currentModel = useStore($currentModel)
  const currentProvider = useStore($currentProvider)
  const currentReasoningEffort = useStore($currentReasoningEffort)
-  const modelPresets = useStore($modelPresets)
  const visibleModels = useStore($visibleModels)

  const modelOptions = useQuery({
@@ -86,12 +76,8 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
    }
  })

-  const { model: optionsModel, provider: optionsProvider } = currentPickerSelection(
-    !!activeSessionId,
-    { model: currentModel, provider: currentProvider },
-    modelOptions.data
-  )
-
+  const optionsModel = String(modelOptions.data?.model ?? currentModel ?? '')
+  const optionsProvider = String(modelOptions.data?.provider ?? currentProvider ?? '')
  const loading = modelOptions.isPending && !modelOptions.data

  const error = modelOptions.error
@@ -101,73 +87,13 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
    : null

  const providers = modelOptions.data?.providers
-
  const effectiveVisibleModels = useMemo(
    () => effectiveVisibleKeys(visibleModels, providers ?? []),
    [visibleModels, providers]
  )

-  // The composer picker never persists the profile default. With a session it
-  // scopes the switch to that session; with none it's UI state shipped on the
-  // next session.create (see selectModel). The default lives in Settings → Model.
-  const switchTo = (model: string, provider: string) => onSelectModel({ model, provider })
-
-  // Explicit "Refresh Models": re-fetch the catalog with refresh:true so the
-  // backend busts its 1h provider-model disk cache and re-pulls each provider's
-  // live list. Fixes live-only models (e.g. OpenCode Zen free tier) vanishing
-  // when the cache expires and falls back to the curated static list.
-  const refreshModels = async () => {
-    if (refreshing) {
-      return
-    }
-
-    setRefreshing(true)
-
-    try {
-      const queryKey = ['model-options', activeSessionId || 'global']
-
-      const next =
-        gateway && activeSessionId
-          ? await gateway.request<ModelOptionsResponse>('model.options', {
-              session_id: activeSessionId,
-              refresh: true
-            })
-          : await getGlobalModelOptions({ refresh: true })
-
-      queryClient.setQueryData<ModelOptionsResponse>(queryKey, next)
-    } catch {
-      // Network/backend hiccup — fall back to a plain invalidate so the next
-      // open re-fetches (still cached, but no worse than before).
-      void queryClient.invalidateQueries({ queryKey: ['model-options'] })
-    } finally {
-      setRefreshing(false)
-    }
-  }
-
-  // Selecting a model row restores that model's remembered preset onto the
-  // session (effort/fast), gated by capability. Unset → Hermes defaults.
-  const selectFamily = async (family: ModelFamily, provider: ModelOptionProvider) => {
-    const caps = provider.capabilities?.[family.id]
-    const preset = modelPresets[modelPresetKey(provider.slug, family.id)] ?? {}
-
-    // Variant-fast models (no speed param) express "fast" as a separate `-fast`
-    // id, so honor the saved preset by selecting that sibling. Param-fast is
-    // applied via applyModelPreset below instead.
-    const variantFast = !(caps?.fast ?? false) && !!family.fastId
-    const targetId = variantFast && preset.fast === true ? family.fastId! : family.id
-
-    if ((await switchTo(targetId, provider.slug)) === false) {
-      return
-    }
-
-    await applyModelPreset(
-      {
-        effort: (caps?.reasoning ?? true) ? (preset.effort ?? 'medium') : undefined,
-        fast: (caps?.fast ?? false) ? (preset.fast ?? false) : undefined
-      },
-      { failMessage: t.shell.modelOptions.updateFailed, request: requestGateway, sessionId: activeSessionId }
-    )
-  }
+  const switchTo = (model: string, provider: string) =>
+    onSelectModel({ model, persistGlobal: !activeSessionId, provider })

  const groups = useMemo(
    () => groupModels(providers ?? [], search, { model: optionsModel, provider: optionsProvider }, effectiveVisibleModels),
@@ -226,42 +152,37 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
                // -fast variant carries the same param support as its base.
                const caps = group.provider.capabilities?.[family.id]

-                // Effective settings for this row: live session state when it's
-                // the active model, otherwise its remembered preset (Hermes
-                // defaults when unset). Row label AND submenu read from these so
-                // they never disagree.
-                const preset = modelPresets[modelPresetKey(group.provider.slug, family.id)] ?? {}
-                const effEffort = isCurrent ? currentReasoningEffort : preset.effort ?? ''
-                const effFast = isCurrent ? currentFastMode : preset.fast ?? false
-
+                // Single source of truth for the active row's fast state — keeps
+                // the row label in lock-step with the submenu's Fast toggle and
+                // handles the standalone `-fast` id case.
                const fastControl = resolveFastControl(
                  activeId ?? family.id,
                  group.provider.models ?? [],
                  caps?.fast ?? false,
-                  effFast
+                  currentFastMode
                )

-                const meta = [
-                  fastControl.kind !== 'none' && fastControl.on ? copy.fast : null,
-                  (caps?.reasoning ?? true) ? reasoningEffortLabel(effEffort) || copy.medium : null
-                ]
-                  .filter(Boolean)
-                  .join(' ')
+                // Grayed text is live session state only. Do not label inactive
+                // rows as "Fast" just because they have a fast-capable sibling:
+                // that makes an off Fast toggle look like it is already on.
+                const meta = isCurrent
+                  ? [
+                      fastControl.kind !== 'none' && fastControl.on ? copy.fast : null,
+                      reasoningEffortLabel(currentReasoningEffort) || copy.medium
+                    ]
+                      .filter(Boolean)
+                      .join(' ')
+                  : ''

                // Every row is a hover-Edit submenu trigger. Activating it
-                // (pointer or keyboard) switches to the family's base model and
-                // restores its preset; the Fast toggle inside swaps to the -fast
-                // sibling (or flips the speed param). The sub-trigger has no
-                // `onSelect`, so wire both click and Enter/Space for keyboard parity.
-                // Clicking the row commits the model and closes the picker; the
-                // edit submenu (reasoning/fast) is reached by HOVER, so you can
-                // still tweak those without the click dismissing everything.
+                // (pointer or keyboard) switches to the family's base model;
+                // the Fast toggle inside swaps to the -fast sibling (or flips
+                // the speed param). The sub-trigger has no `onSelect`, so wire
+                // both click and Enter/Space for keyboard parity.
                const activate = () => {
                  if (!isCurrent) {
-                    void selectFamily(family, group.provider)
+                    void switchTo(family.id, group.provider.slug)
                  }
-
-                  closeMenu()
                }

                return (
@@ -283,12 +204,10 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
                      {isCurrent ? <Codicon className="ml-auto text-foreground" name="check" size="0.75rem" /> : null}
                    </DropdownMenuSubTrigger>
                    <ModelEditSubmenu
-                      effort={effEffort}
                      fastControl={fastControl}
                      isActive={isCurrent}
-                      model={family.id}
+                      onActivate={() => switchTo(family.id, group.provider.slug)}
                      onSelectModel={nextModel => switchTo(nextModel, group.provider.slug)}
-                      provider={group.provider.slug}
                      reasoning={caps?.reasoning ?? true}
                      requestGateway={requestGateway}
                    />
@@ -302,18 +221,6 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model

      <DropdownMenuSeparator className="mx-0" />

-      <DropdownMenuItem
-        className={cn(dropdownMenuRow, 'text-(--ui-text-tertiary)')}
-        disabled={refreshing}
-        onSelect={event => {
-          event.preventDefault()
-          void refreshModels()
-        }}
-      >
-        <Codicon className={cn('mr-1.5', refreshing && 'animate-spin')} name="sync" size="0.75rem" />
-        {copy.refreshModels}
-      </DropdownMenuItem>
-
      <DropdownMenuItem
        className={cn(dropdownMenuRow, 'text-(--ui-text-tertiary)')}
        onSelect={() => setModelVisibilityOpen(true)}
--- a/apps/desktop/src/app/types.ts
+++ b/apps/desktop/src/app/types.ts
@@ -46,12 +46,6 @@ export interface SlashExecResponse {
  warning?: string
 }

-export interface BrowserManageResponse {
-  connected?: boolean
-  url?: string
-  messages?: string[]
-}
-
 export interface SessionSteerResponse {
  // 'queued' == accepted into the live turn's steer slot (injected at the next
  // tool-result boundary); 'rejected' == no live tool window, caller queues.
--- a/apps/desktop/src/components/assistant-ui/block-direction.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/block-direction.test.tsx
@@ -1,129 +0,0 @@
-// Lists and blockquotes have chrome beside the text (markers, the quote
-// border) whose side is driven by the box's CSS direction, which the
-// unicode-bidi:plaintext rules never touch. These tests pin the split of
-// responsibilities: ul/ol/blockquote carry dir="auto" so the browser
-// resolves their box direction from content, inline code carries dir="ltr"
-// so it neither votes in that resolution nor reorders, and plain prose
-// blocks stay attribute-free (the plaintext CSS owns them). jsdom does not
-// resolve dir="auto", so the contract is asserted at the attribute level.
-import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
-import { render, screen } from '@testing-library/react'
-import { describe, expect, it, vi } from 'vitest'
-
-import { Thread } from './thread'
-
-const createdAt = new Date('2026-06-01T00:00:00.000Z')
-
-class TestResizeObserver {
-  observe() {}
-  unobserve() {}
-  disconnect() {}
-}
-
-vi.stubGlobal('ResizeObserver', TestResizeObserver)
-vi.stubGlobal('requestAnimationFrame', (callback: FrameRequestCallback) =>
-  window.setTimeout(() => callback(performance.now()), 0)
-)
-vi.stubGlobal('cancelAnimationFrame', (id: number) => window.clearTimeout(id))
-
-Element.prototype.scrollTo = function scrollTo() {}
-
-function stubOffsetDimension(
-  prop: 'offsetHeight' | 'offsetWidth',
-  clientProp: 'clientHeight' | 'clientWidth',
-  fallback: number
-) {
-  const previous = Object.getOwnPropertyDescriptor(HTMLElement.prototype, prop)
-
-  Object.defineProperty(HTMLElement.prototype, prop, {
-    configurable: true,
-    get() {
-      return previous?.get?.call(this) || (this as HTMLElement)[clientProp] || fallback
-    }
-  })
-}
-
-stubOffsetDimension('offsetWidth', 'clientWidth', 800)
-stubOffsetDimension('offsetHeight', 'clientHeight', 600)
-
-function userMessage(): ThreadMessage {
-  return {
-    id: 'user-1',
-    role: 'user',
-    content: [{ type: 'text', text: 'hi' }],
-    attachments: [],
-    createdAt,
-    metadata: { custom: {} }
-  } as ThreadMessage
-}
-
-function assistantMessage(text: string): ThreadMessage {
-  return {
-    id: 'assistant-1',
-    role: 'assistant',
-    content: [{ type: 'text', text }],
-    status: { type: 'complete', reason: 'stop' },
-    createdAt,
-    metadata: {
-      unstable_state: null,
-      unstable_annotations: [],
-      unstable_data: [],
-      steps: [],
-      custom: {}
-    }
-  } as ThreadMessage
-}
-
-function Harness({ text }: { text: string }) {
-  const runtime = useExternalStoreRuntime<ThreadMessage>({
-    messages: [userMessage(), assistantMessage(text)],
-    isRunning: false,
-    onNew: async () => {}
-  })
-
-  return (
-    <AssistantRuntimeProvider runtime={runtime}>
-      <Thread />
-    </AssistantRuntimeProvider>
-  )
-}
-
-describe('block-level direction chrome', () => {
-  it('lists carry dir="auto" so markers follow the resolved direction', async () => {
-    render(<Harness text={'מקומות:\n\n1. חוף גורדון\n2. שוק הכרמל\n\n- פריט\n- item'} />)
-
-    const item = await screen.findByText(/חוף גורדון/)
-
-    expect(item.closest('ol')?.getAttribute('dir')).toBe('auto')
-
-    const bullet = await screen.findByText(/פריט/)
-
-    expect(bullet.closest('ul')?.getAttribute('dir')).toBe('auto')
-  })
-
-  it('blockquotes carry dir="auto" so the border follows the resolved direction', async () => {
-    render(<Harness text={'> ציטוט קצר בעברית'} />)
-
-    const quote = await screen.findByText(/ציטוט קצר/)
-
-    expect(quote.closest('blockquote')?.getAttribute('dir')).toBe('auto')
-  })
-
-  it('inline code carries dir="ltr" so it does not vote in dir="auto" resolution', async () => {
-    render(<Harness text={'1. `npm install` מתקין תלויות'} />)
-
-    const code = await screen.findByText('npm install')
-
-    expect(code.tagName).toBe('CODE')
-    expect(code.getAttribute('dir')).toBe('ltr')
-    expect(code.closest('ol')?.getAttribute('dir')).toBe('auto')
-  })
-
-  it('plain prose blocks stay attribute-free (plaintext CSS owns them)', async () => {
-    render(<Harness text={'שלום לכולם'} />)
-
-    const paragraph = await screen.findByText(/שלום לכולם/)
-
-    expect(paragraph.closest('p')?.hasAttribute('dir')).toBe(false)
-  })
-})
--- a/apps/desktop/src/components/assistant-ui/directive-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/directive-text.tsx
@@ -322,29 +322,13 @@ function shortLabel(type: HermesRefType, id: string): string {
  return tail || id
 }

-function safeEmbeddedImages(text: string) {
-  try {
-    return extractEmbeddedImages(text)
-  } catch {
-    return { cleanedText: text, images: [] as string[] }
-  }
-}
-
-function safeDirectiveSegments(text: string): Unstable_DirectiveSegment[] {
-  try {
-    return [...hermesDirectiveFormatter.parse(text)]
-  } catch {
-    return [{ kind: 'text', text }]
-  }
-}
-
 /**
 * Renders text containing Hermes directives (`@file:...`, `@image:...`) as
 * inline chips. Embedded MEDIA images render below as a thumbnail row.
 */
 export function DirectiveContent({ text }: { text: string }) {
-  const { cleanedText, images } = useMemo(() => safeEmbeddedImages(text ?? ''), [text])
-  const segments = useMemo(() => safeDirectiveSegments(cleanedText), [cleanedText])
+  const { cleanedText, images } = useMemo(() => extractEmbeddedImages(text ?? ''), [text])
+  const segments = useMemo(() => hermesDirectiveFormatter.parse(cleanedText), [cleanedText])

  return (
    <span className="whitespace-pre-line" data-slot="aui_directive-text">
--- a/apps/desktop/src/components/assistant-ui/markdown-text.test.ts
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.test.ts
@@ -201,13 +201,4 @@ describe('preprocessMarkdown', () => {

    expect(output).toContain('<https://example.com/a_b/c~d/page>')
  })
-
-  it('handles a fenced block larger than V8 spread-argument limit', () => {
-    // A single huge code block (e.g. a logged minified bundle) used to throw
-    // `RangeError: Maximum call stack size exceeded` via `out.push(...lines)`.
-    const body = Array.from({ length: 200_000 }, (_, i) => `line ${i}`).join('\n')
-    const input = `\`\`\`js\n${body}\n\`\`\``
-
-    expect(() => preprocessMarkdown(input)).not.toThrow()
-  })
 })
--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@@ -19,9 +19,8 @@ import {
  useState
 } from 'react'

-import { ExpandableBlock } from '@/components/chat/expandable-block'
 import { PreviewAttachment } from '@/components/chat/preview-attachment'
-import { chunkByLines, SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
+import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
 import { ZoomableImage } from '@/components/chat/zoomable-image'
 import { normalizeExternalUrl, openExternalLink, PrettyLink } from '@/lib/external-link'
 import { createMemoizedMathPlugin } from '@/lib/katex-memo'
@@ -58,11 +57,7 @@ const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
 // flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
 // module-scope so the prop identity is stable across renders.
 function preprocessWithTailRepair(text: string): string {
-  try {
-    return tailBoundedRemend(preprocessMarkdown(text))
-  } catch {
-    return text
-  }
+  return tailBoundedRemend(preprocessMarkdown(text))
 }

 // Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
@@ -458,35 +453,8 @@ const MARKDOWN_CONTAINER_CLASS_NAME = cn(
  '[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-(--paragraph-gap)'
 )

-const MAX_MARKDOWN_CHARS = 200_000
-
-function HugeTextFallback({ containerClassName, text }: { containerClassName?: string; text: string }) {
-  const chunks = useMemo(() => chunkByLines(text, 200), [text])
-
-  return (
-    <div
-      className={cn(
-        'aui-md w-full max-w-none overflow-hidden rounded-[0.625rem] border border-border font-mono text-[0.7rem] leading-relaxed text-foreground/90',
-        containerClassName
-      )}
-    >
-      <ExpandableBlock className="p-2">
-        {chunks.map((chunk, index) => (
-          <div
-            className="[content-visibility:auto]"
-            key={index}
-            style={{ containIntrinsicSize: `auto ${chunk.lines * 16}px` }}
-          >
-            {chunk.text}
-          </div>
-        ))}
-      </ExpandableBlock>
-    </div>
-  )
-}
-
 function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTextSurfaceProps) {
-  const { status, text } = useMessagePartText()
+  const { status } = useMessagePartText()
  const isStreaming = status.type === 'running'

  // Keep code parsing enabled while streaming so incomplete fenced blocks still
@@ -516,37 +484,19 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
          <p className={cn('wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
        ),
        a: MarkdownLink,
-        // Inline code must not vote when an ancestor resolves `dir="auto"`
-        // (HTML's algorithm skips descendants that carry their own dir),
-        // mirroring the CSS isolate that already keeps it out of the
-        // plaintext scan. Fenced code never reaches this override; it goes
-        // through the code plugin's CodeCard path.
-        inlineCode: ({ className, ...props }: ComponentProps<'code'>) => (
-          <code className={className} dir="ltr" {...props} />
-        ),
        // `---` as quiet spacing, not a heavy full-width rule.
        hr: (_props: ComponentProps<'hr'>) => <div aria-hidden className="my-3" />,
-        // Lists and blockquotes have chrome that sits *beside* the text
-        // (markers, the quote border), and that side is driven by the CSS
-        // `direction` of the box, which `unicode-bidi: plaintext` never
-        // touches — an RTL list otherwise renders its numbers stranded at
-        // the far left. `dir="auto"` lets the browser resolve the box
-        // direction from content; the plaintext rules in styles.css keep
-        // owning per-line text direction. Inline code carries `dir="ltr"`
-        // (see the `code` override) so it doesn't vote here either, same
-        // contract as the CSS isolate.
        blockquote: ({ className, ...props }: ComponentProps<'blockquote'>) => (
          <blockquote
-            className={cn('border-s-2 border-border ps-3 text-muted-foreground italic', className)}
-            dir="auto"
+            className={cn('border-l-2 border-border pl-3 text-muted-foreground italic', className)}
            {...props}
          />
        ),
        ul: ({ className, ...props }: ComponentProps<'ul'>) => (
-          <ul className={cn('my-1 gap-0', className)} dir="auto" {...props} />
+          <ul className={cn('my-1 gap-0', className)} {...props} />
        ),
        ol: ({ className, ...props }: ComponentProps<'ol'>) => (
-          <ol className={cn('my-1 gap-0', className)} dir="auto" {...props} />
+          <ol className={cn('my-1 gap-0', className)} {...props} />
        ),
        li: ({ className, ...props }: ComponentProps<'li'>) => (
          <li className={cn('leading-(--dt-line-height)', className)} {...props} />
@@ -583,10 +533,6 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
    [isStreaming]
  )

-  if (text.length > MAX_MARKDOWN_CHARS) {
-    return <HugeTextFallback containerClassName={containerClassName} text={text} />
-  }
-
  return (
    <StreamdownTextPrimitive
      components={components}
--- a/apps/desktop/src/components/assistant-ui/streaming.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/streaming.test.tsx
@@ -378,20 +378,6 @@ function IntroHarness() {
  )
 }

-function DismissibleErrorHarness({ onDismissError }: { onDismissError: (messageId: string) => void }) {
-  const runtime = useExternalStoreRuntime<ThreadMessage>({
-    messages: [assistantErrorMessage('OpenRouter rejected the request (403).')],
-    isRunning: false,
-    onNew: async () => {}
-  })
-
-  return (
-    <AssistantRuntimeProvider runtime={runtime}>
-      <Thread onDismissError={onDismissError} />
-    </AssistantRuntimeProvider>
-  )
-}
-
 describe('assistant-ui streaming renderer', () => {
  beforeEach(() => {
    resizeObservers.clear()
@@ -435,23 +421,6 @@ describe('assistant-ui streaming renderer', () => {
    expect(screen.getByRole('alert').textContent).toContain('OpenRouter rejected the request (403).')
  })

-  it('omits the dismiss control when no onDismissError handler is supplied', () => {
-    render(<MessageHarness message={assistantErrorMessage('OpenRouter rejected the request (403).')} />)
-
-    expect(screen.queryByRole('button', { name: 'Dismiss error' })).toBeNull()
-  })
-
-  it('invokes onDismissError with the errored message id when the dismiss control is clicked', () => {
-    const onDismissError = vi.fn()
-    render(<DismissibleErrorHarness onDismissError={onDismissError} />)
-
-    const dismiss = screen.getByRole('button', { name: 'Dismiss error' })
-    fireEvent.click(dismiss)
-
-    expect(onDismissError).toHaveBeenCalledTimes(1)
-    expect(onDismissError).toHaveBeenCalledWith('assistant-error-1')
-  })
-
  // Scroll behavior (follow-at-bottom, escape-on-scroll-up, re-engage) is owned
  // by the use-stick-to-bottom library and covered by its own test suite. We
  // don't re-assert its scrollTop mechanics here — doing so in jsdom (no real
--- a/apps/desktop/src/components/assistant-ui/thread-list.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread-list.tsx
@@ -1,6 +1,5 @@
 import { ThreadPrimitive, useAuiEvent, useAuiState } from '@assistant-ui/react'
 import {
-  type CSSProperties,
  type ComponentProps,
  type FC,
  memo,
@@ -22,7 +21,6 @@ import {
  resetThreadScroll,
  setThreadAtBottom
 } from '@/store/thread-scroll'
-import { isSecondaryWindow } from '@/store/windows'

 import { MessageRenderBoundary } from './message-render-boundary'

@@ -134,20 +132,6 @@ const ThreadMessageListInner: FC<ThreadMessageListProps> = ({
  const hiddenCount = firstVisible
  const visibleGroups = hiddenCount > 0 ? groups.slice(hiddenCount) : groups
  const restoreFromBottomRef = useRef<number | null>(null)
-  // Secondary windows (new-session scratch, subagent watch, cmd-click pop-out)
-  // hide the titlebar tool cluster + session header, but the OS traffic lights
-  // still sit in the top-left, so reserve the titlebar gap above the transcript.
-  const secondaryWindow = isSecondaryWindow()
-  // NB: CSS calc() requires whitespace around the +/- operator. This string is
-  // assigned verbatim to the --sticky-human-top inline style below (it does not
-  // go through Tailwind, which would auto-space it), so the spaces are load-
-  // bearing — without them the declaration is invalid, gets dropped, and the
-  // sticky user bubble falls back to its ~4px default and slides under the OS
-  // traffic lights.
-  const secondaryTitlebarGap = 'calc(var(--titlebar-height) + 0.75rem)'
-  const threadContentTopPad = secondaryWindow
-    ? 'pt-[calc(var(--titlebar-height)+0.75rem)]'
-    : 'pt-[calc(var(--titlebar-height)-0.5rem)]'

  useEffect(() => setThreadAtBottom(isAtBottom), [isAtBottom])
  useEffect(() => () => resetThreadScroll(), [])
@@ -251,24 +235,8 @@ const ThreadMessageListInner: FC<ThreadMessageListProps> = ({
  return (
    <div
      className="relative min-h-0 max-w-full overflow-hidden contain-[layout_paint]"
-      style={
-        {
-          height: clampToComposer ? 'var(--thread-viewport-height)' : '100%',
-          ...(secondaryWindow ? { '--sticky-human-top': secondaryTitlebarGap } : {})
-        } as CSSProperties
-      }
+      style={{ height: clampToComposer ? 'var(--thread-viewport-height)' : '100%' }}
    >
-      {secondaryWindow && (
-        // Secondary windows hide the titlebar chrome, so the scroller runs to
-        // the window's top edge and streamed text slides up under the OS
-        // traffic lights. Content padding alone scrolls away with the text — a
-        // fixed opaque strip (the titlebar's drag region) masks anything behind
-        // it and keeps the window draggable, matching the main window's header.
-        <div
-          aria-hidden="true"
-          className="absolute inset-x-0 top-0 z-10 h-(--titlebar-height) bg-background [-webkit-app-region:drag]"
-        />
-      )}
      <div
        className="size-full overflow-x-hidden overflow-y-auto overscroll-contain"
        data-following={isAtBottom ? 'true' : 'false'}
@@ -284,7 +252,9 @@ const ThreadMessageListInner: FC<ThreadMessageListProps> = ({
          </div>
        ) : (
          <div
-            className={cn('mx-auto flex w-full max-w-(--composer-width) min-w-0 flex-col px-6', threadContentTopPad)}
+            className={cn(
+              'mx-auto flex w-full max-w-(--composer-width) min-w-0 flex-col px-6 pt-[calc(var(--titlebar-height)+1.5rem)]'
+            )}
            data-slot="aui_thread-content"
            ref={contentRef as React.RefCallback<HTMLDivElement>}
          >
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -91,7 +91,7 @@ import { attachmentDisplayText, attachmentId, pathLabel } from '@/lib/chat-runti
 import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
 import { LinkifiedText } from '@/lib/external-link'
 import { triggerHaptic } from '@/lib/haptics'
-import { GitBranchIcon, Loader2Icon, Volume2Icon, VolumeXIcon, XIcon } from '@/lib/icons'
+import { GitBranchIcon, Loader2Icon, Volume2Icon, VolumeXIcon } from '@/lib/icons'
 import { extractPreviewTargets } from '@/lib/preview-targets'
 import { useEnterAnimation } from '@/lib/use-enter-animation'
 import { cn } from '@/lib/utils'
@@ -169,7 +169,6 @@ export const Thread: FC<{
  loading?: ThreadLoadingState
  onBranchInNewChat?: (messageId: string) => void
  onCancel?: () => Promise<void> | void
-  onDismissError?: (messageId: string) => void
  onRestoreToMessage?: (messageId: string) => Promise<void> | void
  sessionId?: string | null
  sessionKey?: string | null
@@ -181,19 +180,18 @@ export const Thread: FC<{
  loading,
  onBranchInNewChat,
  onCancel,
-  onDismissError,
  onRestoreToMessage,
  sessionId = null,
  sessionKey
 }) => {
  const messageComponents = useMemo(
    () => ({
-      AssistantMessage: () => <AssistantMessage onBranchInNewChat={onBranchInNewChat} onDismissError={onDismissError} />,
+      AssistantMessage: () => <AssistantMessage onBranchInNewChat={onBranchInNewChat} />,
      SystemMessage,
      UserEditComposer: () => <UserEditComposer cwd={cwd} gateway={gateway} sessionId={sessionId} />,
      UserMessage: () => <UserMessage onCancel={onCancel} onRestoreToMessage={onRestoreToMessage} />
    }),
-    [cwd, gateway, onBranchInNewChat, onCancel, onDismissError, onRestoreToMessage, sessionId]
+    [cwd, gateway, onBranchInNewChat, onCancel, onRestoreToMessage, sessionId]
  )

  const emptyPlaceholder = intro ? (
@@ -247,13 +245,9 @@ const CenteredThreadSpinner: FC = () => {
  )
 }

-const AssistantMessage: FC<{
-  onBranchInNewChat?: (messageId: string) => void
-  onDismissError?: (messageId: string) => void
-}> = ({ onBranchInNewChat, onDismissError }) => {
+const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
  const messageId = useAuiState(s => s.message.id)
  const messageRuntime = useMessageRuntime()
-  const { t } = useI18n()

  // PERF: this component must NOT subscribe to the streaming text. Every
  // selector here returns a value that stays referentially stable across
@@ -312,20 +306,10 @@ const AssistantMessage: FC<{
        )}
        <MessagePrimitive.Error>
          <ErrorPrimitive.Root
-            className="mt-1.5 flex items-start gap-1.5 text-[0.78rem] leading-5 text-[color-mix(in_srgb,var(--dt-destructive)_78%,var(--ui-text-secondary))]"
+            className="mt-1.5 text-[0.78rem] leading-5 text-[color-mix(in_srgb,var(--dt-destructive)_78%,var(--ui-text-secondary))]"
            role="alert"
          >
-            <ErrorPrimitive.Message className="min-w-0 flex-1" />
-            {onDismissError && (
-              <TooltipIconButton
-                className="-my-0.5 shrink-0 text-current opacity-70 hover:opacity-100"
-                onClick={() => onDismissError(messageId)}
-                side="top"
-                tooltip={t.assistant.thread.dismissError}
-              >
-                <XIcon className="size-3.5" />
-              </TooltipIconButton>
-            )}
+            <ErrorPrimitive.Message />
          </ErrorPrimitive.Root>
        </MessagePrimitive.Error>
      </div>
@@ -827,7 +811,7 @@ function StickyHumanMessageContainer({ attachments, children }: { attachments?:
 // so without the carve-out, clicking a stuck bubble drags the window instead of
 // opening the edit composer.
 const USER_BUBBLE_BASE_CLASS =
-  'composer-human-message standalone-glass relative flex w-full min-w-0 max-w-full flex-col gap-1.5 overflow-y-auto rounded-xl border bg-(--dt-user-bubble) px-3 py-2 text-left [-webkit-app-region:no-drag]'
+  'composer-human-message standalone-glass relative flex w-full min-w-0 max-w-full flex-col gap-1.5 overflow-hidden rounded-xl border bg-(--dt-user-bubble) px-3 py-2 text-left [-webkit-app-region:no-drag]'

 const USER_ACTION_ICON_BUTTON_CLASS =
  'grid place-items-center rounded-md bg-transparent text-(--ui-text-secondary) transition-colors hover:bg-(--ui-control-active-background) hover:text-foreground disabled:cursor-default disabled:text-(--ui-text-quaternary) disabled:opacity-70'
--- a/apps/desktop/src/components/chat/code-card.tsx
+++ b/apps/desktop/src/components/chat/code-card.tsx
@@ -66,7 +66,7 @@ function CodeCardBody({ className, ...props }: React.ComponentProps<'div'>) {
  return (
    <div
      className={cn(
-        'font-mono text-[0.7rem] leading-relaxed text-foreground/90 [&_pre]:m-0 [&_pre]:overflow-x-auto [&_pre]:bg-transparent! [&_pre]:px-2 [&_pre]:py-1.5 [&_pre]:font-mono [&_pre]:leading-relaxed',
+        'p-1.5 font-mono text-[0.7rem] leading-relaxed text-foreground/90 [&_pre]:m-0 [&_pre]:overflow-x-auto [&_pre]:bg-transparent! [&_pre]:px-2 [&_pre]:py-1.5 [&_pre]:font-mono [&_pre]:leading-relaxed',
        className
      )}
      data-slot="code-card-body"
--- a/apps/desktop/src/components/chat/expandable-block.tsx
+++ b/apps/desktop/src/components/chat/expandable-block.tsx
@@ -1,52 +0,0 @@
-'use client'
-
-import { type ReactNode, useLayoutEffect, useRef, useState } from 'react'
-
-import { ChevronDown } from '@/lib/icons'
-import { cn } from '@/lib/utils'
-
-interface ExpandableBlockProps {
-  children: ReactNode
-  className?: string
-}
-
-export function ExpandableBlock({ children, className }: ExpandableBlockProps) {
-  const innerRef = useRef<HTMLDivElement>(null)
-  const [expanded, setExpanded] = useState(false)
-  const [overflowing, setOverflowing] = useState(false)
-
-  useLayoutEffect(() => {
-    const el = innerRef.current
-
-    if (!el) {return}
-
-    const measure = () => setOverflowing(el.scrollHeight > 121)
-    measure()
-    const observer = new ResizeObserver(measure)
-    observer.observe(el)
-
-    return () => observer.disconnect()
-  }, [])
-
-  return (
-    <div className="relative">
-      <div
-        className={cn('overflow-y-auto', expanded ? 'max-h-[40dvh]' : 'max-h-[7.5rem]', className)}
-        ref={innerRef}
-      >
-        {children}
-      </div>
-      {overflowing && (
-        <button
-          aria-expanded={expanded}
-          aria-label={expanded ? 'Collapse' : 'Expand'}
-          className="absolute inset-x-0 bottom-0 flex h-7 cursor-pointer items-end justify-center bg-linear-to-t from-(--ui-chat-surface-background) to-transparent pb-1 text-muted-foreground/70 transition-colors hover:text-foreground"
-          onClick={() => setExpanded(v => !v)}
-          type="button"
-        >
-          <ChevronDown className={cn('size-3.5 transition-transform', expanded && 'rotate-180')} />
-        </button>
-      )}
-    </div>
-  )
-}
--- a/apps/desktop/src/components/chat/shiki-highlighter.test.ts
+++ b/apps/desktop/src/components/chat/shiki-highlighter.test.ts
@@ -1,37 +0,0 @@
-import { describe, expect, it } from 'vitest'
-
-import { chunkByLines, exceedsHighlightBudget } from '@/components/chat/shiki-highlighter'
-
-describe('exceedsHighlightBudget', () => {
-  it('highlights normal-sized blocks', () => {
-    expect(exceedsHighlightBudget('const x = 1\n'.repeat(100))).toBe(false)
-  })
-
-  it('skips highlighting past the line budget', () => {
-    expect(exceedsHighlightBudget('x\n'.repeat(5_000))).toBe(true)
-  })
-
-  it('skips highlighting past the char budget on few lines', () => {
-    expect(exceedsHighlightBudget('a'.repeat(200_000))).toBe(true)
-  })
-
-  it('short-circuits on char budget before line loop', () => {
-    expect(exceedsHighlightBudget('y\n'.repeat(250_000))).toBe(true)
-  })
-})
-
-describe('chunkByLines', () => {
-  it('keeps a small block as a single chunk', () => {
-    const code = 'a\nb\nc'
-    expect(chunkByLines(code, 200)).toEqual([{ text: code, lines: 3 }])
-  })
-
-  it('splits a large block and reconstructs it losslessly', () => {
-    const code = Array.from({ length: 1000 }, (_, i) => `line ${i}`).join('\n')
-    const chunks = chunkByLines(code, 200)
-
-    expect(chunks).toHaveLength(5)
-    expect(chunks.map(chunk => chunk.text).join('\n')).toBe(code)
-    expect(chunks.reduce((sum, chunk) => sum + chunk.lines, 0)).toBe(1000)
-  })
-})
--- a/Show More
+++ b/Show More