feat(mcp): adopt mcp__server__tool naming convention

Port from anomalyco/opencode#33533. Native MCP tools now register as mcp__<server>__<tool> (double-underscore delimiter) instead of mcp_<server>_<tool>, aligning with the convention used by Claude Code, Codex, and OpenCode. The double-underscore delimiter disambiguates the server/tool boundary even when either component contains underscores (the single-underscore form was ambiguous, which is why is_mcp_tool_parallel_safe already had to track provenance in a side-map). It also unifies native registration with the Anthropic-OAuth wire form (_MCP_TOOL_PREFIX = 'mcp__'), so the single->double promotion that path performed is now a no-op for native tools while still handling legacy replayed names. - tools/mcp_tool.py: add MCP_TOOL_NAME_PREFIX + mcp_prefixed_tool_name() helper; route _convert_mcp_schema, utility schemas, refresh stale-set, and the parallel-safe prefix gate through it - agent/transports/codex_event_projector.py: mirror convention in the deterministic call_id input for MCP server-executed tool calls - tests: update produced-name assertions to the new convention
2026-06-29 14:55:27 +08:00 · 2026-06-25 17:08:44 -07:00
775 changed files with 6620 additions and 34916 deletions
--- a/.envrc
+++ b/.envrc
@@ -1,5 +1,5 @@
 watch_file pyproject.toml uv.lock
 watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
-watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
+watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix

 use flake
--- a/.github/actions/hermes-smoke-test/action.yml
+++ b/.github/actions/hermes-smoke-test/action.yml
@@ -0,0 +1,50 @@
+name: Hermes smoke test
+description: >
+  Run the image's built-in entrypoint against `--help` and `dashboard --help`
+  to catch basic runtime regressions before publishing.  Requires the image
+  to already be loaded into the local Docker daemon under `image`.
+
+  Works identically on amd64 and arm64 runners.
+
+inputs:
+  image:
+    description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Ensure /tmp/hermes-test is hermes-writable
+      shell: bash
+      run: |
+        # The image runs as the hermes user (UID 10000).  GitHub Actions
+        # creates /tmp/hermes-test root-owned by default, which hermes
+        # can't write to — chown it to match the in-container UID before
+        # bind-mounting.  Real users doing `docker run -v ~/.hermes:...`
+        # with their own UID hit the same issue and have their own
+        # remediations (HERMES_UID env var, or chown locally).
+        mkdir -p /tmp/hermes-test
+        sudo chown -R 10000:10000 /tmp/hermes-test
+
+    - name: hermes --help
+      shell: bash
+      run: |
+        # Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
+        # this exercises the actual production startup path. PR #30136
+        # review caught that an --entrypoint override here had been
+        # silently neutered by the s6-overlay migration — stage2-hook
+        # ignores its CMD args, so the smoke test was a no-op.
+        docker run --rm \
+          -v /tmp/hermes-test:/opt/data \
+          "${{ inputs.image }}" --help
+
+    - name: hermes dashboard --help
+      shell: bash
+      run: |
+        # Regression guard for #9153: dashboard was present in source but
+        # missing from the published image.  If this fails, something in
+        # the Dockerfile is excluding the dashboard subcommand from the
+        # installed package.
+        docker run --rm \
+          -v /tmp/hermes-test:/opt/data \
+          "${{ inputs.image }}" dashboard --help
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,6 @@ permissions:
  pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
  actions: read # needed by osv-scanner (SARIF upload)
  security-events: write # needed by osv-scanner (SARIF upload)
-  packages: write # needed by docker build

 concurrency:
  group: ci-${{ github.ref }}
@@ -33,7 +32,6 @@ jobs:
  # (all lanes true) so post-merge validation is never weakened.
  # ─────────────────────────────────────────────────────────────────────
  detect:
-    name: Detect affected areas
    runs-on: ubuntu-latest
    outputs:
      python: ${{ steps.classify.outputs.python }}
@@ -55,15 +53,11 @@ jobs:
  # Skipped workflows (if condition is false) don't spin up runners.
  # ─────────────────────────────────────────────────────────────────────
  tests:
-    name: Python tests
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/tests.yml
-    with:
-      slice_count: 8

  lint:
-    name: Python lints
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/lint.yml
@@ -71,49 +65,35 @@ jobs:
      event_name: ${{ needs.detect.outputs.event_name }}

  typecheck:
-    name: TypeScript
    needs: detect
    if: needs.detect.outputs.frontend == 'true'
    uses: ./.github/workflows/typecheck.yml

  docs-site:
-    name: Docs Site
    needs: detect
    if: needs.detect.outputs.site == 'true'
    uses: ./.github/workflows/docs-site-checks.yml

  history-check:
-    name: Deny unrelated histories
    needs: detect
    if: needs.detect.outputs.event_name == 'pull_request'
    uses: ./.github/workflows/history-check.yml

  contributor-check:
-    name: Check contributors
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/contributor-check.yml

  uv-lockfile:
-    name: Check uv.lock
    needs: detect
    uses: ./.github/workflows/uv-lockfile-check.yml

  docker-lint:
-    name: Lint Docker scripts
    needs: detect
    if: needs.detect.outputs.docker_meta == 'true'
    uses: ./.github/workflows/docker-lint.yml

-  docker:
-    name: Build&Test Docker image
-    needs: detect
-    if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
-    uses: ./.github/workflows/docker.yml
-    secrets: inherit
-
  supply-chain:
-    name: Supply-chain scan
    needs: detect
    if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
    uses: ./.github/workflows/supply-chain-audit.yml
@@ -124,7 +104,7 @@ jobs:
      mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}

  osv-scanner:
-    name: OSV scan
+    needs: detect
    uses: ./.github/workflows/osv-scanner.yml

  # ─────────────────────────────────────────────────────────────────────
@@ -147,8 +127,6 @@ jobs:
      - docker-lint
      - supply-chain
      - osv-scanner
-      # we don't require docker to pass rn because it's so slow lol
-      # - docker
    if: always()
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -2,7 +2,7 @@ name: Docker / shell lint

 # Lints the container build inputs: Dockerfile (via hadolint) and any shell
 # scripts under docker/ (via shellcheck). These catch the class of regression
-# the behavioral docker smoke test can't — unquoted variable
+# the behavioral docker-publish smoke test can't — unquoted variable
 # expansions, silently-failing RUN commands, etc.
 #
 # Rules and ignores are documented in .hadolint.yaml at the repo root.
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -1,9 +1,24 @@
-name: Docker Build, Test, and Publish
+name: Docker Build and Publish

 on:
+  push:
+    branches: [main]
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
+      - '.github/actions/hermes-smoke-test/**'
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
+  pull_request:
+
  release:
    types: [published]
-  workflow_call:

 permissions:
  contents: read
@@ -24,7 +39,11 @@ env:
  IMAGE_NAME: nousresearch/hermes-agent

 jobs:
-  # Build, test, and optionally push the amd64 image.
+  # ---------------------------------------------------------------------------
+  # Build amd64 natively.  This job also runs the smoke tests (basic --help
+  # and the dashboard subcommand regression guard from #9153), because amd64
+  # is the only arch we can `load` into the local daemon on an amd64 runner.
+  # ---------------------------------------------------------------------------
  build-amd64:
    # Only run on the upstream repository, not on forks
    if: github.repository == 'NousResearch/hermes-agent'
@@ -34,19 +53,24 @@ jobs:
      digest: ${{ steps.push.outputs.digest }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      # The image build + integration tests run on every event
-      # (PRs, push-to-main, release). Publish steps below are gated to
-      # push-to-main / release only.
+      # The image build + smoke test + integration tests run ONLY on
+      # push-to-main and release — never on PRs. They are the heaviest jobs
+      # in CI (~15-45 min) and a broken build surfaces on the main push (and
+      # is gated pre-merge by docker-lint + uv-lockfile-check). Every step
+      # below is skipped on PRs, so the job still reports green and the
+      # required check never hangs.
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+        if: github.event_name != 'pull_request'
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

-      # Build once, load into the local daemon for testing.  Cached
+      # Build once, load into the local daemon for smoke testing.  Cached
      # to gha with a per-arch scope; the push step below reuses every
      # layer from this build.
-      - name: Build image (amd64)
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+      - name: Build image (amd64, smoke test)
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -58,12 +82,25 @@ jobs:
          cache-from: type=gha,scope=docker-amd64
          cache-to: type=gha,mode=max,scope=docker-amd64

+      - name: Smoke test image
+        if: github.event_name != 'pull_request'
+        uses: ./.github/actions/hermes-smoke-test
+        with:
+          image: ${{ env.IMAGE_NAME }}:test
+
+      # ---------------------------------------------------------------------
      # Run the docker-integration test suite against the freshly-built
-      # image already loaded into the local daemon (`:test`).
+      # image already loaded into the local daemon (`:test`).  These tests
+      # are excluded from the sharded `tests.yml :: test` matrix on purpose
+      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
+      # shard would otherwise reach the session-scoped ``built_image``
+      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
+      # ``docker build`` — guaranteed to
+      # die in fixture setup.
      #
-      # Piggybacking here avoids a second image build: the build step
-      # already loaded the image into the daemon under
-      # `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
+      # Piggybacking here avoids a second image build: the smoke test
+      # already proved the image loads + runs, so the daemon has it under
+      # `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
      # that.  The fixture's ``HERMES_TEST_IMAGE`` branch (see
      # tests/docker/conftest.py:62-63) short-circuits the rebuild.
      #
@@ -73,20 +110,26 @@ jobs:
      # cheapest path to coverage on every PR that touches docker code.
      # ---------------------------------------------------------------------
      - name: Install uv (for docker tests)
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        if: github.event_name != 'pull_request'
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11 (for docker tests)
+        if: github.event_name != 'pull_request'
        run: uv python install 3.11

      - name: Install Python dependencies (for docker tests)
+        if: github.event_name != 'pull_request'
        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
          # ``dev`` extra pulls in pytest, pytest-asyncio —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
-          uv sync --locked --python 3.11 --extra dev
+          uv pip install -e ".[dev]"

      - name: Run docker integration tests
+        if: github.event_name != 'pull_request'
        env:
          # Skip rebuild; use the image already loaded by the build step.
          HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
@@ -96,11 +139,12 @@ jobs:
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""
        run: |
-          scripts/run_tests.sh tests/docker/ --file-timeout 600
+          source .venv/bin/activate
+          python -m pytest tests/docker/ -v --tb=short

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -111,7 +155,7 @@ jobs:
      - name: Push amd64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -135,7 +179,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: digest-amd64
          path: /tmp/digests/*
@@ -143,7 +187,10 @@ jobs:
          retention-days: 1

  # ---------------------------------------------------------------------------
-  # Build, test, and optionally push the arm64 image.
+  # Build arm64 natively on GitHub's free arm64 runner.  This replaces the
+  # previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
+  # a cache scope with amd64.  Matches the amd64 job's shape: build+load,
+  # smoke test, then on push/release push by digest.
  # ---------------------------------------------------------------------------
  build-arm64:
    if: github.repository == 'NousResearch/hermes-agent'
@@ -153,26 +200,29 @@ jobs:
      digest: ${{ steps.push.outputs.digest }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

+      # arm64 build runs only on push-to-main and release (see build-amd64).
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+        if: github.event_name != 'pull_request'
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

      # Log in to ghcr.io so the registry-backed build cache below can be
      # read (cache-from) on every event and written (cache-to) on
      # push/release.  Uses the workflow's GITHUB_TOKEN, which is valid for
      # the whole job — unlike the gha cache backend's short-lived Azure SAS
      # token, which expired mid-build on slow cold-cache arm64 runs and
-      # crashed the build before the tests ran (the reason the gha cache
+      # crashed the build before the smoke test (the reason the gha cache
      # was removed from arm64 PRs in the first place).
      - name: Log in to ghcr.io (build cache)
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

-      # Build once, load into the local daemon for testing, then push
+      # Build once, load into the local daemon for smoke testing, then push
      # by digest below. Reads AND writes the registry-backed cache so the
      # push reuses layers from this build and the next build starts warm.
      #
@@ -180,8 +230,9 @@ jobs:
      # cache that previously broke here: its credential is the job-lifetime
      # GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
      # token failure mode cannot recur.
-      - name: Build image (arm64, cached publish)
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+      - name: Build image (arm64, smoke test, cached publish)
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -193,29 +244,15 @@ jobs:
          cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
          cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max

-      - name: Install uv for docker tests
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
-
-      - name: Set up Python 3.11 for docker tests
-        run: uv python install 3.11
-
-      - name: Install Python dependencies for docker tests
-        run: |
-          uv sync --locked --python 3.11 --extra dev
-
-      - name: Run docker tests
-        env:
-          # Skip rebuild; use the image already loaded by the build step.
-          HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
-          OPENROUTER_API_KEY: ""
-          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
-        run: |
-          scripts/run_tests.sh tests/docker/ --file-timeout 600
+      - name: Smoke test image
+        if: github.event_name != 'pull_request'
+        uses: ./.github/actions/hermes-smoke-test
+        with:
+          image: ${{ env.IMAGE_NAME }}:test

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -223,7 +260,7 @@ jobs:
      - name: Push arm64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -245,7 +282,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: digest-arm64
          path: /tmp/digests/*
@@ -267,17 +304,17 @@ jobs:
    timeout-minutes: 10
    steps:
      - name: Download digests
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          path: /tmp/digests
          pattern: digest-*
          merge-multiple: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

      - name: Log in to Docker Hub
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -37,7 +37,7 @@ jobs:
          fetch-depth: 0 # need full history for merge-base + worktree

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      - name: Install ruff + ty
        uses: ./.github/actions/retry
@@ -110,7 +110,7 @@ jobs:
          cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"

      - name: Upload reports as artifact
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
        with:
          name: lint-reports
          path: .lint-reports/
@@ -164,7 +164,7 @@ jobs:
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      - name: Install ruff
        uses: ./.github/actions/retry
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -3,17 +3,17 @@ name: Build Skills Index
 on:
  schedule:
    # Run twice daily: 6 AM and 6 PM UTC
-    - cron: "0 6,18 * * *"
-  workflow_dispatch: # Manual trigger
+    - cron: '0 6,18 * * *'
+  workflow_dispatch:  # Manual trigger
  push:
    branches: [main]
    paths:
-      - "scripts/build_skills_index.py"
-      - ".github/workflows/skills-index.yml"
+      - 'scripts/build_skills_index.py'
+      - '.github/workflows/skills-index.yml'

 permissions:
  contents: read
-  actions: write # to trigger deploy-site.yml on schedule
+  actions: write   # to trigger deploy-site.yml on schedule

 jobs:
  build-index:
@@ -21,11 +21,11 @@ jobs:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.11"
+          python-version: '3.11'

      - name: Install dependencies
        run: pip install httpx==0.28.1 pyyaml==6.0.2
@@ -36,7 +36,7 @@ jobs:
        run: python scripts/build_skills_index.py

      - name: Upload index artifact
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: skills-index
          path: website/static/api/skills-index.json
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -2,11 +2,6 @@ name: Tests

 on:
  workflow_call:
-    inputs:
-      slice_count:
-        description: Number of parallel test slices
-        type: number
-        default: 8

 permissions:
  contents: read
@@ -17,11 +12,13 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  generate:
-    name: "Generate slices"
+  test:
    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.matrix.outputs.matrix }}
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -30,26 +27,13 @@ jobs:
        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
        with:
          path: test_durations.json
+          # main always writes a new suffix, but jobs pick the latest one with the same prefix
+          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
+          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
+          # If there are no exact matches, the action searches for partial matches of the restore keys.
+          # When the action finds a partial match, the most recent cache is restored to the path directory.
          key: test-durations

-      - name: Generate test slices
-        id: matrix
-        run: |
-          MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
-          echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
-
-  test:
-    name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
-    needs: generate
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-
      - name: Install ripgrep (prebuilt binary)
        run: |
          set -euo pipefail
@@ -65,7 +49,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -94,19 +78,33 @@ jobs:
        # re-download, keeping the persisted cache small and fast to restore.
        run: uv cache prune --ci

-      - name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
-        # Per-file isolation via scripts/run_tests.sh: each test file runs
-        # in its own freshly-spawned `python -m pytest <file>` subprocess
+      - name: Run tests (slice ${{ matrix.slice }}/6)
+        # Per-file isolation via scripts/run_tests_parallel.py: discovers
+        # every test_*.py file under tests/ (excluding integration/ + e2e/),
+        # then runs `python -m pytest <file>` in a freshly-spawned subprocess
        # with bounded parallelism. No xdist, no shared workers, no
        # module-level state leakage between files.
        #
-        # File list is pre-computed by the generate job (--generate-slices)
-        # which runs LPT distribution once and passes the file list to each
-        # matrix job via --files. Previously each job re-discovered files and
-        # re-ran LPT independently — redundant N times.
+        # Why per-file (not per-test): per-test spawn cost (~250ms × 17k
+        # tests = 70min CPU minimum) blew the wall-clock budget. Per-file
+        # spawn (~250ms × ~850 files = ~3.5min) fits while still giving
+        # every file a fresh interpreter — the only isolation boundary
+        # that matters in practice (cross-file leakage was the original
+        # flake source; intra-file is the test author's responsibility).
+        #
+        # Why drop xdist entirely: xdist's persistent workers accumulate
+        # state across files, which is exactly the leakage we wanted to
+        # fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
+        # the job with cleaner semantics.
+        #
+        # Matrix slicing (--slice I/N): files are distributed across 6
+        # jobs by cached duration (LPT algorithm) so each job gets
+        # roughly equal wall time. Without a cache, files default to 2s
+        # estimate and get split roughly evenly by count — still correct,
+        # just not perfectly balanced.
        run: |
          source .venv/bin/activate
-          scripts/run_tests.sh --files '${{ matrix.slice.files }}'
+          python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
@@ -116,7 +114,7 @@ jobs:
      - name: Upload per-slice durations
        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
-          name: test-durations-slice-${{ matrix.slice.index }}
+          name: test-durations-slice-${{ matrix.slice }}
          path: test_durations.json
          retention-days: 1

@@ -175,7 +173,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -6,7 +6,6 @@ on:

 jobs:
  typecheck:
-    name: Check TypeScript
    runs-on: ubuntu-latest
    strategy:
      matrix:
@@ -23,7 +22,8 @@ jobs:
      # native builds. Skipping install scripts drops node-pty's node-gyp
      # header fetch — the transient flake that killed this job pre-`tsc` — and
      # is faster. retry covers the remaining registry blips.
-      - uses: ./.github/actions/retry
+      - 
+        uses: ./.github/actions/retry
        with:
          command: npm ci --ignore-scripts
      - run: npm run --prefix ${{ matrix.package }} typecheck
@@ -35,7 +35,6 @@ jobs:
  # users build apps/desktop from source on install/update. Run the real
  # `vite build` here so that class of break fails in CI instead.
  desktop-build:
-    name: Build desktop app
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -45,7 +44,8 @@ jobs:
          cache: npm
      # Keep install scripts here: the production build may need node-pty's
      # native binary. retry handles the transient install-time fetch flakes.
-      - uses: ./.github/actions/retry
+      - 
+        uses: ./.github/actions/retry
        with:
          command: npm ci
      - run: npm run --prefix apps/desktop build
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -5,11 +5,11 @@ name: Publish to PyPI
 on:
  push:
    tags:
-      - "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
+      - 'v20*'  # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
  workflow_dispatch:
    inputs:
      confirm_tag:
-        description: "Tag to publish (e.g. v2026.5.15). Must already exist."
+        description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
        required: true
        type: string

@@ -27,7 +27,7 @@ jobs:
    name: Build distribution 📦
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          persist-credentials: false
          # On workflow_dispatch, check out the confirmed tag.
@@ -43,17 +43,17 @@ jobs:
          fi

      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.13"
+          python-version: '3.13'

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e  # v6

      - name: Set up Node.js
-        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: "22"
+          node-version: '22'

      - name: Build web dashboard
        run: cd web && npm ci && npm run build
@@ -81,7 +81,7 @@ jobs:
        run: uv build --sdist --wheel

      - name: Upload distribution artifacts
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: python-package-distributions
          path: dist/
@@ -94,17 +94,17 @@ jobs:
      name: pypi
      url: https://pypi.org/p/hermes-agent
    permissions:
-      id-token: write # OIDC trusted publishing
+      id-token: write  # OIDC trusted publishing

    steps:
      - name: Download distribution artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          name: python-package-distributions
          path: dist/

      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # v1.14.0
        with:
          skip-existing: true

@@ -116,12 +116,12 @@ jobs:
    needs: publish
    runs-on: ubuntu-latest
    permissions:
-      contents: write # attach assets to the existing release
-      id-token: write # sigstore signing
+      contents: write   # attach assets to the existing release
+      id-token: write   # sigstore signing

    steps:
      - name: Download distribution artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          name: python-package-distributions
          path: dist/
@@ -145,7 +145,7 @@ jobs:

      - name: Sign with Sigstore
        if: env.skip_sign != 'true'
-        uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
+        uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc  # v3.3.0
        with:
          inputs: >-
            ./dist/*.tar.gz
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -4,7 +4,7 @@ name: uv.lock check
 # that modify pyproject.toml without regenerating uv.lock (or vice versa)
 # must not merge, because the Docker build's `uv sync --frozen` step will
 # fail on a stale lockfile and we'd rather catch it here than in the
-# docker workflow on main.
+# docker-publish workflow on main.
 #
 # ─────────────────────────────────────────────────────────────────────────
 # IMPORTANT: this check runs against the MERGED state, not just your branch
@@ -63,7 +63,7 @@ jobs:
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
@@ -100,7 +100,7 @@ jobs:

          This check is blocking because the Docker image build uses
          `uv sync --frozen --extra all`, which rejects stale lockfiles
-          — catching it here avoids a ~15 min failed docker run
+          — catching it here avoids a ~15 min failed docker-publish run
          on `main` post-merge.
          EOF
            echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -819,37 +819,6 @@ that touches the OS, assume *any* platform can hit your code path.
    _quote_cmd_script_arg` and `_quote_schtasks_arg` for the reference
    pair.

-17. **Spawning a console program from a background/GUI parent needs a
-    no-window flag on Windows — and CI enforces it.** A `subprocess.run(["git",
-    ...])` / `Popen(...)` of a cross-platform console exe (git, gh, npm, node,
-    python, uv, ffmpeg, docker, …) allocates and flashes a cmd/conhost window
-    on Windows when the parent has no console of its own (Desktop/Electron,
-    `pythonw.exe`, a detached gateway/cron). **Capturing or redirecting stdio
-    does NOT prevent this** — `capture_output=`/`stdout=` controls where the
-    child's *output* goes, not whether a console is *allocated*. Only
-    `CREATE_NO_WINDOW` suppresses the window. This was the single biggest
-    source of "terminal popups" bug reports. Prefer the chokepoint wrapper —
-    it always injects the flag on Windows and is a no-op on POSIX:
-    ```python
-    from hermes_cli import _subprocess_compat
-    _subprocess_compat.run(cmd, capture_output=True, text=True)   # never flashes
-    _subprocess_compat.popen(cmd)                                  # never flashes
-    # detached background daemon:
-    subprocess.Popen(cmd, **windows_detach_popen_kwargs())
-    # or, at a site you can't route through the wrapper:
-    subprocess.run(cmd, creationflags=windows_hide_flags())
-    ```
-    `scripts/check-windows-footguns.py` (AST-based) flags raw `subprocess.*`
-    calls that can create a new console. It exempts calls that pass
-    `creationflags=`, use `**windows_*_kwargs` spread, or run a provably
-    POSIX-only program (`launchctl`, `systemctl`, `brew`, …). It does **not**
-    treat `capture_output`/`stdout=`/`check_output` as safe for the known
-    Windows-flashing programs above. Calls routed through
-    `_subprocess_compat.run/popen` are inherently safe (the wrapper carries the
-    flag). If a visible window is genuinely intended (interactive editor/terminal
-    launch, foreground re-exec, `cmd /c start`), add `# windows-footgun: ok` on
-    the call line.
-
 ### Testing cross-platform

 Tests that use POSIX-only syscalls need a skip marker. Common ones:
--- a/28
+++ b/28
@@ -189,13 +189,7 @@ RUN cd web && npm run build && \

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
-# --link decouples this layer from parents for cache purposes; --chmod bakes
-# the final read-only permissions at copy time so we skip the separate
-# `chmod -R` pass that previously walked ~30k files across the venv +
-# node_modules + source (21s amd64 / 222s arm64 — #49113).  `a+rX,go-w`
-# gives the non-root hermes user read + traverse but no write; root retains
-# write so the build steps below don't need chmod u+w dances.
-COPY --link --chmod=a+rX,go-w . .
+COPY . .

 # ---------- Permissions ----------
 # Link hermes-agent itself (editable). Deps are already installed in the
@@ -203,15 +197,19 @@ COPY --link --chmod=a+rX,go-w . .
 # resolution or downloads.
 RUN uv pip install --no-cache-dir --no-deps -e "."

-# Wire the exec shim and install-method stamp.  Files under /opt/hermes are
-# already root-owned (COPY, uv sync, npm install all run as root) and
-# read-only for the hermes user (go-w from the --chmod above).
-
+# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
+# instances must not be able to self-edit the installed source or venv; user
+# data, skills, plugins, config, logs, and dashboard uploads live under
+# /opt/data instead. Root can still repair the image during build/boot, but
+# supervised Hermes processes drop to the non-root hermes user.
 USER root
 RUN mkdir -p /opt/hermes/bin && \
    cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
    chmod 0755 /opt/hermes/bin/hermes && \
-    printf 'docker\n' > /opt/hermes/.install_method
+    printf 'docker\n' > /opt/hermes/.install_method && \
+    chown -R root:root /opt/hermes && \
+    chmod -R a+rX /opt/hermes && \
+    chmod -R a-w /opt/hermes
 # The ``.install_method`` stamp is baked next to the running code (the install
 # tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
 # volume that is commonly bind-mounted from the host and even shared with a
@@ -238,11 +236,13 @@ RUN mkdir -p /opt/hermes/bin && \
 #
 # The arg is optional — local `docker build` without --build-arg simply
 # omits the file, and the runtime falls back to live-git lookup.  CI
-# (.github/workflows/docker.yml) passes ${{ github.sha }} so
+# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
 # every published image has it.
 ARG HERMES_GIT_SHA=
 RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
-        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
+        chmod u+w /opt/hermes && \
+        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
+        chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
    fi

 # ---------- s6-overlay service wiring ----------
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -722,50 +722,10 @@ def init_agent(
    elif agent.provider == "moa":
        from agent.moa_loop import MoAClient
        agent.api_mode = "chat_completions"
-
-        # Route reference-model outputs to the agent's tool_progress_callback so
-        # every surface that already consumes it (CLI spinner/scrollback, TUI,
-        # desktop, gateway) can show each reference's answer as a labelled block
-        # before the aggregator acts. The facade emits "moa.reference" and
-        # "moa.aggregating" events; we forward them through the same callback
-        # the tool lifecycle uses. Best-effort and cache-safe — these are
-        # display-only events, they never touch the message history.
-        def _moa_reference_relay(event: str, **kwargs: Any) -> None:
-            cb = getattr(agent, "tool_progress_callback", None)
-            if cb is None:
-                return
-            try:
-                if event == "moa.reference":
-                    label = str(kwargs.get("label") or "")
-                    text = str(kwargs.get("text") or "")
-                    idx = kwargs.get("index")
-                    count = kwargs.get("count")
-                    cb(
-                        "moa.reference",
-                        label,
-                        text,
-                        None,
-                        moa_index=idx,
-                        moa_count=count,
-                    )
-                elif event == "moa.aggregating":
-                    cb(
-                        "moa.aggregating",
-                        str(kwargs.get("aggregator") or ""),
-                        None,
-                        None,
-                        moa_ref_count=kwargs.get("ref_count"),
-                    )
-            except Exception:
-                pass
-
-        agent.client = MoAClient(
-            agent.model or "default",
-            reference_callback=_moa_reference_relay,
-        )
+        agent.client = MoAClient(agent.model or "default")
        agent._client_kwargs = {}
        agent.api_key = api_key or "moa-virtual-provider"
-        agent.base_url = "moa://local"
+        agent.base_url = base_url or "moa://local"
        if not agent.quiet_mode:
            print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
    elif agent.api_mode == "bedrock_converse":
@@ -1670,10 +1630,8 @@ def init_agent(
            f"Model {agent.model} has a context window of {_ctx:,} tokens, "
            f"which is below the minimum {MINIMUM_CONTEXT_LENGTH:,} required "
            f"by Hermes Agent.  Choose a model with at least "
-            f"{MINIMUM_CONTEXT_LENGTH // 1000}K context.  If your server "
-            f"reports a window smaller than the model's true window, set "
-            f"model.context_length in config.yaml to the real value "
-            f"(this must be at least {MINIMUM_CONTEXT_LENGTH // 1000}K)."
+            f"{MINIMUM_CONTEXT_LENGTH // 1000}K context, or set "
+            f"model.context_length in config.yaml to override."
        )

    # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand).
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -1499,10 +1499,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
    # _client_kwargs is a dict — snapshot a shallow copy so mutating the
    # live dict doesn't poison the rollback target.
    _snapshot["_client_kwargs"] = dict(getattr(agent, "_client_kwargs", {}) or {})
-    # Snapshot the credential pool reference so a failed client rebuild can
-    # restore the original pool (issue #52727: pool reload is part of this
-    # switch and must be reversible on rollback).
-    _snapshot["_credential_pool"] = getattr(agent, "_credential_pool", _MISSING)

    try:
        # Clear the per-config context_length override so the new model's
@@ -1527,36 +1523,8 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        if api_key:
            agent.api_key = api_key

-        # ── Reload credential pool for the new provider (issue #52727) ──
-        # Without this, ``recover_with_credential_pool`` sees a
-        # ``pool.provider != agent.provider`` mismatch and short-circuits,
-        # leaving the new provider with no rotation/recovery on 401/429 and
-        # burning the original pool's entries. Only reload when the provider
-        # actually changed (or the pool was missing) — re-selecting the same
-        # provider must not churn the pool reference. A reload failure is
-        # logged + swallowed: the switch itself must still complete.
-        old_norm = (old_provider or "").strip().lower()
-        new_norm = (new_provider or "").strip().lower()
-        if old_norm != new_norm or getattr(agent, "_credential_pool", None) is None:
-            try:
-                from agent.credential_pool import load_pool
-                agent._credential_pool = load_pool(new_provider)
-            except Exception as _pool_exc:  # noqa: BLE001
-                logger.warning(
-                    "switch_model: credential pool reload failed for %s (%s); "
-                    "continuing without pool rotation this turn",
-                    new_provider, _pool_exc,
-                )
-
        # ── Build new client ──
-        if (new_provider or "").strip().lower() == "moa":
-            from agent.moa_loop import MoAClient
-
-            agent.api_key = api_key or "moa-virtual-provider"
-            agent.base_url = "moa://local"
-            agent._client_kwargs = {}
-            agent.client = MoAClient(agent.model or "default")
-        elif api_mode == "anthropic_messages":
+        if api_mode == "anthropic_messages":
            from agent.anthropic_adapter import (
                build_anthropic_client,
                resolve_anthropic_token,
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1274,7 +1274,7 @@ def run_oauth_setup_token() -> Optional[str]:
    # concern does not apply to an interactive login the user explicitly
    # invokes.  noqa: subprocess-stdin
    try:
-        subprocess.run([claude_path, "setup-token"])  # windows-footgun: ok — claude setup-token is interactive OAuth
+        subprocess.run([claude_path, "setup-token"])
    except (KeyboardInterrupt, EOFError):
        return None

--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -666,28 +666,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
    return str(url or "").strip().rstrip("/")


-# Hostnames (lowercase, exact) that the auxiliary Anthropic path is allowed to
-# be pointed at via config.yaml model.base_url. Anything else falls back to the
-# Anthropic default — operators routing main-session traffic through a
-# non-Anthropic host (e.g. OpenRouter, OpenAI) with provider=anthropic in config
-# must NOT have that foreign host leak into the auxiliary client. See #52608.
-_ANTHROPIC_COMPATIBLE_HOSTS = frozenset({
-    "api.anthropic.com",
-})
-
-
-def _is_anthropic_compatible_host(url: str) -> bool:
-    """Return True if ``url``'s hostname is an Anthropic endpoint we trust for aux calls."""
-    if not url:
-        return False
-    try:
-        from urllib.parse import urlparse
-        host = (urlparse(url).hostname or "").strip().lower().rstrip(".")
-        return host in _ANTHROPIC_COMPATIBLE_HOSTS
-    except Exception:
-        return False
-
-
 def _nous_min_key_ttl_seconds() -> int:
    try:
        return max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
@@ -2278,16 +2256,9 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
    if not token:
        return None, None

-    # Allow base URL override from config.yaml model.base_url, but only when:
-    #   1. the configured provider is anthropic (otherwise a non-Anthropic
-    #      base_url, e.g. Codex endpoint, would leak into Anthropic requests), AND
-    #   2. the override URL actually points at an Anthropic-compatible endpoint.
-    # Without gate (2), operators who route main-session traffic through a
-    # non-Anthropic provider that accepts Anthropic-format requests (e.g.
-    # OpenRouter at openrouter.ai/api/v1, with provider=anthropic in config.yaml)
-    # would have every auxiliary side-channel call (memory extractors,
-    # reflection, vision, title generation) 401 from the foreign host —
-    # see issue #52608.
+    # Allow base URL override from config.yaml model.base_url, but only
+    # when the configured provider is anthropic — otherwise a non-Anthropic
+    # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
    try:
        from hermes_cli.config import load_config
@@ -2297,7 +2268,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
            cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
            if cfg_provider == "anthropic":
                cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
-                if cfg_base_url and _is_anthropic_compatible_host(cfg_base_url):
+                if cfg_base_url:
                    base_url = cfg_base_url
    except Exception:
        pass
@@ -2783,25 +2754,6 @@ def _is_model_incompatible_error(exc: Exception) -> bool:
    ))


-def _is_invalid_aux_response_error(exc: Exception) -> bool:
-    """Detect provider responses that authenticated but cannot serve aux shape.
-
-    Some OpenAI-compatible routes return HTTP 200 with an empty/malformed
-    ChatCompletion instead of a normal provider error.  That is still a
-    provider/model capability failure for auxiliary tasks: downstream callers
-    need ``choices[0].message`` and should be able to continue through the
-    same fallback path as explicit model-incompatibility errors.
-    """
-    if not isinstance(exc, RuntimeError):
-        return False
-    msg = str(exc).lower()
-    return (
-        "auxiliary " in msg
-        and "llm returned invalid response" in msg
-        and "choices[0].message" in msg
-    )
-
-
 def _evict_cached_clients(provider: str) -> None:
    """Drop cached auxiliary clients for a provider so fresh creds are used."""
    normalized = _normalize_aux_provider(provider)
@@ -3624,37 +3576,6 @@ def _resolve_auto(
    # config.yaml (auxiliary.<task>.provider) still win over this.
    main_provider = str(runtime_provider or _read_main_provider() or "")
    main_model = str(runtime_model or _read_main_model() or "")
-
-    # MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
-    # there is no real "moa" HTTP endpoint, so resolving an aux client against
-    # provider="moa"/model=<preset> sends the preset name as the model id and
-    # the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
-    # (title generation, compression, vision, …) don't need the reference
-    # fan-out — they should run on the aggregator, which is the preset's acting
-    # model. Resolve the MoA preset to its aggregator slot and continue Step 1
-    # with that real provider+model. Mirrors the MoA context-length resolution.
-    if main_provider == "moa":
-        try:
-            from hermes_cli.config import load_config
-            from hermes_cli.moa_config import resolve_moa_preset
-
-            _preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
-            _agg = _preset.get("aggregator") or {}
-            _agg_provider = str(_agg.get("provider") or "").strip()
-            _agg_model = str(_agg.get("model") or "").strip()
-            if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
-                main_provider = _agg_provider
-                main_model = _agg_model
-                # The MoA virtual runtime carries a non-HTTP base_url
-                # ("moa://local") and a placeholder api_key; they belong to the
-                # facade, not the aggregator's real provider. Drop them so the
-                # aggregator resolves through its own provider credentials.
-                runtime_base_url = ""
-                runtime_api_key = ""
-                runtime_api_mode = ""
-        except Exception:
-            logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
-
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
@@ -5524,9 +5445,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
        if not choices or not hasattr(choices[0], "message"):
            raise AttributeError("missing choices[0].message")
    except (AttributeError, TypeError, IndexError) as exc:
-        recovered = _recover_aux_response_message(response)
-        if recovered is not None:
-            return recovered
        response_type = type(response).__name__
        response_preview = str(response)[:120]
        raise RuntimeError(
@@ -5538,64 +5456,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
    return response


-def _recover_aux_response_message(response: Any) -> Optional[Any]:
-    """Synthesize chat-completions shape from Responses-style text fields.
-
-    Auxiliary callers consume ``choices[0].message``.  Some compatible
-    endpoints return text outside ``choices`` (for example ``output_text`` or
-    ``output`` items).  Preserve that response before declaring it malformed.
-    """
-    text = _extract_aux_response_text(response)
-    if not text:
-        return None
-
-    choice = SimpleNamespace(
-        message=SimpleNamespace(content=text),
-        finish_reason=getattr(response, "finish_reason", None) or "stop",
-    )
-    try:
-        response.choices = [choice]
-        return response
-    except Exception:
-        return SimpleNamespace(
-            id=getattr(response, "id", ""),
-            model=getattr(response, "model", ""),
-            object=getattr(response, "object", "chat.completion"),
-            choices=[choice],
-            usage=getattr(response, "usage", None),
-        )
-
-
-def _extract_aux_response_text(response: Any) -> str:
-    output_text = _obj_get(response, "output_text")
-    if isinstance(output_text, str) and output_text.strip():
-        return output_text.strip()
-
-    output = _obj_get(response, "output")
-    if not isinstance(output, list):
-        return ""
-
-    parts: List[str] = []
-    for item in output:
-        item_type = _obj_get(item, "type")
-        if item_type and item_type != "message":
-            continue
-        for part in (_obj_get(item, "content") or []):
-            part_type = _obj_get(part, "type")
-            if part_type in {"output_text", "text", None}:
-                text = _obj_get(part, "text")
-                if isinstance(text, str) and text.strip():
-                    parts.append(text.strip())
-    return "\n".join(parts).strip()
-
-
-def _obj_get(obj: Any, key: str, default: Any = None) -> Any:
-    value = getattr(obj, key, default)
-    if value is default and isinstance(obj, dict):
-        value = obj.get(key, default)
-    return value
-
-
 def call_llm(
    task: str = None,
    *,
@@ -5998,7 +5858,6 @@ def call_llm(
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
-            or _is_invalid_aux_response_error(first_err)
        )
        # Respect explicit provider choice for transient errors (auth, request
        # validation, etc.) but allow fallback when the provider clearly cannot
@@ -6021,7 +5880,6 @@ def call_llm(
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
-            or _is_invalid_aux_response_error(first_err)
        )
        if should_fallback and (is_auto or is_capacity_error):
            if _is_payment_error(first_err):
@@ -6037,8 +5895,6 @@ def call_llm(
                reason = "rate limit"
            elif _is_model_incompatible_error(first_err):
                reason = "model incompatible with route"
-            elif _is_invalid_aux_response_error(first_err):
-                reason = "invalid provider response"
            else:
                reason = "connection error"
            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
@@ -6478,7 +6334,6 @@ async def async_call_llm(
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
-            or _is_invalid_aux_response_error(first_err)
        )
        # Capacity errors (payment/quota/connection/rate-limit) bypass the
        # explicit-provider gate — the provider cannot serve the request
@@ -6493,7 +6348,6 @@ async def async_call_llm(
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
-            or _is_invalid_aux_response_error(first_err)
        )
        if should_fallback and (is_auto or is_capacity_error):
            if _is_payment_error(first_err):
@@ -6505,8 +6359,6 @@ async def async_call_llm(
                reason = "rate limit"
            elif _is_model_incompatible_error(first_err):
                reason = "model incompatible with route"
-            elif _is_invalid_aux_response_error(first_err):
-                reason = "invalid provider response"
            else:
                reason = "connection error"
            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -37,7 +37,6 @@ from tools.terminal_tool import is_persistent_env
 from utils import base_url_host_matches, base_url_hostname, env_float, env_int

 logger = logging.getLogger(__name__)
-_OPENROUTER_PROVIDER_SORT_VALUES = {"throughput", "latency", "price"}


 def _ra():
@@ -116,23 +115,6 @@ def _is_openai_codex_backend(agent) -> bool:
    )


-def _validated_openrouter_provider_sort(raw_sort: Any) -> Optional[str]:
-    """Return a normalized OpenRouter provider.sort value or None."""
-    if not isinstance(raw_sort, str):
-        return None
-    sort_value = raw_sort.strip().lower()
-    if not sort_value:
-        return None
-    if sort_value in _OPENROUTER_PROVIDER_SORT_VALUES:
-        return sort_value
-    logger.warning(
-        "Ignoring invalid OpenRouter provider.sort value %r (allowed: %s)",
-        raw_sort,
-        ", ".join(sorted(_OPENROUTER_PROVIDER_SORT_VALUES)),
-    )
-    return None
-
-
 def _env_float(name: str, default: float) -> float:
    try:
        return float(os.getenv(name, str(default)))
@@ -247,11 +229,6 @@ def interruptible_api_call(agent, api_kwargs: dict):
                        invalidate_runtime_client(region)
                    raise
                result["response"] = normalize_converse_response(raw_response)
-            elif agent.provider == "moa":
-                # MoA is a virtual chat-completions provider backed by the
-                # in-process MoAClient facade. Do not rebuild a request-local
-                # OpenAI client from the virtual runtime metadata.
-                result["response"] = agent.client.chat.completions.create(**api_kwargs)
            else:
                request_client = _set_request_client(
                    agent._create_request_openai_client(
@@ -721,9 +698,8 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
        _prefs["ignore"] = agent.providers_ignored
    if agent.providers_order:
        _prefs["order"] = agent.providers_order
-    _provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
-    if _provider_sort:
-        _prefs["sort"] = _provider_sort
+    if agent.provider_sort:
+        _prefs["sort"] = agent.provider_sort
    if agent.provider_require_parameters:
        _prefs["require_parameters"] = True
    if agent.provider_data_collection:
@@ -1234,16 +1210,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
            agent._transport_cache.clear()
        agent._fallback_activated = True

-        # Rebind the credential pool to the fallback provider when the provider
-        # changes.  Keeping the primary pool attached would make downstream
-        # recovery (rate_limit / billing / auth) mutate the wrong credential
-        # set and can overwrite the fallback's base_url back to the primary
-        # endpoint.  See #33163.
-        #
+        # Clear the credential pool when the fallback provider doesn't match
+        # the pool's provider.  The pool was seeded for the primary provider;
+        # leaving it attached means downstream recovery (rate_limit / billing /
+        # auth) calls ``_swap_credential`` with a primary entry which overwrites
+        # the agent's ``base_url`` back to the primary's endpoint — every
+        # fallback request then 404s against the wrong host.  See #33163.
        # When the fallback shares the pool's provider (e.g. both openrouter
-        # entries with different routing) the pool is preserved.  When the
-        # providers differ, load the fallback provider's own pool if one exists
-        # so provider-specific rotation continues to work after the switch.
+        # entries with different routing) the pool is preserved.
        _existing_pool = getattr(agent, "_credential_pool", None)
        if _existing_pool is not None:
            _pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
@@ -1254,22 +1228,6 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                    fb_provider, fb_model, _pool_provider,
                )
                agent._credential_pool = None
-        if getattr(agent, "_credential_pool", None) is None:
-            try:
-                from agent.credential_pool import load_pool
-
-                fallback_pool = load_pool(fb_provider)
-                if fallback_pool and fallback_pool.has_credentials():
-                    agent._credential_pool = fallback_pool
-                    logger.info(
-                        "Fallback to %s/%s: attached fallback credential pool",
-                        fb_provider, fb_model,
-                    )
-            except Exception as exc:
-                logger.debug(
-                    "Fallback to %s/%s: could not attach credential pool: %s",
-                    fb_provider, fb_model, exc,
-                )

        # Honor per-provider / per-model request_timeout_seconds for the
        # fallback target (same knob the primary client uses).  None = use
@@ -1500,9 +1458,8 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
                provider_preferences["ignore"] = agent.providers_ignored
            if agent.providers_order:
                provider_preferences["order"] = agent.providers_order
-            _provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
-            if _provider_sort:
-                provider_preferences["sort"] = _provider_sort
+            if agent.provider_sort:
+                provider_preferences["sort"] = agent.provider_sort
            if provider_preferences and (
                (agent.provider or "").strip().lower() == "openrouter"
                or agent._is_openrouter_url()
@@ -2604,17 +2561,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
            _stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
        else:
            _stream_stale_timeout = _stream_stale_timeout_base
-        # Reasoning-model floor: known reasoning models (Nemotron 3 Ultra,
-        # OpenAI o1/o3, Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ,
-        # xAI Grok reasoning, etc.) routinely exceed the default 180s chat-
-        # model threshold during their thinking phase.  The cloud gateway
-        # upstream kills the socket first, surfacing as BrokenPipeError.
-        # Raises the floor only — never overrides explicit user config
-        # (handled by get_provider_stale_timeout above).
-        from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
-        _reasoning_floor = get_reasoning_stale_timeout_floor(api_kwargs.get("model"))
-        if _reasoning_floor is not None:
-            _stream_stale_timeout = max(_stream_stale_timeout, _reasoning_floor)

    t = threading.Thread(target=_call, daemon=True)
    t.start()
--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -59,7 +59,6 @@ import subprocess
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional
-from hermes_cli import _subprocess_compat

 logger = logging.getLogger("hermes.coding_context")

@@ -649,7 +648,7 @@ def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:

 def _git(cwd: Path, *args: str) -> str:
    try:
-        out = _subprocess_compat.run(
+        out = subprocess.run(
            ["git", "-C", str(cwd), *args],
            capture_output=True,
            text=True,
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -12,7 +12,6 @@ from pathlib import Path
 from typing import Awaitable, Callable

 from agent.model_metadata import estimate_tokens_rough
-from hermes_cli import _subprocess_compat

 _QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
 REFERENCE_PATTERN = re.compile(
@@ -292,7 +291,7 @@ def _expand_git_reference(
    label: str,
 ) -> tuple[str | None, str | None]:
    try:
-        result = _subprocess_compat.run(
+        result = subprocess.run(
            ["git", *args],
            cwd=cwd,
            capture_output=True,
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -288,29 +288,6 @@ def replay_compression_warning(agent: Any) -> None:
            pass


-def conversation_history_after_compression(agent: Any, messages: list) -> Optional[list]:
-    """Return the correct flush baseline after a compression boundary.
-
-    Legacy compression rotates to a fresh child session. That child has not
-    seen the compacted transcript through the normal same-turn flush path yet,
-    so callers must clear ``conversation_history`` to ``None`` and let the next
-    persistence call write the whole compacted list.
-
-    In-place compaction is different: ``archive_and_compact()`` has already
-    soft-archived the previous active rows and inserted ``messages`` as the new
-    active live transcript under the same session id. If the same agent turn
-    continues with ``conversation_history=None``, the identity-based flush path
-    treats those already-persisted compacted dicts as new and appends them a
-    second time, doubling the active context and retriggering compression.
-
-    A shallow copy is intentional: it captures the current compacted dict
-    identities as history while allowing later same-turn appends to remain new.
-    """
-    if bool(getattr(agent, "_last_compaction_in_place", False)):
-        return list(messages)
-    return None
-
-
 def compress_context(
    agent: Any,
    messages: list,
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -28,7 +28,6 @@ import uuid
 from typing import Any, Dict, List, Optional

 from agent.codex_responses_adapter import _summarize_user_message_for_log
-from agent.conversation_compression import conversation_history_after_compression
 from agent.display import KawaiiSpinner
 from agent.error_classifier import FailoverReason, classify_api_error
 from agent.iteration_budget import IterationBudget
@@ -828,6 +827,7 @@ def run_conversation(
                    aggregator=moa_config.get("aggregator") or {},
                    temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
                    aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
+                    max_tokens=int(moa_config.get("max_tokens", 4096) or 4096),
                )
                if _moa_context:
                    for _msg in reversed(api_messages):
@@ -2011,21 +2011,9 @@ def run_conversation(
                    agent.thinking_callback("")
                api_elapsed = time.time() - api_start_time
                agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True)
-                interrupted = True
-                # Preserve any assistant text already streamed to the user
-                # before the stop landed. Dropping it leaves history with no
-                # record of the half-finished reply on screen, so the next turn
-                # the model "forgets" what it just said — exactly what users hit
-                # when they stop to redirect mid-response.
-                _partial = agent._strip_think_blocks(
-                    getattr(agent, "_current_streamed_assistant_text", "") or ""
-                ).strip()
-                if _partial:
-                    messages.append({"role": "assistant", "content": _partial})
-                    final_response = _partial
-                else:
-                    final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
                agent._persist_session(messages, conversation_history)
+                interrupted = True
+                final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
                break

            except Exception as api_error:
@@ -2830,9 +2818,10 @@ def run_conversation(
                            approx_tokens=approx_tokens,
                            task_id=effective_task_id,
                        )
-                        conversation_history = conversation_history_after_compression(
-                            agent, messages
-                        )
+                        # Compression created a new session — clear history
+                        # so _flush_messages_to_session_db writes compressed
+                        # messages to the new session, not skipping them.
+                        conversation_history = None
                        if len(messages) < original_len or old_ctx > _reduced_ctx:
                            agent._buffer_status(
                                f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -3041,9 +3030,10 @@ def run_conversation(
                        messages, system_message, approx_tokens=approx_tokens,
                        task_id=effective_task_id,
                    )
-                    conversation_history = conversation_history_after_compression(
-                        agent, messages
-                    )
+                    # Compression created a new session — clear history
+                    # so _flush_messages_to_session_db writes compressed
+                    # messages to the new session, not skipping them.
+                    conversation_history = None

                    # Re-estimate tokens after compression.  Same-message-count
                    # compression (tool-result pruning, in-place summarization)
@@ -3207,9 +3197,10 @@ def run_conversation(
                        messages, system_message, approx_tokens=approx_tokens,
                        task_id=effective_task_id,
                    )
-                    conversation_history = conversation_history_after_compression(
-                        agent, messages
-                    )
+                    # Compression created a new session — clear history
+                    # so _flush_messages_to_session_db writes compressed
+                    # messages to the new session, not skipping them.
+                    conversation_history = None

                    # Re-estimate tokens after compression.  Same-message-count
                    # compression (tool-result pruning, in-place summarization)
@@ -3536,65 +3527,6 @@ def run_conversation(
                            force=True,
                        )

-                    # Detect thinking-timeout pattern: a known reasoning model
-                    # hit a transport-layer error before the first content
-                    # token arrived.  Distinct from _is_stream_drop above
-                    # (which fires for large file-write stream drops) and
-                    # from any classifier reason that's not a transport
-                    # timeout.  Reuses the reasoning-model allowlist from
-                    # agent/reasoning_timeouts.py (Fixes #52217) so the
-                    # trigger is consistent with what the per-model
-                    # stale-timeout floor covers.  After the classifier
-                    # override at agent/error_classifier.py:720-738 (this
-                    # PR), transport disconnects on reasoning models route
-                    # to FailoverReason.timeout rather than
-                    # context_overflow, so this branch actually fires.
-                    # Detection and message text live in
-                    # agent.thinking_timeout_guidance so they're
-                    # unit-testable without driving the full retry loop.
-                    # (Part 2 of Fixes #52310.)
-                    from agent.thinking_timeout_guidance import (
-                        is_thinking_timeout,
-                    )
-                    _is_thinking_timeout = is_thinking_timeout(
-                        classified,
-                        _model,
-                        error_msg,
-                    )
-                    if _is_thinking_timeout:
-                        agent._vprint(
-                            f"{agent.log_prefix}   💡 The model's thinking "
-                            f"phase exceeded the upstream proxy's idle "
-                            f"timeout before the first content token "
-                            f"arrived. This is a known issue with "
-                            f"reasoning models behind cloud gateways "
-                            f"(NVIDIA NIM, OpenAI, Anthropic, DeepSeek).",
-                            force=True,
-                        )
-                        agent._vprint(
-                            f"{agent.log_prefix}      Workarounds in priority order:",
-                            force=True,
-                        )
-                        agent._vprint(
-                            f"{agent.log_prefix}      1. Set "
-                            f"`providers.{_provider}.models.{_model}.stale_timeout_seconds: 900` "
-                            f"in `~/.hermes/config.yaml` to extend the per-call "
-                            f"timeout. (Hermes's built-in floor is 600s for "
-                            f"known reasoning models — if you still see this "
-                            f"after raising, the upstream cap is even shorter.)",
-                            force=True,
-                        )
-                        agent._vprint(
-                            f"{agent.log_prefix}      2. Lower `reasoning_budget` or set "
-                            f"`reasoning_effort: medium` on this model if the provider supports it.",
-                            force=True,
-                        )
-                        agent._vprint(
-                            f"{agent.log_prefix}      3. Use a smaller / faster reasoning "
-                            f"model if the task doesn't require deep thinking.",
-                            force=True,
-                        )
-
                    logger.error(
                        "%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
                        agent.log_prefix, max_retries, _final_summary,
@@ -3611,22 +3543,7 @@ def run_conversation(
                            _final_response += f"\n\n{_billing_guidance}"
                    else:
                        _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
-                    if _is_thinking_timeout:
-                        # Thinking-timeout guidance overrides the generic
-                        # stream-drop guidance — the latter is wrong for
-                        # this case (it suggests splitting large file
-                        # writes, which isn't what happened).  See the
-                        # reasoning-model override at
-                        # agent/error_classifier.py:720-738 and the
-                        # detection block above for context.
-                        from agent.thinking_timeout_guidance import (
-                            build_thinking_timeout_guidance,
-                        )
-                        _final_response += build_thinking_timeout_guidance(
-                            provider=_provider,
-                            model=_model,
-                        )
-                    elif _is_stream_drop:
+                    if _is_stream_drop:
                        _final_response += (
                            "\n\nThe provider's stream connection keeps "
                            "dropping — this often happens when generating "
@@ -4313,9 +4230,10 @@ def run_conversation(
                        approx_tokens=agent.context_compressor.last_prompt_tokens,
                        task_id=effective_task_id,
                    )
-                    conversation_history = conversation_history_after_compression(
-                        agent, messages
-                    )
+                    # Compression created a new session — clear history so
+                    # _flush_messages_to_session_db writes compressed messages
+                    # to the new session (see preflight compression comment).
+                    conversation_history = None
                
                # Save session log incrementally (so progress is visible even if interrupted)
                agent._session_messages = messages
@@ -4690,11 +4608,7 @@ def run_conversation(
                        "_verification_stop_synthetic": True,
                    })
                    agent._session_messages = messages
-                    # Run the verification-stop loop silently — the nudge is an
-                    # internal turn that should not add noise to the user's
-                    # terminal. Keep a debug breadcrumb in agent.log for tracing.
-                    logger.debug("verification stop-loop nudge issued (attempt %d)",
-                                 agent._verification_stop_nudges)
+                    agent._emit_status("↻ Verification required before finishing")
                    continue

                messages.append(final_msg)
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -11,7 +11,6 @@ import uuid
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime, timezone
-from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
@@ -448,63 +447,6 @@ def get_pool_strategy(provider: str) -> str:
 DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1


-def _write_through_provider_state_to_global_root(
-    provider_id: str, state: Dict[str, Any]
-) -> None:
-    """Persist a rotated OAuth ``state`` into the global-root auth.json.
-
-    Best-effort write-through for the multi-profile rotation hazard
-    (#48415 / #43589): nous, openai-codex, and xai-oauth rotate the
-    refresh_token on refresh, so when a profile pool refresh rotates a grant
-    it resolved from the root fallback, the rotated chain must land back in
-    root. Otherwise root keeps a now-revoked refresh token and every other
-    profile reading the stale root grant dies with ``refresh_token_reused`` /
-    ``invalid_grant`` once its access token expires.
-
-    Only updates ``providers.<provider_id>`` in the root store; never touches
-    the profile store (the caller already saved that). Swallows all errors — a
-    failed write-through degrades to the pre-existing behavior (root stale), it
-    must never break the profile's own successful save. Mirrors
-    ``hermes_cli.auth._write_through_xai_oauth_to_global_root`` (which covers
-    the non-pool xAI refresh path) for the credential-pool refresh path.
-    """
-    try:
-        global_path = auth_mod._global_auth_file_path()
-    except Exception:
-        return
-    if global_path is None:
-        # Classic mode (profile == root); the profile save already hit root.
-        return
-    # Seat belt: under pytest, refuse to write the real user's
-    # ~/.hermes/auth.json even when HERMES_HOME points at a profile path
-    # (mirrors the read-side guard in _load_global_auth_store). Uses the
-    # unmodified HOME env, not Path.home() which fixtures may monkeypatch.
-    if os.environ.get("PYTEST_CURRENT_TEST"):
-        real_home_env = os.environ.get("HOME", "")
-        if real_home_env:
-            real_root = Path(real_home_env) / ".hermes" / "auth.json"
-            try:
-                if global_path.resolve(strict=False) == real_root.resolve(strict=False):
-                    return
-            except Exception:
-                return
-    try:
-        if global_path.exists():
-            global_store = _load_auth_store(global_path)
-        else:
-            global_store = {}
-        if not isinstance(global_store, dict):
-            return
-        _store_provider_state(global_store, provider_id, dict(state), set_active=False)
-        auth_mod._save_auth_store(global_store, global_path)
-    except Exception as exc:  # pragma: no cover - best effort
-        logger.debug(
-            "%s pool refresh: write-through to global root failed: %s",
-            provider_id,
-            exc,
-        )
-
-
 class CredentialPool:
    def __init__(self, provider: str, entries: List[PooledCredential]):
        self.provider = provider
@@ -858,28 +800,6 @@ class CredentialPool:
        try:
            with _auth_store_lock():
                auth_store = _load_auth_store()
-                # Decide BEFORE writing whether this profile is reading the
-                # grant from the global root (no own providers.<id> block) vs.
-                # genuinely shadowing it. A pool refresh rotates single-use
-                # OAuth refresh tokens, so a profile that resolved the grant
-                # from root MUST write the rotated chain back to root too —
-                # otherwise root keeps a revoked refresh token and every other
-                # profile reading the stale root grant dies with
-                # refresh_token_reused / invalid_grant once its access token
-                # expires. This mirrors the xAI write-through in
-                # hermes_cli.auth._save_xai_oauth_tokens (#43589); the pool
-                # refresh path is the Codex/xAI analog reported in #48415.
-                _wt_provider_id = {
-                    "nous": "nous",
-                    "openai-codex": "openai-codex",
-                    "xai-oauth": "xai-oauth",
-                }.get(self.provider)
-                write_through_to_root = bool(_wt_provider_id) and not (
-                    isinstance(auth_store.get("providers"), dict)
-                    and isinstance(
-                        auth_store["providers"].get(_wt_provider_id), dict
-                    )
-                )
                if self.provider == "nous":
                    state = _load_provider_state(auth_store, "nous")
                    if state is None:
@@ -935,10 +855,6 @@ class CredentialPool:
                    return

                _save_auth_store(auth_store)
-                if write_through_to_root and _wt_provider_id:
-                    _write_through_provider_state_to_global_root(
-                        _wt_provider_id, state
-                    )
        except Exception as exc:
            logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)

--- a/agent/curator.py
+++ b/agent/curator.py
@@ -377,10 +377,8 @@ CURATOR_REVIEW_PROMPT = (
    "bodies + `references/`, `templates/`, and `scripts/` subfiles for "
    "session-specific detail — not one-session-one-skill micro-entries.\n\n"
    "Hard rules — do not violate:\n"
-    "1. DO NOT touch bundled, hub-installed, or external-dir skills "
-    "(`skills.external_dirs`). The candidate list below is already filtered "
-    "to local curator-managed skills only; external skills are externally "
-    "owned and read-only to this background curator.\n"
+    "1. DO NOT touch bundled or hub-installed skills. The candidate list "
+    "below is already filtered to agent-created skills only.\n"
    "2. DO NOT delete any skill. Archiving (moving the skill's directory "
    "into ~/.hermes/skills/.archive/) is the maximum destructive action. "
    "Archives are recoverable; deletion is not.\n"
@@ -471,9 +469,8 @@ CURATOR_REVIEW_PROMPT = (
    "skill, or `absorbed_into=\"\"` when you're truly pruning with no "
    "forwarding target. This drives cron-job skill-reference migration — "
    "guessing from your YAML summary after the fact is fragile.\n"
-    "  - terminal                       — move LOCAL candidate content into "
-    "a support subfile when package integrity requires it; never mv, cp, rm, "
-    "patch, or rewrite bundled, hub-installed, or external-dir skills\n\n"
+    "  - terminal                       — mv a sibling into the archive "
+    "OR move its content into a support subfile\n\n"
    "'keep' is a legitimate decision ONLY when the skill is already a "
    "class-level umbrella and none of the proposed merges would improve "
    "discoverability. 'This is narrow but distinct from its siblings' "
@@ -1846,14 +1843,6 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
        # Disable recursive nudges — the curator must never spawn its own review.
        review_agent._memory_nudge_interval = 0
        review_agent._skill_nudge_interval = 0
-        # Tag this fork as autonomous background curation so skill_manage's
-        # background-review write guard fires. Without this the fork inherits
-        # the default "assistant_tool" origin, is_background_review() is False,
-        # and the external/bundled/hub-installed skill_manage guards never
-        # trigger during the curation pass they exist to protect against.
-        # turn_context.py binds this onto the write-origin ContextVar at turn
-        # start (see agent/turn_context.py).
-        review_agent._memory_write_origin = "background_review"

        # Redirect the forked agent's stdout/stderr to /dev/null while it
        # runs so its tool-call chatter doesn't pollute the foreground
--- a/agent/display.py
+++ b/agent/display.py
@@ -16,7 +16,6 @@ from pathlib import Path
 from typing import Any

 from utils import safe_json_loads
-from agent.redact import redact_sensitive_text
 from agent.tool_result_classification import file_mutation_result_landed

 # ANSI escape codes for coloring tool failure indicators
@@ -340,62 +339,6 @@ def _read_file_line_label(args: dict) -> str:
    return f"L{offset}-{offset + limit - 1}"


-def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any:
-    """Apply secret redaction to browser_type text in display-facing payloads.
-
-    Backends sometimes echo the attempted input in error strings or fallback
-    metadata.  When the raw typed value contains a recognizable secret (API
-    key, token, JWT, etc.) the redacted form differs from the raw value, so we
-    replace every occurrence of the raw value with its redacted form before a
-    browser_type result reaches logs, callbacks, the model, or chat history.
-
-    Normal typed text (search queries, addresses, form fields) matches no
-    secret pattern, so it passes through unchanged and stays readable.
-
-    Redaction is forced here regardless of the global ``security.redact_secrets``
-    preference: a typed credential leaking into chat history is a security
-    boundary, not mere log hygiene.
-    """
-    if typed_text is None:
-        return value
-    needle = str(typed_text)
-    if needle == "":
-        return value
-    redacted = redact_sensitive_text(needle, force=True)
-    if redacted == needle:
-        # Nothing secret-looking in the typed text; leave payload untouched.
-        return value
-    if isinstance(value, str):
-        return value.replace(needle, redacted)
-    if isinstance(value, dict):
-        return {
-            key: redact_browser_typed_text_for_display(item, typed_text)
-            for key, item in value.items()
-        }
-    if isinstance(value, list):
-        return [redact_browser_typed_text_for_display(item, typed_text) for item in value]
-    if isinstance(value, tuple):
-        return tuple(redact_browser_typed_text_for_display(item, typed_text) for item in value)
-    return value
-
-
-def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | None:
-    """Return a copy of tool args safe for logs/progress UI.
-
-    For ``browser_type`` the ``text`` argument is run through the same
-    secret-pattern redactor used for logs.  Recognizable credentials (API
-    keys, tokens) are masked before the value reaches tool progress
-    notifications; normal typed text is left intact for debuggability.
-    """
-    if not isinstance(args, dict):
-        return args
-    if tool_name == "browser_type" and isinstance(args.get("text"), str):
-        safe_args = dict(args)
-        safe_args["text"] = redact_sensitive_text(args["text"], force=True)
-        return safe_args
-    return args
-
-
 def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
    if not isinstance(tasks, list):
        return 0, []
@@ -419,7 +362,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
        max_len = _tool_preview_max_len
    if not args:
        return None
-    args = redact_tool_args_for_display(tool_name, args) or args
    primary_args = {
        "terminal": "command", "web_search": "query", "web_extract": "urls",
        "read_file": "path", "write_file": "path", "patch": "path",
@@ -1143,7 +1085,6 @@ def get_cute_tool_message(
    When *result* is provided the line is checked for failure indicators.
    Failed tool calls get a red prefix and an informational suffix.
    """
-    args = redact_tool_args_for_display(tool_name, args) or args
    dur = f"{duration:.1f}s"
    is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
    skin_prefix = get_skin_tool_prefix()
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -133,31 +133,6 @@ _RATE_LIMIT_PATTERNS = [
    "servicequotaexceededexception",
 ]

-# Patterns that indicate provider-side overload, NOT a per-credential rate
-# limit or billing problem.  The credential is valid — the server is just
-# busy — so the correct recovery is "back off and retry the same key", never
-# "rotate the credential" (rotating exhausts the pool while the endpoint is
-# still busy; a single-key user has nothing to rotate to).  Some providers
-# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
-# status path matches the body against this list before falling through to
-# the rate_limit default.  Phrases are kept narrow and overload-flavoured so a
-# normal rate-limit message ("you have been rate-limited") doesn't hit this
-# bucket. (#14038, #15297)
-_OVERLOADED_PATTERNS = [
-    "overloaded",
-    "temporarily overloaded",
-    "service is temporarily overloaded",
-    "service may be temporarily overloaded",
-    "server is overloaded",
-    "server overloaded",
-    "service overloaded",
-    "service is overloaded",
-    "upstream overloaded",
-    "currently overloaded",
-    "at capacity",
-    "over capacity",
-]
-
 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
 _USAGE_LIMIT_PATTERNS = [
    "usage limit",
@@ -742,26 +717,6 @@ def classify_api_error(

    is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
    if is_disconnect and not status_code:
-        # Reasoning-model override: a transport disconnect on a reasoning
-        # model is much more likely the upstream proxy idle-killing a
-        # long thinking stream than a true context overflow — even on
-        # large sessions.  The default disconnect+large-session routing
-        # below would otherwise send the user into the compression
-        # branch (should_compress=True) and silently delete
-        # conversation history on a phantom context-length error.
-        # Reasoning models have multi-minute thinking phases that
-        # routinely exceed the cloud gateway's idle window (NVIDIA
-        # NIM ~120s — first-party repro at NVIDIA/NemoClaw#4846;
-        # OpenAI worker / Anthropic stream-idle similar).  The
-        # per-reasoning-model stale-timeout floor in
-        # agent/reasoning_timeouts.py raises the stale-detector
-        # threshold to tolerate long thinking, so a true
-        # transport-layer failure here is recoverable via the retry
-        # path — not via context compression.  Reclassify as timeout.
-        # (Part 1 of Fixes #52310.)
-        from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
-        if get_reasoning_stale_timeout_floor(model) is not None:
-            return _result(FailoverReason.timeout, retryable=True)
        # Absolute token/message-count thresholds are only a proxy for smaller
        # context windows.  Large-context sessions can have hundreds of
        # messages while still being far below their actual token budget.
@@ -888,19 +843,7 @@ def _classify_by_status(
        )

    if status_code == 429:
-        # Already checked long_context_tier above. Some providers (notably
-        # Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
-        # code as a true per-credential rate limit, but the credential is
-        # valid and the correct recovery is "back off and retry the same key",
-        # NOT "rotate the credential" (which exhausts the pool while the
-        # endpoint is still busy, and does nothing for a single-key user).
-        # Disambiguate on the error body so an overload 429 takes the
-        # transient-overload path instead of burning the pool. (#14038)
-        if any(p in error_msg for p in _OVERLOADED_PATTERNS):
-            return result_fn(
-                FailoverReason.overloaded,
-                retryable=True,
-            )
+        # Already checked long_context_tier above; this is a normal rate limit
        return result_fn(
            FailoverReason.rate_limit,
            retryable=True,
@@ -1251,17 +1194,6 @@ def _classify_by_message(
            should_fallback=True,
        )

-    # Overloaded / server-busy patterns — must come BEFORE the rate_limit and
-    # billing checks so that a message-only "overloaded" (no 503/529 status,
-    # e.g. some Anthropic-compatible proxies) classifies as a transient
-    # overload (backoff + retry) instead of falling through to `unknown` or
-    # incorrectly triggering credential rotation.
-    if any(p in error_msg for p in _OVERLOADED_PATTERNS):
-        return result_fn(
-            FailoverReason.overloaded,
-            retryable=True,
-        )
-
    # Billing patterns
    if any(p in error_msg for p in _BILLING_PATTERNS):
        return result_fn(
@@ -1351,25 +1283,19 @@ def _extract_status_code(error: Exception) -> Optional[int]:


 def _extract_error_body(error: Exception) -> dict:
-    """Extract the structured error body from an SDK exception or its cause chain."""
-    current = error
-    for _ in range(5):  # Match _extract_status_code() traversal depth.
-        body = getattr(current, "body", None)
-        if isinstance(body, dict):
-            return body
-        # Some errors have .response.json()
-        response = getattr(current, "response", None)
-        if response is not None:
-            try:
-                json_body = response.json()
-                if isinstance(json_body, dict):
-                    return json_body
-            except Exception:
-                pass
-        cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
-        if cause is None or cause is current:
-            break
-        current = cause
+    """Extract the structured error body from an SDK exception."""
+    body = getattr(error, "body", None)
+    if isinstance(body, dict):
+        return body
+    # Some errors have .response.json()
+    response = getattr(error, "response", None)
+    if response is not None:
+        try:
+            json_body = response.json()
+            if isinstance(json_body, dict):
+                return json_body
+        except Exception:
+            pass
    return {}


--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -77,22 +77,15 @@ def build_write_denied_prefixes(home: str) -> list[str]:
    ]


-def get_safe_write_roots() -> set[str]:
-    """Return resolved HERMES_WRITE_SAFE_ROOT paths. Supports multiple directories
-    separated by ``os.pathsep`` (``:`` on Unix, ``;`` on Windows).
-    E.g., ``/opt/data:/var/www/html`` on Unix, ``C:\\data;D:\\www`` on Windows."""
-    env = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
-    if not env:
-        return set()
-    roots: set[str] = set()
-    for path in env.split(os.pathsep):
-        if path:
-            try:
-                resolved = os.path.realpath(os.path.expanduser(path))
-                roots.add(resolved)
-            except (OSError, ValueError):
-                continue
-    return roots
+def get_safe_write_root() -> Optional[str]:
+    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
+    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
+    if not root:
+        return None
+    try:
+        return os.path.realpath(os.path.expanduser(root))
+    except Exception:
+        return None


 def is_write_denied(path: str) -> bool:
@@ -131,15 +124,9 @@ def is_write_denied(path: str) -> bool:
        except Exception:
            pass

-    safe_roots = get_safe_write_roots()
-    if safe_roots:
-        allowed = False
-        for safe_root in safe_roots:
-            if resolved == safe_root or resolved.startswith(safe_root + os.sep):
-                allowed = True
-                break
-        if not allowed:
-            return True
+    safe_root = get_safe_write_root()
+    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
+        return True

    return False

--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@@ -8,7 +8,6 @@ iteration.

 from __future__ import annotations

-import hashlib
 import logging
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any
@@ -31,68 +30,15 @@ def _slot_label(slot: dict[str, str]) -> str:
    return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"


-def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
-    """Resolve a reference/aggregator slot to real runtime call kwargs.
-
-    A MoA slot is just a model selection — it must be called the same way any
-    model is called elsewhere, not through a bare ``call_llm(provider=...,
-    model=...)`` that leaves base_url/api_key/api_mode unresolved and lets the
-    auxiliary auto-detector guess. We route the slot's provider through
-    ``resolve_runtime_provider`` (the canonical provider→api_mode/base_url/
-    api_key resolver the CLI, gateway, and delegate_task all use), so the slot
-    gets its provider's real API surface — e.g. MiniMax → anthropic_messages,
-    GPT-5/o-series → max_completion_tokens, custom endpoints → their base_url.
-
-    Returns the kwargs to pass through to ``call_llm`` (provider/model plus the
-    resolved base_url/api_key when available). Falls back to the bare
-    provider/model on any resolution error so a misconfigured slot still
-    attempts the call rather than aborting the whole MoA turn.
-    """
-    provider = str(slot.get("provider") or "").strip()
-    model = str(slot.get("model") or "").strip()
-    out: dict[str, Any] = {"provider": provider, "model": model}
-    try:
-        from hermes_cli.runtime_provider import resolve_runtime_provider
-
-        rt = resolve_runtime_provider(requested=provider, target_model=model)
-        resolved_provider = str(rt.get("provider") or provider).strip().lower()
-        # call_llm treats an explicit base_url as a custom endpoint. That is
-        # correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
-        # adapter-backed providers whose provider branch adds auth headers and
-        # request-shape adapters. Keep those providers identified by name.
-        if resolved_provider in {"openai-codex", "xai-oauth"}:
-            return out
-        # Pass the resolved endpoint through so call_llm builds the request for
-        # the provider's actual API surface instead of auto-detecting. base_url
-        # routes call_llm to the right adapter (incl. anthropic_messages mode);
-        # api_key is the resolved credential for that provider.
-        if rt.get("base_url"):
-            out["base_url"] = rt["base_url"]
-        if rt.get("api_key"):
-            out["api_key"] = rt["api_key"]
-    except Exception as exc:  # pragma: no cover - defensive
-        logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
-    return out
-
-
 def _run_reference(
    slot: dict[str, str],
    ref_messages: list[dict[str, Any]],
    *,
-    temperature: float | None = None,
-    max_tokens: int | None = None,
+    temperature: float,
+    max_tokens: int,
 ) -> tuple[str, str]:
    """Call one reference model and return ``(label, text)``.

-    The slot is resolved to its provider's real runtime (via ``_slot_runtime``)
-    and called through the same ``call_llm`` request-building path any model
-    uses, so per-model wire-format handling (anthropic_messages,
-    max_completion_tokens, fixed/forbidden temperature) applies identically to
-    a reference as it would if that model were the acting model. MoA imposes no
-    cap of its own (``max_tokens`` defaults to ``None`` → omitted → the model's
-    real maximum); ``temperature`` is only the user's configured preset value,
-    which call_llm may still override per model.
-
    Never raises: a failed reference becomes a labelled note so the aggregator
    can still act with partial context. Designed to run inside a thread pool —
    ``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
@@ -102,10 +48,11 @@ def _run_reference(
    try:
        response = call_llm(
            task="moa_reference",
+            provider=slot["provider"],
+            model=slot["model"],
            messages=ref_messages,
            temperature=temperature,
            max_tokens=max_tokens,
-            **_slot_runtime(slot),
        )
        return label, _extract_text(response) or "(empty response)"
    except Exception as exc:
@@ -117,8 +64,8 @@ def _run_references_parallel(
    reference_models: list[dict[str, str]],
    ref_messages: list[dict[str, Any]],
    *,
-    temperature: float | None = None,
-    max_tokens: int | None = None,
+    temperature: float,
+    max_tokens: int,
 ) -> list[tuple[str, str]]:
    """Fan out all reference models in parallel, returning outputs in order.

@@ -222,18 +169,12 @@ def aggregate_moa_context(
    aggregator: dict[str, str],
    temperature: float = 0.6,
    aggregator_temperature: float = 0.4,
-    max_tokens: int | None = None,
+    max_tokens: int = 4096,
 ) -> str:
    """Run configured reference models and synthesize their advice.

    Failures are returned as model-specific notes instead of aborting the normal
    agent loop; the main model can still act with partial context.
-
-    ``max_tokens`` is ``None`` by default: MoA does not cap reference or
-    aggregator output, so each model uses its own maximum. ``call_llm`` omits
-    the parameter entirely when it is ``None`` (see its docstring), which also
-    sidesteps providers that reject ``max_tokens`` outright. A hardcoded cap
-    here previously truncated long aggregator syntheses.
    """
    reference_outputs: list[tuple[str, str]] = []
    ref_messages = _reference_messages(api_messages)
@@ -262,10 +203,11 @@ def aggregate_moa_context(
    try:
        response = call_llm(
            task="moa_aggregator",
+            provider=aggregator["provider"],
+            model=aggregator["model"],
            messages=[{"role": "user", "content": synth_prompt}],
            temperature=aggregator_temperature,
            max_tokens=max_tokens,
-            **_slot_runtime(aggregator),
        )
        synthesis = _extract_text(response)
    except Exception as exc:
@@ -288,37 +230,8 @@ def aggregate_moa_context(
 class MoAChatCompletions:
    """OpenAI-chat-compatible facade where the aggregator is the acting model."""

-    def __init__(self, preset_name: str, reference_callback: Any = None):
+    def __init__(self, preset_name: str):
        self.preset_name = preset_name or "default"
-        # Optional display hook. Called as reference outputs become available so
-        # frontends can show each reference model's answer as a labelled block
-        # before the aggregator acts. Signature:
-        #   reference_callback(event, **kwargs)
-        # where event is one of:
-        #   "moa.reference"   kwargs: index, count, label, text
-        #   "moa.aggregating" kwargs: aggregator (label), ref_count
-        # Never raises into the model call — display is best-effort.
-        self.reference_callback = reference_callback
-        # Turn-scoped reference cache. The agent loop calls create() once per
-        # tool-loop iteration, but references are advisory for the whole turn:
-        # the advisory message view (_reference_messages) is identical across
-        # iterations (it strips tool/tool_call turns) until a new user message
-        # arrives. Re-running references every iteration would multiply their
-        # API cost by the tool-loop depth AND re-emit the same blocks to the
-        # display on every iteration. So cache outputs keyed by the advisory
-        # view's signature and reuse them — running and showing references once
-        # per user turn.
-        self._ref_cache_key: tuple | None = None
-        self._ref_cache_outputs: list[tuple[str, str]] = []
-
-    def _emit(self, event: str, **kwargs: Any) -> None:
-        cb = self.reference_callback
-        if cb is None:
-            return
-        try:
-            cb(event, **kwargs)
-        except Exception as exc:  # pragma: no cover - display must never break the turn
-            logger.debug("MoA reference_callback failed for %s: %s", event, exc)

    def create(self, **api_kwargs: Any) -> Any:
        from hermes_cli.config import load_config
@@ -328,10 +241,7 @@ class MoAChatCompletions:
        messages = list(api_kwargs.get("messages") or [])
        reference_models = preset.get("reference_models") or []
        aggregator = preset.get("aggregator") or {}
-        # MoA does not cap reference or aggregator output: each model uses its
-        # own maximum. Passing max_tokens=None makes call_llm omit the parameter
-        # (it never caps by default), so a long aggregator synthesis is never
-        # truncated and providers that reject max_tokens don't 400.
+        max_tokens = int(preset.get("max_tokens", api_kwargs.get("max_tokens") or 4096) or 4096)
        temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
        aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)

@@ -343,52 +253,12 @@ class MoAChatCompletions:

        reference_outputs: list[tuple[str, str]] = []
        ref_messages = _reference_messages(messages)
-
-        # Turn-scoped cache: only run + display references when the advisory
-        # view changed (i.e. a new user turn). Within one turn the agent loop
-        # calls create() once per tool iteration with the same advisory view;
-        # reuse the cached outputs and skip both the re-run and the re-emit.
-        _sig = hashlib.sha256(
-            "\u0000".join(
-                f"{m.get('role')}:{m.get('content')}" for m in ref_messages
-            ).encode("utf-8", "replace")
-        ).hexdigest()
-        _cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
-        _refs_from_cache = _cache_key == self._ref_cache_key and bool(self._ref_cache_outputs)
-
-        if _refs_from_cache:
-            reference_outputs = list(self._ref_cache_outputs)
-        else:
-            reference_outputs = _run_references_parallel(
-                reference_models,
-                ref_messages,
-                temperature=temperature,
-                max_tokens=None,
-            )
-            self._ref_cache_key = _cache_key
-            self._ref_cache_outputs = list(reference_outputs)
-
-            # Surface each reference model's answer to the display BEFORE the
-            # aggregator acts — once per turn (only on the iteration that
-            # actually ran them). The user sees one labelled block per
-            # reference (rendered like a thinking block) so the MoA process is
-            # visible rather than a silent pause. Best-effort: never blocks the
-            # turn.
-            _ref_count = len(reference_outputs)
-            for _idx, (_label, _text) in enumerate(reference_outputs, start=1):
-                self._emit(
-                    "moa.reference",
-                    index=_idx,
-                    count=_ref_count,
-                    label=_label,
-                    text=_text,
-                )
-            if _ref_count:
-                self._emit(
-                    "moa.aggregating",
-                    aggregator=_slot_label(aggregator),
-                    ref_count=_ref_count,
-                )
+        reference_outputs = _run_references_parallel(
+            reference_models,
+            ref_messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )

        agg_messages = [dict(m) for m in messages]
        if reference_outputs:
@@ -416,26 +286,21 @@ class MoAChatCompletions:
            raise RuntimeError("MoA aggregator cannot be another MoA preset")
        agg_kwargs = dict(api_kwargs)
        agg_kwargs["messages"] = agg_messages
-        # The aggregator is the acting model. Resolve its slot to the provider's
-        # real runtime (base_url/api_key/api_mode) and call it through the same
-        # request-building path any model uses — so per-model wire-format
-        # handling (anthropic_messages, max_completion_tokens, fixed/forbidden
-        # temperature) applies identically to it. MoA imposes no output cap:
-        # max_tokens is passed through from the caller (normally None → omitted
-        # → the model's real maximum). The preset's old hardcoded 4096 default
-        # is gone — it truncated long syntheses.
+        agg_kwargs["model"] = aggregator.get("model")
+        agg_kwargs["temperature"] = aggregator_temperature
        return call_llm(
            task="moa_aggregator",
+            provider=aggregator.get("provider"),
+            model=aggregator.get("model"),
            messages=agg_messages,
            temperature=aggregator_temperature,
            max_tokens=agg_kwargs.get("max_tokens"),
            tools=agg_kwargs.get("tools"),
            extra_body=agg_kwargs.get("extra_body"),
-            **_slot_runtime(aggregator),
        )


 class MoAClient:
-    def __init__(self, preset_name: str, reference_callback: Any = None):
+    def __init__(self, preset_name: str):
        self.chat = type("_MoAChat", (), {})()
-        self.chat.completions = MoAChatCompletions(preset_name, reference_callback=reference_callback)
+        self.chat.completions = MoAChatCompletions(preset_name)
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -1646,34 +1646,6 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

-    # 0a. MoA virtual provider — ``model`` is a preset name, not a real model,
-    # and ``base_url`` is the local virtual endpoint, so every probe below would
-    # miss and fall through to the 256K default. The aggregator is the acting
-    # model, so resolve the context window from the aggregator slot's real
-    # provider+model instead. References are advisory-only and never bound the
-    # acting context, so they're ignored here.
-    if (provider or "").strip().lower() == "moa":
-        try:
-            from hermes_cli.config import load_config
-            from hermes_cli.moa_config import resolve_moa_preset
-            from hermes_cli.runtime_provider import resolve_runtime_provider
-
-            preset = resolve_moa_preset(load_config().get("moa") or {}, model)
-            agg = preset.get("aggregator") or {}
-            agg_provider = str(agg.get("provider") or "").strip()
-            agg_model = str(agg.get("model") or "").strip()
-            if agg_model and agg_provider and agg_provider.lower() != "moa":
-                rt = resolve_runtime_provider(requested=agg_provider, target_model=agg_model)
-                return get_model_context_length(
-                    agg_model,
-                    base_url=rt.get("base_url", "") or "",
-                    api_key=rt.get("api_key", "") or "",
-                    provider=agg_provider,
-                )
-        except Exception:
-            logger.debug("MoA aggregator context-length resolution failed", exc_info=True)
-        # Fall through to the generic default if aggregator resolution failed.
-
    # 0b. custom_providers per-model override — check before any probe.
    # This closes the gap where /model switch and display paths used to fall
    # back to 128K despite the user having a per-model context_length set.
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -617,12 +617,7 @@ DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
-        "Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
-        "`code`, ```code blocks```, [links](url)) is auto-converted to "
-        "WhatsApp's native syntax (*bold*, _italic_, ~strike~, monospace) — "
-        "feel free to write in markdown, and use bullet lists ('- item') "
-        "freely. Tables are NOT supported — prefer bullet lists or labeled "
-        "key:value pairs. "
+        "Please do not use markdown as it does not render. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. The file "
        "will be sent as a native WhatsApp attachment — images (.jpg, .png, "
@@ -687,11 +682,7 @@ PLATFORM_HINTS = {
    ),
    "signal": (
        "You are on a text messaging communication platform, Signal. "
-        "Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
-        "`code`, ```code blocks```) is auto-converted to Signal's native "
-        "rich formatting — feel free to write in markdown, and use bullet "
-        "lists ('- item') freely (they render as • bullets). Tables are NOT "
-        "supported — prefer bullet lists or labeled key:value pairs. "
+        "Please do not use markdown as it does not render. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
--- a/agent/reasoning_timeouts.py
+++ b/agent/reasoning_timeouts.py
@@ -1,216 +0,0 @@
-"""Per-reasoning-model stale-timeout floor for known reasoning models.
-
-Reasoning models (those that emit extended thinking blocks before their
-first content token) routinely exceed Hermes's default chat-model
-stale detectors:
-
-* Stream stale detector:   ``HERMES_STREAM_STALE_TIMEOUT``     default 180s
-                           ``agent/chat_completion_helpers.py:2544``
-* Non-stream stale detector: ``HERMES_API_CALL_STALE_TIMEOUT``  default 90s
-                           ``run_agent.py:1140``
-
-For NVIDIA Nemotron 3 Ultra on the hosted NIM gateway the empirical
-upstream idle kill is ~120s (first-party reproduction at
-NVIDIA/NemoClaw#4846 — TTFB ~31s, stream dies at 120s). The same
-failure mode exists on OpenAI o1/o3, Anthropic Opus 4.x thinking,
-DeepSeek R1, Qwen QwQ, xAI Grok reasoning — every cloud reasoning
-model hits upstream-proxies / load-balancers with idle timeouts
-shorter than the model's thinking phase. Result: the stale detector
-kills the connection mid-think, surfacing as
-``BrokenPipeError``/``RemoteProtocolError`` on the next read.
-
-This module provides a floor that the existing stale-detector scaling
-blocks consult via :func:`get_reasoning_stale_timeout_floor` and
-apply as ``max(default, floor)``. It is a FLOOR:
-
-* Never overrides explicit user config (``providers.<id>.models.<model>.stale_timeout_seconds``
-  or ``request_timeout_seconds`` already wins — this code never runs
-  in that branch).
-* Never lowers an existing threshold.
-* Has zero effect on non-reasoning models — they are not in the
-  allowlist and the resolver returns ``None``.
-
-Matching uses start-anchored regex on the slug-only component of
-the model name (after stripping any aggregator prefix like
-``openai/``, ``x-ai/``, ``anthropic/``).  The right-anchor matches
-end-of-string or a ``-``/``.``/``_`` slug separator, so ``qwen3-235b``
-matches the ``qwen3`` family entry (a future model slug would be
-``qwen3-235b-instruct`` and would also match) but ``some-other-qwen3``
-does NOT match ``qwen3`` (the ``-qwen3`` is not at start of slug).
-
-The ``o1`` case is the most delicate: a model named
-``llama-4-70b-o1-preview`` is a hypothetical community derivative that
-should NOT trigger the reasoning-model floor for the user (the user
-chose a non-OpenAI model, not a reasoning model).  The start-of-slug
-anchor naturally excludes this — the matched ``o1-preview`` is at
-position 11 of the slug, not at position 0.  The previous substring-
-with-trailing-hyphen design would have over-matched here, which is
-why start-of-slug anchoring is the right shape.
-
-Fixes #52217.
-"""
-
-from __future__ import annotations
-
-import re
-from typing import Optional
-
-
-# (slug, floor_seconds).  Each slug is matched as a discrete
-# word-boundary component via the wrapper regex in ``_match_any``
-# below.  Order is irrelevant — the first regex match wins.
-_REASONING_STALE_TIMEOUT_FLOORS: tuple[tuple[str, int], ...] = (
-    # NVIDIA Nemotron — reasoning models behind hosted NIM with
-    # documented 60-180s upstream idle kill (NVIDIA/NemoClaw#4846:
-    # 120s measured).
-    ("nemotron-3-ultra", 600),
-    ("nemotron-3-super", 600),
-    ("nemotron-3-nano",  300),
-    # DeepSeek — R1 reasoning model on hosted NIM / DeepSeek direct.
-    ("deepseek-r1", 600),
-    ("deepseek-reasoner", 600),
-    # Qwen — QwQ reasoning + Qwen3 thinking variants.  QwQ-32B
-    # preview is the stable slug; ``qwen3`` covers the family of
-    # thinking-mode Qwen3 models (qwen3-235b-a22b, qwen3-32b, etc.)
-    # without over-matching every Qwen3 instruct variant — the
-    # right-anchor requires the slug to be at the start of the
-    # remaining model name, so ``qwen3-235b-instruct`` (instruct is
-    # NOT a thinking variant) would still match.  Acceptable
-    # trade-off: instruct variants of qwen3 get the 180s floor
-    # even though they don't reason.  The cost is a slightly longer
-    # wait on a hung provider; the alternative (matching only
-    # ``qwen3-.*-thinking``) breaks the moment NVIDIA or Alibaba
-    # ships a slightly different naming shape.
-    ("qwq-32b", 300),
-    ("qwen3", 180),
-    # OpenAI o-series — known multi-minute TTFB.  Each variant
-    # enumerated explicitly so bare ``o1`` doesn't over-match
-    # ``olmo-1`` or hypothetical future community derivatives.
-    ("o1", 600),
-    ("o1-mini", 600),
-    ("o1-pro", 600),
-    ("o1-preview", 600),
-    ("o3", 600),
-    ("o3-pro", 600),
-    ("o3-mini", 300),
-    ("o4-mini", 300),
-    # Anthropic Claude 4.x thinking variants.  Anchored at
-    # ``claude-opus-4`` so non-thinking Claude 3.x or future
-    # non-reasoning Claude variants don't match.
-    ("claude-opus-4", 240),
-    ("claude-sonnet-4.5", 180),
-    ("claude-sonnet-4.6", 180),
-    # xAI Grok reasoning variants.  Explicit reasoning-only keys
-    # plus one for the ``non-reasoning`` variant so users picking
-    # the fast variant don't get the 300s floor.  Bare ``grok-3``,
-    # ``grok-4`` etc. don't match — only the explicit reasoning /
-    # non-reasoning pairs.
-    ("grok-4-fast-reasoning", 300),
-    ("grok-4.20-reasoning", 300),
-    ("grok-4-fast-non-reasoning", 180),
-)
-
-
-# Pre-compile each pattern.  Wrapper = start-of-slug + slug + end-or-
-# separator, where ``start-of-slug`` means start-of-string OR
-# immediately after the last ``/`` (aggregator separator) and
-# ``end-or-separator`` means end-of-string OR a ``-``/``.``/``_``.
-#
-# Why start-of-slug and not start-of-string: aggregator prefixes
-# like ``openai/`` should not affect matching — the slug identity is
-# the part after the last ``/``.  Stripping the aggregator prefix in
-# :func:`get_reasoning_stale_timeout_floor` before regex matching
-# gives the wrapper a clean start-of-string anchor.
-#
-# Why end-or-separator on the right: ``openai/o3-mini`` must match
-# the ``o3-mini`` slug (the right anchor is end-of-string).  And
-# ``openai/o3-mini-2025-01-31`` must also match ``o3-mini`` (the right
-# anchor is the ``-`` separator).  But ``openai/o3-mini-fork`` should
-# NOT match ``o3-mini`` if we wanted to exclude forks — though the
-# pattern ``o3-mini-fork`` would be matched as a derivative anyway,
-# so we accept that community forks inheriting the same prefix are
-# treated as reasoning models (a reasonable default — the upstream
-# gateway timing is the same).
-_PATTERN_CACHE: dict[str, re.Pattern[str]] = {}
-
-
-def _get_pattern(slug: str) -> re.Pattern[str]:
-    compiled = _PATTERN_CACHE.get(slug)
-    if compiled is None:
-        compiled = re.compile(
-            r"^"
-            + re.escape(slug)
-            + r"(?:$|[\-._])"
-        )
-        _PATTERN_CACHE[slug] = compiled
-    return compiled
-
-
-def _match_any(model_lower: str) -> Optional[float]:
-    """Return the floor for the first matching slug, else None.
-
-    Each table entry is matched as a start-of-slug prefix with the
-    slug-separator-or-end-of-string right-anchor.  Table iteration
-    order is irrelevant: longest slug wins (so ``o3-mini`` beats
-    ``o3`` on a model like ``openai/o3-mini``).
-    """
-    # Sort by slug length descending so longer / more-specific slugs
-    # win on shared prefixes (o3-mini beats o3).
-    sorted_floors = sorted(
-        _REASONING_STALE_TIMEOUT_FLOORS, key=lambda kv: -len(kv[0])
-    )
-    for slug, floor in sorted_floors:
-        if _get_pattern(slug).search(model_lower):
-            return float(floor)
-    return None
-
-
-def get_reasoning_stale_timeout_floor(model: object) -> Optional[float]:
-    """Return the stale-timeout floor (seconds) for a known reasoning model.
-
-    Returns ``None`` when the model is not in the allowlist or the
-    argument is empty / not a string.  Matching uses
-    word-boundary-anchored regex on the lowercased model name, so
-    ``openai/o3-mini`` matches the ``o3-mini`` slug but
-    ``olmo-1`` does NOT match ``o1`` (the ``o1`` substring is not
-    at a word boundary inside ``olmo-1``).
-
-    Aggregator prefixes (``openai/``, ``x-ai/``, ``anthropic/`` etc.)
-    are preserved through matching — the ``/`` is itself a word
-    boundary, so ``openai/o3-mini`` matches ``o3-mini`` because the
-    ``/`` before ``o3-mini`` satisfies the left-anchor alternation.
-
-    This is a FLOOR — callers must apply it as ``max(default, floor)``
-    and only when no explicit user-configured per-model
-    ``stale_timeout_seconds`` exists.
-
-    >>> get_reasoning_stale_timeout_floor("nvidia/nemotron-3-ultra-550b-a55b")
-    600.0
-    >>> get_reasoning_stale_timeout_floor("openai/o3-mini")
-    300.0
-    >>> get_reasoning_stale_timeout_floor("deepseek/deepseek-r1")
-    600.0
-    >>> get_reasoning_stale_timeout_floor("qwen/qwen3-235b-a22b-thinking")
-    180.0
-    >>> get_reasoning_stale_timeout_floor("x-ai/grok-4-fast-reasoning")
-    300.0
-    >>> get_reasoning_stale_timeout_floor("anthropic/claude-opus-4-6")
-    240.0
-    >>> get_reasoning_stale_timeout_floor("gpt-4o") is None
-    True
-    >>> get_reasoning_stale_timeout_floor("olmo-1") is None
-    True
-    >>> get_reasoning_stale_timeout_floor(None) is None
-    True
-    """
-    if not model or not isinstance(model, str):
-        return None
-    name = model.strip().lower()
-    if not name:
-        return None
-    # Strip aggregator prefix (everything before and including the
-    # last ``/``).  The wrapper regex anchors at start-of-string, so
-    # the slug identity is the bare model name.
-    if "/" in name:
-        name = name.rsplit("/", 1)[1]
-    return _match_any(name)
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -107,60 +107,12 @@ _PREFIX_PATTERNS = [
    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

-# ENV assignment patterns: KEY=value where KEY contains a secret-like name.
-# Uppercase keys tolerate spaces around "=" (e.g. ``FOO_SECRET = bar``) because
-# an all-caps key is almost never prose/code.
+# ENV assignment patterns: KEY=value where KEY contains a secret-like name
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
    rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
 )

-# Lowercase / dotted / hyphenated config keys from config files
-# (application.properties, .env, YAML-ish dumps): ``spring.datasource.password=secret``,
-# ``app.api.key=xyz``, ``password=secret``. The uppercase _ENV_ASSIGN_RE above
-# never matched these, so config-file passwords leaked verbatim (issue #16413).
-#
-# These run only in a config-file context, NOT in prose, code, or URLs — three
-# carve-outs preserved from the original design (#4367 + the documented
-# web-URL passthrough below):
-#   1. The value is bounded by ``[^\s&]`` (stops at whitespace AND ``&``) so
-#      form-urlencoded bodies are handled pair-by-pair (by _redact_form_body),
-#      not greedily swallowed.
-#   2. _CFG_DOTTED_RE only matches when the key is NAMESPACED (contains a dot),
-#      which is unambiguously a config key — never a prose word.
-#   3. _CFG_ANCHORED_RE matches a bare secret-word key only at line start
-#      (optionally after ``export``), so conversational ``I have password=foo``
-#      mid-sentence is left alone.
-# The colon-form URL guard (skip when ``://`` present) lives at the call site.
-_SECRET_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential|auth)"
-_CFG_VALUE = r"(['\"]?)([^\s&]+?)\2(?=[\s&]|$)"
-# Namespaced (dotted) key: the secret word may sit anywhere in a dotted path.
-_CFG_DOTTED_RE = re.compile(
-    rf"((?:[A-Za-z0-9_\-]+\.)+[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*"
-    rf"|[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*\.[A-Za-z0-9_.\-]+)"
-    rf"={_CFG_VALUE}",
-    re.IGNORECASE,
-)
-# Line-anchored bare key: ``password=…`` / ``export api_key=…`` at start of line.
-_CFG_ANCHORED_RE = re.compile(
-    rf"(^[ \t]*(?:export[ \t]+)?[A-Za-z0-9_\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_\-]*)={_CFG_VALUE}",
-    re.IGNORECASE | re.MULTILINE,
-)
-
-# Unquoted YAML / colon config (e.g. ``password: secret``,
-# ``spring.datasource.password: hunter2``). The secret keyword must be part of
-# the KEY (anchored to the start of the line/indent), and the value is a single
-# whitespace-free token — so prose like ``note: secret meeting`` (keyword in the
-# value) and ``error: token expired`` are left alone. Bare ``auth`` is excluded
-# from the key set so ``Authorization:`` / ``author:`` don't match (the former
-# is masked by _AUTH_HEADER_RE); ``auth_token``/``auth-token`` still match via
-# the ``token`` keyword. Quoted values defer to _JSON_FIELD_RE via the lookahead.
-_YAML_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential)"
-_YAML_ASSIGN_RE = re.compile(
-    rf"(^[ \t]*[A-Za-z0-9_.\-]*{_YAML_CFG_NAMES}[A-Za-z0-9_.\-]*)(:[ \t]*)(?!['\"])([^\s&]+)",
-    re.IGNORECASE | re.MULTILINE,
-)
-
 # JSON field patterns: "apiKey": "value", "token": "value", etc.
 _JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)"
 _JSON_FIELD_RE = re.compile(
@@ -430,13 +382,6 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
                name, quote, value = m.group(1), m.group(2), m.group(3)
                return f"{name}={quote}{_mask_token(value)}{quote}"
            text = _ENV_ASSIGN_RE.sub(_redact_env, text)
-            # Lowercase/dotted config keys (issue #16413). Skip URLs entirely —
-            # web-URL query params are intentionally passed through (see note
-            # near the bottom of this function); _DB_CONNSTR_RE still guards
-            # connection-string passwords.
-            if "://" not in text:
-                text = _CFG_DOTTED_RE.sub(_redact_env, text)
-                text = _CFG_ANCHORED_RE.sub(_redact_env, text)

        # JSON fields: "apiKey": "***"  (skip for code files — false positives)
        if ":" in text and '"' in text:
@@ -445,15 +390,6 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
                return f'{key}: "{_mask_token(value)}"'
            text = _JSON_FIELD_RE.sub(_redact_json, text)

-        # Unquoted YAML / colon config: password: ***  (after JSON so quoted
-        # values are handled there; the lookahead in _YAML_ASSIGN_RE skips
-        # quotes). Skip URLs — web-URL query params pass through by design.
-        if ":" in text and "://" not in text:
-            def _redact_yaml(m):
-                key, sep, value = m.group(1), m.group(2), m.group(3)
-                return f"{key}{sep}{_mask_token(value)}"
-            text = _YAML_ASSIGN_RE.sub(_redact_yaml, text)
-
    # Authorization headers — _AUTH_HEADER_RE matches any scheme after
    # "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
    # cheapest substring gate that covers every casing without a casefold().
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -507,34 +507,6 @@ def get_all_skills_dirs() -> List[Path]:
    return dirs


-def _resolve_for_skill_ownership(path) -> Path:
-    path_obj = path if isinstance(path, Path) else Path(str(path))
-    try:
-        return path_obj.expanduser().resolve()
-    except (OSError, RuntimeError):
-        return path_obj.expanduser().absolute()
-
-
-def is_external_skill_path(path) -> bool:
-    """Return True when ``path`` lives under a configured external skills dir.
-
-    ``skills.external_dirs`` are externally owned: Hermes can discover and view
-    their skills, and foreground user-directed tool calls may still edit them,
-    but autonomous lifecycle maintenance must treat them as read-only. This
-    helper centralizes the ownership boundary so curator/reporting/tool paths do
-    not each need to re-interpret the config.
-    """
-    candidate = _resolve_for_skill_ownership(path)
-    for root in get_external_skills_dirs():
-        resolved_root = _resolve_for_skill_ownership(root)
-        try:
-            candidate.relative_to(resolved_root)
-            return True
-        except ValueError:
-            continue
-    return False
-
-
 # ── Condition extraction ──────────────────────────────────────────────────


--- a/agent/thinking_timeout_guidance.py
+++ b/agent/thinking_timeout_guidance.py
@@ -1,136 +0,0 @@
-"""Thinking-timeout detection and user-facing guidance for reasoning models.
-
-When a known reasoning model (NVIDIA Nemotron 3 Ultra, OpenAI o1/o3,
-Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ, xAI Grok reasoning)
-hits a transport-layer error before the first content token arrives, the
-upstream proxy has almost certainly idle-killed a long thinking stream —
-not a true context overflow or a configuration error.  The user needs
-distinct guidance for this case:
-
-    "The model's thinking phase exceeded the upstream proxy's idle
-     timeout before the first content token arrived.  This is a known
-     issue with reasoning models behind cloud gateways (NVIDIA NIM,
-     OpenAI, Anthropic, DeepSeek).  Workarounds in priority order:
-     1. Set `providers.<provider>.models.<model>.stale_timeout_seconds: 900`
-        in `~/.hermes/config.yaml` to extend the per-call timeout...
-     2. Lower `reasoning_budget` or set `reasoning_effort: medium`...
-     3. Use a smaller / faster reasoning model..."
-
-The existing `_is_stream_drop` guidance at
-``agent/conversation_loop.py:3464-3486`` fires for large-file-write
-stream drops ("try execute_code with Python's open() for large files")
-which is the WRONG advice for the thinking-timeout case.  This module
-provides the detection and the message as standalone helpers so the
-detection logic is unit-testable without driving the full retry loop,
-and the message text can be regression-tested for spelling and accuracy.
-
-Part 2 of Fixes #52310.
-"""
-
-from __future__ import annotations
-
-from typing import Optional
-
-
-# Substring set that identifies a transport-layer failure on the
-# response stream.  Same shape as the existing
-# ``_SERVER_DISCONNECT_PATTERNS`` in ``agent/error_classifier.py:394``
-# but extended to also catch the OSS-level error signature
-# (``broken pipe`` / ``errno 32``) that the upstream kill surfaces
-# to the OpenAI SDK wrapper.
-_THINKING_TIMEOUT_SUBSTRINGS: tuple[str, ...] = (
-    "broken pipe",
-    "errno 32",
-    "remote protocol",
-    "connection reset",
-    "connection lost",
-    "peer closed",
-    "server disconnected",
-)
-
-
-def is_thinking_timeout(classified: object, model: str, error_msg: str) -> bool:
-    """Return True when a reasoning model's thinking phase hit a transport kill.
-
-    Args:
-        classified: a :class:`agent.error_classifier.ClassifiedError` instance
-            (duck-typed here to avoid an import cycle in unit tests).
-        model: the model slug at failure time (e.g.
-            ``"nvidia/nemotron-3-ultra-550b-a55b"``).
-        error_msg: lowercased string representation of the underlying
-            exception (typically ``str(api_error).lower()``).
-
-    Returns True when ALL conditions hold:
-        1. ``classified.reason == FailoverReason.timeout`` (the classifier
-           override at ``agent/error_classifier.py:720-738`` ensures this
-           is the case for reasoning models even on large sessions).
-        2. ``api_error`` has no ``.status_code`` attribute set (transport
-           disconnect, not an HTTP error).
-        3. ``model`` is in the reasoning-model allowlist (reuses
-           ``agent.reasoning_timeouts.get_reasoning_stale_timeout_floor``).
-        4. ``error_msg`` contains one of the transport-kill substrings.
-
-    Non-reasoning models always return False.  Non-transport errors
-    (billing / rate_limit / auth / context_overflow / format_error)
-    always return False.  HTTP-status errors always return False.
-    """
-    # Import here (not at module top) to keep this helper cheap to
-    # import even from callers that don't need it.  ``agent.reasoning_timeouts``
-    # is small and dependency-free.
-    from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
-
-    # Condition 1: classifier says timeout.  Use a string/value check
-    # rather than importing FailoverReason so this module has zero
-    # import cycles from the error_classifier package.
-    reason = getattr(classified, "reason", None)
-    reason_value = getattr(reason, "value", None)
-    if reason_value != "timeout":
-        return False
-
-    # Condition 2: no HTTP status code (transport, not API error).
-    # Caller is expected to gate on ``getattr(api_error, "status_code", None) is None``
-    # before calling this helper; the surface here is just the post-gate
-    # boolean so the caller can pass an already-prepped error_msg.
-
-    # Condition 3: reasoning model allowlist.
-    if get_reasoning_stale_timeout_floor(model) is None:
-        return False
-
-    # Condition 4: transport-kill substring in the error message.
-    error_msg_lower = (error_msg or "").lower()
-    return any(p in error_msg_lower for p in _THINKING_TIMEOUT_SUBSTRINGS)
-
-
-def build_thinking_timeout_guidance(
-    provider: str, model: str, model_label: Optional[str] = None,
-) -> str:
-    """Return the user-facing guidance string appended to ``_final_response``.
-
-    Args:
-        provider: provider slug (e.g. ``"nvidia"``, ``"openai"``).
-        model: bare model slug the user would put in their config
-            (e.g. ``"nemotron-3-ultra-550b-a55b"`` if the user uses
-            NVIDIA direct, or the full ``"nvidia/nemotron-3-ultra-550b-a55b"``
-            if they go through an aggregator).  Used verbatim in the
-            config snippet so the user can copy-paste.
-        model_label: optional short label for the model name in the
-            prose (e.g. ``"Nemotron 3 Ultra"``).  Falls back to the
-            slug if not provided.
-    """
-    label = model_label or model
-    return (
-        "\n\nThe model's thinking phase exceeded the upstream proxy's "
-        "idle timeout before the first content token arrived. This is a "
-        f"known issue with reasoning models (like {label}) behind cloud "
-        "gateways (NVIDIA NIM, OpenAI, Anthropic, DeepSeek). Workarounds "
-        "in priority order:\n"
-        f"1. Set `providers.{provider}.models.{model}.stale_timeout_seconds: 900` "
-        "in `~/.hermes/config.yaml` to extend the per-call timeout. "
-        "(Hermes's built-in floor is 600s for known reasoning models — "
-        "if you still see this after raising, the upstream cap is even "
-        "shorter.)\n"
-        "2. Lower `reasoning_budget` or set `reasoning_effort: medium` on this "
-        "model if the provider supports it.\n"
-        "3. Use a smaller / faster reasoning model if the task doesn't "
-        "require deep thinking."
-    )
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -26,7 +26,6 @@ from agent.display import (
    build_tool_preview as _build_tool_preview,
    get_cute_tool_message as _get_cute_tool_message_impl,
    get_tool_emoji as _get_tool_emoji,
-    redact_tool_args_for_display as _redact_tool_args_for_display,
    _detect_tool_failure,
 )
 from agent.tool_guardrails import ToolGuardrailDecision
@@ -470,11 +469,10 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
    if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
        print(f"  ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
        for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
-            display_args = _redact_tool_args_for_display(name, args) or args
-            args_str = json.dumps(display_args, ensure_ascii=False)
+            args_str = json.dumps(args, ensure_ascii=False)
            if agent.verbose_logging:
-                print(f"  📞 Tool {i}: {name}({list(display_args.keys())})")
-                print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
+                print(f"  📞 Tool {i}: {name}({list(args.keys())})")
+                print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
            else:
                args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
                print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
@@ -484,9 +482,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
            continue
        if agent.tool_progress_callback:
            try:
-                display_args = _redact_tool_args_for_display(name, args) or args
-                preview = _build_tool_preview(name, display_args)
-                agent.tool_progress_callback("tool.started", name, preview, display_args)
+                preview = _build_tool_preview(name, args)
+                agent.tool_progress_callback("tool.started", name, preview, args)
            except Exception as cb_err:
                logging.debug(f"Tool progress callback error: {cb_err}")

@@ -495,8 +492,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
            continue
        if agent.tool_start_callback:
            try:
-                display_args = _redact_tool_args_for_display(name, args) or args
-                agent.tool_start_callback(tc.id, name, display_args)
+                agent.tool_start_callback(tc.id, name, args)
            except Exception as cb_err:
                logging.debug(f"Tool start callback error: {cb_err}")

@@ -796,8 +792,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe

        if not blocked and agent.tool_complete_callback:
            try:
-                display_args = _redact_tool_args_for_display(name, args) or args
-                agent.tool_complete_callback(tc.id, name, display_args, function_result)
+                agent.tool_complete_callback(tc.id, name, args, function_result)
            except Exception as cb_err:
                logging.debug(f"Tool complete callback error: {cb_err}")

@@ -959,11 +954,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            agent._iters_since_skill = 0

        if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
-            display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-            args_str = json.dumps(display_args, ensure_ascii=False)
+            args_str = json.dumps(function_args, ensure_ascii=False)
            if agent.verbose_logging:
-                print(f"  📞 Tool {i}: {function_name}({list(display_args.keys())})")
-                print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
+                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
+                print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
            else:
                args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
@@ -984,16 +978,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe

        if not _execution_blocked and agent.tool_progress_callback:
            try:
-                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args)
-                agent.tool_progress_callback("tool.started", function_name, preview, display_args)
+                preview = _build_tool_preview(function_name, function_args)
+                agent.tool_progress_callback("tool.started", function_name, preview, function_args)
            except Exception as cb_err:
                logging.debug(f"Tool progress callback error: {cb_err}")

        if not _execution_blocked and agent.tool_start_callback:
            try:
-                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                agent.tool_start_callback(tool_call.id, function_name, display_args)
+                agent.tool_start_callback(tool_call.id, function_name, function_args)
            except Exception as cb_err:
                logging.debug(f"Tool start callback error: {cb_err}")

@@ -1223,8 +1215,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            if agent._should_emit_quiet_tool_messages():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
-                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args) or function_name
+                preview = _build_tool_preview(function_name, function_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _ce_result = None
@@ -1257,8 +1248,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
-                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args) or function_name
+                preview = _build_tool_preview(function_name, function_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _mem_result = None
@@ -1289,8 +1279,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
            if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
-                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args) or function_name
+                preview = _build_tool_preview(function_name, function_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _spinner_result = None
@@ -1452,8 +1441,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe

        if not _execution_blocked and agent.tool_complete_callback:
            try:
-                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                agent.tool_complete_callback(tool_call.id, function_name, display_args, function_result)
+                agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
            except Exception as cb_err:
                logging.debug(f"Tool complete callback error: {cb_err}")

--- a/agent/transports/codex_event_projector.py
+++ b/agent/transports/codex_event_projector.py
@@ -217,7 +217,9 @@ class CodexEventProjector:
    def _project_mcp_tool_call(self, item: dict, item_id: str) -> ProjectionResult:
        server = item.get("server") or "mcp"
        tool = item.get("tool") or "unknown"
-        call_id = _deterministic_call_id(f"mcp_{server}_{tool}", item_id)
+        # Mirror the native MCP tool-name convention (mcp__server__tool) so the
+        # deterministic call_id input stays consistent with registration names.
+        call_id = _deterministic_call_id(f"mcp__{server}__{tool}", item_id)
        args = item.get("arguments") or {}
        if not isinstance(args, dict):
            args = {"arguments": args}
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -28,7 +28,6 @@ import uuid
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional

-from agent.conversation_compression import conversation_history_after_compression
 from agent.iteration_budget import IterationBudget
 from agent.model_metadata import (
    estimate_messages_tokens_rough,
@@ -401,9 +400,7 @@ def build_turn_context(
                    _orig_len, len(messages), _orig_tokens, _preflight_tokens
                ):
                    break  # Cannot compress further: neither rows nor tokens moved
-                conversation_history = conversation_history_after_compression(
-                    agent, messages
-                )
+                conversation_history = None
                agent._empty_content_retries = 0
                agent._thinking_prefill_retries = 0
                agent._last_content_with_tools = None
--- a/agent/verification_stop.py
+++ b/agent/verification_stop.py
@@ -15,135 +15,9 @@ from typing import Any, Iterable

 _MAX_CHANGED_PATHS_IN_NUDGE = 8

-# Non-code file extensions whose edits carry no verifiable runtime behavior:
-# documentation, prose, and data/markup that no test/build exercises. When a
-# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is
-# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a
-# SKILL.md or README edit must never demand a /tmp verification script). A turn
-# that edits any non-listed path (a real source/code/config file) still nudges.
-_NON_CODE_VERIFY_EXTENSIONS = frozenset(
-    {
-        ".md",
-        ".markdown",
-        ".mdx",
-        ".rst",
-        ".txt",
-        ".text",
-        ".adoc",
-        ".asciidoc",
-        ".org",
-        ".log",
-        ".csv",
-        ".tsv",
-    }
-)
-
-# Filenames (case-insensitive, extension-less or otherwise) that are pure prose
-# even without a recognized doc extension.
-_NON_CODE_VERIFY_FILENAMES = frozenset(
-    {
-        "license",
-        "licence",
-        "notice",
-        "authors",
-        "contributors",
-        "changelog",
-        "codeowners",
-    }
-)
-
-
-def _is_non_code_path(raw: str) -> bool:
-    """Return True when a changed path is documentation/prose with nothing to verify."""
-    try:
-        p = Path(str(raw))
-    except Exception:
-        return False
-    suffix = p.suffix.lower()
-    if suffix in _NON_CODE_VERIFY_EXTENSIONS:
-        return True
-    if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES:
-        return True
-    return False
-
-
-def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]:
-    """Drop documentation/prose paths; keep paths that could have verifiable behavior."""
-    return [p for p in paths if p and not _is_non_code_path(p)]
-
-
-# Session identities (platform or source) that are NOT human conversational
-# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
-# local, gateway) and programmatic callers (API server, webhooks, tools).
-# Verify-on-stop stays ON by default for these. Any other resolved gateway
-# platform is a conversational messaging surface (Telegram, Discord, WhatsApp,
-# Signal, Slack, etc.) where the verification narrative would reach a human as
-# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in
-# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a
-# local or programmatic surface. Default-deny by design: an unrecognized
-# identity is treated as messaging (OFF) so a new chat platform never leaks the
-# verification receipt before this set is updated.
-_NON_MESSAGING_SESSION_SURFACES = frozenset(
-    {
-        "",
-        "cli",
-        "codex",
-        "desktop",
-        "gateway",
-        "local",
-        "tui",
-        "tool",
-        "api_server",
-        "webhook",
-        "msgraph_webhook",
-    }
-)
-
-
-def _session_is_messaging_surface() -> bool:
-    """Return whether this turn is delivered over a human messaging channel.
-
-    The gateway binds the platform value (e.g. ``telegram``) to
-    ``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE``
-    (e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context
-    helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM``
-    override, matching the sibling platform resolution in
-    ``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a
-    messaging surface when a resolved identity is present and is not a known
-    non-messaging surface.
-    """
-    try:
-        from gateway.session_context import get_session_env
-
-        platform = (
-            os.getenv("HERMES_PLATFORM")
-            or get_session_env("HERMES_SESSION_PLATFORM", "")
-        )
-        source = get_session_env("HERMES_SESSION_SOURCE", "")
-    except Exception:
-        platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get(
-            "HERMES_SESSION_PLATFORM", ""
-        )
-        source = os.environ.get("HERMES_SESSION_SOURCE", "")
-    for identity in (platform, source):
-        identity = str(identity or "").strip().lower()
-        if identity and identity not in _NON_MESSAGING_SESSION_SURFACES:
-            return True
-    return False
-

 def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
-    """Return whether edit -> verify-before-finish behavior is enabled.
-
-    Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
-    explicit ``agent.verify_on_stop`` config value. The config default is
-    ``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
-    opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
-    sets it explicitly: it resolves to ON for interactive coding surfaces
-    (CLI, TUI, desktop) and programmatic callers, and OFF for conversational
-    messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
-    falls back to OFF.
-    """
+    """Return whether edit -> verify-before-finish behavior is enabled."""
    env = os.environ.get("HERMES_VERIFY_ON_STOP")
    if env is not None:
        return env.strip().lower() not in {"0", "false", "no", "off"}
@@ -155,20 +29,9 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
        except Exception:
            config = {}
    agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
-    cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None
-    if isinstance(cfg_val, bool):
-        return cfg_val
-    if isinstance(cfg_val, str):
-        token = cfg_val.strip().lower()
-        if token in {"1", "true", "yes", "on"}:
-            return True
-        if token in {"0", "false", "no", "off"}:
-            return False
-        if token == "auto":
-            # Explicit opt-in to the legacy surface-aware behavior.
-            return not _session_is_messaging_surface()
-    # Missing or unknown value -> OFF (the new default).
-    return False
+    if isinstance(agent_cfg, dict) and "verify_on_stop" in agent_cfg:
+        return bool(agent_cfg.get("verify_on_stop"))
+    return True


 def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
@@ -251,10 +114,7 @@ def build_verify_on_stop_nudge(
    max_attempts: int = 2,
 ) -> str | None:
    """Return a synthetic follow-up when edited code lacks fresh verification."""
-    # Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) —
-    # they carry no verifiable behavior, so a turn that touched only those has
-    # nothing to verify and must not nudge.
-    paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)})
+    paths = sorted({str(p) for p in changed_paths if p})
    if not paths or attempts >= max_attempts:
        return None

--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -61,7 +61,10 @@ function buildDesktopBackendPath({
  const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
  const saneEntries = platform === 'win32' ? [] : POSIX_SANE_PATH_ENTRIES

-  return appendUniquePathEntries([hermesNodeBin, venvBin, currentPath, saneEntries], { delimiter })
+  return appendUniquePathEntries(
+    [hermesNodeBin, venvBin, currentPath, saneEntries],
+    { delimiter }
+  )
 }

 function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -76,7 +76,10 @@ test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root'
    normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
    'C:\\Users\\test\\AppData\\Local\\hermes'
  )
-  assert.equal(normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }), '/Users/test/.hermes')
+  assert.equal(
+    normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
+    '/Users/test/.hermes'
+  )
 })

 test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
@@ -101,5 +104,8 @@ test('Windows PATH casing and delimiter are preserved without POSIX sane entries
 })

 test('appendUniquePathEntries drops empty entries and keeps first occurrence', () => {
-  assert.equal(appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }), '/a:/b:/c')
+  assert.equal(
+    appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }),
+    '/a:/b:/c'
+  )
 })
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -167,5 +167,5 @@ module.exports = {
  readDashboardReadyFile,
  resolvePortAnnounceTimeoutMs,
  DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
-  MIN_PORT_ANNOUNCE_TIMEOUT_MS
+  MIN_PORT_ANNOUNCE_TIMEOUT_MS,
 }
--- a/apps/desktop/electron/backend-ready.test.cjs
+++ b/apps/desktop/electron/backend-ready.test.cjs
@@ -25,7 +25,7 @@ const {
  waitForDashboardReadyFile,
  resolvePortAnnounceTimeoutMs,
  DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
-  MIN_PORT_ANNOUNCE_TIMEOUT_MS
+  MIN_PORT_ANNOUNCE_TIMEOUT_MS,
 } = require('./backend-ready.cjs')

 // A minimal stand-in for a spawned child process: an EventEmitter with a
--- a/apps/desktop/electron/bootstrap-runner.cjs
+++ b/apps/desktop/electron/bootstrap-runner.cjs
@@ -179,13 +179,7 @@ function downloadInstallScript(commit, destPath) {
  })
 }

-async function resolveInstallScript({
-  installStamp,
-  sourceRepoRoot,
-  hermesHome,
-  emit,
-  _download = downloadInstallScript
-}) {
+async function resolveInstallScript({ installStamp, sourceRepoRoot, hermesHome, emit, _download = downloadInstallScript }) {
  // 1. Dev shortcut: prefer a local checkout's installer so we can iterate
  //    without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
  //    of APP_ROOT/../..).
@@ -299,19 +293,15 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
    const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
    const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]

-    const child = spawn(
-      ps,
-      fullArgs,
-      hiddenWindowsChildOptions({
-        stdio: ['ignore', 'pipe', 'pipe'],
-        env: {
-          ...process.env,
-          // Pass HERMES_HOME through so install.ps1 respects the caller's
-          // choice rather than re-computing the default.
-          HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
-        }
-      })
-    )
+    const child = spawn(ps, fullArgs, hiddenWindowsChildOptions({
+      stdio: ['ignore', 'pipe', 'pipe'],
+      env: {
+        ...process.env,
+        // Pass HERMES_HOME through so install.ps1 respects the caller's
+        // choice rather than re-computing the default.
+        HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
+      }
+    }))

    let stdout = ''
    let stderr = ''
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -261,7 +261,12 @@ function cookiesHaveSession(cookies) {
 */
 function cookiesHaveLiveSession(cookies) {
  if (!Array.isArray(cookies)) return false
-  return cookies.some(c => c && c.value && (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)))
+  return cookies.some(
+    c =>
+      c &&
+      c.value &&
+      (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name))
+  )
 }

 module.exports = {
--- a/apps/desktop/electron/desktop-uninstall.cjs
+++ b/apps/desktop/electron/desktop-uninstall.cjs
@@ -138,7 +138,10 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
  if (pythonPath) {
    lines.push(`export PYTHONPATH=${q(pythonPath)}\${PYTHONPATH:+:$PYTHONPATH}`)
  }
-  lines.push(`cd ${q(agentRoot)} 2>/dev/null || true`, `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`)
+  lines.push(
+    `cd ${q(agentRoot)} 2>/dev/null || true`,
+    `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`
+  )
  if (appPath) {
    lines.push(`rm -rf ${q(appPath)} || true`)
  }
@@ -166,15 +169,7 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
 * Removal: even after the desktop PID is gone, Windows releases directory
 * handles lazily, so a single `rmdir /s /q` can half-fail — retry up to 10x.
 */
-function buildWindowsCleanupScript({
-  desktopPid,
-  pythonExe,
-  pythonPath,
-  agentRoot,
-  uninstallArgs,
-  appPath,
-  hermesHome
-}) {
+function buildWindowsCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot, uninstallArgs, appPath, hermesHome }) {
  const pid = Number(desktopPid) || 0
  // cmd.exe has no string escaping inside quotes; strip embedded quotes (paths
  // under %LOCALAPPDATA% never contain them). `&`/`^` in a path would still be
--- a/apps/desktop/electron/desktop-uninstall.test.cjs
+++ b/apps/desktop/electron/desktop-uninstall.test.cjs
@@ -101,7 +101,10 @@ test('resolveRemovableAppPath uses APPIMAGE on Linux when set', () => {
 })

 test('resolveRemovableAppPath finds the unpacked dir on Linux', () => {
-  assert.equal(resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}), '/opt/hermes/linux-unpacked')
+  assert.equal(
+    resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}),
+    '/opt/hermes/linux-unpacked'
+  )
  // A system-package install (/usr/bin) → null, left to apt/dnf.
  assert.equal(resolveRemovableAppPath('/usr/bin/hermes', 'linux', {}), null)
 })
--- a/apps/desktop/electron/embed-referer.cjs
+++ b/apps/desktop/electron/embed-referer.cjs
@@ -1,48 +0,0 @@
-'use strict'
-
-const { session } = require('electron')
-
-const EMBED_SESSION_PARTITION = 'persist:hermes-embed'
-const EMBED_REFERER = 'https://www.youtube.com/'
-const YOUTUBE_REFERER_HOST_RE =
-  /(^|\.)(youtube\.com|youtube-nocookie\.com|googlevideo\.com|ytimg\.com|youtubei\.googleapis\.com)$/i
-
-function installEmbedRefererForSession(embedSession) {
-  if (!embedSession) {
-    return
-  }
-
-  embedSession.webRequest.onBeforeSendHeaders((details, callback) => {
-    let host = ''
-
-    try {
-      host = new URL(details.url).hostname
-    } catch {
-      host = ''
-    }
-
-    if (!YOUTUBE_REFERER_HOST_RE.test(host)) {
-      callback({ requestHeaders: details.requestHeaders })
-      return
-    }
-
-    const headers = { ...details.requestHeaders }
-
-    if (!headers.Referer && !headers.referer) {
-      headers.Referer = EMBED_REFERER
-    }
-
-    callback({ requestHeaders: headers })
-  })
-}
-
-/** Stamp Referer on YouTube requests in the embed webview partition only. */
-function installEmbedReferer() {
-  try {
-    installEmbedRefererForSession(session.fromPartition(EMBED_SESSION_PARTITION))
-  } catch {
-    // Non-fatal: embeds still render; YouTube may show referer errors.
-  }
-}
-
-module.exports = { installEmbedReferer }
--- a/apps/desktop/electron/fs-read-dir.cjs
+++ b/apps/desktop/electron/fs-read-dir.cjs
@@ -92,7 +92,9 @@ async function readDirForIpc(dirPath, options = {}) {
  try {
    const dirents = await fsImpl.promises.readdir(resolved, { withFileTypes: true })
    const visibleDirents = dirents.filter(dirent => !FS_READDIR_HIDDEN.has(dirent.name))
-    const entries = await mapWithStatConcurrency(visibleDirents, dirent => entryForDirent(dirent, resolved, fsImpl))
+    const entries = await mapWithStatConcurrency(visibleDirents, dirent =>
+      entryForDirent(dirent, resolved, fsImpl)
+    )

    entries.sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name))

--- a/apps/desktop/electron/fs-read-dir.test.cjs
+++ b/apps/desktop/electron/fs-read-dir.test.cjs
@@ -349,10 +349,7 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
  assert.equal(result.error, undefined)
  assert.equal(result.entries.length, names.length)
  assert.equal(statCalls.length, names.length)
-  assert.equal(
-    statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)),
-    false
-  )
+  assert.equal(statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)), false)
  assert.ok(peak > 1, `expected concurrent stats, observed peak ${peak}`)
  assert.ok(peak <= 16, `expected at most 16 concurrent stats, observed peak ${peak}`)
  assert.deepEqual(
@@ -360,5 +357,8 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
    expectedNames
  )
  assert.equal(result.entries.find(entry => entry.name === failedName)?.isDirectory, false)
-  assert.equal(result.entries.filter(entry => entry.isDirectory).length, successfulDirectoryNames.size)
+  assert.equal(
+    result.entries.filter(entry => entry.isDirectory).length,
+    successfulDirectoryNames.size
+  )
 })
--- a/apps/desktop/electron/git-repo-scan.cjs
+++ b/apps/desktop/electron/git-repo-scan.cjs
@@ -86,8 +86,10 @@ async function scanGitRepos(roots, options = {}) {
    await mapLimit(subdirs, MAX_CONCURRENCY, sub => walk(sub, depth + 1))
  }

-  await mapLimit(searchRoots.map(root => String(root || '').trim()).filter(Boolean), MAX_CONCURRENCY, root =>
-    walk(root, 0)
+  await mapLimit(
+    searchRoots.map(root => String(root || '').trim()).filter(Boolean),
+    MAX_CONCURRENCY,
+    root => walk(root, 0)
  )

  return [...found.entries()].map(([root, label]) => ({ label, root }))
--- a/apps/desktop/electron/git-review-ops.cjs
+++ b/apps/desktop/electron/git-review-ops.cjs
@@ -188,12 +188,7 @@ async function defaultBranchName(git) {

  // Prefer a local trunk, then a remote-only one (returns the clean name either
  // way) so "branch off main" works even before main is checked out locally.
-  for (const ref of [
-    'refs/heads/main',
-    'refs/heads/master',
-    'refs/remotes/origin/main',
-    'refs/remotes/origin/master'
-  ]) {
+  for (const ref of ['refs/heads/main', 'refs/heads/master', 'refs/remotes/origin/main', 'refs/remotes/origin/master']) {
    try {
      await git.raw(['rev-parse', '--verify', '--quiet', ref])

--- a/apps/desktop/electron/git-worktree-ops.cjs
+++ b/apps/desktop/electron/git-worktree-ops.cjs
@@ -45,10 +45,7 @@ function parseWorktrees(out) {
    } else if (!cur) {
      continue
    } else if (line.startsWith('branch ')) {
-      cur.branch = line
-        .slice(7)
-        .trim()
-        .replace(/^refs\/heads\//, '')
+      cur.branch = line.slice(7).trim().replace(/^refs\/heads\//, '')
    } else if (line === 'detached') {
      cur.detached = true
    } else if (line === 'bare') {
@@ -125,9 +122,10 @@ async function gitLine(gitBin, args, cwd) {
 }

 async function defaultBranch(gitBin, cwd) {
-  const remote = (
-    await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)
-  ).replace(/^origin\//, '')
+  const remote = (await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)).replace(
+    /^origin\//,
+    ''
+  )

  if (remote) {
    return remote
@@ -179,16 +177,7 @@ async function ensureGitRepo(gitBin, dir) {
    // Inline identity so the seed commit lands even with no global git config.
    await runGit(
      gitBin,
-      [
-        '-c',
-        'user.email=hermes@localhost',
-        '-c',
-        'user.name=Hermes',
-        'commit',
-        '--allow-empty',
-        '-m',
-        'Initial commit'
-      ],
+      ['-c', 'user.email=hermes@localhost', '-c', 'user.name=Hermes', 'commit', '--allow-empty', '-m', 'Initial commit'],
      dir
    )
  }
--- a/apps/desktop/electron/hardening.cjs
+++ b/apps/desktop/electron/hardening.cjs
@@ -186,10 +186,7 @@ async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
    if (code === 'ENOENT' || code === 'ENOTDIR') {
      throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
    }
-    throw ipcPathError(
-      code || 'read-error',
-      `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
-    )
+    throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
  }
 }

@@ -204,10 +201,7 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {
    return realPath
  } catch (error) {
    const code = error && typeof error === 'object' ? error.code : ''
-    throw ipcPathError(
-      code || 'read-error',
-      `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
-    )
+    throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
  }
 }

--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -21,10 +21,10 @@ const crypto = require('node:crypto')
 const fs = require('node:fs')
 const http = require('node:http')
 const https = require('node:https')
+const net = require('node:net')
 const path = require('node:path')
 const { pathToFileURL } = require('node:url')
 const { execFileSync, spawn } = require('node:child_process')
-const { installEmbedReferer } = require('./embed-referer.cjs')
 const { detectRemoteDisplay, isWindowsBinaryPathInWsl, isWslEnvironment } = require('./bootstrap-platform.cjs')
 const { runBootstrap } = require('./bootstrap-runner.cjs')
 const {
@@ -43,8 +43,6 @@ const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-reques
 const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs')
 const { buildDesktopBackendEnv, normalizeHermesHomeRoot } = require('./backend-env.cjs')
 const { readWindowsUserEnvVar } = require('./windows-user-env.cjs')
-const { readWslWindowsClipboardImage } = require('./wsl-clipboard-image.cjs')
-const { nativeOverlayWidth: computeNativeOverlayWidth } = require('./titlebar-overlay-width.cjs')
 const { readDirForIpc } = require('./fs-read-dir.cjs')
 const { readLiveUpdateMarker } = require('./update-marker.cjs')
 const {
@@ -188,16 +186,6 @@ if (REMOTE_DISPLAY_REASON) {
  )
 }

-// WSLg: Chromium blocklists the Mesa vGPU → software compositing → typing lag.
-// /dev/dxg means a real GPU is available; un-blocklist it. Skipped when a remote
-// display already forced software (SSH'd-into-WSL).
-if (IS_WSL && !REMOTE_DISPLAY_REASON && fs.existsSync('/dev/dxg')) {
-  app.commandLine.appendSwitch('ignore-gpu-blocklist')
-  app.commandLine.appendSwitch('enable-gpu-rasterization')
-  app.commandLine.appendSwitch('enable-zero-copy')
-  console.log('[hermes] WSL GPU passthrough (/dev/dxg) detected; enabling GPU acceleration')
-}
-
 ipcMain.handle('hermes:get-remote-display-reason', () => REMOTE_DISPLAY_REASON)

 // Keep the renderer running at full speed while the window is in the background
@@ -330,7 +318,9 @@ function hermesManagedNodePathEntries() {
 }

 function pathWithHermesManagedNode(...entries) {
-  return [...hermesManagedNodePathEntries(), ...entries, process.env.PATH].filter(Boolean).join(path.delimiter)
+  return [...hermesManagedNodePathEntries(), ...entries, process.env.PATH]
+    .filter(Boolean)
+    .join(path.delimiter)
 }

 // ACTIVE_HERMES_ROOT — the canonical mutable Hermes install. Same path
@@ -408,10 +398,14 @@ const WINDOW_BUTTON_POSITION = {
  x: 24,
  y: TITLEBAR_HEIGHT / 2 - MACOS_TRAFFIC_LIGHTS_HEIGHT / 2
 }
-// Right-edge window-control reservation lives in titlebar-overlay-width.cjs
-// (pure + unit-testable); computeNativeOverlayWidth() applies it per platform.
-// It's only the pre-layout fallback — the renderer measures the exact overlay
-// width live via the Window Controls Overlay API.
+// Width Electron reserves for the Windows/Linux native min/max/close cluster
+// when `titleBarOverlay` is enabled. The OS paints these buttons in the
+// top-right corner of the renderer; we have to leave that much room on the
+// right edge so our system tools (file browser, haptics, settings) don't sit
+// underneath them. macOS uses left-side traffic lights instead and reports a
+// position via getWindowButtonPosition(), so this width is non-zero only on
+// non-macOS platforms.
+const NATIVE_OVERLAY_BUTTON_WIDTH = 144
 const APP_ICON_PATHS = [
  path.join(APP_ROOT, 'public', 'apple-touch-icon.png'),
  path.join(APP_ROOT, 'dist', 'apple-touch-icon.png'),
@@ -525,48 +519,25 @@ function getWindowBackgroundColor() {
  return nativeTheme.shouldUseDarkColors ? '#111111' : '#f7f7f7'
 }

-// Transparent WCO — renderer chrome shows through. rgba(0,0,0,0) can fall back
-// to GetFrameColor() on some Electron builds; rgba(1,0,0,0) is the escape hatch.
-const TITLEBAR_OVERLAY_COLOR = 'rgba(1, 0, 0, 0)'
-
 function getTitleBarOverlayOptions() {
  if (IS_MAC) {
    return { height: TITLEBAR_HEIGHT }
  }

-  // Windows + WSLg paint WCO natively; plain Linux disables it (frameless hidden
-  // titlebar still applies).
-  if (!IS_WINDOWS && !IS_WSL) {
-    return false
+  if (rendererTitleBarTheme) {
+    return {
+      color: rendererTitleBarTheme.background,
+      height: TITLEBAR_HEIGHT,
+      symbolColor: rendererTitleBarTheme.foreground
+    }
  }

+  const useDarkColors = nativeTheme.shouldUseDarkColors
+
  return {
-    color: TITLEBAR_OVERLAY_COLOR,
+    color: useDarkColors ? '#111111' : '#f7f7f7',
    height: TITLEBAR_HEIGHT,
-    symbolColor:
-      rendererTitleBarTheme && isHexColor(rendererTitleBarTheme.foreground)
-        ? rendererTitleBarTheme.foreground
-        : nativeTheme.shouldUseDarkColors
-          ? '#f7f7f7'
-          : '#242424'
-  }
-}
-
-// Push refreshed overlay options to a live window after a theme/appearance
-// change. No-op only on plain (non-WSL) Linux, where getTitleBarOverlayOptions()
-// returns false; the try/catch additionally guards builds where
-// setTitleBarOverlay isn't supported.
-function applyTitleBarOverlay(win) {
-  const options = getTitleBarOverlayOptions()
-  if (!options || typeof options !== 'object') {
-    return
-  }
-
-  try {
-    win?.setTitleBarOverlay?.(options)
-  } catch {
-    // Overlay not supported on this platform/build — leave the frameless
-    // titlebar as-is.
+    symbolColor: useDarkColors ? '#f7f7f7' : '#242424'
  }
 }

@@ -1323,7 +1294,10 @@ function unwrapWindowsVenvHermesCommand(command, dashboardArgs) {
    bootstrap: false,
    env: buildDesktopBackendEnv({
      hermesHome: HERMES_HOME,
-      pythonPathEntries: [...(directoryExists(root) ? [root] : []), ...getVenvSitePackagesEntries(venvRoot)],
+      pythonPathEntries: [
+        ...(directoryExists(root) ? [root] : []),
+        ...getVenvSitePackagesEntries(venvRoot)
+      ],
      venvRoot
    }),
    kind: 'python',
@@ -1565,17 +1539,18 @@ function readVenvHome(venvRoot) {
 function getNoConsoleVenvPython(venvRoot) {
  if (!IS_WINDOWS) return getVenvPython(venvRoot)

-  // uv venv launchers can re-exec console python.exe, which allocates conhost /
-  // Windows Terminal. Use base pythonw directly and provide imports via env.
+  // Prefer the venv's own pythonw shim — it carries pyvenv.cfg / site-packages
+  // wiring. Falling back to the base uv/python.org pythonw.exe skips the venv
+  // and breaks imports (yaml, hermes_cli, …) even when PYTHONPATH is patched.
+  const venvPythonw = path.join(venvRoot, 'Scripts', 'pythonw.exe')
+  if (fileExists(venvPythonw)) return venvPythonw
+
  const baseHome = readVenvHome(venvRoot)
  if (baseHome) {
    const basePythonw = path.join(baseHome, 'pythonw.exe')
    if (fileExists(basePythonw)) return basePythonw
  }

-  const venvPythonw = path.join(venvRoot, 'Scripts', 'pythonw.exe')
-  if (fileExists(venvPythonw)) return venvPythonw
-
  return venvPythonw
 }

@@ -1598,7 +1573,9 @@ function applyWindowsNoConsoleSpawnHints(backend) {

  const usesHermesModule =
    backend.kind === 'python' ||
-    (Array.isArray(backend.args) && backend.args[0] === '-m' && backend.args[1] === 'hermes_cli.main')
+    (Array.isArray(backend.args) &&
+      backend.args[0] === '-m' &&
+      backend.args[1] === 'hermes_cli.main')

  if (!usesHermesModule) return backend

@@ -2174,8 +2151,7 @@ async function applyUpdates(opts = {}) {

    emitUpdateProgress({
      stage: 'restart',
-      message:
-        'Updating Hermes — this window will close and the updater will open. Don’t reopen Hermes yourself; it restarts automatically when the update finishes.',
+      message: 'Updating Hermes — this window will close and the updater will open. Don’t reopen Hermes yourself; it restarts automatically when the update finishes.',
      percent: 100
    })
    repairMacUpdaterHelper(updater)
@@ -2258,9 +2234,7 @@ async function handOffWindowsBootstrapRecovery(reason) {
  })
  child.unref()

-  rememberLog(
-    `[bootstrap] handed off ${reason} recovery to updater: ${updater} ${updaterArgs.join(' ')}; exiting desktop to release app.asar`
-  )
+  rememberLog(`[bootstrap] handed off ${reason} recovery to updater: ${updater} ${updaterArgs.join(' ')}; exiting desktop to release app.asar`)
  // Same dwell as the in-app update hand-off (#50419): give the updater's
  // window time to appear before we vanish, so the recovery doesn't look like
  // a crash and provoke a mid-recovery relaunch.
@@ -2787,7 +2761,8 @@ function createPythonBackend(root, label, dashboardArgs, options = {}) {

  const venvRoot = path.join(root, 'venv')
  const venvPython = getVenvPython(venvRoot)
-  const command = IS_WINDOWS && fileExists(venvPython) ? getNoConsoleVenvPython(venvRoot) : toNoConsolePython(python)
+  const command =
+    IS_WINDOWS && fileExists(venvPython) ? getNoConsoleVenvPython(venvRoot) : toNoConsolePython(python)

  return applyWindowsNoConsoleSpawnHints({
    kind: 'python',
@@ -2796,7 +2771,7 @@ function createPythonBackend(root, label, dashboardArgs, options = {}) {
    args: ['-m', 'hermes_cli.main', ...dashboardArgs],
    env: buildDesktopBackendEnv({
      hermesHome: HERMES_HOME,
-      pythonPathEntries: [root, ...getVenvSitePackagesEntries(venvRoot)],
+      pythonPathEntries: [root],
      venvRoot
    }),
    root,
@@ -2811,7 +2786,9 @@ function createPythonBackend(root, label, dashboardArgs, options = {}) {
 // ensureRuntime() to create / refresh it before launch.
 function createActiveBackend(dashboardArgs) {
  const venvPython = getVenvPython(VENV_ROOT)
-  const command = fileExists(venvPython) ? getNoConsoleVenvPython(VENV_ROOT) : toNoConsolePython(findSystemPython())
+  const command = fileExists(venvPython)
+    ? getNoConsoleVenvPython(VENV_ROOT)
+    : toNoConsolePython(findSystemPython())

  return applyWindowsNoConsoleSpawnHints({
    kind: 'python',
@@ -2820,7 +2797,7 @@ function createActiveBackend(dashboardArgs) {
    args: ['-m', 'hermes_cli.main', ...dashboardArgs],
    env: buildDesktopBackendEnv({
      hermesHome: HERMES_HOME,
-      pythonPathEntries: [ACTIVE_HERMES_ROOT, ...getVenvSitePackagesEntries(VENV_ROOT)],
+      pythonPathEntries: [ACTIVE_HERMES_ROOT],
      venvRoot: VENV_ROOT
    }),
    root: ACTIVE_HERMES_ROOT,
@@ -2901,17 +2878,15 @@ function resolveHermesBackend(dashboardArgs) {
      // and lets the resolver fall through to step 6 / bootstrap.
      const shellForProbe = isCommandScript(hermesCommand)
      if (verifyHermesCli(hermesCommand, { shell: shellForProbe })) {
-        return (
-          unwrapWindowsVenvHermesCommand(hermesCommand, dashboardArgs) || {
-            label: `existing Hermes CLI at ${hermesCommand}`,
-            command: hermesCommand,
-            args: dashboardArgs,
-            bootstrap: false,
-            env: {},
-            kind: 'command',
-            shell: shellForProbe
-          }
-        )
+        return unwrapWindowsVenvHermesCommand(hermesCommand, dashboardArgs) || {
+          label: `existing Hermes CLI at ${hermesCommand}`,
+          command: hermesCommand,
+          args: dashboardArgs,
+          bootstrap: false,
+          env: {},
+          kind: 'command',
+          shell: shellForProbe
+        }
      }
      rememberLog(
        `Ignoring existing Hermes CLI at ${hermesCommand}: --version probe failed; falling through to bootstrap.`
@@ -2991,9 +2966,7 @@ async function ensureRuntime(backend) {
    rememberLog('[bootstrap] no Hermes install found; starting first-launch bootstrap')

    if (await handOffWindowsBootstrapRecovery('bootstrap-needed')) {
-      const handoffError = new Error(
-        'Hermes recovery was handed off to Hermes Setup. The desktop will restart when recovery completes.'
-      )
+      const handoffError = new Error('Hermes recovery was handed off to Hermes Setup. The desktop will restart when recovery completes.')
      handoffError.isBootstrapFailure = true
      handoffError.bootstrapHandedOff = true
      bootstrapFailure = handoffError
@@ -3787,7 +3760,11 @@ function getWindowButtonPosition() {
 }

 function getNativeOverlayWidth() {
-  return computeNativeOverlayWidth({ isWindows: IS_WINDOWS, isWsl: IS_WSL })
+  // macOS reports traffic-light coords via windowButtonPosition; the
+  // titlebarOverlay there doesn't reserve right-edge space. Windows/Linux
+  // render the native window-controls overlay on the right, so the renderer
+  // needs to inset its right cluster by this much to clear them.
+  return IS_MAC ? 0 : NATIVE_OVERLAY_BUTTON_WIDTH
 }

 function getWindowState() {
@@ -5508,10 +5485,7 @@ async function startHermes() {

    await advanceBootProgress('backend.port', 'Waiting for Hermes backend to launch', 86)
    // Discover the ephemeral port the child bound to
-    const port = await Promise.race([
-      waitForDashboardPortAnnouncement(hermesProcess, { readyFile }),
-      backendStartFailed
-    ])
+    const port = await Promise.race([waitForDashboardPortAnnouncement(hermesProcess, { readyFile }), backendStartFailed])
    if (readyFile) {
      fs.unlink(readyFile, () => {})
    }
@@ -5846,7 +5820,7 @@ function createWindow() {
    if (!nativeThemeListenerInstalled) {
      nativeThemeListenerInstalled = true
      nativeTheme.on('updated', () => {
-        applyTitleBarOverlay(mainWindow)
+        mainWindow?.setTitleBarOverlay?.(getTitleBarOverlayOptions())
      })
    }
  }
@@ -6030,32 +6004,19 @@ ipcMain.handle('hermes:pet-overlay:close', async () => {

  return { ok: true }
 })
-// Drag/resize: the overlay reports new absolute screen bounds (it already knows
-// the pointer's screen coords). Drag keeps the size constant; the wheel-to-scale
-// gesture grows/shrinks it so the sprite is never cropped by the window edge.
-// The window is created non-resizable (no stray edge-drag on the transparent
-// frameless panel), which on Windows/Linux also blocks programmatic setBounds
-// sizing — so briefly flip resizable on whenever the size actually changes.
+// Drag: the overlay reports a new absolute screen position (it already knows the
+// pointer's screen coords), we just move the window.
 ipcMain.on('hermes:pet-overlay:set-bounds', (_event, bounds) => {
  if (!petOverlayWindow || petOverlayWindow.isDestroyed() || !bounds) {
    return
  }

-  const win = petOverlayWindow
-  const width = Math.max(80, Math.round(bounds.width))
-  const height = Math.max(80, Math.round(bounds.height))
-  const [curW, curH] = win.getSize()
-  const resizing = width !== curW || height !== curH
-
-  if (resizing && !win.isResizable()) {
-    win.setResizable(true)
-  }
-
-  win.setBounds({ x: Math.round(bounds.x), y: Math.round(bounds.y), width, height })
-
-  if (resizing) {
-    win.setResizable(false)
-  }
+  petOverlayWindow.setBounds({
+    x: Math.round(bounds.x),
+    y: Math.round(bounds.y),
+    width: Math.max(80, Math.round(bounds.width)),
+    height: Math.max(80, Math.round(bounds.height))
+  })
 })
 // Click-through: the overlay window is a full rectangle but only the pet pixels
 // should be interactive. The renderer toggles this as the cursor enters/leaves
@@ -6521,21 +6482,11 @@ ipcMain.handle('hermes:saveImageBuffer', async (_event, payload) => {

 ipcMain.handle('hermes:saveClipboardImage', async () => {
  const image = clipboard.readImage()
-  if (image && !image.isEmpty()) {
-    return writeComposerImage(image.toPNG(), '.png')
+  if (!image || image.isEmpty()) {
+    return ''
  }

-  // WSL2/WSLg doesn't bridge clipboard *images* from the Windows host to the
-  // Linux clipboard Electron reads, so a host screenshot looks empty above.
-  // Pull it straight off the Windows clipboard via PowerShell as a fallback.
-  if (IS_WSL) {
-    const png = readWslWindowsClipboardImage()
-    if (png) {
-      return writeComposerImage(png, '.png')
-    }
-  }
-
-  return ''
+  return writeComposerImage(image.toPNG(), '.png')
 })

 ipcMain.handle('hermes:normalizePreviewTarget', (_event, target, baseDir) =>
@@ -6555,7 +6506,7 @@ ipcMain.on('hermes:titlebar-theme', (_event, payload) => {
    background: payload.background,
    foreground: payload.foreground
  }
-  applyTitleBarOverlay(mainWindow)
+  mainWindow?.setTitleBarOverlay?.(getTitleBarOverlayOptions())
 })

 // Pin the native appearance to the app theme (see NATIVE_THEME_CONFIG_PATH).
@@ -6931,7 +6882,9 @@ ipcMain.handle('hermes:fs:trash', async (_event, targetPath) => {

 // Git-driven worktree management ("Start work" flow). Errors surface to the
 // renderer as rejected promises so it can toast a friendly message.
-ipcMain.handle('hermes:git:worktreeList', async (_event, repoPath) => listWorktrees(repoPath, resolveGitBinary()))
+ipcMain.handle('hermes:git:worktreeList', async (_event, repoPath) =>
+  listWorktrees(repoPath, resolveGitBinary())
+)

 ipcMain.handle('hermes:git:worktreeAdd', async (_event, repoPath, options) =>
  addWorktree(repoPath, options || {}, resolveGitBinary())
@@ -6945,7 +6898,9 @@ ipcMain.handle('hermes:git:branchSwitch', async (_event, repoPath, branch) =>
  switchBranch(repoPath, branch, resolveGitBinary())
 )

-ipcMain.handle('hermes:git:branchList', async (_event, repoPath) => listBranches(repoPath, resolveGitBinary()))
+ipcMain.handle('hermes:git:branchList', async (_event, repoPath) =>
+  listBranches(repoPath, resolveGitBinary())
+)

 // Compact repo status (branch, ahead/behind, change counts + files) for the
 // composer coding rail. Returns null on a non-repo / remote backend so the rail
@@ -7447,7 +7402,6 @@ app.whenReady().then(() => {
  }
  installMediaPermissions()
  registerMediaProtocol()
-  installEmbedReferer()
  registerDeepLinkProtocol()
  ensureWslWindowsFonts()
  configureSpellChecker()
--- a/apps/desktop/electron/oauth-net-request.test.cjs
+++ b/apps/desktop/electron/oauth-net-request.test.cjs
@@ -30,8 +30,5 @@ test('setJsonRequestHeaders does not set Electron-restricted Content-Length', ()
  setJsonRequestHeaders(request)

  assert.deepEqual(headers, [['Content-Type', 'application/json']])
-  assert.equal(
-    headers.some(([name]) => name.toLowerCase() === 'content-length'),
-    false
-  )
+  assert.equal(headers.some(([name]) => name.toLowerCase() === 'content-length'), false)
 })
--- a/apps/desktop/electron/titlebar-overlay-width.cjs
+++ b/apps/desktop/electron/titlebar-overlay-width.cjs
@@ -1,11 +0,0 @@
-// Pre-layout fallback for WCO right-edge reservation (--titlebar-tools-right).
-// Live width comes from navigator.windowControlsOverlay in the renderer.
-
-const OVERLAY_FALLBACK_WIDTH = 144
-
-/** @param {{ isWindows?: boolean, isWsl?: boolean }} opts */
-function nativeOverlayWidth({ isWindows = false, isWsl = false } = {}) {
-  return isWindows || isWsl ? OVERLAY_FALLBACK_WIDTH : 0
-}
-
-module.exports = { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth }
--- a/apps/desktop/electron/titlebar-overlay-width.test.cjs
+++ b/apps/desktop/electron/titlebar-overlay-width.test.cjs
@@ -1,29 +0,0 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
-
-const { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth } = require('./titlebar-overlay-width.cjs')
-
-// This static reservation is only the pre-layout FALLBACK. Once laid out the
-// renderer reads the exact width from navigator.windowControlsOverlay
-// (use-window-controls-overlay-width.ts) and uses these values only when the WCO
-// API is unavailable.
-
-test('Windows reserves the overlay fallback width', () => {
-  assert.equal(nativeOverlayWidth({ isWindows: true }), OVERLAY_FALLBACK_WIDTH)
-})
-
-test('WSLg paints the same WCO, so it reserves the same fallback width', () => {
-  // The original bug: WSL fell through to 0, so the right tools sat under the
-  // controls and the title overran into them.
-  assert.equal(nativeOverlayWidth({ isWsl: true }), OVERLAY_FALLBACK_WIDTH)
-})
-
-test('plain Linux and macOS reserve nothing', () => {
-  assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), 0)
-  assert.equal(nativeOverlayWidth(), 0)
-  assert.equal(nativeOverlayWidth({}), 0)
-})
-
-test('the fallback width is a sane positive pixel value', () => {
-  assert.ok(Number.isInteger(OVERLAY_FALLBACK_WIDTH) && OVERLAY_FALLBACK_WIDTH > 0)
-})
--- a/apps/desktop/electron/update-count.test.cjs
+++ b/apps/desktop/electron/update-count.test.cjs
@@ -7,81 +7,45 @@ const { resolveBehindCount, shouldCountCommits } = require('./update-count.cjs')
 // unconditionally, so a shallow checkout with no merge-base surfaced the bogus
 // rev-list count (e.g. 12104). This asserts the new shallow/no-merge-base branch.
 test('shallow checkout with no merge-base does NOT trust the bogus rev-list count', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '12104',
-      currentSha: 'aaa',
-      targetSha: 'bbb',
-      isShallow: true,
-      hasMergeBase: false
-    }),
-    1
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '12104', currentSha: 'aaa', targetSha: 'bbb',
+    isShallow: true, hasMergeBase: false,
+  }), 1)
 })

 test('shallow checkout with no merge-base but identical SHA reports up-to-date', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '12104',
-      currentSha: 'abc',
-      targetSha: 'abc',
-      isShallow: true,
-      hasMergeBase: false
-    }),
-    0
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '12104', currentSha: 'abc', targetSha: 'abc',
+    isShallow: true, hasMergeBase: false,
+  }), 0)
 })

 test('shallow checkout WITH a merge-base keeps the exact count (reliable)', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '3',
-      currentSha: 'aaa',
-      targetSha: 'bbb',
-      isShallow: true,
-      hasMergeBase: true
-    }),
-    3
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '3', currentSha: 'aaa', targetSha: 'bbb',
+    isShallow: true, hasMergeBase: true,
+  }), 3)
 })

 test('full (non-shallow) clone keeps the exact count path unchanged', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '7',
-      currentSha: 'aaa',
-      targetSha: 'bbb',
-      isShallow: false,
-      hasMergeBase: true
-    }),
-    7
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '7', currentSha: 'aaa', targetSha: 'bbb',
+    isShallow: false, hasMergeBase: true,
+  }), 7)
 })

 test('up-to-date full clone reports 0', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '0',
-      currentSha: 'x',
-      targetSha: 'x',
-      isShallow: false,
-      hasMergeBase: true
-    }),
-    0
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '0', currentSha: 'x', targetSha: 'x',
+    isShallow: false, hasMergeBase: true,
+  }), 0)
 })

 test('non-numeric count falls back to 0 (defensive, unchanged behaviour)', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '',
-      currentSha: 'aaa',
-      targetSha: 'bbb',
-      isShallow: false,
-      hasMergeBase: true
-    }),
-    0
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '', currentSha: 'aaa', targetSha: 'bbb',
+    isShallow: false, hasMergeBase: true,
+  }), 0)
 })

 // shouldCountCommits gates the expensive `rev-list --count` in checkUpdates().
@@ -104,24 +68,12 @@ test('full (non-shallow) clone always runs the count', () => {
 // The skip path produces an empty countStr; resolveBehindCount must NOT trust
 // it and must fall through to the SHA compare (mirrors the live call site).
 test('skipped-count path resolves via SHA compare, never via empty countStr', () => {
-  assert.equal(
-    resolveBehindCount({
-      countStr: '',
-      currentSha: 'aaa',
-      targetSha: 'bbb',
-      isShallow: true,
-      hasMergeBase: false
-    }),
-    1
-  )
-  assert.equal(
-    resolveBehindCount({
-      countStr: '',
-      currentSha: 'same',
-      targetSha: 'same',
-      isShallow: true,
-      hasMergeBase: false
-    }),
-    0
-  )
+  assert.equal(resolveBehindCount({
+    countStr: '', currentSha: 'aaa', targetSha: 'bbb',
+    isShallow: true, hasMergeBase: false,
+  }), 1)
+  assert.equal(resolveBehindCount({
+    countStr: '', currentSha: 'same', targetSha: 'same',
+    isShallow: true, hasMergeBase: false,
+  }), 0)
 })
--- a/apps/desktop/electron/update-relaunch.test.cjs
+++ b/apps/desktop/electron/update-relaunch.test.cjs
@@ -62,10 +62,7 @@ test('resolveUnpackedRelease is null for AppImage / .deb / .rpm / dev / unresolv
  assert.equal(resolveUnpackedRelease('/usr/lib/hermes/hermes', ROOT, 'linux'), null)
  assert.equal(resolveUnpackedRelease('/opt/Hermes/hermes', ROOT, 'linux'), null)
  // dev electron
-  assert.equal(
-    resolveUnpackedRelease('/home/u/.hermes/hermes-agent/node_modules/electron/dist/electron', ROOT, 'linux'),
-    null
-  )
+  assert.equal(resolveUnpackedRelease('/home/u/.hermes/hermes-agent/node_modules/electron/dist/electron', ROOT, 'linux'), null)
  // empty / missing
  assert.equal(resolveUnpackedRelease('', ROOT, 'linux'), null)
  assert.equal(resolveUnpackedRelease(path.join(UNPACKED, 'hermes'), '', 'linux'), null)
--- a/apps/desktop/electron/update-remote.cjs
+++ b/apps/desktop/electron/update-remote.cjs
@@ -39,9 +39,7 @@ function canonicalGitHubRemote(url) {
 }

 function isSshRemote(url) {
-  const value = String(url || '')
-    .trim()
-    .toLowerCase()
+  const value = String(url || '').trim().toLowerCase()
  return value.startsWith('git@') || value.startsWith('ssh://')
 }

--- a/apps/desktop/electron/vscode-marketplace.cjs
+++ b/apps/desktop/electron/vscode-marketplace.cjs
@@ -26,11 +26,7 @@ const REQUEST_TIMEOUT_MS = 20_000
 const ID_RE = /^[\w-]+\.[\w-]+$/

 /** Minimal HTTPS helper with redirect-following, timeout, and a size cap. */
-function request(
-  url,
-  { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {},
-  redirectsLeft = MAX_REDIRECTS
-) {
+function request(url, { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {}, redirectsLeft = MAX_REDIRECTS) {
  return new Promise((resolve, reject) => {
    const req = https.request(url, { method, headers }, res => {
      const status = res.statusCode ?? 0
@@ -46,13 +42,7 @@ function request(
        const next = new URL(res.headers.location, url).toString()
        res.resume()
        // Redirects to the CDN are plain GETs (drop the POST body).
-        resolve(
-          request(
-            next,
-            { method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes },
-            redirectsLeft - 1
-          )
-        )
+        resolve(request(next, { method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes }, redirectsLeft - 1))

        return
      }
--- a/apps/desktop/electron/window-state.test.cjs
+++ b/apps/desktop/electron/window-state.test.cjs
@@ -26,16 +26,7 @@ const LAPTOP = [{ workArea: { x: 0, y: 0, width: 1366, height: 728 } }]
 // ─── sanitizeWindowState ───────────────────────────────────────────────────

 test('sanitizeWindowState rejects missing/garbage input', () => {
-  for (const bad of [
-    null,
-    undefined,
-    'nope',
-    42,
-    {},
-    { width: 'x', height: 800 },
-    { width: NaN, height: 800 },
-    { width: 1000 }
-  ]) {
+  for (const bad of [null, undefined, 'nope', 42, {}, { width: 'x', height: 800 }, { width: NaN, height: 800 }, { width: 1000 }]) {
    assert.equal(sanitizeWindowState(bad), null)
  }
 })
@@ -121,13 +112,9 @@ test('computeWindowOptions does not clamp when displays are unknown', () => {
 test('debounce coalesces a burst into one trailing run', t => {
  t.mock.timers.enable({ apis: ['setTimeout'] })
  let calls = 0
-  const d = debounce(() => {
-    calls += 1
-  }, 250)
+  const d = debounce(() => { calls += 1 }, 250)

-  d()
-  d()
-  d()
+  d(); d(); d()
  assert.equal(calls, 0)
  t.mock.timers.tick(249)
  assert.equal(calls, 0)
@@ -138,9 +125,7 @@ test('debounce coalesces a burst into one trailing run', t => {
 test('debounce.flush runs now and cancels the pending timer', t => {
  t.mock.timers.enable({ apis: ['setTimeout'] })
  let calls = 0
-  const d = debounce(() => {
-    calls += 1
-  }, 250)
+  const d = debounce(() => { calls += 1 }, 250)

  d()
  d.flush()
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -13,7 +13,7 @@ function readElectronFile(name) {

 function requireHiddenChildOptions(source, needle) {
  const match = needle instanceof RegExp ? needle.exec(source) : null
-  const index = needle instanceof RegExp ? (match?.index ?? -1) : source.indexOf(needle)
+  const index = needle instanceof RegExp ? match?.index ?? -1 : source.indexOf(needle)
  assert.notEqual(index, -1, `missing call site: ${needle}`)
  const snippet = source.slice(index, index + 700)
  assert.match(
--- a/apps/desktop/electron/windows-user-env.cjs
+++ b/apps/desktop/electron/windows-user-env.cjs
@@ -21,7 +21,8 @@ const { execFileSync } = require('node:child_process')
 // the requested value line isn't present.
 function parseRegQueryValue(stdout, name) {
  if (!stdout || !name) return null
-  const typePattern = /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
+  const typePattern =
+    /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
  for (const rawLine of String(stdout).split(/\r?\n/)) {
    const line = rawLine.trim()
    const match = line.match(typePattern)
@@ -46,7 +47,10 @@ function expandWindowsEnvRefs(value, env = process.env) {
 // Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
 // off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
 // (the value doesn't exist), or when the value is empty.
-function readWindowsUserEnvVar(name, { platform = process.platform, env = process.env, exec = execFileSync } = {}) {
+function readWindowsUserEnvVar(
+  name,
+  { platform = process.platform, env = process.env, exec = execFileSync } = {}
+) {
  if (platform !== 'win32' || !name) return null
  let stdout
  try {
--- a/apps/desktop/electron/windows-user-env.test.cjs
+++ b/apps/desktop/electron/windows-user-env.test.cjs
@@ -1,12 +1,21 @@
 const assert = require('node:assert/strict')
 const { test } = require('node:test')

-const { expandWindowsEnvRefs, parseRegQueryValue, readWindowsUserEnvVar } = require('./windows-user-env.cjs')
+const {
+  expandWindowsEnvRefs,
+  parseRegQueryValue,
+  readWindowsUserEnvVar
+} = require('./windows-user-env.cjs')

 // ── parseRegQueryValue ─────────────────────────────────────────────────────

 test('parseRegQueryValue extracts a REG_SZ value', () => {
-  const out = ['', 'HKEY_CURRENT_USER\\Environment', '    HERMES_HOME    REG_SZ    F:\\Hermes\\data', ''].join('\r\n')
+  const out = [
+    '',
+    'HKEY_CURRENT_USER\\Environment',
+    '    HERMES_HOME    REG_SZ    F:\\Hermes\\data',
+    ''
+  ].join('\r\n')
  assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
 })

@@ -30,7 +39,10 @@ test('parseRegQueryValue returns null when the value line is absent', () => {
 // ── expandWindowsEnvRefs ───────────────────────────────────────────────────

 test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
-  assert.equal(expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }), 'C:\\Users\\jeff\\h')
+  assert.equal(
+    expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
+    'C:\\Users\\jeff\\h'
+  )
 })

 test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {
--- a/apps/desktop/electron/workspace-cwd.cjs
+++ b/apps/desktop/electron/workspace-cwd.cjs
@@ -14,7 +14,11 @@ function isPackagedInstallPath(dir, { installRoots, isPackaged }) {
    return false
  }

-  const roots = new Set((installRoots ?? []).filter(Boolean).map(candidate => path.resolve(String(candidate))))
+  const roots = new Set(
+    (installRoots ?? [])
+      .filter(Boolean)
+      .map(candidate => path.resolve(String(candidate)))
+  )

  for (const root of roots) {
    if (resolved === root) {
--- a/apps/desktop/electron/workspace-cwd.test.cjs
+++ b/apps/desktop/electron/workspace-cwd.test.cjs
@@ -13,21 +13,33 @@ const { isPackagedInstallPath } = require('./workspace-cwd.cjs')
 const installRoot = path.resolve('/opt/Hermes')

 test('isPackagedInstallPath returns false when not packaged', () => {
-  assert.equal(isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }), false)
+  assert.equal(
+    isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }),
+    false
+  )
 })

 test('isPackagedInstallPath flags the install root itself', () => {
-  assert.equal(isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }), true)
+  assert.equal(
+    isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }),
+    true
+  )
 })

 test('isPackagedInstallPath flags paths nested under the install root', () => {
  const nested = path.join(installRoot, 'resources', 'app.asar')

-  assert.equal(isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }), true)
+  assert.equal(
+    isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }),
+    true
+  )
 })

 test('isPackagedInstallPath ignores paths outside the install root', () => {
  const homeProject = path.resolve('/home/user/projects/demo')

-  assert.equal(isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }), false)
+  assert.equal(
+    isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }),
+    false
+  )
 })
--- a/apps/desktop/electron/wsl-clipboard-image.cjs
+++ b/apps/desktop/electron/wsl-clipboard-image.cjs
@@ -1,92 +0,0 @@
-// Pull a Windows-host clipboard image from inside WSL2 via PowerShell (WSLg
-// bridges text but not images). Returns PNG bytes or null; exec injectable.
-
-const { execFileSync } = require('node:child_process')
-
-// STA is mandatory: System.Windows.Forms.Clipboard throws ThreadStateException
-// off a single-threaded apartment. We emit base64 (not raw bytes) so the PNG
-// survives stdout's text decoding intact, and write with [Console]::Out.Write
-// to avoid a trailing newline.
-const PS_SCRIPT = [
-  'Add-Type -AssemblyName System.Windows.Forms,System.Drawing',
-  '$img = [System.Windows.Forms.Clipboard]::GetImage()',
-  'if ($null -eq $img) { exit 0 }',
-  '$ms = New-Object System.IO.MemoryStream',
-  '$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)',
-  '[Console]::Out.Write([System.Convert]::ToBase64String($ms.ToArray()))'
-].join('\n')
-
-// PowerShell's -EncodedCommand takes UTF-16LE base64. Encoding the whole script
-// this way sidesteps every layer of WSL→Windows quoting (spaces, quotes,
-// brackets, newlines) that plain -Command arguments would mangle.
-function encodePowerShellCommand(script) {
-  return Buffer.from(String(script), 'utf16le').toString('base64')
-}
-
-// Locate powershell.exe. The bare name resolves through WSL's Windows-interop
-// PATH on every standard WSL2 setup; the absolute fallback covers a stripped
-// PATH. Returns the first candidate — execFile surfaces ENOENT if it's wrong
-// and we fall back to null.
-function powershellCandidates() {
-  return ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
-}
-
-function decodeClipboardImageBase64(stdout) {
-  const b64 = String(stdout || '').trim()
-  if (!b64) return null
-
-  let buffer
-  try {
-    buffer = Buffer.from(b64, 'base64')
-  } catch {
-    return null
-  }
-
-  // Guard against partial / garbage output: require a real PNG signature.
-  const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
-  if (buffer.length < PNG_SIGNATURE.length || !buffer.subarray(0, PNG_SIGNATURE.length).equals(PNG_SIGNATURE)) {
-    return null
-  }
-
-  return buffer
-}
-
-// Read the Windows clipboard image from inside WSL. Returns a PNG Buffer, or
-// null when there's no image, PowerShell is unreachable, or output is invalid.
-// Linux-only by contract (caller gates on IS_WSL); never throws.
-function readWslWindowsClipboardImage({ exec = execFileSync, candidates = powershellCandidates() } = {}) {
-  const encoded = encodePowerShellCommand(PS_SCRIPT)
-
-  for (const ps of candidates) {
-    try {
-      const stdout = exec(
-        ps,
-        ['-NoProfile', '-NonInteractive', '-STA', '-ExecutionPolicy', 'Bypass', '-EncodedCommand', encoded],
-        {
-          encoding: 'utf8',
-          windowsHide: true,
-          timeout: 8000,
-          // A 4K screenshot base64s to a few MB; give stdout generous headroom.
-          maxBuffer: 64 * 1024 * 1024,
-          // PowerShell writes progress/CLIXML noise to stderr — ignore it.
-          stdio: ['ignore', 'pipe', 'ignore']
-        }
-      )
-      const decoded = decodeClipboardImageBase64(stdout)
-      if (decoded) return decoded
-      // Empty stdout = no image on the clipboard; stop, don't try fallbacks.
-      if (String(stdout || '').trim() === '') return null
-    } catch {
-      // This powershell.exe candidate is missing/failed — try the next one.
-    }
-  }
-
-  return null
-}
-
-module.exports = {
-  decodeClipboardImageBase64,
-  encodePowerShellCommand,
-  powershellCandidates,
-  readWslWindowsClipboardImage
-}
--- a/apps/desktop/electron/wsl-clipboard-image.test.cjs
+++ b/apps/desktop/electron/wsl-clipboard-image.test.cjs
@@ -1,114 +0,0 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
-
-const {
-  decodeClipboardImageBase64,
-  encodePowerShellCommand,
-  powershellCandidates,
-  readWslWindowsClipboardImage
-} = require('./wsl-clipboard-image.cjs')
-
-const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
-
-function fakePngBuffer(extraBytes = 16) {
-  return Buffer.concat([PNG_SIGNATURE, Buffer.alloc(extraBytes, 0x42)])
-}
-
-test('encodePowerShellCommand produces UTF-16LE base64 PowerShell can decode', () => {
-  const encoded = encodePowerShellCommand('Write-Output "hi"')
-  const roundTripped = Buffer.from(encoded, 'base64').toString('utf16le')
-  assert.equal(roundTripped, 'Write-Output "hi"')
-})
-
-test('decodeClipboardImageBase64 returns a Buffer for valid PNG base64', () => {
-  const png = fakePngBuffer()
-  const decoded = decodeClipboardImageBase64(png.toString('base64'))
-  assert.ok(Buffer.isBuffer(decoded))
-  assert.ok(decoded.equals(png))
-})
-
-test('decodeClipboardImageBase64 trims surrounding whitespace before decoding', () => {
-  const png = fakePngBuffer()
-  const decoded = decodeClipboardImageBase64(`\n  ${png.toString('base64')}  \r\n`)
-  assert.ok(decoded && decoded.equals(png))
-})
-
-test('decodeClipboardImageBase64 returns null for empty / whitespace input', () => {
-  assert.equal(decodeClipboardImageBase64(''), null)
-  assert.equal(decodeClipboardImageBase64('   \n  '), null)
-  assert.equal(decodeClipboardImageBase64(null), null)
-  assert.equal(decodeClipboardImageBase64(undefined), null)
-})
-
-test('decodeClipboardImageBase64 rejects base64 without a PNG signature', () => {
-  // Valid base64, but the decoded bytes are not a PNG.
-  const notPng = Buffer.from('this is not a png at all').toString('base64')
-  assert.equal(decodeClipboardImageBase64(notPng), null)
-})
-
-test('readWslWindowsClipboardImage decodes the first candidate that returns a PNG', () => {
-  const png = fakePngBuffer()
-  const calls = []
-  const exec = (cmd, args) => {
-    calls.push({ cmd, args })
-    return png.toString('base64')
-  }
-
-  const result = readWslWindowsClipboardImage({ exec, candidates: ['powershell.exe'] })
-  assert.ok(result && result.equals(png))
-  assert.equal(calls.length, 1)
-  assert.equal(calls[0].cmd, 'powershell.exe')
-  // -STA is mandatory for System.Windows.Forms.Clipboard.
-  assert.ok(calls[0].args.includes('-STA'))
-  assert.ok(calls[0].args.includes('-EncodedCommand'))
-})
-
-test('readWslWindowsClipboardImage returns null and stops when stdout is empty (no image)', () => {
-  let count = 0
-  const exec = () => {
-    count += 1
-    return ''
-  }
-
-  const result = readWslWindowsClipboardImage({
-    exec,
-    candidates: ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
-  })
-  assert.equal(result, null)
-  // Empty stdout means "no image on the clipboard" — don't probe further candidates.
-  assert.equal(count, 1)
-})
-
-test('readWslWindowsClipboardImage falls through to the next candidate when one throws', () => {
-  const png = fakePngBuffer()
-  const seen = []
-  const exec = cmd => {
-    seen.push(cmd)
-    if (cmd === 'powershell.exe') {
-      throw Object.assign(new Error('not found'), { code: 'ENOENT' })
-    }
-    return png.toString('base64')
-  }
-
-  const result = readWslWindowsClipboardImage({
-    exec,
-    candidates: ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
-  })
-  assert.ok(result && result.equals(png))
-  assert.deepEqual(seen, ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe'])
-})
-
-test('readWslWindowsClipboardImage returns null when every candidate throws', () => {
-  const exec = () => {
-    throw new Error('boom')
-  }
-
-  const result = readWslWindowsClipboardImage({ exec, candidates: ['a', 'b'] })
-  assert.equal(result, null)
-})
-
-test('powershellCandidates lists the bare name first, then the absolute fallback', () => {
-  const candidates = powershellCandidates()
-  assert.equal(candidates[0], 'powershell.exe')
-  assert.ok(candidates.some(c => c.endsWith('WindowsPowerShell/v1.0/powershell.exe')))
-})
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -37,7 +37,7 @@
    "test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/wsl-clipboard-image.test.cjs electron/titlebar-overlay-width.test.cjs electron/window-state.test.cjs",
+    "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/window-state.test.cjs",
    "typecheck": "tsc -p . --noEmit",
    "lint": "eslint src/ electron/",
    "lint:fix": "eslint src/ electron/ --fix",
@@ -51,17 +51,11 @@
    "@assistant-ui/react-streamdown": "^0.1.11",
    "@audiowave/react": "^0.6.2",
    "@chenglou/pretext": "^0.0.6",
-    "@codemirror/commands": "^6.10.4",
-    "@codemirror/language": "^6.12.4",
-    "@codemirror/language-data": "^6.5.2",
-    "@codemirror/state": "^6.7.0",
-    "@codemirror/view": "^6.43.3",
    "@dnd-kit/core": "^6.3.1",
    "@dnd-kit/sortable": "^10.0.0",
    "@dnd-kit/utilities": "^3.2.2",
    "@hermes/shared": "file:../shared",
    "@icons-pack/react-simple-icons": "=13.11.1",
-    "@lezer/highlight": "^1.2.3",
    "@nanostores/react": "^1.1.0",
    "@nous-research/ui": "^0.13.0",
    "@radix-ui/react-slot": "^1.2.4",
@@ -81,13 +75,11 @@
    "clsx": "^2.1.1",
    "cmdk": "^1.1.1",
    "dnd-core": "^14.0.1",
-    "dompurify": "^3.4.11",
    "hast-util-from-html-isomorphic": "^2.0.0",
    "hast-util-to-text": "^4.0.2",
    "ignore": "^7.0.5",
    "katex": "^0.16.45",
    "leva": "^0.10.1",
-    "mermaid": "^11.15.0",
    "motion": "^12.38.0",
    "nanostores": "^1.3.0",
    "node-pty": "1.1.0",
--- a/apps/desktop/src/app/agents/index.tsx
+++ b/apps/desktop/src/app/agents/index.tsx
@@ -3,8 +3,8 @@ import { type ReactNode, useEffect, useMemo, useState } from 'react'

 import { useElapsedSeconds } from '@/components/chat/activity-timer'
 import { ActivityTimerText } from '@/components/chat/activity-timer-text'
-import { Codicon } from '@/components/ui/codicon'
 import { FadeText } from '@/components/ui/fade-text'
+import { Codicon } from '@/components/ui/codicon'
 import { GlyphSpinner } from '@/components/ui/glyph-spinner'
 import { type Translations, useI18n } from '@/i18n'
 import { AlertCircle, CheckCircle2 } from '@/lib/icons'
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -477,20 +477,17 @@ export function ArtifactsView({ setStatusbarItemGroup: _setStatusbarItemGroup, .
    }
  }, [artifacts])

-  const openArtifact = useCallback(
-    async (href: string) => {
-      try {
-        if (window.hermesDesktop?.openExternal) {
-          await window.hermesDesktop.openExternal(href)
-        } else {
-          window.open(href, '_blank', 'noopener,noreferrer')
-        }
-      } catch (err) {
-        notifyError(err, a.openFailed)
+  const openArtifact = useCallback(async (href: string) => {
+    try {
+      if (window.hermesDesktop?.openExternal) {
+        await window.hermesDesktop.openExternal(href)
+      } else {
+        window.open(href, '_blank', 'noopener,noreferrer')
      }
-    },
-    [a]
-  )
+    } catch (err) {
+      notifyError(err, a.openFailed)
+    }
+  }, [a])

  const markImageFailed = useCallback((id: string) => {
    setFailedImageIds(current => {
@@ -842,8 +839,7 @@ const ARTIFACT_COLUMNS: readonly ArtifactColumn[] = [
  {
    Cell: PrimaryCell,
    bodyClassName: 'p-0',
-    header: (filter, a) =>
-      filter === 'link' ? a.colTitleLink : filter === 'file' ? a.colTitleFile : a.colTitleDefault,
+    header: (filter, a) => (filter === 'link' ? a.colTitleLink : filter === 'file' ? a.colTitleFile : a.colTitleDefault),
    id: 'primary',
    width: filter => (filter === 'link' ? 'w-[50%]' : 'w-[35%]')
  },
--- a/apps/desktop/src/app/chat/composer/attachments.test.tsx
+++ b/apps/desktop/src/app/chat/composer/attachments.test.tsx
@@ -2,9 +2,9 @@ import { cleanup, render, screen } from '@testing-library/react'
 import { afterEach, describe, expect, it } from 'vitest'

 import { I18nProvider } from '@/i18n/context'
-import type { ComposerAttachment } from '@/store/composer'

 import { AttachmentList } from './attachments'
+import type { ComposerAttachment } from '@/store/composer'

 function makeAttachment(id: string, label = 'test.pdf'): ComposerAttachment {
  return { id, kind: 'file', label }
@@ -32,10 +32,7 @@ describe('AttachmentList', () => {

  it('renders empty list without error', () => {
    renderWithI18n(<AttachmentList attachments={[]} />)
-
-    const container =
-      screen.getByTestId?.('composer-attachments') ?? document.querySelector('[data-slot="composer-attachments"]')
-
+    const container = screen.getByTestId?.('composer-attachments') ?? document.querySelector('[data-slot="composer-attachments"]')
    expect(container).toBeDefined()
  })

@@ -58,7 +55,10 @@ describe('AttachmentList', () => {
  })

  it('does not crash when attachments array contains null entries', () => {
-    const attachments = [null as unknown as ComposerAttachment, makeAttachment('a', 'valid.txt')]
+    const attachments = [
+      null as unknown as ComposerAttachment,
+      makeAttachment('a', 'valid.txt')
+    ]

    expect(() => {
      renderWithI18n(<AttachmentList attachments={attachments} />)
--- a/apps/desktop/src/app/chat/composer/context-menu.tsx
+++ b/apps/desktop/src/app/chat/composer/context-menu.tsx
@@ -73,11 +73,7 @@ export function ContextMenu({
          <ContextMenuItem disabled={!onPickImages} icon={ImageIcon} onSelect={onPickImages}>
            {c.images}
          </ContextMenuItem>
-          <ContextMenuItem
-            disabled={!onPasteClipboardImage}
-            icon={Clipboard}
-            onSelect={onPasteClipboardImage ? () => void onPasteClipboardImage() : undefined}
-          >
+          <ContextMenuItem disabled={!onPasteClipboardImage} icon={Clipboard} onSelect={onPasteClipboardImage}>
            {c.pasteImage}
          </ContextMenuItem>
          <ContextMenuItem icon={Link} onSelect={onOpenUrlDialog}>
@@ -171,7 +167,7 @@ interface ContextMenuItemProps {
 interface ContextMenuProps {
  onInsertText: (text: string) => void
  onOpenUrlDialog: () => void
-  onPasteClipboardImage?: (opts?: { silent?: boolean }) => Promise<boolean> | void
+  onPasteClipboardImage?: () => void
  onPickFiles?: () => void
  onPickFolders?: () => void
  onPickImages?: () => void
--- a/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
+++ b/apps/desktop/src/app/chat/composer/enter-submit-dom-race.test.tsx
@@ -59,10 +59,8 @@ function Harness({
    }

    const editor = editorRef.current
-
    if (editor) {
      const domText = composerPlainText(editor)
-
      if (domText !== draftRef.current) {
        draftRef.current = domText
        setDraft(domText)
@@ -129,11 +127,9 @@ function Harness({
 describe('composer Enter submit — live DOM vs stale composer state (#39630)', () => {
  it('sends the just-typed text on Enter even when composer state has not synced', async () => {
    const onSubmit = vi.fn()
-
    const { getByTestId } = render(
      <Harness onCancel={vi.fn()} onDrain={vi.fn()} onQueue={vi.fn()} onSubmit={onSubmit} />
    )
-
    const editor = getByTestId('editor')

    // Fast typing: the DOM has the text but NO input event fired, so `draft`
@@ -150,11 +146,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
    const onQueue = vi.fn()
    const onDrain = vi.fn()
    const onCancel = vi.fn()
-
    const { getByTestId } = render(
      <Harness busy onCancel={onCancel} onDrain={onDrain} onQueue={onQueue} onSubmit={vi.fn()} queued={['queued-1']} />
    )
-
    const editor = getByTestId('editor')

    await act(async () => {
@@ -171,11 +165,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
    const onCancel = vi.fn()
    const onSubmit = vi.fn()
    const onQueue = vi.fn()
-
    const { getByTestId } = render(
      <Harness busy onCancel={onCancel} onDrain={vi.fn()} onQueue={onQueue} onSubmit={onSubmit} />
    )
-
    const editor = getByTestId('editor')

    await act(async () => {
@@ -191,11 +183,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
  it('drains the next queued prompt on Enter when idle with a truly empty editor', async () => {
    const onDrain = vi.fn()
    const onSubmit = vi.fn()
-
    const { getByTestId } = render(
      <Harness onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
    )
-
    const editor = getByTestId('editor')

    await act(async () => {
@@ -210,18 +200,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
  it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
    const onSubmit = vi.fn()
    const onDrain = vi.fn()
-
    const { getByTestId } = render(
-      <Harness
-        disabled
-        onCancel={vi.fn()}
-        onDrain={onDrain}
-        onQueue={vi.fn()}
-        onSubmit={onSubmit}
-        queued={['queued-1']}
-      />
+      <Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
    )
-
    const editor = getByTestId('editor')

    await act(async () => {
--- a/apps/desktop/src/app/chat/composer/help-hint.tsx
+++ b/apps/desktop/src/app/chat/composer/help-hint.tsx
@@ -33,7 +33,7 @@ export function HelpHint() {

      <Section title={c.hotkeys}>
        {COMPOSER_HOTKEY_ROWS.map(row => (
-          <HotkeyRow combos={[...row.combos]} description={c.hotkeyDescs[row.id] ?? ''} key={row.id} />
+          <HotkeyRow description={c.hotkeyDescs[row.id] ?? ''} combos={[...row.combos]} key={row.id} />
        ))}
      </Section>

--- a/apps/desktop/src/app/chat/composer/hooks/use-mic-recorder.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-mic-recorder.ts
@@ -59,11 +59,7 @@ function micError(error: unknown, copy: MicRecorderErrorCopy): Error {
  return new Error(copy.microphoneStartFailed)
 }

-export function useMicRecorder(copy: MicRecorderErrorCopy): {
-  handle: MicRecorderHandle
-  level: number
-  recording: boolean
-} {
+export function useMicRecorder(copy: MicRecorderErrorCopy): { handle: MicRecorderHandle; level: number; recording: boolean } {
  const [level, setLevel] = useState(0)
  const [recording, setRecording] = useState(false)

--- a/apps/desktop/src/app/chat/composer/hooks/use-popout-drag.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-popout-drag.ts
@@ -1,12 +1,19 @@
-import { type PointerEvent as ReactPointerEvent, type RefObject, useCallback, useEffect, useRef, useState } from 'react'
+import {
+  type PointerEvent as ReactPointerEvent,
+  type RefObject,
+  useCallback,
+  useEffect,
+  useRef,
+  useState
+} from 'react'

 import {
  POPOUT_ESTIMATED_HEIGHT,
  POPOUT_WIDTH_REM,
-  type PopoutPosition,
-  type PopoutSize,
  readPopoutBounds,
-  setComposerPopoutPosition
+  setComposerPopoutPosition,
+  type PopoutPosition,
+  type PopoutSize
 } from '@/store/composer-popout'

 // Floating surface long-press before it becomes draggable (the 5px platform drags
@@ -73,7 +80,6 @@ function dockProximityOf(rect: DOMRect) {
  const verticalGap = window.innerHeight - DOCK_ZONE_BOTTOM_PX - rect.bottom

  const v = verticalGap <= 0 ? 1 : Math.max(0, 1 - verticalGap / DOCK_VERTICAL_FALLOFF_PX)
-
  const h =
    horizontalDist <= DOCK_ZONE_CENTER_TOLERANCE_PX
      ? 1
--- a/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts
@@ -98,14 +98,12 @@ export function useSlashCompletions(options: {

        const matches = (
          needle
-            ? $sessions
-                .get()
-                .filter(
-                  session =>
-                    sessionTitle(session).toLowerCase().includes(needle) ||
-                    (session.preview ?? '').toLowerCase().includes(needle) ||
-                    session.id.toLowerCase().includes(needle)
-                )
+            ? $sessions.get().filter(
+                session =>
+                  sessionTitle(session).toLowerCase().includes(needle) ||
+                  (session.preview ?? '').toLowerCase().includes(needle) ||
+                  session.id.toLowerCase().includes(needle)
+              )
            : $sessions.get()
        ).slice(0, SESSION_INLINE_LIMIT)

@@ -137,7 +135,9 @@ export function useSlashCompletions(options: {
          // Prefer the categorized layout so the popover renders section headers
          // (Session, Tools & Skills, ...). Fall back to the flat list when the
          // backend didn't categorize.
-          const sections = catalog.categories?.length ? catalog.categories : [{ name: '', pairs: catalog.pairs ?? [] }]
+          const sections = catalog.categories?.length
+            ? catalog.categories
+            : [{ name: '', pairs: catalog.pairs ?? [] }]

          const items = sections.flatMap(section =>
            section.pairs.map(([command, meta]) => ({
@@ -151,9 +151,10 @@ export function useSlashCompletions(options: {
          return { items, query }
        }

-        const result = await gateway.request<{ items?: CompletionEntry[]; replace_from?: number }>('complete.slash', {
-          text
-        })
+        const result = await gateway.request<{ items?: CompletionEntry[]; replace_from?: number }>(
+          'complete.slash',
+          { text }
+        )

        // Arg-completion items (replace_from > 1) carry just the arg stub —
        // e.g. complete.slash returns `{text: "alice"}` for `/personality alic`
--- a/apps/desktop/src/app/chat/composer/hooks/use-voice-conversation.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-voice-conversation.ts
@@ -220,25 +220,22 @@ export function useVoiceConversation({
    }
  }, [handle, handleTurn, onFatalError, voiceCopy.couldNotStartSession, voiceCopy.microphoneFailed])

-  const speak = useCallback(
-    async (text: string) => {
-      setStatus('speaking')
+  const speak = useCallback(async (text: string) => {
+    setStatus('speaking')

-      try {
-        await playSpeechText(text, { source: 'voice-conversation' })
-      } catch (error) {
-        notifyError(error, voiceCopy.playbackFailed)
-      } finally {
-        if (enabledRef.current) {
-          pendingStartRef.current = true
-          setStatus('idle')
-        } else {
-          setStatus('idle')
-        }
+    try {
+      await playSpeechText(text, { source: 'voice-conversation' })
+    } catch (error) {
+      notifyError(error, voiceCopy.playbackFailed)
+    } finally {
+      if (enabledRef.current) {
+        pendingStartRef.current = true
+        setStatus('idle')
+      } else {
+        setStatus('idle')
      }
-    },
-    [voiceCopy.playbackFailed]
-  )
+    }
+  }, [voiceCopy.playbackFailed])

  const start = useCallback(async () => {
    if (!onTranscribeAudio) {
@@ -258,14 +255,7 @@ export function useVoiceConversation({
    consumePendingResponse()
    pendingStartRef.current = true
    await startListening()
-  }, [
-    consumePendingResponse,
-    onFatalError,
-    onTranscribeAudio,
-    startListening,
-    voiceCopy.configureSpeechToText,
-    voiceCopy.unavailable
-  ])
+  }, [consumePendingResponse, onFatalError, onTranscribeAudio, startListening, voiceCopy.configureSpeechToText, voiceCopy.unavailable])

  const end = useCallback(async () => {
    pendingStartRef.current = false
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -63,7 +63,6 @@ import { $statusItemsBySession } from '@/store/composer-status'
 import { notify } from '@/store/notifications'
 import { $previewStatusBySession } from '@/store/preview-status'
 import { listRepoBranches, requestStartWorkSession, startWorkInRepo, switchBranchInRepo } from '@/store/projects'
-import { $activeSessionAwaitingInput } from '@/store/prompts'
 import { toggleReview } from '@/store/review'
 import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
@@ -230,11 +229,6 @@ export function ChatBar({
  const statusItemsBySession = useStore($statusItemsBySession)
  const previewStatusBySession = useStore($previewStatusBySession)
  const scrolledUp = useStore($threadScrolledUp)
-  // The turn is parked on the user (clarify / approval / sudo / secret). Esc must
-  // not interrupt it — there's nothing actively running to stop, and stopping
-  // would discard a question the user may want to come back to. The blocking
-  // prompt owns its own dismissal (Skip, Reject, dialog close).
-  const awaitingInput = useStore($activeSessionAwaitingInput)
  // Pop-out is a shared, persisted state — but secondary windows (the Ctrl+Shift+N
  // tiny window, subagent watch windows) always start docked and can't pop out:
  // a floating composer makes no sense in a single-session side window, and it
@@ -284,17 +278,14 @@ export function ChatBar({
    poppedOut ? handleComposerDock() : handleComposerPopOut()
  }, [handleComposerDock, handleComposerPopOut, poppedOut])

-  const {
-    dockProximity,
-    dragging,
-    onPointerDown: onComposerGesturePointerDown
-  } = useComposerPopoutGestures({
-    composerRef,
-    onDock: handleComposerDock,
-    onPopOut: handleComposerPopOut,
-    poppedOut,
-    position: popoutPosition
-  })
+  const { dockProximity, dragging, onPointerDown: onComposerGesturePointerDown } =
+    useComposerPopoutGestures({
+      composerRef,
+      onDock: handleComposerDock,
+      onPopOut: handleComposerPopOut,
+      poppedOut,
+      position: popoutPosition
+    })

  const draftRef = useRef(draft)
  const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null)
@@ -793,16 +784,6 @@ export function ChatBar({
    if (!pastedText) {
      event.preventDefault()

-      // Under WSL2/WSLg the Windows host clipboard doesn't bridge *images* to
-      // the Linux clipboard the DOM paste event reads, so a host screenshot
-      // arrives as an empty paste (no blobs, no text). Fall back to the main
-      // process, which pulls the image straight off the Windows clipboard.
-      // Silent so a genuinely-empty paste doesn't pop a "no image" warning.
-      if (onPasteClipboardImage) {
-        triggerHaptic('selection')
-        void onPasteClipboardImage({ silent: true })
-      }
-
      return
    }

@@ -835,7 +816,8 @@ export function ChatBar({
  // Suppress the "No matches" empty state once a slash command is past its name:
  // a no-arg command has nothing to offer, and a fully-typed arg commits on
  // Space/Tab — neither should dead-end on a popover.
-  const argStageEmpty = trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length
+  const argStageEmpty =
+    trigger?.kind === '/' && slashArgStage(trigger.query) && !triggerLoading && !triggerItems.length

  const closeTrigger = () => {
    setTrigger(null)
@@ -862,14 +844,7 @@ export function ChatBar({
      id: text,
      type: 'slash',
      label: text.slice(1),
-      metadata: {
-        command: slashCommandToken(trigger.query),
-        display: text,
-        meta: '',
-        group: '',
-        action: '',
-        rawText: text
-      }
+      metadata: { command: slashCommandToken(trigger.query), display: text, meta: '', group: '', action: '', rawText: text }
    })
  }

@@ -1009,7 +984,10 @@ export function ChatBar({

    // Non-collapsed Backspace/Delete: native selection-delete is ~O(n²) on large
    // drafts (Ctrl+A → Delete froze ~1.3s). Collapsed carets fall through.
-    if ((event.key === 'Backspace' || event.key === 'Delete') && deleteSelectionInEditor(event.currentTarget)) {
+    if (
+      (event.key === 'Backspace' || event.key === 'Delete') &&
+      deleteSelectionInEditor(event.currentTarget)
+    ) {
      event.preventDefault()
      flushEditorToDraft(event.currentTarget)

@@ -1220,10 +1198,8 @@ export function ChatBar({
        return
      }

-      // Otherwise Esc interrupts the running turn (Stop-button parity) — unless
-      // the turn is parked waiting on the user, where Esc must not discard the
-      // pending prompt.
-      if (busy && !awaitingInput) {
+      // Otherwise Esc interrupts the running turn (Stop-button parity).
+      if (busy) {
        event.preventDefault()
        triggerHaptic('cancel')
        void Promise.resolve(onCancel())
@@ -1785,17 +1761,12 @@ export function ChatBar({
  // open — Esc must close that overlay, never double as canceling the stream
  // behind it. A latest-handler ref keeps the listener registered once.
  const escCancelRef = useRef<(event: globalThis.KeyboardEvent) => void>(() => {})
-
  escCancelRef.current = (event: globalThis.KeyboardEvent) => {
-    // `awaitingInput`: the turn is parked on a clarify / approval / sudo / secret
-    // prompt, which owns Esc (or is meant to persist) — never cancel the stream
-    // out from under it.
-    if (event.key !== 'Escape' || event.defaultPrevented || !busy || awaitingInput) {
+    if (event.key !== 'Escape' || event.defaultPrevented || !busy) {
      return
    }

    const active = document.activeElement as HTMLElement | null
-
    if (active && (active.tagName === 'INPUT' || active.tagName === 'TEXTAREA' || active.isContentEditable)) {
      return
    }
@@ -2283,9 +2254,7 @@ export function ChatBar({
              <div
                className={cn(
                  'relative z-1 flex min-h-0 w-full flex-col gap-(--composer-row-gap) overflow-hidden rounded-[inherit] px-(--composer-surface-pad-x) py-(--composer-surface-pad-y) transition-opacity duration-200 ease-out',
-                  scrolledUp
-                    ? 'opacity-30 group-hover/composer:opacity-100 group-focus-within/composer-surface:opacity-100'
-                    : 'opacity-100'
+                  scrolledUp ? 'opacity-30 group-hover/composer:opacity-100 group-focus-within/composer-surface:opacity-100' : 'opacity-100'
                )}
                data-slot="composer-fade"
              >
--- a/apps/desktop/src/app/chat/composer/inline-refs.ts
+++ b/apps/desktop/src/app/chat/composer/inline-refs.ts
@@ -3,7 +3,12 @@ import { contextPath } from '@/lib/chat-runtime'

 import type { DroppedFile } from '../hooks/use-composer-actions'

-import { composerPlainText, normalizeComposerEditorDom, placeCaretEnd, refChipElement } from './rich-editor'
+import {
+  composerPlainText,
+  normalizeComposerEditorDom,
+  placeCaretEnd,
+  refChipElement
+} from './rich-editor'

 /** A chip to insert: a raw `@kind:value` string, or a typed value + display label. */
 export type InlineRefInput = string | { kind: string; label?: string; value: string }
@@ -154,7 +159,6 @@ export function insertInlineRefsIntoEditor(editor: HTMLDivElement, refs: readonl
  editor.focus({ preventScroll: true })

  const selection = window.getSelection()
-
  const range =
    selection?.rangeCount && editor.contains(selection.getRangeAt(0).commonAncestorContainer)
      ? selection.getRangeAt(0)
--- a/apps/desktop/src/app/chat/composer/model-pill.tsx
+++ b/apps/desktop/src/app/chat/composer/model-pill.tsx
@@ -94,7 +94,13 @@ export function ModelPill({
    <DropdownMenu onOpenChange={setOpen} open={open}>
      <Tip label={title} side="top">
        <DropdownMenuTrigger asChild>
-          <Button aria-label={title} className={pillClass} disabled={disabled} type="button" variant="ghost">
+          <Button
+            aria-label={title}
+            className={pillClass}
+            disabled={disabled}
+            type="button"
+            variant="ghost"
+          >
            {label}
          </Button>
        </DropdownMenuTrigger>
--- a/apps/desktop/src/app/chat/composer/status-stack/coding-row.tsx
+++ b/apps/desktop/src/app/chat/composer/status-stack/coding-row.tsx
@@ -4,7 +4,14 @@ import { memo, useCallback, useEffect, useRef, useState } from 'react'
 import { StatusRow } from '@/components/chat/status-row'
 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
-import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from '@/components/ui/command'
+import {
+  Command,
+  CommandEmpty,
+  CommandGroup,
+  CommandInput,
+  CommandItem,
+  CommandList
+} from '@/components/ui/command'
 import {
  Dialog,
  DialogContent,
@@ -233,8 +240,7 @@ export const CodingStatusRow = memo(function CodingStatusRow({
    branchTargets.push({ base: undefined, label: s.newBranch })
  }

-  const switchTarget =
-    onSwitchBranch && current && status.defaultBranch && status.defaultBranch !== current ? status.defaultBranch : null
+  const switchTarget = onSwitchBranch && current && status.defaultBranch && status.defaultBranch !== current ? status.defaultBranch : null

  // Other worktrees to jump into — everything except the one we're already in
  // (matched by its checked-out branch) and the bare/main placeholder entry.
--- a/apps/desktop/src/app/chat/composer/status-stack/preview-row.tsx
+++ b/apps/desktop/src/app/chat/composer/status-stack/preview-row.tsx
@@ -76,12 +76,7 @@ export const PreviewStatusRow = memo(function PreviewStatusRow({ item, onDismiss
  return (
    <StatusRow
      leading={
-        <Codicon
-          aria-hidden
-          className={cn('text-muted-foreground/70', opening && 'animate-pulse')}
-          name="globe"
-          size="0.8rem"
-        />
+        <Codicon aria-hidden className={cn('text-muted-foreground/70', opening && 'animate-pulse')} name="globe" size="0.8rem" />
      }
      // Plain click opens the link in the browser; ⌘/Ctrl-click opens it in the
      // in-app preview pane instead. (isOpen still toggles the pane closed.)
--- a/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx
+++ b/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx
@@ -11,14 +11,7 @@ function renderPopover(kind: '@' | '/', loading = false) {

  const rendered = render(
    <I18nProvider configClient={null} initialLocale="zh">
-      <ComposerTriggerPopover
-        activeIndex={0}
-        items={[]}
-        kind={kind}
-        loading={loading}
-        onHover={onHover}
-        onPick={onPick}
-      />
+      <ComposerTriggerPopover activeIndex={0} items={[]} kind={kind} loading={loading} onHover={onHover} onPick={onPick} />
    </I18nProvider>
  )

--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -46,7 +46,7 @@ export interface ChatBarProps {
  onAddUrl?: (url: string) => void
  onAttachImageBlob?: (blob: Blob) => Promise<boolean | void> | boolean | void
  onAttachDroppedItems?: (candidates: DroppedFile[]) => Promise<boolean | void> | boolean | void
-  onPasteClipboardImage?: (opts?: { silent?: boolean }) => Promise<boolean> | void
+  onPasteClipboardImage?: () => void
  onPickFiles?: () => void
  onPickFolders?: () => void
  onPickImages?: () => void
--- a/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
+++ b/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
@@ -226,10 +226,9 @@ const attachToMain = (attachment: ComposerAttachment) => {
 export function useComposerActions({ activeSessionId, currentCwd, requestGateway }: ComposerActionsOptions) {
  const { t } = useI18n()
  const copy = t.desktop
-
  const addTextToDraft = useCallback((text: string) => {
    requestComposerInsert(text, { mode: 'block' })
-  }, [])
+  }, [copy.imagePreviewFailed])

  const addTerminalSelectionAttachment = useCallback((text: string, label = 'selection') => {
    const trimmed = text.trim()
@@ -330,38 +329,35 @@ export function useComposerActions({ activeSessionId, currentCwd, requestGateway
    [currentCwd]
  )

-  const attachImagePath = useCallback(
-    async (filePath: string) => {
-      if (!filePath) {
-        return false
+  const attachImagePath = useCallback(async (filePath: string) => {
+    if (!filePath) {
+      return false
+    }
+
+    const baseAttachment: ComposerAttachment = {
+      id: attachmentId('image', filePath),
+      kind: 'image',
+      label: pathLabel(filePath),
+      detail: filePath,
+      path: filePath
+    }
+
+    attachToMain(baseAttachment)
+
+    try {
+      const previewUrl = await window.hermesDesktop?.readFileDataUrl(filePath)
+
+      if (previewUrl) {
+        addComposerAttachment({ ...baseAttachment, previewUrl })
      }

-      const baseAttachment: ComposerAttachment = {
-        id: attachmentId('image', filePath),
-        kind: 'image',
-        label: pathLabel(filePath),
-        detail: filePath,
-        path: filePath
-      }
+      return true
+    } catch (err) {
+      notifyError(err, copy.imagePreviewFailed)

-      attachToMain(baseAttachment)
-
-      try {
-        const previewUrl = await window.hermesDesktop?.readFileDataUrl(filePath)
-
-        if (previewUrl) {
-          addComposerAttachment({ ...baseAttachment, previewUrl })
-        }
-
-        return true
-      } catch (err) {
-        notifyError(err, copy.imagePreviewFailed)
-
-        return true
-      }
-    },
-    [copy.imagePreviewFailed]
-  )
+      return true
+    }
+  }, [])

  const attachImageBlob = useCallback(
    async (blob: Blob) => {
@@ -415,36 +411,25 @@ export function useComposerActions({ activeSessionId, currentCwd, requestGateway
    }
  }, [attachImagePath, copy.attachImages, currentCwd, t.composer.images])

-  const pasteClipboardImage = useCallback(
-    async ({ silent = false }: { silent?: boolean } = {}) => {
-      try {
-        const path = await window.hermesDesktop?.saveClipboardImage()
+  const pasteClipboardImage = useCallback(async () => {
+    try {
+      const path = await window.hermesDesktop?.saveClipboardImage()

-        if (!path) {
-          if (!silent) {
-            notify({
-              kind: 'warning',
-              title: copy.clipboard,
-              message: copy.noClipboardImage
-            })
-          }
+      if (!path) {
+        notify({
+          kind: 'warning',
+          title: copy.clipboard,
+          message: copy.noClipboardImage
+        })

-          return false
-        }
-
-        await attachImagePath(path)
-
-        return true
-      } catch (err) {
-        if (!silent) {
-          notifyError(err, copy.clipboardPasteFailed)
-        }
-
-        return false
+        return
      }
-    },
-    [attachImagePath, copy.clipboard, copy.clipboardPasteFailed, copy.noClipboardImage]
-  )
+
+      await attachImagePath(path)
+    } catch (err) {
+      notifyError(err, copy.clipboardPasteFailed)
+    }
+  }, [attachImagePath, copy.clipboard, copy.clipboardPasteFailed, copy.noClipboardImage])

  const attachContextFolderPath = useCallback(
    (folderPath: string) => {
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -75,7 +75,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  maxVoiceRecordingSeconds?: number
  onAttachImageBlob: (blob: Blob) => Promise<boolean | void> | boolean | void
  onAttachDroppedItems: (candidates: DroppedFile[]) => Promise<boolean | void> | boolean | void
-  onPasteClipboardImage: (opts?: { silent?: boolean }) => Promise<boolean> | void
+  onPasteClipboardImage: () => void
  onPickFiles: () => void
  onPickFolders: () => void
  onPickImages: () => void
@@ -88,7 +88,10 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
  onEdit: (message: AppendMessage) => Promise<void>
  onReload: (parentId: string | null) => Promise<void>
-  onRestoreToMessage?: (messageId: string, target?: { text?: string; userOrdinal?: number | null }) => Promise<void>
+  onRestoreToMessage?: (
+    messageId: string,
+    target?: { text?: string; userOrdinal?: number | null }
+  ) => Promise<void>
  onRetryResume: (sessionId: string) => void
  onTranscribeAudio?: (audio: Blob) => Promise<string>
  onDismissError?: (messageId: string) => void
@@ -317,12 +320,7 @@ export function ChatView({
  // The compact new-session pop-out skips the wordmark/tagline intro — it's a
  // scratch window, not the full-height empty state.
  const showIntro =
-    !isSecondaryWindow() &&
-    freshDraftReady &&
-    !isRoutedSessionView &&
-    !selectedSessionId &&
-    !activeSessionId &&
-    messagesEmpty
+    !isSecondaryWindow() && freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty

  // Session is still loading if the route references a session we haven't
  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
--- a/apps/desktop/src/app/chat/right-rail/preview-file.tsx
+++ b/apps/desktop/src/app/chat/right-rail/preview-file.tsx
@@ -6,7 +6,7 @@ import type {
  MouseEvent as ReactMouseEvent,
  ReactNode
 } from 'react'
-import { Fragment, useCallback, useEffect, useMemo, useRef, useState } from 'react'
+import { Fragment, useEffect, useMemo, useState } from 'react'
 import ShikiHighlighter from 'react-shiki'
 import { Streamdown } from 'streamdown'

@@ -14,25 +14,15 @@ import { requestComposerFocus, requestComposerInsertRefs } from '@/app/chat/comp
 import { droppedFileInlineRef } from '@/app/chat/composer/inline-refs'
 import { HERMES_PATHS_MIME } from '@/app/chat/hooks/use-composer-actions'
 import { isAddSelectionShortcut } from '@/app/right-sidebar/terminal/selection'
-import { CodeEditor } from '@/components/chat/code-editor'
 import { FileDiffPanel } from '@/components/chat/diff-lines'
 import { chunkTextLines, useFixedRowWindow } from '@/components/chat/fixed-row-window'
 import { PageLoader } from '@/components/page-loader'
 import { translateNow, useI18n } from '@/i18n'
-import {
-  desktopFileDiff,
-  desktopGitRoot,
-  readDesktopFileDataUrl,
-  readDesktopFileText,
-  writeDesktopFileText
-} from '@/lib/desktop-fs'
-import { Check, Pencil, X } from '@/lib/icons'
+import { desktopFileDiff, desktopGitRoot, readDesktopFileDataUrl, readDesktopFileText } from '@/lib/desktop-fs'
 import { shikiLanguageForFilename } from '@/lib/markdown-code'
 import { cn } from '@/lib/utils'
 import type { PreviewTarget } from '@/store/preview'
-import { setPreviewDirty } from '@/store/preview-edit'
 import { $currentCwd } from '@/store/session'
-import { notifyWorkspaceChanged } from '@/store/workspace-events'

 const SHIKI_THEME = { dark: 'github-dark-default', light: 'github-light-default' } as const
 const TEXT_PREVIEW_MAX_BYTES = 512 * 1024
@@ -151,19 +141,6 @@ interface LocalPreviewState {
  truncated?: boolean
 }

-// True when focus is in a field that should swallow plain keystrokes (so the
-// bare-`e` edit shortcut never fires while the user is typing in the composer,
-// a search box, or the editor itself).
-function isTypableElement(el: Element | null): boolean {
-  if (!el) {
-    return false
-  }
-
-  const tag = el.tagName
-
-  return tag === 'INPUT' || tag === 'TEXTAREA' || tag === 'SELECT' || (el as HTMLElement).isContentEditable
-}
-
 function filePathForTarget(target: PreviewTarget) {
  if (target.path) {
    return target.path
@@ -333,20 +310,13 @@ function MarkdownPreview({ text }: { text: string }) {
 function PreviewModeSwitcher({
  active,
  modes,
-  onSelect,
-  trailing
+  onSelect
 }: {
  active: PreviewViewMode
  modes: PreviewViewMode[]
  onSelect: (mode: PreviewViewMode) => void
-  trailing?: ReactNode
 }) {
  const { t } = useI18n()
-  const showModes = modes.length > 1
-
-  if (!showModes && !trailing) {
-    return null
-  }

  const label: Record<PreviewViewMode, string> = {
    diff: t.preview.diff,
@@ -355,68 +325,26 @@ function PreviewModeSwitcher({
  }

  return (
-    // Fixed height so the header is byte-identical between read and edit modes —
-    // swapping the trailing controls must never move the body below it.
-    <div className="flex h-7 shrink-0 items-center justify-end gap-3 border-b border-border/40 px-3">
-      {showModes &&
-        modes.map(mode => (
-          <button
-            className={cn(
-              'text-[0.625rem] font-bold underline-offset-4 transition-colors',
-              mode === active
-                ? 'text-foreground underline decoration-current/30'
-                : 'text-muted-foreground hover:text-foreground'
-            )}
-            key={mode}
-            onClick={() => onSelect(mode)}
-            type="button"
-          >
-            {label[mode]}
-          </button>
-        ))}
-      {trailing && <div className="flex items-center gap-1.5">{trailing}</div>}
+    <div className="flex shrink-0 justify-end gap-3 border-b border-border/40 px-3 py-1">
+      {modes.map(mode => (
+        <button
+          className={cn(
+            'text-[0.625rem] font-bold underline-offset-4 transition-colors',
+            mode === active
+              ? 'text-foreground underline decoration-current/30'
+              : 'text-muted-foreground hover:text-foreground'
+          )}
+          key={mode}
+          onClick={() => onSelect(mode)}
+          type="button"
+        >
+          {label[mode]}
+        </button>
+      ))}
    </div>
  )
 }

-// Cancel / Save controls rendered as the header's trailing slot (not a bar of
-// their own) so edit mode reuses the read-mode header row verbatim.
-function EditControls({
-  dirty,
-  onCancel,
-  onSave,
-  saving
-}: {
-  dirty: boolean
-  onCancel: () => void
-  onSave: () => void
-  saving: boolean
-}) {
-  const { t } = useI18n()
-
-  return (
-    <>
-      <button
-        className="flex items-center gap-1 rounded-md px-1.5 text-[0.625rem] font-bold text-muted-foreground transition-colors hover:bg-accent hover:text-foreground"
-        onClick={onCancel}
-        type="button"
-      >
-        <X className="size-3" />
-        {t.common.cancel}
-      </button>
-      <button
-        className="flex items-center gap-1 rounded-md bg-primary px-2 py-0.5 text-[0.625rem] font-bold text-primary-foreground shadow-xs transition-opacity hover:opacity-90 disabled:opacity-50"
-        disabled={!dirty || saving}
-        onClick={onSave}
-        type="button"
-      >
-        <Check className="size-3" />
-        {saving ? t.common.saving : t.common.save}
-      </button>
-    </>
-  )
-}
-
 interface LineSelection {
  end: number
  start: number
@@ -503,7 +431,9 @@ function SourceView({ filePath, language, text }: { filePath: string; language:
  return (
    <div className="h-full overflow-auto" onScroll={onScroll} ref={scrollerRef}>
      <div className="grid min-w-max grid-cols-[auto_minmax(0,1fr)] font-mono text-[0.7rem] leading-relaxed">
-        {beforeRows > 0 && <div aria-hidden className="col-span-2" style={{ height: beforeRows * SOURCE_LINE_PX }} />}
+        {beforeRows > 0 && (
+          <div aria-hidden className="col-span-2" style={{ height: beforeRows * SOURCE_LINE_PX }} />
+        )}
        {visibleChunks.map(chunk => (
          <Fragment key={chunk.start}>
            <div className="select-none text-right text-muted-foreground/55">
@@ -545,7 +475,9 @@ function SourceView({ filePath, language, text }: { filePath: string; language:
            </div>
          </Fragment>
        ))}
-        {afterRows > 0 && <div aria-hidden className="col-span-2" style={{ height: afterRows * SOURCE_LINE_PX }} />}
+        {afterRows > 0 && (
+          <div aria-hidden className="col-span-2" style={{ height: afterRows * SOURCE_LINE_PX }} />
+        )}
      </div>
    </div>
  )
@@ -560,36 +492,11 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar
  // User-picked view; null = auto (diff when changed, else rendered markdown,
  // else source). Reset when the previewed file changes.
  const [userMode, setUserMode] = useState<null | PreviewViewMode>(null)
-  // Spot-editor state. The editor owns its buffer (keyed by `editorKey`); the
-  // live draft + the snapshot the user started from live in refs so typing
-  // never re-renders this (large) component — `dirty` is the only render-worthy
-  // signal and it flips just once when crossing the clean↔dirty boundary.
-  // `selfReload` re-runs the load after a save without the parent.
-  const [editing, setEditing] = useState(false)
-  const draftRef = useRef('')
-  const baselineRef = useRef('')
-  const [dirty, setDirty] = useState(false)
-  const [editorKey, setEditorKey] = useState(0)
-  const [saving, setSaving] = useState(false)
-  const [saveError, setSaveError] = useState<null | string>(null)
-  const [conflict, setConflict] = useState(false)
-  const [selfReload, setSelfReload] = useState(0)
-  // For the bare-`e` shortcut: the read-view root (to detect focus-within) and a
-  // hover flag (no state — only the keydown handler reads it).
-  const readViewRef = useRef<HTMLDivElement>(null)
-  const hoverRef = useRef(false)
  const filePath = filePathForTarget(target)
  const isImage = target.previewKind === 'image'

  useEffect(() => {
    setUserMode(null)
-    setEditing(false)
-    setDirty(false)
-    setSaving(false)
-    setSaveError(null)
-    setConflict(false)
-    draftRef.current = ''
-    baselineRef.current = ''
  }, [filePath, reloadKey])

  // HTML files are rendered as source code, not in a webview - so they take
@@ -675,188 +582,7 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar
    return () => {
      active = false
    }
-  }, [blockedByTarget, filePath, forcePreview, isImage, isText, reloadKey, selfReload, target.dataUrl, target.language])
-
-  // Editing is only offered for whole, readable text — never images, binaries,
-  // or files we only loaded the first 512 KB of (saving would drop the tail).
-  const canEdit =
-    isText && !isImage && !blockedByTarget && state.text !== undefined && !state.truncated && !state.binary
-
-  // Per-keystroke: update the draft ref (no render) and only set `dirty` when it
-  // actually changes — React bails on an identical value, so a long typing run
-  // triggers a single re-render at most.
-  const handleEditorChange = useCallback((value: string) => {
-    draftRef.current = value
-    const next = value !== baselineRef.current
-    setDirty(prev => (prev === next ? prev : next))
-  }, [])
-
-  // Publish the unsaved state to the rail so the tab can show a modified dot.
-  // Keyed by url; cleared on unmount/tab-change so a stale dot never lingers.
-  useEffect(() => {
-    setPreviewDirty(target.url, editing && dirty)
-
-    return () => setPreviewDirty(target.url, false)
-  }, [target.url, editing, dirty])
-
-  const beginEdit = () => {
-    const text = state.text ?? ''
-    baselineRef.current = text
-    draftRef.current = text
-    setDirty(false)
-    setEditorKey(key => key + 1)
-    setSaving(false)
-    setSaveError(null)
-    setConflict(false)
-    setEditing(true)
-  }
-
-  // Latest `beginEdit` for the keydown listener, so the listener can stay
-  // subscribed across renders without recreating itself or going stale.
-  const beginEditRef = useRef(beginEdit)
-  beginEditRef.current = beginEdit
-
-  // Bare `e` enters edit mode when the file pane is hovered or focused and no
-  // typable field has focus — a fast, button-free path (double-click felt laggy
-  // because of the browser's click-disambiguation delay).
-  useEffect(() => {
-    if (!canEdit || editing) {
-      return
-    }
-
-    const onKeyDown = (event: KeyboardEvent) => {
-      if (event.key !== 'e' || event.metaKey || event.ctrlKey || event.altKey) {
-        return
-      }
-
-      if (isTypableElement(document.activeElement)) {
-        return
-      }
-
-      const root = readViewRef.current
-      const focusWithin = Boolean(root && document.activeElement && root.contains(document.activeElement))
-
-      if (!hoverRef.current && !focusWithin) {
-        return
-      }
-
-      event.preventDefault()
-      beginEditRef.current()
-    }
-
-    window.addEventListener('keydown', onKeyDown)
-
-    return () => window.removeEventListener('keydown', onKeyDown)
-  }, [canEdit, editing])
-
-  const cancelEdit = () => {
-    setEditing(false)
-    setSaveError(null)
-    setConflict(false)
-  }
-
-  const discardAndReload = () => {
-    setEditing(false)
-    setConflict(false)
-    setSaveError(null)
-    setSelfReload(n => n + 1)
-  }
-
-  const saveEdit = async (force = false) => {
-    if (saving) {
-      return
-    }
-
-    setSaving(true)
-    setSaveError(null)
-
-    try {
-      // Stale-on-disk guard: re-read what's on disk now and compare to the
-      // snapshot the user started from. If something changed underneath (an
-      // agent edit, an external save), don't clobber it silently — surface the
-      // choice. `force` is the user picking "overwrite" from that banner.
-      if (!force) {
-        try {
-          const current = await readTextPreview(filePath)
-
-          if (!current.binary && (current.text ?? '') !== baselineRef.current) {
-            setConflict(true)
-            setSaving(false)
-
-            return
-          }
-        } catch {
-          // Couldn't re-read for the check — fall through and attempt the write.
-        }
-      }
-
-      await writeDesktopFileText(filePath, draftRef.current)
-      baselineRef.current = draftRef.current
-      setDirty(false)
-      setConflict(false)
-      setEditing(false)
-      notifyWorkspaceChanged()
-      setSelfReload(n => n + 1)
-    } catch (error) {
-      setSaveError(error instanceof Error ? error.message : String(error))
-    } finally {
-      setSaving(false)
-    }
-  }
-
-  // Rendered before the loading/error branches so a background re-read (file
-  // watcher, workspace tick) can't unmount the editor and drop the draft. Uses
-  // the SAME container + fixed-height header as the read view so entering edit
-  // never shifts the body — only the trailing controls and the body swap.
-  if (editing) {
-    return (
-      <div className="flex h-full flex-col overflow-hidden bg-transparent">
-        <PreviewModeSwitcher
-          active="source"
-          modes={[]}
-          onSelect={() => {}}
-          trailing={<EditControls dirty={dirty} onCancel={cancelEdit} onSave={() => void saveEdit()} saving={saving} />}
-        />
-        {conflict && (
-          <div className="shrink-0 border-b border-amber-400/40 bg-amber-50 px-3 py-2 text-[0.7rem] text-amber-900 dark:border-amber-300/30 dark:bg-amber-300/10 dark:text-amber-100">
-            <div className="font-semibold">{t.preview.diskChangedTitle}</div>
-            <div className="mt-0.5 leading-relaxed">{t.preview.diskChangedBody}</div>
-            <div className="mt-1.5 flex gap-3">
-              <button
-                className="font-bold underline underline-offset-4 transition-opacity hover:opacity-80"
-                onClick={() => void saveEdit(true)}
-                type="button"
-              >
-                {t.preview.overwrite}
-              </button>
-              <button
-                className="font-bold underline underline-offset-4 transition-opacity hover:opacity-80"
-                onClick={discardAndReload}
-                type="button"
-              >
-                {t.preview.discardReload}
-              </button>
-            </div>
-          </div>
-        )}
-        {saveError && (
-          <div className="shrink-0 border-b border-destructive/40 bg-destructive/10 px-3 py-1.5 text-[0.7rem] text-destructive">
-            {t.preview.saveFailed(saveError)}
-          </div>
-        )}
-        <div className="min-h-0 flex-1 overflow-hidden">
-          <CodeEditor
-            filePath={filePath}
-            initialValue={baselineRef.current}
-            key={editorKey}
-            onCancel={cancelEdit}
-            onChange={handleEditorChange}
-            onSave={() => void saveEdit()}
-          />
-        </div>
-      </div>
-    )
-  }
+  }, [blockedByTarget, filePath, forcePreview, isImage, isText, reloadKey, target.dataUrl, target.language])

  if (state.loading) {
    return <PageLoader label={t.preview.loading} />
@@ -876,7 +602,11 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar

    return (
      <PreviewEmptyState
-        body={binary ? t.preview.binaryBody(target.label) : t.preview.largeBody(target.label, formatBytes(size))}
+        body={
+          binary
+            ? t.preview.binaryBody(target.label)
+            : t.preview.largeBody(target.label, formatBytes(size))
+        }
        primaryAction={{ label: t.preview.previewAnyway, onClick: () => setForcePreview(true) }}
        title={binary ? t.preview.binaryTitle : t.preview.largeTitle}
        tone="warning"
@@ -917,39 +647,13 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar
    const mode = userMode && modes.includes(userMode) ? userMode : autoMode

    return (
-      <div
-        className="flex h-full flex-col overflow-hidden bg-transparent"
-        onMouseEnter={() => {
-          hoverRef.current = true
-        }}
-        onMouseLeave={() => {
-          hoverRef.current = false
-        }}
-        ref={readViewRef}
-      >
+      <div className="flex h-full flex-col overflow-hidden bg-transparent">
        {state.truncated && (
          <div className="border-b border-border/60 bg-muted/35 px-3 py-1.5 text-[0.68rem] text-muted-foreground">
            {t.preview.truncated}
          </div>
        )}
-        <PreviewModeSwitcher
-          active={mode}
-          modes={modes}
-          onSelect={setUserMode}
-          trailing={
-            canEdit ? (
-              <button
-                className="flex items-center gap-1 text-[0.625rem] font-bold text-muted-foreground underline-offset-4 transition-colors hover:text-foreground"
-                onClick={beginEdit}
-                title={`${t.preview.edit} (e)`}
-                type="button"
-              >
-                <Pencil className="size-3" />
-                {t.preview.edit}
-              </button>
-            ) : null
-          }
-        />
+        {modes.length > 1 && <PreviewModeSwitcher active={mode} modes={modes} onSelect={setUserMode} />}
        <div className="min-h-0 flex-1 overflow-auto">
          {mode === 'rendered' ? (
            <MarkdownPreview text={state.text} />
@@ -973,5 +677,10 @@ export function LocalFilePreview({ reloadKey, target }: { reloadKey: number; tar
    )
  }

-  return <PreviewEmptyState body={t.preview.noInlineBody(target.mimeType || '')} title={t.preview.noInlineTitle} />
+  return (
+    <PreviewEmptyState
+      body={t.preview.noInlineBody(target.mimeType || '')}
+      title={t.preview.noInlineTitle}
+    />
+  )
 }
--- a/apps/desktop/src/app/chat/right-rail/preview-pane.test.tsx
+++ b/apps/desktop/src/app/chat/right-rail/preview-pane.test.tsx
@@ -7,9 +7,7 @@ import { PreviewPane } from './preview-pane'

 describe('PreviewPane console state', () => {
  beforeEach(() => {
-    vi.stubGlobal('requestAnimationFrame', (callback: FrameRequestCallback) =>
-      window.setTimeout(() => callback(Date.now()), 0)
-    )
+    vi.stubGlobal('requestAnimationFrame', (callback: FrameRequestCallback) => window.setTimeout(() => callback(Date.now()), 0))
    vi.stubGlobal('cancelAnimationFrame', (id: number) => window.clearTimeout(id))
  })

--- a/apps/desktop/src/app/chat/right-rail/preview.tsx
+++ b/apps/desktop/src/app/chat/right-rail/preview.tsx
@@ -31,7 +31,6 @@ import {
  closeRightRailTabsToRight,
  type PreviewTarget
 } from '@/store/preview'
-import { $dirtyPreviewUrls } from '@/store/preview-edit'

 import { PreviewPane } from './preview-pane'

@@ -71,13 +70,10 @@ export function ChatPreviewRail({ onRestartServer, setTitlebarToolGroup }: ChatP
  const panesFlipped = useStore($panesFlipped)
  const filePreviewTabs = useStore($filePreviewTabs)
  const previewTarget = useStore($previewTarget)
-  const dirtyPreviewUrls = useStore($dirtyPreviewUrls)

  const tabs = useMemo<readonly RailTab[]>(
    () => [
-      ...(previewTarget
-        ? [{ id: RIGHT_RAIL_PREVIEW_TAB_ID, label: t.preview.tab, target: previewTarget } as RailTab]
-        : []),
+      ...(previewTarget ? [{ id: RIGHT_RAIL_PREVIEW_TAB_ID, label: t.preview.tab, target: previewTarget } as RailTab] : []),
      ...filePreviewTabs.map(({ id, target }) => ({ id, label: tabLabelFor(target), target }) as RailTab)
    ],
    [filePreviewTabs, previewTarget, t.preview.tab]
@@ -103,12 +99,6 @@ export function ChatPreviewRail({ onRestartServer, setTitlebarToolGroup }: ChatP
        'relative flex h-full w-full min-w-0 flex-col overflow-hidden border-(--ui-stroke-tertiary) bg-(--ui-editor-surface-background) text-(--ui-text-tertiary)',
        panesFlipped ? 'border-r' : 'border-l'
      )}
-      // Windows/WSLg paint Electron's Window Controls Overlay across our
-      // titlebar band, so the editor-style tab strip (which normally sits IN that
-      // band) would land under the fixed titlebar tools. --right-rail-top-inset
-      // (set by AppShell only when the overlay is present) drops the rail one
-      // titlebar-height so it opens below the band. 0px elsewhere → unchanged.
-      style={{ paddingTop: 'var(--right-rail-top-inset, 0px)' }}
    >
      <div className="group/rail-tabs flex h-(--titlebar-height) shrink-0 border-b border-(--ui-stroke-tertiary) bg-(--ui-sidebar-surface-background)">
        <div
@@ -119,7 +109,6 @@ export function ChatPreviewRail({ onRestartServer, setTitlebarToolGroup }: ChatP
            const active = tab.id === activeTab.id
            const hasOthers = tabs.length > 1
            const hasTabsToRight = index < tabs.length - 1
-            const dirty = Boolean(dirtyPreviewUrls[tab.target.url])

            return (
              <ContextMenu key={tab.id}>
@@ -166,16 +155,6 @@ export function ChatPreviewRail({ onRestartServer, setTitlebarToolGroup }: ChatP
                      aria-hidden="true"
                      className="pointer-events-none absolute inset-y-0 right-0 w-9 bg-[linear-gradient(to_right,transparent,var(--tab-bg)_55%)] opacity-0 transition-opacity group-hover/tab:opacity-100 group-focus-within/tab:opacity-100"
                    />
-                    {dirty && (
-                      <span
-                        aria-hidden="true"
-                        className="pointer-events-none absolute right-1.5 top-1/2 grid size-4 -translate-y-1/2 place-items-center opacity-100 transition-opacity group-hover/tab:opacity-0 group-focus-within/tab:opacity-0"
-                      >
-                        {/* Amber (our warn color); a tab-bg ring + soft drop keeps it
-                            legible where it overlaps the filename. */}
-                        <span className="size-2 rounded-full bg-amber-500 shadow-[0_0_0_2px_var(--tab-bg),0_1px_2px_rgba(0,0,0,0.45)] dark:bg-amber-400" />
-                      </span>
-                    )}
                    <button
                      aria-label={t.preview.closeTab(tab.label)}
                      className="pointer-events-none absolute right-1.5 top-1/2 grid size-4 -translate-y-1/2 place-items-center rounded-sm text-(--ui-text-tertiary) opacity-0 transition-[background-color,color,opacity] hover:bg-(--ui-bg-secondary) hover:text-foreground focus-visible:pointer-events-auto focus-visible:opacity-100 group-hover/tab:pointer-events-auto group-hover/tab:opacity-100 group-focus-within/tab:pointer-events-auto group-focus-within/tab:opacity-100"
--- a/apps/desktop/src/app/chat/sidebar/chrome.tsx
+++ b/apps/desktop/src/app/chat/sidebar/chrome.tsx
@@ -146,7 +146,10 @@ export function SidebarRowLeadGlyph({
 }) {
  return (
    <span
-      className={cn('grid size-full place-items-center text-(--ui-text-tertiary) [&_.codicon]:leading-none', className)}
+      className={cn(
+        'grid size-full place-items-center text-(--ui-text-tertiary) [&_.codicon]:leading-none',
+        className
+      )}
      style={style}
    >
      {children}
--- a/apps/desktop/src/app/chat/sidebar/index.tsx
+++ b/apps/desktop/src/app/chat/sidebar/index.tsx
@@ -77,7 +77,13 @@ import {
  toggleSidebarMessagingOpen,
  unpinSession
 } from '@/store/layout'
-import { $newChatProfile, $profiles, $profileScope, ALL_PROFILES, normalizeProfileKey } from '@/store/profile'
+import {
+  $newChatProfile,
+  $profiles,
+  $profileScope,
+  ALL_PROFILES,
+  normalizeProfileKey
+} from '@/store/profile'
 import {
  $activeProjectId,
  $projects,
@@ -241,12 +247,7 @@ function ReorderableList({
  }

  return (
-    <DndContext
-      autoScroll={reorderAutoScroll}
-      collisionDetection={closestCenter}
-      onDragEnd={handleDragEnd}
-      sensors={sensors}
-    >
+    <DndContext autoScroll={reorderAutoScroll} collisionDetection={closestCenter} onDragEnd={handleDragEnd} sensors={sensors}>
      <SortableContext items={ids} strategy={verticalListSortingStrategy}>
        {children}
      </SortableContext>
@@ -1118,7 +1119,9 @@ export function ChatSidebar({
  )

  const recentsVirtualizes =
-    !displayAgentGroups?.length && !agentProjectTree?.length && displayAgentSessions.length >= VIRTUALIZE_THRESHOLD
+    !displayAgentGroups?.length &&
+    !agentProjectTree?.length &&
+    displayAgentSessions.length >= VIRTUALIZE_THRESHOLD

  // Keep the persisted parent + worktree orders reconciled with what's on screen:
  // freshly-seen repos/worktrees surface at the top, vanished ones drop out of
@@ -1436,13 +1439,11 @@ export function ChatSidebar({
                }
                label={sessionsLabel}
                labelMeta={
-                  worktreeGroupingActive ? (
-                    reposScanning && !projectsSkeletonVisible ? (
-                      <GlyphSpinner ariaLabel={s.loading} className="text-[0.6875rem] text-(--ui-text-quaternary)" />
-                    ) : undefined
-                  ) : (
-                    recentsMeta
-                  )
+                  worktreeGroupingActive
+                    ? reposScanning && !projectsSkeletonVisible
+                      ? <GlyphSpinner ariaLabel={s.loading} className="text-[0.6875rem] text-(--ui-text-quaternary)" />
+                      : undefined
+                    : recentsMeta
                }
                liveSessions={inProject ? agentSessions : undefined}
                onArchiveSession={onArchiveSession}
@@ -1457,9 +1458,7 @@ export function ChatSidebar({
                onTogglePin={pinSession}
                open={agentsOpen}
                pinned={false}
-                projectBackRow={
-                  inProject ? <ProjectBackRow label={s.projects.back} onClick={exitProjectScope} /> : undefined
-                }
+                projectBackRow={inProject ? <ProjectBackRow label={s.projects.back} onClick={exitProjectScope} /> : undefined}
                projectContent={inProject ? enteredProjectContent : undefined}
                projectOverview={projectOverview}
                projectOverviewPreviews={overviewPreviews}
@@ -1563,15 +1562,7 @@ interface SidebarSectionHeaderProps {
  collapsible?: boolean
 }

-function SidebarSectionHeader({
-  label,
-  open,
-  onToggle,
-  action,
-  meta,
-  icon,
-  collapsible = true
-}: SidebarSectionHeaderProps) {
+function SidebarSectionHeader({ label, open, onToggle, action, meta, icon, collapsible = true }: SidebarSectionHeaderProps) {
  const labelBody = (
    <>
      {icon}
@@ -1606,10 +1597,7 @@ function SidebarSessionSkeletons() {
  return (
    <div aria-hidden="true" className="grid gap-px">
      {['w-32', 'w-40', 'w-28', 'w-36', 'w-24'].map((width, i) => (
-        <div
-          className="grid min-h-[1.625rem] grid-cols-[minmax(0,1fr)_1.375rem] items-center rounded-md pl-2"
-          key={`${width}-${i}`}
-        >
+        <div className="grid min-h-[1.625rem] grid-cols-[minmax(0,1fr)_1.375rem] items-center rounded-md pl-2" key={`${width}-${i}`}>
          <Skeleton className={cn('h-3 rounded-sm', width)} />
          <Skeleton className="mx-auto size-3.5 rounded-sm opacity-60" />
        </div>
@@ -1744,7 +1732,8 @@ function SidebarSessionsSection({
  const hasProjectContent = Boolean(projectContent && projectContent.sessionCount > 0)

  const showEmptyState =
-    forceEmptyState || (!hasGroupedSessions && !hasProjectOverview && !hasProjectContent && sessions.length === 0)
+    forceEmptyState ||
+    (!hasGroupedSessions && !hasProjectOverview && !hasProjectContent && sessions.length === 0)

  // The flat recents/pinned list is the only place sessions reorder by hand;
  // grouped/tree views always sort by creation date and never drag.
@@ -1839,11 +1828,7 @@ function SidebarSessionsSection({

    inner =
      projectsDraggable && onReorderProjects ? (
-        <ReorderableList
-          ids={projectOverview.map(project => project.id)}
-          onReorder={onReorderProjects}
-          sensors={dndSensors}
-        >
+        <ReorderableList ids={projectOverview.map(project => project.id)} onReorder={onReorderProjects} sensors={dndSensors}>
          {rows}
        </ReorderableList>
      ) : (
@@ -1852,12 +1837,7 @@ function SidebarSessionsSection({
  } else if (groups?.length) {
    // Profile/source groups never reorder; render them flat with static rows.
    inner = groups.map(group => (
-      <SidebarWorkspaceGroup
-        group={group}
-        key={group.id}
-        onNewSession={onNewSessionInWorkspace}
-        renderRows={renderRows}
-      />
+      <SidebarWorkspaceGroup group={group} key={group.id} onNewSession={onNewSessionInWorkspace} renderRows={renderRows} />
    ))
  } else if (flatVirtualized) {
    const virtual = (
--- a/apps/desktop/src/app/chat/sidebar/load-more-row.tsx
+++ b/apps/desktop/src/app/chat/sidebar/load-more-row.tsx
@@ -23,11 +23,7 @@ export function SidebarLoadMoreRow({ step, onClick, loading = false }: SidebarLo
      onClick={onClick}
      type="button"
    >
-      {loading ? (
-        <GlyphSpinner ariaLabel={label} className="text-[0.75rem]" />
-      ) : (
-        <Codicon name="ellipsis" size="0.75rem" />
-      )}
+      {loading ? <GlyphSpinner ariaLabel={label} className="text-[0.75rem]" /> : <Codicon name="ellipsis" size="0.75rem" />}
    </button>
  )
 }
--- a/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
+++ b/apps/desktop/src/app/chat/sidebar/profile-switcher.tsx
@@ -132,11 +132,7 @@ export function ProfileRail() {
  const defaultProfile = profiles.find(profile => profile.is_default)
  const onDefault = !isAll && activeKey === 'default'

-  const named = sortByProfileOrder(
-    profiles.filter(profile => !profile.is_default),
-    order
-  )
-
+  const named = sortByProfileOrder(profiles.filter(profile => !profile.is_default), order)
  const multiProfile = profiles.length > 1

  // distance constraint: a small drag reorders, a tap still selects the profile.
@@ -486,11 +482,7 @@ function ProfileSquare({ active, color, label, onDelete, onRecolor, onRename, on
            <Codicon name="edit" size="0.875rem" />
            <span>{p.rename}</span>
          </ContextMenuItem>
-          <ContextMenuItem
-            className="text-destructive focus:text-destructive"
-            onSelect={onDelete}
-            variant="destructive"
-          >
+          <ContextMenuItem className="text-destructive focus:text-destructive" onSelect={onDelete} variant="destructive">
            <Codicon name="trash" size="0.875rem" />
            <span>{t.common.delete}</span>
          </ContextMenuItem>
--- a/apps/desktop/src/app/chat/sidebar/project-dialog.tsx
+++ b/apps/desktop/src/app/chat/sidebar/project-dialog.tsx
@@ -3,14 +3,7 @@ import { useEffect, useRef, useState } from 'react'

 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
-import {
-  Dialog,
-  DialogContent,
-  DialogDescription,
-  DialogFooter,
-  DialogHeader,
-  DialogTitle
-} from '@/components/ui/dialog'
+import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'
 import { GenerateButton } from '@/components/ui/generate-button'
 import { Input } from '@/components/ui/input'
 import { Textarea } from '@/components/ui/textarea'
--- a/apps/desktop/src/app/chat/sidebar/projects/entered-content.tsx
+++ b/apps/desktop/src/app/chat/sidebar/projects/entered-content.tsx
@@ -4,14 +4,7 @@ import { useMemo, useState } from 'react'

 import { Button } from '@/components/ui/button'
 import { Codicon } from '@/components/ui/codicon'
-import {
-  Dialog,
-  DialogContent,
-  DialogDescription,
-  DialogFooter,
-  DialogHeader,
-  DialogTitle
-} from '@/components/ui/dialog'
+import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'
 import type { HermesGitWorktree } from '@/global'
 import type { SessionInfo } from '@/hermes'
 import { useI18n } from '@/i18n'
@@ -129,8 +122,7 @@ function RepoFlatSection({
  // A live `git worktree list` hit wins over an old dismissal: if git says the
  // worktree exists again (or still exists after "hide from sidebar"), surface it.
  const ordered = overlaidGroups.filter(
-    group =>
-      group.isMain || !dismissedWorktrees.includes(group.id) || (group.path && discoveredWorktreePaths.has(group.path))
+    group => group.isMain || !dismissedWorktrees.includes(group.id) || (group.path && discoveredWorktreePaths.has(group.path))
  )

  const repoCount = ordered.reduce((sum, group) => sum + group.sessions.length, 0)
@@ -256,9 +248,7 @@ function RepoFlatSection({
    <SidebarRowStack>
      <WorkspaceHeader
        action={
-          onNewSession && (
-            <WorkspaceAddButton label={s.newSessionIn(repo.label)} onClick={() => onNewSession(repo.path)} />
-          )
+          onNewSession && <WorkspaceAddButton label={s.newSessionIn(repo.label)} onClick={() => onNewSession(repo.path)} />
        }
        count={repoCount}
        emphasis
--- a/apps/desktop/src/app/chat/sidebar/projects/model.ts
+++ b/apps/desktop/src/app/chat/sidebar/projects/model.ts
@@ -19,11 +19,7 @@ export const PROJECT_PREVIEW_COUNT = 3
 const WORKTREE_PROBE_CONCURRENCY = 4

 const pathListKey = (paths: string[]): string =>
-  paths
-    .map(path => path.trim())
-    .filter(Boolean)
-    .sort((a, b) => a.localeCompare(b))
-    .join('\n')
+  paths.map(path => path.trim()).filter(Boolean).sort((a, b) => a.localeCompare(b)).join('\n')

 // Every session in a project, across its repos/worktrees (order-agnostic).
 const projectSessions = (project: SidebarProjectTree): SessionInfo[] =>
@@ -67,10 +63,7 @@ export function sortProjectsForOverview(
      return aHasSessions ? -1 : 1
    }

-    return (
-      projectActivityTime(b) - projectActivityTime(a) ||
-      a.label.localeCompare(b.label, undefined, { sensitivity: 'base' })
-    )
+    return projectActivityTime(b) - projectActivityTime(a) || a.label.localeCompare(b.label, undefined, { sensitivity: 'base' })
  })
 }

--- a/apps/desktop/src/app/chat/sidebar/projects/overview-row.tsx
+++ b/apps/desktop/src/app/chat/sidebar/projects/overview-row.tsx
@@ -116,9 +116,7 @@ export function ProjectOverviewRow({
      <SidebarRowShell
        actions={
          <>
-            {onNewSession && (
-              <WorkspaceAddButton label={s.newSessionIn(project.label)} onClick={() => onNewSession(project.path)} />
-            )}
+            {onNewSession && <WorkspaceAddButton label={s.newSessionIn(project.label)} onClick={() => onNewSession(project.path)} />}
            <ProjectMenu anchorRef={rowRef} isActive={isActive} project={project} />
          </>
        }
--- a/Show More
+++ b/Show More