asdfasdf

wip ts-ify
fix(skills): replace string prefix check with strict path containment
2026-06-30 15:25:48 +08:00 · 2026-06-29 17:24:14 -04:00 · 2026-06-29 16:31:36 -04:00 · 2026-06-28 21:14:01 -07:00 · 2026-06-29 14:03:02 +10:00 · 2026-06-28 20:47:33 -07:00
600 changed files with 39610 additions and 7404 deletions
--- a/.envrc
+++ b/.envrc
@@ -1,5 +1,5 @@
 watch_file pyproject.toml uv.lock
 watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
-watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
+watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix

 use flake
--- a/.github/actions/hermes-smoke-test/action.yml
+++ b/.github/actions/hermes-smoke-test/action.yml
@@ -1,50 +0,0 @@
-name: Hermes smoke test
-description: >
-  Run the image's built-in entrypoint against `--help` and `dashboard --help`
-  to catch basic runtime regressions before publishing.  Requires the image
-  to already be loaded into the local Docker daemon under `image`.
-
-  Works identically on amd64 and arm64 runners.
-
-inputs:
-  image:
-    description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
-    required: true
-
-runs:
-  using: composite
-  steps:
-    - name: Ensure /tmp/hermes-test is hermes-writable
-      shell: bash
-      run: |
-        # The image runs as the hermes user (UID 10000).  GitHub Actions
-        # creates /tmp/hermes-test root-owned by default, which hermes
-        # can't write to — chown it to match the in-container UID before
-        # bind-mounting.  Real users doing `docker run -v ~/.hermes:...`
-        # with their own UID hit the same issue and have their own
-        # remediations (HERMES_UID env var, or chown locally).
-        mkdir -p /tmp/hermes-test
-        sudo chown -R 10000:10000 /tmp/hermes-test
-
-    - name: hermes --help
-      shell: bash
-      run: |
-        # Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
-        # this exercises the actual production startup path. PR #30136
-        # review caught that an --entrypoint override here had been
-        # silently neutered by the s6-overlay migration — stage2-hook
-        # ignores its CMD args, so the smoke test was a no-op.
-        docker run --rm \
-          -v /tmp/hermes-test:/opt/data \
-          "${{ inputs.image }}" --help
-
-    - name: hermes dashboard --help
-      shell: bash
-      run: |
-        # Regression guard for #9153: dashboard was present in source but
-        # missing from the published image.  If this fails, something in
-        # the Dockerfile is excluding the dashboard subcommand from the
-        # installed package.
-        docker run --rm \
-          -v /tmp/hermes-test:/opt/data \
-          "${{ inputs.image }}" dashboard --help
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,6 +20,7 @@ permissions:
  pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
  actions: read # needed by osv-scanner (SARIF upload)
  security-events: write # needed by osv-scanner (SARIF upload)
+  packages: write # needed by docker build

 concurrency:
  group: ci-${{ github.ref }}
@@ -32,6 +33,7 @@ jobs:
  # (all lanes true) so post-merge validation is never weakened.
  # ─────────────────────────────────────────────────────────────────────
  detect:
+    name: Detect affected areas
    runs-on: ubuntu-latest
    outputs:
      python: ${{ steps.classify.outputs.python }}
@@ -53,11 +55,15 @@ jobs:
  # Skipped workflows (if condition is false) don't spin up runners.
  # ─────────────────────────────────────────────────────────────────────
  tests:
+    name: Python tests
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/tests.yml
+    with:
+      slice_count: 8

  lint:
+    name: Python lints
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/lint.yml
@@ -65,35 +71,49 @@ jobs:
      event_name: ${{ needs.detect.outputs.event_name }}

  typecheck:
+    name: TypeScript
    needs: detect
    if: needs.detect.outputs.frontend == 'true'
    uses: ./.github/workflows/typecheck.yml

  docs-site:
+    name: Docs Site
    needs: detect
    if: needs.detect.outputs.site == 'true'
    uses: ./.github/workflows/docs-site-checks.yml

  history-check:
+    name: Deny unrelated histories
    needs: detect
    if: needs.detect.outputs.event_name == 'pull_request'
    uses: ./.github/workflows/history-check.yml

  contributor-check:
+    name: Check contributors
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/contributor-check.yml

  uv-lockfile:
+    name: Check uv.lock
    needs: detect
    uses: ./.github/workflows/uv-lockfile-check.yml

  docker-lint:
+    name: Lint Docker scripts
    needs: detect
    if: needs.detect.outputs.docker_meta == 'true'
    uses: ./.github/workflows/docker-lint.yml

+  docker:
+    name: Build&Test Docker image
+    needs: detect
+    if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
+    uses: ./.github/workflows/docker.yml
+    secrets: inherit
+
  supply-chain:
+    name: Supply-chain scan
    needs: detect
    if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
    uses: ./.github/workflows/supply-chain-audit.yml
@@ -104,7 +124,7 @@ jobs:
      mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}

  osv-scanner:
-    needs: detect
+    name: OSV scan
    uses: ./.github/workflows/osv-scanner.yml

  # ─────────────────────────────────────────────────────────────────────
@@ -127,6 +147,8 @@ jobs:
      - docker-lint
      - supply-chain
      - osv-scanner
+      # we don't require docker to pass rn because it's so slow lol
+      # - docker
    if: always()
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -2,7 +2,7 @@ name: Docker / shell lint

 # Lints the container build inputs: Dockerfile (via hadolint) and any shell
 # scripts under docker/ (via shellcheck). These catch the class of regression
-# the behavioral docker-publish smoke test can't — unquoted variable
+# the behavioral docker smoke test can't — unquoted variable
 # expansions, silently-failing RUN commands, etc.
 #
 # Rules and ignores are documented in .hadolint.yaml at the repo root.
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -1,24 +1,9 @@
-name: Docker Build and Publish
+name: Docker Build, Test, and Publish

 on:
-  push:
-    branches: [main]
-    paths:
-      - '**/*.py'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'Dockerfile'
-      - 'docker/**'
-      - '.github/workflows/docker-publish.yml'
-      - '.github/actions/hermes-smoke-test/**'
-
-  # No paths filter — the job must always run so the required check
-  # reports a status (path-gated workflows leave checks "pending" forever
-  # when no matching files change, which blocks merge).
-  pull_request:
-
  release:
    types: [published]
+  workflow_call:

 permissions:
  contents: read
@@ -39,11 +24,7 @@ env:
  IMAGE_NAME: nousresearch/hermes-agent

 jobs:
-  # ---------------------------------------------------------------------------
-  # Build amd64 natively.  This job also runs the smoke tests (basic --help
-  # and the dashboard subcommand regression guard from #9153), because amd64
-  # is the only arch we can `load` into the local daemon on an amd64 runner.
-  # ---------------------------------------------------------------------------
+  # Build, test, and optionally push the amd64 image.
  build-amd64:
    # Only run on the upstream repository, not on forks
    if: github.repository == 'NousResearch/hermes-agent'
@@ -53,24 +34,19 @@ jobs:
      digest: ${{ steps.push.outputs.digest }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      # The image build + smoke test + integration tests run ONLY on
-      # push-to-main and release — never on PRs. They are the heaviest jobs
-      # in CI (~15-45 min) and a broken build surfaces on the main push (and
-      # is gated pre-merge by docker-lint + uv-lockfile-check). Every step
-      # below is skipped on PRs, so the job still reports green and the
-      # required check never hangs.
+      # The image build + integration tests run on every event
+      # (PRs, push-to-main, release). Publish steps below are gated to
+      # push-to-main / release only.
      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

-      # Build once, load into the local daemon for smoke testing.  Cached
+      # Build once, load into the local daemon for testing.  Cached
      # to gha with a per-arch scope; the push step below reuses every
      # layer from this build.
-      - name: Build image (amd64, smoke test)
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
+      - name: Build image (amd64)
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -82,25 +58,12 @@ jobs:
          cache-from: type=gha,scope=docker-amd64
          cache-to: type=gha,mode=max,scope=docker-amd64

-      - name: Smoke test image
-        if: github.event_name != 'pull_request'
-        uses: ./.github/actions/hermes-smoke-test
-        with:
-          image: ${{ env.IMAGE_NAME }}:test
-
-      # ---------------------------------------------------------------------
      # Run the docker-integration test suite against the freshly-built
-      # image already loaded into the local daemon (`:test`).  These tests
-      # are excluded from the sharded `tests.yml :: test` matrix on purpose
-      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
-      # shard would otherwise reach the session-scoped ``built_image``
-      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
-      # ``docker build`` — guaranteed to
-      # die in fixture setup.
+      # image already loaded into the local daemon (`:test`).
      #
-      # Piggybacking here avoids a second image build: the smoke test
-      # already proved the image loads + runs, so the daemon has it under
-      # `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
+      # Piggybacking here avoids a second image build: the build step
+      # already loaded the image into the daemon under
+      # `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
      # that.  The fixture's ``HERMES_TEST_IMAGE`` branch (see
      # tests/docker/conftest.py:62-63) short-circuits the rebuild.
      #
@@ -110,26 +73,20 @@ jobs:
      # cheapest path to coverage on every PR that touches docker code.
      # ---------------------------------------------------------------------
      - name: Install uv (for docker tests)
-        if: github.event_name != 'pull_request'
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0

      - name: Set up Python 3.11 (for docker tests)
-        if: github.event_name != 'pull_request'
        run: uv python install 3.11

      - name: Install Python dependencies (for docker tests)
-        if: github.event_name != 'pull_request'
        run: |
-          uv venv .venv --python 3.11
-          source .venv/bin/activate
          # ``dev`` extra pulls in pytest, pytest-asyncio —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
-          uv pip install -e ".[dev]"
+          uv sync --locked --python 3.11 --extra dev

      - name: Run docker integration tests
-        if: github.event_name != 'pull_request'
        env:
          # Skip rebuild; use the image already loaded by the build step.
          HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
@@ -139,12 +96,11 @@ jobs:
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""
        run: |
-          source .venv/bin/activate
-          python -m pytest tests/docker/ -v --tb=short
+          scripts/run_tests.sh tests/docker/ --file-timeout 600

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -155,7 +111,7 @@ jobs:
      - name: Push amd64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -179,7 +135,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
        with:
          name: digest-amd64
          path: /tmp/digests/*
@@ -187,10 +143,7 @@ jobs:
          retention-days: 1

  # ---------------------------------------------------------------------------
-  # Build arm64 natively on GitHub's free arm64 runner.  This replaces the
-  # previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
-  # a cache scope with amd64.  Matches the amd64 job's shape: build+load,
-  # smoke test, then on push/release push by digest.
+  # Build, test, and optionally push the arm64 image.
  # ---------------------------------------------------------------------------
  build-arm64:
    if: github.repository == 'NousResearch/hermes-agent'
@@ -200,29 +153,26 @@ jobs:
      digest: ${{ steps.push.outputs.digest }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      # arm64 build runs only on push-to-main and release (see build-amd64).
      - name: Set up Docker Buildx
-        if: github.event_name != 'pull_request'
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

      # Log in to ghcr.io so the registry-backed build cache below can be
      # read (cache-from) on every event and written (cache-to) on
      # push/release.  Uses the workflow's GITHUB_TOKEN, which is valid for
      # the whole job — unlike the gha cache backend's short-lived Azure SAS
      # token, which expired mid-build on slow cold-cache arm64 runs and
-      # crashed the build before the smoke test (the reason the gha cache
+      # crashed the build before the tests ran (the reason the gha cache
      # was removed from arm64 PRs in the first place).
      - name: Log in to ghcr.io (build cache)
-        if: github.event_name != 'pull_request'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

-      # Build once, load into the local daemon for smoke testing, then push
+      # Build once, load into the local daemon for testing, then push
      # by digest below. Reads AND writes the registry-backed cache so the
      # push reuses layers from this build and the next build starts warm.
      #
@@ -230,9 +180,8 @@ jobs:
      # cache that previously broke here: its credential is the job-lifetime
      # GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
      # token failure mode cannot recur.
-      - name: Build image (arm64, smoke test, cached publish)
-        if: github.event_name != 'pull_request'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
+      - name: Build image (arm64, cached publish)
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -244,15 +193,29 @@ jobs:
          cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
          cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max

-      - name: Smoke test image
-        if: github.event_name != 'pull_request'
-        uses: ./.github/actions/hermes-smoke-test
-        with:
-          image: ${{ env.IMAGE_NAME }}:test
+      - name: Install uv for docker tests
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+
+      - name: Set up Python 3.11 for docker tests
+        run: uv python install 3.11
+
+      - name: Install Python dependencies for docker tests
+        run: |
+          uv sync --locked --python 3.11 --extra dev
+
+      - name: Run docker tests
+        env:
+          # Skip rebuild; use the image already loaded by the build step.
+          HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
+          OPENROUTER_API_KEY: ""
+          OPENAI_API_KEY: ""
+          NOUS_API_KEY: ""
+        run: |
+          scripts/run_tests.sh tests/docker/ --file-timeout 600

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -260,7 +223,7 @@ jobs:
      - name: Push arm64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -282,7 +245,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
        with:
          name: digest-arm64
          path: /tmp/digests/*
@@ -304,17 +267,17 @@ jobs:
    timeout-minutes: 10
    steps:
      - name: Download digests
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
        with:
          path: /tmp/digests
          pattern: digest-*
          merge-multiple: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3

      - name: Log in to Docker Hub
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -37,7 +37,7 @@ jobs:
          fetch-depth: 0 # need full history for merge-base + worktree

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0

      - name: Install ruff + ty
        uses: ./.github/actions/retry
@@ -110,7 +110,7 @@ jobs:
          cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"

      - name: Upload reports as artifact
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
        with:
          name: lint-reports
          path: .lint-reports/
@@ -164,7 +164,7 @@ jobs:
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0

      - name: Install ruff
        uses: ./.github/actions/retry
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -3,17 +3,17 @@ name: Build Skills Index
 on:
  schedule:
    # Run twice daily: 6 AM and 6 PM UTC
-    - cron: '0 6,18 * * *'
-  workflow_dispatch:  # Manual trigger
+    - cron: "0 6,18 * * *"
+  workflow_dispatch: # Manual trigger
  push:
    branches: [main]
    paths:
-      - 'scripts/build_skills_index.py'
-      - '.github/workflows/skills-index.yml'
+      - "scripts/build_skills_index.py"
+      - ".github/workflows/skills-index.yml"

 permissions:
  contents: read
-  actions: write   # to trigger deploy-site.yml on schedule
+  actions: write # to trigger deploy-site.yml on schedule

 jobs:
  build-index:
@@ -21,11 +21,11 @@ jobs:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
-          python-version: '3.11'
+          python-version: "3.11"

      - name: Install dependencies
        run: pip install httpx==0.28.1 pyyaml==6.0.2
@@ -36,7 +36,7 @@ jobs:
        run: python scripts/build_skills_index.py

      - name: Upload index artifact
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
        with:
          name: skills-index
          path: website/static/api/skills-index.json
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -2,6 +2,11 @@ name: Tests

 on:
  workflow_call:
+    inputs:
+      slice_count:
+        description: Number of parallel test slices
+        type: number
+        default: 8

 permissions:
  contents: read
@@ -12,13 +17,11 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  test:
+  generate:
+    name: "Generate slices"
    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        slice: [1, 2, 3, 4, 5, 6]
+    outputs:
+      matrix: ${{ steps.matrix.outputs.matrix }}
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -27,13 +30,26 @@ jobs:
        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
        with:
          path: test_durations.json
-          # main always writes a new suffix, but jobs pick the latest one with the same prefix
-          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
-          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
-          # If there are no exact matches, the action searches for partial matches of the restore keys.
-          # When the action finds a partial match, the most recent cache is restored to the path directory.
          key: test-durations

+      - name: Generate test slices
+        id: matrix
+        run: |
+          MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
+          echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
+
+  test:
+    name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
+    needs: generate
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
      - name: Install ripgrep (prebuilt binary)
        run: |
          set -euo pipefail
@@ -49,7 +65,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -78,33 +94,19 @@ jobs:
        # re-download, keeping the persisted cache small and fast to restore.
        run: uv cache prune --ci

-      - name: Run tests (slice ${{ matrix.slice }}/6)
-        # Per-file isolation via scripts/run_tests_parallel.py: discovers
-        # every test_*.py file under tests/ (excluding integration/ + e2e/),
-        # then runs `python -m pytest <file>` in a freshly-spawned subprocess
+      - name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
+        # Per-file isolation via scripts/run_tests.sh: each test file runs
+        # in its own freshly-spawned `python -m pytest <file>` subprocess
        # with bounded parallelism. No xdist, no shared workers, no
        # module-level state leakage between files.
        #
-        # Why per-file (not per-test): per-test spawn cost (~250ms × 17k
-        # tests = 70min CPU minimum) blew the wall-clock budget. Per-file
-        # spawn (~250ms × ~850 files = ~3.5min) fits while still giving
-        # every file a fresh interpreter — the only isolation boundary
-        # that matters in practice (cross-file leakage was the original
-        # flake source; intra-file is the test author's responsibility).
-        #
-        # Why drop xdist entirely: xdist's persistent workers accumulate
-        # state across files, which is exactly the leakage we wanted to
-        # fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
-        # the job with cleaner semantics.
-        #
-        # Matrix slicing (--slice I/N): files are distributed across 6
-        # jobs by cached duration (LPT algorithm) so each job gets
-        # roughly equal wall time. Without a cache, files default to 2s
-        # estimate and get split roughly evenly by count — still correct,
-        # just not perfectly balanced.
+        # File list is pre-computed by the generate job (--generate-slices)
+        # which runs LPT distribution once and passes the file list to each
+        # matrix job via --files. Previously each job re-discovered files and
+        # re-ran LPT independently — redundant N times.
        run: |
          source .venv/bin/activate
-          python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
+          scripts/run_tests.sh --files '${{ matrix.slice.files }}'
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
@@ -114,7 +116,7 @@ jobs:
      - name: Upload per-slice durations
        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
-          name: test-durations-slice-${{ matrix.slice }}
+          name: test-durations-slice-${{ matrix.slice.index }}
          path: test_durations.json
          retention-days: 1

@@ -173,7 +175,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -6,6 +6,7 @@ on:

 jobs:
  typecheck:
+    name: Check TypeScript
    runs-on: ubuntu-latest
    strategy:
      matrix:
@@ -22,8 +23,7 @@ jobs:
      # native builds. Skipping install scripts drops node-pty's node-gyp
      # header fetch — the transient flake that killed this job pre-`tsc` — and
      # is faster. retry covers the remaining registry blips.
-      - 
-        uses: ./.github/actions/retry
+      - uses: ./.github/actions/retry
        with:
          command: npm ci --ignore-scripts
      - run: npm run --prefix ${{ matrix.package }} typecheck
@@ -35,6 +35,7 @@ jobs:
  # users build apps/desktop from source on install/update. Run the real
  # `vite build` here so that class of break fails in CI instead.
  desktop-build:
+    name: Build desktop app
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -44,8 +45,7 @@ jobs:
          cache: npm
      # Keep install scripts here: the production build may need node-pty's
      # native binary. retry handles the transient install-time fetch flakes.
-      - 
-        uses: ./.github/actions/retry
+      - uses: ./.github/actions/retry
        with:
          command: npm ci
      - run: npm run --prefix apps/desktop build
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -5,11 +5,11 @@ name: Publish to PyPI
 on:
  push:
    tags:
-      - 'v20*'  # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
+      - "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
  workflow_dispatch:
    inputs:
      confirm_tag:
-        description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
+        description: "Tag to publish (e.g. v2026.5.15). Must already exist."
        required: true
        type: string

@@ -27,7 +27,7 @@ jobs:
    name: Build distribution 📦
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
          # On workflow_dispatch, check out the confirmed tag.
@@ -43,17 +43,17 @@ jobs:
          fi

      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
-          python-version: '3.13'
+          python-version: "3.13"

      - name: Install uv
-        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e  # v6
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0

      - name: Set up Node.js
-        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
        with:
-          node-version: '22'
+          node-version: "22"

      - name: Build web dashboard
        run: cd web && npm ci && npm run build
@@ -81,7 +81,7 @@ jobs:
        run: uv build --sdist --wheel

      - name: Upload distribution artifacts
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
        with:
          name: python-package-distributions
          path: dist/
@@ -94,17 +94,17 @@ jobs:
      name: pypi
      url: https://pypi.org/p/hermes-agent
    permissions:
-      id-token: write  # OIDC trusted publishing
+      id-token: write # OIDC trusted publishing

    steps:
      - name: Download distribution artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
        with:
          name: python-package-distributions
          path: dist/

      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # v1.14.0
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
        with:
          skip-existing: true

@@ -116,12 +116,12 @@ jobs:
    needs: publish
    runs-on: ubuntu-latest
    permissions:
-      contents: write   # attach assets to the existing release
-      id-token: write   # sigstore signing
+      contents: write # attach assets to the existing release
+      id-token: write # sigstore signing

    steps:
      - name: Download distribution artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
        with:
          name: python-package-distributions
          path: dist/
@@ -145,7 +145,7 @@ jobs:

      - name: Sign with Sigstore
        if: env.skip_sign != 'true'
-        uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc  # v3.3.0
+        uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
        with:
          inputs: >-
            ./dist/*.tar.gz
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -4,7 +4,7 @@ name: uv.lock check
 # that modify pyproject.toml without regenerating uv.lock (or vice versa)
 # must not merge, because the Docker build's `uv sync --frozen` step will
 # fail on a stale lockfile and we'd rather catch it here than in the
-# docker-publish workflow on main.
+# docker workflow on main.
 #
 # ─────────────────────────────────────────────────────────────────────────
 # IMPORTANT: this check runs against the MERGED state, not just your branch
@@ -63,7 +63,7 @@ jobs:
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
+        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
@@ -100,7 +100,7 @@ jobs:

          This check is blocking because the Docker image build uses
          `uv sync --frozen --extra all`, which rejects stale lockfiles
-          — catching it here avoids a ~15 min failed docker-publish run
+          — catching it here avoids a ~15 min failed docker run
          on `main` post-merge.
          EOF
            echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
--- a/.gitignore
+++ b/.gitignore
@@ -137,3 +137,9 @@ RELEASE_v*.md
 # Desktop demo-run scratch output (hermes writes demo/*.txt during recorded
 # walkthroughs). Throwaway artifacts, never part of the app.
 apps/desktop/demo/
+
+# PR infographics are rendered locally and embedded in PR descriptions via the
+# image-provider (fal.media) URL — they are NEVER committed to the repo. The
+# PR body is the archive. See the hermes-agent-dev skill's
+# pr-infographic-workflow reference (storage rule + lapse #8 / #COMMIT-1).
+infographic/
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -123,6 +123,17 @@ conservative at the waist.
  without E2E proof, and plugins that touch core files.** Plugins live in their
  own directory and work within the ABCs/hooks we provide; if a plugin needs
  more, widen the generic plugin surface, don't special-case it in core.
+- **Third-party products / other people's projects integrated into the core
+  tree.** Observability backends, vendor SaaS integrations, analytics dashboards,
+  and similar "someone else's product" plugins do NOT land under `plugins/` in
+  this repo. They place an ongoing maintenance burden on us to keep them working
+  against a fast-moving core, for a backend we don't own. Ship them as a
+  **standalone plugin repo** users install into `~/.hermes/plugins/` (or via a
+  pip entry point), and promote them in the Nous Research Discord
+  (`#plugins-skills-and-skins`). This is a coupling-and-maintenance decision, not
+  a quality bar — the plugin can be excellent and still be a close. PRs that add
+  such a directory to the tree are closed with a pointer to publish it as its own
+  repo.

 ### Before you call it a bug — verify the premise (and when NOT to close)

@@ -783,6 +794,24 @@ landing in this tree. PRs that add a new directory under
 provider as its own repo. Existing in-tree providers stay; bug fixes
 to them are welcome.

+**No new third-party-product plugins in-tree (policy, June 2026):** the
+same rule applies beyond memory providers. Plugins that integrate
+someone else's product or project — observability/metrics backends,
+vendor SaaS connectors, analytics dashboards, paid-service tie-ins —
+must ship as **standalone plugin repos** that users install into
+`~/.hermes/plugins/` (or via pip entry points). They register through
+the existing plugin discovery path and use the ABCs/hooks/ctx surface
+we expose; nothing special is needed in core. The reason is
+maintenance load: every product we absorb into the tree becomes our
+burden to keep working against a fast-moving core, for a backend we
+don't own. Promote standalone plugins in the Nous Research Discord
+(`#plugins-skills-and-skins`). PRs that add such a directory under
+`plugins/` are closed with a pointer to publish it as its own repo —
+this is a coupling decision, not a quality judgment. (The
+`observability/`, `kanban/`, `disk-cleanup/`, etc. directories already
+in the tree are existing precedent, not an invitation to add more
+third-party-product plugins alongside them.)
+
 ### Model-provider plugins (`plugins/model-providers/<name>/`)

 Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -85,6 +85,23 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr

 ---

+## Third-Party Product Integrations: Ship as a Standalone Plugin
+
+The same rule extends to **any plugin that integrates someone else's product or project** — observability/metrics backends, vendor SaaS connectors, analytics dashboards, paid-service tie-ins, and similar third-party integrations. **These do not land in this repo.**
+
+The reason is maintenance load, not quality. Every external product absorbed into the core tree becomes ours to keep working against a fast-moving codebase, for a backend we don't own and can't control. Hermes ships a lot and the core moves quickly; coupling third-party products into it creates an open-ended burden on the maintainers.
+
+Publish these as a **standalone plugin repo** instead:
+
+- Implement the relevant ABC and use the existing plugin discovery path (`~/.hermes/plugins/`, project `.hermes/plugins/`, or a pip entry point) — see [Build a Hermes Plugin](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin)
+- Register lifecycle hooks (`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`), tools (`ctx.register_tool`), and CLI subcommands (`ctx.register_cli_command`) through the surface we already expose — no core changes needed
+- If your plugin needs a capability the framework doesn't expose, that's a feature request to **widen the generic plugin surface** (a new hook or `ctx` method) — never special-case your plugin in core
+- Promote it in the [Nous Research Discord](https://discord.gg/NousResearch) `#plugins-skills-and-skins` channel so users can find and install it
+
+A well-built third-party-product plugin can clear automated review and still be closed for this reason — it's a placement decision, not a verdict on the code. PRs that add such a directory under `plugins/` will be closed with a pointer to publish it as its own repo.
+
+---
+
 ## Development Setup

 ### Prerequisites
--- a/28
+++ b/28
@@ -189,7 +189,13 @@ RUN cd web && npm run build && \

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
-COPY . .
+# --link decouples this layer from parents for cache purposes; --chmod bakes
+# the final read-only permissions at copy time so we skip the separate
+# `chmod -R` pass that previously walked ~30k files across the venv +
+# node_modules + source (21s amd64 / 222s arm64 — #49113).  `a+rX,go-w`
+# gives the non-root hermes user read + traverse but no write; root retains
+# write so the build steps below don't need chmod u+w dances.
+COPY --link --chmod=a+rX,go-w . .

 # ---------- Permissions ----------
 # Link hermes-agent itself (editable). Deps are already installed in the
@@ -197,19 +203,15 @@ COPY . .
 # resolution or downloads.
 RUN uv pip install --no-cache-dir --no-deps -e "."

-# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
-# instances must not be able to self-edit the installed source or venv; user
-# data, skills, plugins, config, logs, and dashboard uploads live under
-# /opt/data instead. Root can still repair the image during build/boot, but
-# supervised Hermes processes drop to the non-root hermes user.
+# Wire the exec shim and install-method stamp.  Files under /opt/hermes are
+# already root-owned (COPY, uv sync, npm install all run as root) and
+# read-only for the hermes user (go-w from the --chmod above).
+
 USER root
 RUN mkdir -p /opt/hermes/bin && \
    cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
    chmod 0755 /opt/hermes/bin/hermes && \
-    printf 'docker\n' > /opt/hermes/.install_method && \
-    chown -R root:root /opt/hermes && \
-    chmod -R a+rX /opt/hermes && \
-    chmod -R a-w /opt/hermes
+    printf 'docker\n' > /opt/hermes/.install_method
 # The ``.install_method`` stamp is baked next to the running code (the install
 # tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
 # volume that is commonly bind-mounted from the host and even shared with a
@@ -236,13 +238,11 @@ RUN mkdir -p /opt/hermes/bin && \
 #
 # The arg is optional — local `docker build` without --build-arg simply
 # omits the file, and the runtime falls back to live-git lookup.  CI
-# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
+# (.github/workflows/docker.yml) passes ${{ github.sha }} so
 # every published image has it.
 ARG HERMES_GIT_SHA=
 RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
-        chmod u+w /opt/hermes && \
-        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
-        chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
+        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
    fi

 # ---------- s6-overlay service wiring ----------
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), OpenRouter, OpenAI, your own endpoint, and [many others](https://hermes-agent.nousresearch.com/docs/integrations/providers). Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -722,10 +722,50 @@ def init_agent(
    elif agent.provider == "moa":
        from agent.moa_loop import MoAClient
        agent.api_mode = "chat_completions"
-        agent.client = MoAClient(agent.model or "default")
+
+        # Route reference-model outputs to the agent's tool_progress_callback so
+        # every surface that already consumes it (CLI spinner/scrollback, TUI,
+        # desktop, gateway) can show each reference's answer as a labelled block
+        # before the aggregator acts. The facade emits "moa.reference" and
+        # "moa.aggregating" events; we forward them through the same callback
+        # the tool lifecycle uses. Best-effort and cache-safe — these are
+        # display-only events, they never touch the message history.
+        def _moa_reference_relay(event: str, **kwargs: Any) -> None:
+            cb = getattr(agent, "tool_progress_callback", None)
+            if cb is None:
+                return
+            try:
+                if event == "moa.reference":
+                    label = str(kwargs.get("label") or "")
+                    text = str(kwargs.get("text") or "")
+                    idx = kwargs.get("index")
+                    count = kwargs.get("count")
+                    cb(
+                        "moa.reference",
+                        label,
+                        text,
+                        None,
+                        moa_index=idx,
+                        moa_count=count,
+                    )
+                elif event == "moa.aggregating":
+                    cb(
+                        "moa.aggregating",
+                        str(kwargs.get("aggregator") or ""),
+                        None,
+                        None,
+                        moa_ref_count=kwargs.get("ref_count"),
+                    )
+            except Exception:
+                pass
+
+        agent.client = MoAClient(
+            agent.model or "default",
+            reference_callback=_moa_reference_relay,
+        )
        agent._client_kwargs = {}
        agent.api_key = api_key or "moa-virtual-provider"
-        agent.base_url = base_url or "moa://local"
+        agent.base_url = "moa://local"
        if not agent.quiet_mode:
            print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
    elif agent.api_mode == "bedrock_converse":
@@ -1267,6 +1307,12 @@ def init_agent(
        _agent_section = {}
    agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

+    # Intent-ack continuation config: "auto" (default — codex_responses only,
+    # the historical gate), true (all api_modes), false (never), or a list of
+    # model-name substrings.  Resolved against the active api_mode/model in the
+    # conversation loop's intent-ack block.
+    agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
+
    # Universal task-completion guidance toggle.  Default True.  Surfaced
    # as a separate flag from tool_use_enforcement because the guidance
    # applies to ALL models, not just the model families enforcement
@@ -1630,8 +1676,10 @@ def init_agent(
            f"Model {agent.model} has a context window of {_ctx:,} tokens, "
            f"which is below the minimum {MINIMUM_CONTEXT_LENGTH:,} required "
            f"by Hermes Agent.  Choose a model with at least "
-            f"{MINIMUM_CONTEXT_LENGTH // 1000}K context, or set "
-            f"model.context_length in config.yaml to override."
+            f"{MINIMUM_CONTEXT_LENGTH // 1000}K context.  If your server "
+            f"reports a window smaller than the model's true window, set "
+            f"model.context_length in config.yaml to the real value "
+            f"(this must be at least {MINIMUM_CONTEXT_LENGTH // 1000}K)."
        )

    # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand).
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -42,6 +42,14 @@ from utils import base_url_host_matches, base_url_hostname, env_var_enabled, ato
 logger = logging.getLogger(__name__)


+# Max consecutive successful credential-pool token refreshes of the SAME entry
+# on a persistent auth failure before we give up and let the fallback chain
+# activate. A single-entry OAuth pool can re-mint a fresh token indefinitely
+# even when the upstream keeps rejecting it, so without this cap the retry loop
+# spins forever and never reaches ``_try_activate_fallback``. See #26080.
+_MAX_AUTH_REFRESH_ATTEMPTS = 2
+
+
 def _ra():
    """Lazy ``run_agent`` reference for test-patch routing."""
    import run_agent
@@ -775,6 +783,30 @@ def recover_with_credential_pool(
            return False, has_retried_429
        refreshed = pool.try_refresh_current()
        if refreshed is not None:
+            # ``try_refresh_current()`` re-mints a fresh OAuth token and reports
+            # success even when the upstream keeps rejecting it — a single-entry
+            # pool (common for OAuth/Max subscribers) has nothing to rotate to,
+            # so a bare "refreshed → retry" loop spins forever on the same dead
+            # token and the configured fallback never activates. Cap consecutive
+            # same-entry refreshes and fall through to fallback once exceeded.
+            # See #26080.
+            refreshed_id = getattr(refreshed, "id", None)
+            if refreshed_id is not None:
+                refresh_counts = getattr(agent, "_auth_pool_refresh_counts", None)
+                if refresh_counts is None:
+                    refresh_counts = {}
+                    agent._auth_pool_refresh_counts = refresh_counts
+                refresh_key = (agent.provider, refreshed_id)
+                refresh_counts[refresh_key] = refresh_counts.get(refresh_key, 0) + 1
+                if refresh_counts[refresh_key] > _MAX_AUTH_REFRESH_ATTEMPTS:
+                    _ra().logger.warning(
+                        "Credential auth failure persists after %s refreshes for "
+                        "pool entry %s — treating as unrecoverable and allowing "
+                        "fallback to activate.",
+                        refresh_counts[refresh_key] - 1,
+                        refreshed_id,
+                    )
+                    return False, has_retried_429
            _ra().logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
            agent._swap_credential(refreshed)
            return True, has_retried_429
@@ -1046,6 +1078,34 @@ def restore_primary_runtime(agent) -> bool:
            api_mode=rt.get("compressor_api_mode", ""),
        )

+        # ── Re-select from the credential pool if one is available ──
+        # The snapshot's api_key was captured at construction time.  Across
+        # turns the pool may have rotated (token revocation, billing/rate-limit
+        # exhaustion, cooldown), leaving the snapshot key stale.  Restoring it
+        # blindly re-fails on the first request and burns through the remaining
+        # pool entries before cross-provider fallback even gets a chance.  Ask
+        # the pool for its current best entry and swap the live credential in.
+        # When the pool is absent, empty, or the entry has no usable key, we
+        # keep the snapshot key (the existing behavior).  Fixes #25205.
+        pool = getattr(agent, "_credential_pool", None)
+        if pool is not None and pool.has_available():
+            entry = pool.select()
+            if entry is not None:
+                entry_key = (
+                    getattr(entry, "runtime_api_key", None)
+                    or getattr(entry, "access_token", "")
+                )
+                if entry_key:
+                    # ``_swap_credential`` rebuilds the OpenAI/Anthropic client,
+                    # reapplies base-url-scoped headers, and carries the
+                    # accumulated base_url / OAuth-detection fixes (#33163).
+                    agent._swap_credential(entry)
+                    logger.info(
+                        "Restore re-selected pool entry %s (%s)",
+                        getattr(entry, "id", "?"),
+                        getattr(entry, "label", "?"),
+                    )
+
        # ── Reset fallback chain for the new turn ──
        agent._fallback_activated = False
        agent._fallback_index = 0
@@ -1420,6 +1480,15 @@ def create_openai_client(agent, client_kwargs: dict, *, reason: str, shared: boo
        keepalive_http = agent._build_keepalive_http_client(client_kwargs.get("base_url", ""))
        if keepalive_http is not None:
            client_kwargs["http_client"] = keepalive_http
+    # Delegate all rate-limit / 5xx retry to hermes's outer conversation loop,
+    # which honors Retry-After and applies adaptive/jittered backoff. The OpenAI
+    # SDK default (max_retries=2) uses its own 1-2s backoff that ignores
+    # Retry-After and double-retries inside our loop — the same deadlock the
+    # Anthropic clients hit (#26293). This is the single chokepoint every primary
+    # OpenAI/aggregator client passes through (init, switch_model, recovery,
+    # restore, request-scoped); auxiliary_client builds its own clients and keeps
+    # SDK retries because it is NOT wrapped by the conversation loop.
+    client_kwargs.setdefault("max_retries", 0)
    # Uses the module-level `OpenAI` name, resolved lazily on first
    # access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
    client = _ra().OpenAI(**client_kwargs)
@@ -1499,6 +1568,10 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
    # _client_kwargs is a dict — snapshot a shallow copy so mutating the
    # live dict doesn't poison the rollback target.
    _snapshot["_client_kwargs"] = dict(getattr(agent, "_client_kwargs", {}) or {})
+    # Snapshot the credential pool reference so a failed client rebuild can
+    # restore the original pool (issue #52727: pool reload is part of this
+    # switch and must be reversible on rollback).
+    _snapshot["_credential_pool"] = getattr(agent, "_credential_pool", _MISSING)

    try:
        # Clear the per-config context_length override so the new model's
@@ -1523,8 +1596,36 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        if api_key:
            agent.api_key = api_key

+        # ── Reload credential pool for the new provider (issue #52727) ──
+        # Without this, ``recover_with_credential_pool`` sees a
+        # ``pool.provider != agent.provider`` mismatch and short-circuits,
+        # leaving the new provider with no rotation/recovery on 401/429 and
+        # burning the original pool's entries. Only reload when the provider
+        # actually changed (or the pool was missing) — re-selecting the same
+        # provider must not churn the pool reference. A reload failure is
+        # logged + swallowed: the switch itself must still complete.
+        old_norm = (old_provider or "").strip().lower()
+        new_norm = (new_provider or "").strip().lower()
+        if old_norm != new_norm or getattr(agent, "_credential_pool", None) is None:
+            try:
+                from agent.credential_pool import load_pool
+                agent._credential_pool = load_pool(new_provider)
+            except Exception as _pool_exc:  # noqa: BLE001
+                logger.warning(
+                    "switch_model: credential pool reload failed for %s (%s); "
+                    "continuing without pool rotation this turn",
+                    new_provider, _pool_exc,
+                )
+
        # ── Build new client ──
-        if api_mode == "anthropic_messages":
+        if (new_provider or "").strip().lower() == "moa":
+            from agent.moa_loop import MoAClient
+
+            agent.api_key = api_key or "moa-virtual-provider"
+            agent.base_url = "moa://local"
+            agent._client_kwargs = {}
+            agent.client = MoAClient(agent.model or "default")
+        elif api_mode == "anthropic_messages":
            from agent.anthropic_adapter import (
                build_anthropic_client,
                resolve_anthropic_token,
@@ -2104,8 +2205,21 @@ def looks_like_codex_intermediate_ack(
    user_message: str,
    assistant_content: str,
    messages: List[Dict[str, Any]],
+    require_workspace: bool = True,
 ) -> bool:
-    """Detect a planning/ack message that should continue instead of ending the turn."""
+    """Detect a planning/ack message that should continue instead of ending the turn.
+
+    ``require_workspace`` (default True) keeps the original codex-coding scope:
+    the ack must reference a filesystem/repo workspace. The conversation loop
+    passes ``require_workspace=False`` when the user has explicitly opted into
+    intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
+    is ``true`` or a model-list), so general autonomous workflows ("I'll run a
+    health check on the server", "I'll start the deployment") — which carry a
+    future-ack and an action verb but no filesystem reference — are caught too.
+    The future-ack + short-content + no-prior-tools + action-verb requirements
+    always apply, which is what keeps conversational "I'll help you brainstorm"
+    replies from tripping it.
+    """
    if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
        return False

@@ -2158,17 +2272,67 @@ def looks_like_codex_intermediate_ack(
        "path",
    )

+    assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
+    if not assistant_mentions_action:
+        return False
+
+    # Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
+    # call is enough — the user asked us to keep going when the model only
+    # announces intent, regardless of whether a filesystem is involved.
+    if not require_workspace:
+        return True
+
    user_text = (user_message or "").strip().lower()
    user_targets_workspace = (
        any(marker in user_text for marker in workspace_markers)
        or "~/" in user_text
        or "/" in user_text
    )
-    assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
    assistant_targets_workspace = any(
        marker in assistant_text for marker in workspace_markers
    )
-    return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
+    return user_targets_workspace or assistant_targets_workspace
+
+
+def intent_ack_continuation_mode(agent) -> str:
+    """Classify the resolved intent-ack continuation mode for this turn.
+
+    Returns one of:
+      * ``"off"``        — never continue.
+      * ``"codex_only"`` — historical scope: continue only on the
+        ``codex_responses`` api_mode, and only for codebase/workspace acks
+        (``require_workspace=True``).
+      * ``"all"``        — user opted in for every api_mode; continue on any
+        future-ack + action verb (``require_workspace=False``).
+
+    Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
+    (default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
+    ``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
+    matches the active model name, else off.
+    """
+    mode = getattr(agent, "_intent_ack_continuation", "auto")
+
+    if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
+        return "all"
+    if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
+        return "off"
+    if isinstance(mode, list):
+        model_lower = (agent.model or "").lower()
+        return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
+    # "auto" or any unrecognised value — historical codex-only behavior.
+    return "codex_only" if agent.api_mode == "codex_responses" else "off"
+
+
+def intent_ack_continuation_enabled(agent) -> bool:
+    """Whether intent-ack continuation should fire at all for this turn.
+
+    The ``codex_ack_continuations < 2`` per-turn cap and the
+    ``looks_like_codex_intermediate_ack`` detector are applied by the caller;
+    this only decides the on/off gate. Callers that also need to know whether
+    the workspace requirement applies should use ``intent_ack_continuation_mode``
+    directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
+    """
+    return intent_ack_continuation_mode(agent) != "off"



--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -673,6 +673,9 @@ def _build_anthropic_client_with_bearer_hook(
    kwargs = {
        "timeout": timeout_obj,
        "http_client": http_client,
+        # Delegate retry to hermes's outer loop (honors Retry-After); the SDK
+        # default max_retries=2 ignores it and double-retries. (#26293)
+        "max_retries": 0,
        # The SDK requires *something* for api_key/auth_token. Our
        # event hook overrides Authorization per request so this value
        # is never sent. The sentinel string makes accidental leaks
@@ -757,6 +760,12 @@ def build_anthropic_client(
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
+        # Delegate all rate-limit / 5xx retry to hermes's outer conversation
+        # loop, which honors Retry-After. The SDK default (max_retries=2) uses
+        # its own 1-2s backoff that ignores Retry-After and double-retries
+        # inside our loop — burning request slots against a bucket that won't
+        # refill for minutes. (#26293)
+        "max_retries": 0,
    }
    if normalized_base_url:
        # Azure Anthropic endpoints require an ``api-version`` query parameter.
@@ -852,6 +861,9 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
+        # Delegate retry to hermes's outer loop (honors Retry-After); the SDK
+        # default max_retries=2 ignores it and double-retries. (#26293)
+        max_retries=0,
        default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])},
    )

@@ -914,44 +926,72 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
    return None


+def _read_claude_code_credentials_from_file() -> Optional[Dict[str, Any]]:
+    """Read Claude Code OAuth credentials from ~/.claude/.credentials.json.
+
+    Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
+    """
+    cred_path = Path.home() / ".claude" / ".credentials.json"
+    if not cred_path.exists():
+        return None
+    try:
+        data = json.loads(cred_path.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError, IOError) as e:
+        logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
+        return None
+
+    oauth_data = data.get("claudeAiOauth")
+    if not (oauth_data and isinstance(oauth_data, dict)):
+        return None
+    access_token = oauth_data.get("accessToken", "")
+    if not access_token:
+        return None
+    return {
+        "accessToken": access_token,
+        "refreshToken": oauth_data.get("refreshToken", ""),
+        "expiresAt": oauth_data.get("expiresAt", 0),
+        "source": "claude_code_credentials_file",
+    }
+
+
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
    """Read refreshable Claude Code OAuth credentials.

-    Checks two sources in order:
+    Reads from two possible sources and reconciles them:
      1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
      2. ~/.claude/.credentials.json file

+    Selection rules when both are present:
+      - If exactly one is non-expired, prefer that one. (Handles the case
+        where Claude Code refreshes one source but not the other — observed
+        in the wild on Claude Code 2.1.x.)
+      - Otherwise, prefer the source with the later ``expiresAt`` so that
+        any subsequent refresh uses the most recent ``refreshToken``.
+
    This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
    subscription flow is OAuth/setup-token based with refreshable credentials,
    and native direct Anthropic provider usage should follow that path rather
    than auto-detecting Claude's first-party managed key.

-    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
+    Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
    """
-    # Try macOS Keychain first (covers Claude Code >=2.1.114)
    kc_creds = _read_claude_code_credentials_from_keychain()
-    if kc_creds:
-        return kc_creds
+    file_creds = _read_claude_code_credentials_from_file()

-    # Fall back to JSON file
-    cred_path = Path.home() / ".claude" / ".credentials.json"
-    if cred_path.exists():
-        try:
-            data = json.loads(cred_path.read_text(encoding="utf-8"))
-            oauth_data = data.get("claudeAiOauth")
-            if oauth_data and isinstance(oauth_data, dict):
-                access_token = oauth_data.get("accessToken", "")
-                if access_token:
-                    return {
-                        "accessToken": access_token,
-                        "refreshToken": oauth_data.get("refreshToken", ""),
-                        "expiresAt": oauth_data.get("expiresAt", 0),
-                        "source": "claude_code_credentials_file",
-                    }
-        except (json.JSONDecodeError, OSError, IOError) as e:
-            logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
+    if kc_creds and file_creds:
+        kc_valid = is_claude_code_token_valid(kc_creds)
+        file_valid = is_claude_code_token_valid(file_creds)
+        if kc_valid and not file_valid:
+            return kc_creds
+        if file_valid and not kc_valid:
+            return file_creds
+        # Both valid or both expired: prefer the later expiresAt so the
+        # downstream refresh path uses the freshest refresh_token.
+        kc_exp = kc_creds.get("expiresAt", 0) or 0
+        file_exp = file_creds.get("expiresAt", 0) or 0
+        return kc_creds if kc_exp >= file_exp else file_creds

-    return None
+    return kc_creds or file_creds


 def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
@@ -1034,8 +1074,40 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)


 def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
-    """Attempt to refresh an expired Claude Code OAuth token."""
-    refresh_token = creds.get("refreshToken", "")
+    """Attempt to refresh an expired Claude Code OAuth token.
+
+    Claude Code's OAuth refresh tokens are single-use: a successful refresh
+    rotates the pair and invalidates the old refresh token. Claude Code itself
+    also refreshes on its own schedule (IDE/CLI activity), so by the time
+    Hermes notices an expired token, Claude Code may have already rotated it.
+    POSTing our now-stale refresh token in that window races Claude Code and
+    fails with ``invalid_grant``.
+
+    So before refreshing, re-read the live credential sources. If Claude Code
+    has already produced a valid token, adopt it and skip the POST entirely.
+    Only fall back to refreshing ourselves when no fresh credential is found.
+    """
+    # Claude Code may have already refreshed — adopt its token rather than
+    # racing it with our (possibly already-rotated) refresh token. Only adopt
+    # when the live re-read produced a DIFFERENT token with a real future
+    # expiry: re-adopting the same credential we were just handed would be a
+    # no-op, and a 0/absent ``expiresAt`` means "managed key / unknown expiry"
+    # (see is_claude_code_token_valid) which must NOT be treated as a fresh
+    # refresh here.
+    current = read_claude_code_credentials()
+    if current:
+        current_token = current.get("accessToken", "")
+        current_exp = current.get("expiresAt", 0) or 0
+        if (
+            current_token
+            and current_token != creds.get("accessToken", "")
+            and current_exp > 0
+            and is_claude_code_token_valid(current)
+        ):
+            logger.debug("Adopted Claude Code's already-refreshed OAuth token")
+            return current_token
+
+    refresh_token = (current or {}).get("refreshToken", "") or creds.get("refreshToken", "")
    if not refresh_token:
        logger.debug("No refresh token available — cannot refresh")
        return None
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -102,6 +102,7 @@ OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance

 from agent.credential_pool import load_pool
 from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length
+from agent.process_bootstrap import build_keepalive_http_client
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
 from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
@@ -109,6 +110,23 @@ from utils import base_url_host_matches, base_url_hostname, env_float, model_for
 logger = logging.getLogger(__name__)


+def _openai_http_client_kwargs(
+    base_url: Optional[str],
+    *,
+    async_mode: bool = False,
+) -> Dict[str, Any]:
+    """Inject keepalive httpx client with env-only proxy (not macOS system proxy)."""
+    client = build_keepalive_http_client(str(base_url or ""), async_mode=async_mode)
+    if client is None:
+        return {}
+    return {"http_client": client}
+
+
+def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any:
+    kwargs = {**_openai_http_client_kwargs(base_url), **kwargs}
+    return OpenAI(api_key=api_key, base_url=base_url, **kwargs)
+
+
 # ── Interrupt protection for atomic auxiliary tasks ──────────────────────
 # Some auxiliary tasks must NOT be aborted mid-flight by a gateway interrupt
 # (e.g. an incoming user message while the agent is busy). Context
@@ -1614,7 +1632,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            _merged_aux = _apply_user_default_headers(extra.get("default_headers"))
            if _merged_aux:
                extra["default_headers"] = _merged_aux
-            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+            _client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
            return _client, model

@@ -1654,7 +1672,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        _merged_aux2 = _apply_user_default_headers(extra.get("default_headers"))
        if _merged_aux2:
            extra["default_headers"] = _merged_aux2
-        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+        _client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
        return _client, model

@@ -1669,20 +1687,21 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op
    pool_present, entry = _select_pool_entry("openrouter")
    if pool_present:
        or_key = explicit_api_key or _pool_runtime_api_key(entry)
-        if not or_key:
-            _mark_provider_unhealthy("openrouter", ttl=60)
-            return None, None
-        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
-        logger.debug("Auxiliary client: OpenRouter via pool")
-        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=build_or_headers()), model or _OPENROUTER_MODEL
+        if or_key:
+            base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
+            logger.debug("Auxiliary client: OpenRouter via pool")
+            return _create_openai_client(api_key=or_key, base_url=base_url,
+                           default_headers=build_or_headers()), model or _OPENROUTER_MODEL
+        # Pool exists but is exhausted (no usable runtime key) — fall through to
+        # the OPENROUTER_API_KEY env-var path rather than failing outright.
+        logger.debug("Auxiliary client: OpenRouter pool exhausted, trying OPENROUTER_API_KEY")

    or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        _mark_provider_unhealthy("openrouter", ttl=60)
        return None, None
    logger.debug("Auxiliary client: OpenRouter")
-    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
+    return _create_openai_client(api_key=or_key, base_url=OPENROUTER_BASE_URL,
                   default_headers=build_or_headers()), model or _OPENROUTER_MODEL


@@ -1775,7 +1794,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
            return None, None
        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
    return (
-        OpenAI(
+        _create_openai_client(
            api_key=api_key,
            base_url=base_url,
        ),
@@ -2052,7 +2071,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    if _custom_headers:
        _extra["default_headers"] = _custom_headers
    if custom_mode == "codex_responses":
-        real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
+        real_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
        return CodexAuxiliaryClient(real_client, model), model
    if custom_mode == "anthropic_messages":
        # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -2066,14 +2085,14 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
                "Custom endpoint declares api_mode=anthropic_messages but the "
                "anthropic SDK is not installed — falling back to OpenAI-wire."
            )
-            return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
+            return _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra), model
        return (
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
    # URL-based anthropic detection for custom endpoints that didn't set
    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
-    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
+    _fallback_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
    _fallback_client = _maybe_wrap_anthropic(
        _fallback_client, model, custom_key, custom_base, custom_mode,
    )
@@ -2102,7 +2121,7 @@ def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str
        return None, None
    api_key, base_url = resolved
    logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
-    real_client = OpenAI(api_key=api_key, base_url=base_url)
+    real_client = _create_openai_client(api_key=api_key, base_url=base_url)
    return CodexAuxiliaryClient(real_client, model), model


@@ -2139,7 +2158,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
-    real_client = OpenAI(
+    real_client = _create_openai_client(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
@@ -2239,7 +2258,7 @@ def _try_azure_foundry(
    if _dq:
        extra["default_query"] = _dq

-    client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
+    client = _create_openai_client(api_key=api_key, base_url=_clean_base, **extra)

    if runtime_api_mode == "codex_responses":
        # GPT-5.x / o-series / codex models on Azure Foundry are
@@ -3624,6 +3643,37 @@ def _resolve_auto(
    # config.yaml (auxiliary.<task>.provider) still win over this.
    main_provider = str(runtime_provider or _read_main_provider() or "")
    main_model = str(runtime_model or _read_main_model() or "")
+
+    # MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
+    # there is no real "moa" HTTP endpoint, so resolving an aux client against
+    # provider="moa"/model=<preset> sends the preset name as the model id and
+    # the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
+    # (title generation, compression, vision, …) don't need the reference
+    # fan-out — they should run on the aggregator, which is the preset's acting
+    # model. Resolve the MoA preset to its aggregator slot and continue Step 1
+    # with that real provider+model. Mirrors the MoA context-length resolution.
+    if main_provider == "moa":
+        try:
+            from hermes_cli.config import load_config
+            from hermes_cli.moa_config import resolve_moa_preset
+
+            _preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
+            _agg = _preset.get("aggregator") or {}
+            _agg_provider = str(_agg.get("provider") or "").strip()
+            _agg_model = str(_agg.get("model") or "").strip()
+            if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
+                main_provider = _agg_provider
+                main_model = _agg_model
+                # The MoA virtual runtime carries a non-HTTP base_url
+                # ("moa://local") and a placeholder api_key; they belong to the
+                # facade, not the aggregator's real provider. Drop them so the
+                # aggregator resolves through its own provider credentials.
+                runtime_base_url = ""
+                runtime_api_key = ""
+                runtime_api_mode = ""
+        except Exception:
+            logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
+
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
@@ -3770,6 +3820,10 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    _merged_async = _apply_user_default_headers(async_kwargs.get("default_headers"))
    if _merged_async:
        async_kwargs["default_headers"] = _merged_async
+    async_kwargs = {
+        **_openai_http_client_kwargs(sync_base_url, async_mode=True),
+        **async_kwargs,
+    }
    return AsyncOpenAI(**async_kwargs), model


@@ -3980,7 +4034,7 @@ def resolve_provider_client(
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
            final_model = _normalize_resolved_model(model, provider)
-            raw_client = OpenAI(
+            raw_client = _create_openai_client(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
                default_headers=_codex_cloudflare_headers(codex_token),
@@ -4061,7 +4115,7 @@ def resolve_provider_client(
            _merged_custom = _apply_user_default_headers(extra.get("default_headers"))
            if _merged_custom:
                extra["default_headers"] = _merged_custom
-            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
+            client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **extra)
            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
@@ -4165,7 +4219,7 @@ def resolve_provider_client(
                        _fb_headers = _apply_user_default_headers(_fb_extra.get("default_headers"))
                        if _fb_headers:
                            _fb_extra["default_headers"] = _fb_headers
-                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
+                        client = _create_openai_client(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
@@ -4174,7 +4228,7 @@ def resolve_provider_client(
                    if async_mode:
                        return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
                    return sync_anthropic, final_model
-                client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
+                client = _create_openai_client(api_key=custom_key, base_url=_clean_base2, **_extra2)
                # codex_responses or inherited auto-detect (via _wrap_if_needed).
                # _wrap_if_needed reads the closed-over `api_mode` (the task-level
                # override). Named-provider entry api_mode=codex_responses also
@@ -4316,7 +4370,7 @@ def resolve_provider_client(
        _merged_main = _apply_user_default_headers(headers)
        if _merged_main:
            headers = _merged_main
-        client = OpenAI(api_key=api_key, base_url=base_url,
+        client = _create_openai_client(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))

        # Copilot GPT-5+ models (except gpt-5-mini) require the Responses
@@ -4852,7 +4906,7 @@ def _refresh_nous_auxiliary_client(
        return None, model

    fresh_key, fresh_base_url = runtime
-    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    sync_client = _create_openai_client(api_key=fresh_key, base_url=fresh_base_url)
    final_model = model

    current_loop = None
@@ -5962,8 +6016,17 @@ def call_llm(
        # When the provider returns a 429 rate-limit (not billing), fall
        # back to an alternative provider instead of exhausting retries
        # against the same rate-limited endpoint.
+        #
+        # ── Auth error fallback (#21165) ─────────────────────────────
+        # When the resolved provider returns 401 and neither the Nous
+        # refresh path nor explicit provider credential refresh applies,
+        # fall back to an alternative provider instead of dropping the
+        # auxiliary task on the floor (silent compression failure /
+        # message loss). Auth is NOT a capacity error: it only bypasses
+        # the explicit-provider gate when the user is in auto mode.
        should_fallback = (
-            _is_payment_error(first_err)
+            _is_auth_error(first_err)
+            or _is_payment_error(first_err)
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
@@ -5993,7 +6056,9 @@ def call_llm(
            or _is_invalid_aux_response_error(first_err)
        )
        if should_fallback and (is_auto or is_capacity_error):
-            if _is_payment_error(first_err):
+            if _is_auth_error(first_err):
+                reason = "auth error"
+            elif _is_payment_error(first_err):
                reason = "payment error"
                # Resolve the actual provider label (resolved_provider may be
                # "auto"; the client's base_url tells us which backend got the
@@ -6442,8 +6507,13 @@ async def async_call_llm(
                        raise

        # ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
+        # Auth error fallback (#21165): a 401 that survived the refresh path
+        # falls back in auto mode just like the sync call_llm() path. Auth is
+        # NOT a capacity error, so on an explicit provider it still respects
+        # the user's choice (handled by the is_auto/is_capacity_error gate).
        should_fallback = (
-            _is_payment_error(first_err)
+            _is_auth_error(first_err)
+            or _is_payment_error(first_err)
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
@@ -6465,7 +6535,9 @@ async def async_call_llm(
            or _is_invalid_aux_response_error(first_err)
        )
        if should_fallback and (is_auto or is_capacity_error):
-            if _is_payment_error(first_err):
+            if _is_auth_error(first_err):
+                reason = "auth error"
+            elif _is_payment_error(first_err):
                reason = "payment error"
                _mark_provider_unhealthy(
                    _recoverable_pool_provider(resolved_provider, client) or resolved_provider
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -37,6 +37,18 @@ from tools.terminal_tool import is_persistent_env
 from utils import base_url_host_matches, base_url_hostname, env_float, env_int

 logger = logging.getLogger(__name__)
+_OPENROUTER_PROVIDER_SORT_VALUES = {"throughput", "latency", "price"}
+
+# When the fallback chain is fully exhausted on a non-rate-limit failure
+# (e.g. every provider returns a non-retryable client error like HTTP 400),
+# arm a short cooldown so the NEXT turn's restore_primary_runtime stays gated
+# and does not reset _fallback_index=0 to replay the entire chain again.
+# Without this, a client/gateway that re-submits immediately would re-marshal
+# the full (potentially 80k-token) context once per provider every turn and
+# can drive a constrained host into memory/swap exhaustion.  Rate-limit /
+# billing reasons keep their own 60s cooldown (set above); this is the
+# narrower non-rate-limit case.  See issue #24996.
+_FALLBACK_EXHAUSTED_COOLDOWN_S = 5.0


 def _ra():
@@ -115,6 +127,23 @@ def _is_openai_codex_backend(agent) -> bool:
    )


+def _validated_openrouter_provider_sort(raw_sort: Any) -> Optional[str]:
+    """Return a normalized OpenRouter provider.sort value or None."""
+    if not isinstance(raw_sort, str):
+        return None
+    sort_value = raw_sort.strip().lower()
+    if not sort_value:
+        return None
+    if sort_value in _OPENROUTER_PROVIDER_SORT_VALUES:
+        return sort_value
+    logger.warning(
+        "Ignoring invalid OpenRouter provider.sort value %r (allowed: %s)",
+        raw_sort,
+        ", ".join(sorted(_OPENROUTER_PROVIDER_SORT_VALUES)),
+    )
+    return None
+
+
 def _env_float(name: str, default: float) -> float:
    try:
        return float(os.getenv(name, str(default)))
@@ -229,6 +258,11 @@ def interruptible_api_call(agent, api_kwargs: dict):
                        invalidate_runtime_client(region)
                    raise
                result["response"] = normalize_converse_response(raw_response)
+            elif agent.provider == "moa":
+                # MoA is a virtual chat-completions provider backed by the
+                # in-process MoAClient facade. Do not rebuild a request-local
+                # OpenAI client from the virtual runtime metadata.
+                result["response"] = agent.client.chat.completions.create(**api_kwargs)
            else:
                request_client = _set_request_client(
                    agent._create_request_openai_client(
@@ -698,8 +732,9 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
        _prefs["ignore"] = agent.providers_ignored
    if agent.providers_order:
        _prefs["order"] = agent.providers_order
-    if agent.provider_sort:
-        _prefs["sort"] = agent.provider_sort
+    _provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
+    if _provider_sort:
+        _prefs["sort"] = _provider_sort
    if agent.provider_require_parameters:
        _prefs["require_parameters"] = True
    if agent.provider_data_collection:
@@ -1015,18 +1050,23 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
                    "arguments": tool_call.function.arguments
                },
            }
-            # Defence-in-depth: redact credentials from tool call arguments
-            # before they enter conversation history. Tool execution uses the
-            # raw API response object, not this dict, so redacting the
-            # persisted shape is safe and only affects storage. Catches the
-            # case where a model accidentally inlines a secret into a tool
-            # call (e.g. `terminal(command="curl -H 'Authorization: Bearer
-            # sk-...'")`). (#19798)
-            if isinstance(tc_dict["function"]["arguments"], str):
-                from agent.redact import redact_sensitive_text
-                tc_dict["function"]["arguments"] = redact_sensitive_text(
-                    tc_dict["function"]["arguments"]
-                )
+            # Tool-call arguments are intentionally NOT redacted here. This
+            # dict enters the in-memory conversation history that is replayed
+            # to the model on every subsequent turn AND persisted to state.db,
+            # which is itself replayed verbatim on session resume
+            # (get_messages_as_conversation). Masking a credential to `***`
+            # here poisons that replay: the model reads back its own
+            # `PGPASSWORD='***' psql ...` call and copies the placeholder into
+            # the next tool call, breaking every credential-dependent command
+            # on the second turn (#43083). The masking also provided no real
+            # protection — the same secret still leaks verbatim through tool
+            # OUTPUT (file contents, command output, diffs, the compaction
+            # block), none of which this pass ever touched. Keeping secrets
+            # out of the replayable store is a separate tokenization/vault
+            # concern, not something arg-redaction can deliver without
+            # breaking replay. Storage-time redaction remains governed by the
+            # `security.redact_secrets` toggle. (#19798 introduced this;
+            # #43083 removed it.)
            # Preserve extra_content (e.g. Gemini thought_signature) so it
            # is sent back on subsequent API calls.  Without this, Gemini 3
            # thinking models reject the request with a 400 error.
@@ -1093,8 +1133,22 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
        if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
            agent._rate_limited_until = time.monotonic() + 60
    if agent._fallback_index >= len(agent._fallback_chain):
+        # Chain exhausted.  If we actually walked a non-empty chain and the
+        # failure was NOT a rate-limit/billing event (those already armed
+        # their own 60s cooldown above), arm a short cooldown so the next
+        # turn's restore_primary_runtime stays gated instead of resetting
+        # _fallback_index=0 and re-marshaling the whole context across every
+        # provider again.  Guards the cross-turn replay storm in #24996.
+        if (
+            len(agent._fallback_chain) > 0
+            and reason not in {FailoverReason.rate_limit, FailoverReason.billing}
+        ):
+            _existing_cooldown = getattr(agent, "_rate_limited_until", 0) or 0
+            agent._rate_limited_until = max(
+                _existing_cooldown,
+                time.monotonic() + _FALLBACK_EXHAUSTED_COOLDOWN_S,
+            )
        return False
-
    fb = agent._fallback_chain[agent._fallback_index]
    agent._fallback_index += 1
    fb_provider = (fb.get("provider") or "").strip().lower()
@@ -1210,14 +1264,16 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
            agent._transport_cache.clear()
        agent._fallback_activated = True

-        # Clear the credential pool when the fallback provider doesn't match
-        # the pool's provider.  The pool was seeded for the primary provider;
-        # leaving it attached means downstream recovery (rate_limit / billing /
-        # auth) calls ``_swap_credential`` with a primary entry which overwrites
-        # the agent's ``base_url`` back to the primary's endpoint — every
-        # fallback request then 404s against the wrong host.  See #33163.
+        # Rebind the credential pool to the fallback provider when the provider
+        # changes.  Keeping the primary pool attached would make downstream
+        # recovery (rate_limit / billing / auth) mutate the wrong credential
+        # set and can overwrite the fallback's base_url back to the primary
+        # endpoint.  See #33163.
+        #
        # When the fallback shares the pool's provider (e.g. both openrouter
-        # entries with different routing) the pool is preserved.
+        # entries with different routing) the pool is preserved.  When the
+        # providers differ, load the fallback provider's own pool if one exists
+        # so provider-specific rotation continues to work after the switch.
        _existing_pool = getattr(agent, "_credential_pool", None)
        if _existing_pool is not None:
            _pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
@@ -1228,6 +1284,22 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                    fb_provider, fb_model, _pool_provider,
                )
                agent._credential_pool = None
+        if getattr(agent, "_credential_pool", None) is None:
+            try:
+                from agent.credential_pool import load_pool
+
+                fallback_pool = load_pool(fb_provider)
+                if fallback_pool and fallback_pool.has_credentials():
+                    agent._credential_pool = fallback_pool
+                    logger.info(
+                        "Fallback to %s/%s: attached fallback credential pool",
+                        fb_provider, fb_model,
+                    )
+            except Exception as exc:
+                logger.debug(
+                    "Fallback to %s/%s: could not attach credential pool: %s",
+                    fb_provider, fb_model, exc,
+                )

        # Honor per-provider / per-model request_timeout_seconds for the
        # fallback target (same knob the primary client uses).  None = use
@@ -1458,8 +1530,9 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
                provider_preferences["ignore"] = agent.providers_ignored
            if agent.providers_order:
                provider_preferences["order"] = agent.providers_order
-            if agent.provider_sort:
-                provider_preferences["sort"] = agent.provider_sort
+            _provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
+            if _provider_sort:
+                provider_preferences["sort"] = _provider_sort
            if provider_preferences and (
                (agent.provider or "").strip().lower() == "openrouter"
                or agent._is_openrouter_url()
@@ -2246,7 +2319,15 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                                _fire_first_delta()
                                agent._fire_reasoning_delta(thinking_text)

-            # Return the native Anthropic Message for downstream processing
+            # Return the native Anthropic Message for downstream processing.
+            # If the stream was interrupted (the event loop broke out above on
+            # agent._interrupt_requested), do NOT call get_final_message() — on
+            # a partially-consumed stream the SDK may hang draining remaining
+            # events or return a Message with incomplete tool_use blocks (partial
+            # JSON in `input`). The outer poll loop raises InterruptedError, so
+            # this return value is discarded anyway.
+            if agent._interrupt_requested:
+                return None
            return stream.get_final_message()

    def _call():
@@ -2391,12 +2472,19 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            diag=request_client_holder.get("diag"),
                        )
                        _close_request_client_once("stream_mid_tool_retry_cleanup")
-                        try:
-                            agent._replace_primary_openai_client(
-                                reason="stream_mid_tool_retry_pool_cleanup"
-                            )
-                        except Exception:
-                            pass
+                        if agent.api_mode == "anthropic_messages":
+                            try:
+                                agent._anthropic_client.close()
+                                agent._rebuild_anthropic_client()
+                            except Exception:
+                                pass
+                        else:
+                            try:
+                                agent._replace_primary_openai_client(
+                                    reason="stream_mid_tool_retry_pool_cleanup"
+                                )
+                            except Exception:
+                                pass
                        continue

                    # SSE error events from proxies (e.g. OpenRouter sends
@@ -2444,12 +2532,19 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            _close_request_client_once("stream_retry_cleanup")
                            # Also rebuild the primary client to purge
                            # any dead connections from the pool.
-                            try:
-                                agent._replace_primary_openai_client(
-                                    reason="stream_retry_pool_cleanup"
-                                )
-                            except Exception:
-                                pass
+                            if agent.api_mode == "anthropic_messages":
+                                try:
+                                    agent._anthropic_client.close()
+                                    agent._rebuild_anthropic_client()
+                                except Exception:
+                                    pass
+                            else:
+                                try:
+                                    agent._replace_primary_openai_client(
+                                        reason="stream_retry_pool_cleanup"
+                                    )
+                                except Exception:
+                                    pass
                            continue
                        # Retries exhausted. Log the final failure with
                        # full diagnostic detail (chain, headers,
@@ -2620,10 +2715,17 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                pass
            # Rebuild the primary client too — its connection pool
            # may hold dead sockets from the same provider outage.
-            try:
-                agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
-            except Exception:
-                pass
+            if agent.api_mode == "anthropic_messages":
+                try:
+                    agent._anthropic_client.close()
+                    agent._rebuild_anthropic_client()
+                except Exception:
+                    pass
+            else:
+                try:
+                    agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
+                except Exception:
+                    pass
            # Reset the timer so we don't kill repeatedly while
            # the inner thread processes the closure.
            last_chunk_time["t"] = time.time()
@@ -2699,7 +2801,30 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                role="assistant", content=_partial_text, tool_calls=None,
                reasoning_content=None,
            )
-            return SimpleNamespace(
+            # Detect provider output-layer content filtering (e.g. MiniMax
+            # "output new_sensitive (1027)", Azure/OpenAI content_filter,
+            # Anthropic safety refusal).  The raw error is about to be
+            # swallowed into a finish_reason=length stub, so classify it HERE
+            # while we still have it and stamp the stub.  Retrying such a
+            # content-deterministic filter on the same primary just re-hits
+            # the filter — the conversation loop reads this tag and activates
+            # the fallback chain instead of burning continuation retries.
+            # error_classifier is the single source of truth for "what counts
+            # as a content filter" (#32421).
+            _content_filter_terminated = False
+            try:
+                from agent.error_classifier import classify_api_error, FailoverReason
+                _cls = classify_api_error(
+                    result["error"],
+                    provider=str(getattr(agent, "provider", "") or ""),
+                    model=str(getattr(agent, "model", "") or ""),
+                )
+                _content_filter_terminated = (
+                    _cls.reason == FailoverReason.content_policy_blocked
+                )
+            except Exception:
+                _content_filter_terminated = False
+            _stub = SimpleNamespace(
                id=PARTIAL_STREAM_STUB_ID,
                model=getattr(agent, "model", "unknown"),
                choices=[SimpleNamespace(
@@ -2708,6 +2833,9 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                usage=None,
                _dropped_tool_names=_partial_names or None,
            )
+            if _content_filter_terminated:
+                _stub._content_filter_terminated = True
+            return _stub
        raise result["error"]
    return result["response"]

--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -60,6 +60,8 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional

+from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
+
 logger = logging.getLogger("hermes.coding_context")

 CODING_TOOLSET = "coding"
@@ -647,12 +649,14 @@ def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:


 def _git(cwd: Path, *args: str) -> str:
+    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        out = subprocess.run(
            ["git", "-C", str(cwd), *args],
            capture_output=True,
            text=True,
            timeout=_GIT_TIMEOUT,
+            **_popen_kwargs,
        )
    except (OSError, subprocess.SubprocessError):
        return ""
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -12,6 +12,7 @@ from pathlib import Path
 from typing import Awaitable, Callable

 from agent.model_metadata import estimate_tokens_rough
+from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags

 _QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
 REFERENCE_PATTERN = re.compile(
@@ -290,6 +291,7 @@ def _expand_git_reference(
    args: list[str],
    label: str,
 ) -> tuple[str | None, str | None]:
+    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        result = subprocess.run(
            ["git", *args],
@@ -298,6 +300,7 @@ def _expand_git_reference(
            text=True,
            timeout=30,
            stdin=subprocess.DEVNULL,
+            **_popen_kwargs,
        )
    except subprocess.TimeoutExpired:
        return f"{ref.raw}: git command timed out (30s)", None
@@ -483,6 +486,7 @@ def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:


 def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
+    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        result = subprocess.run(
            ["rg", "--files", str(path.relative_to(cwd))],
@@ -491,6 +495,7 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
            text=True,
            timeout=10,
            stdin=subprocess.DEVNULL,
+            **_popen_kwargs,
        )
    except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
        return None
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -288,6 +288,29 @@ def replay_compression_warning(agent: Any) -> None:
            pass


+def conversation_history_after_compression(agent: Any, messages: list) -> Optional[list]:
+    """Return the correct flush baseline after a compression boundary.
+
+    Legacy compression rotates to a fresh child session. That child has not
+    seen the compacted transcript through the normal same-turn flush path yet,
+    so callers must clear ``conversation_history`` to ``None`` and let the next
+    persistence call write the whole compacted list.
+
+    In-place compaction is different: ``archive_and_compact()`` has already
+    soft-archived the previous active rows and inserted ``messages`` as the new
+    active live transcript under the same session id. If the same agent turn
+    continues with ``conversation_history=None``, the identity-based flush path
+    treats those already-persisted compacted dicts as new and appends them a
+    second time, doubling the active context and retriggering compression.
+
+    A shallow copy is intentional: it captures the current compacted dict
+    identities as history while allowing later same-turn appends to remain new.
+    """
+    if bool(getattr(agent, "_last_compaction_in_place", False)):
+        return list(messages)
+    return None
+
+
 def compress_context(
    agent: Any,
    messages: list,
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -28,6 +28,7 @@ import uuid
 from typing import Any, Dict, List, Optional

 from agent.codex_responses_adapter import _summarize_user_message_for_log
+from agent.conversation_compression import conversation_history_after_compression
 from agent.display import KawaiiSpinner
 from agent.error_classifier import FailoverReason, classify_api_error
 from agent.iteration_budget import IterationBudget
@@ -587,6 +588,13 @@ def run_conversation(
    compression_attempts = 0
    _turn_exit_reason = "unknown"  # Diagnostic: why the loop ended

+    # Per-turn tally of consecutive successful credential-pool token refreshes,
+    # keyed by (provider, pool-entry-id). A persistent upstream 401 lets
+    # ``try_refresh_current()`` "succeed" forever on a single-entry OAuth pool,
+    # so this tally caps same-entry refreshes and lets the fallback chain take
+    # over instead of spinning. Reset here so each turn starts fresh. See #26080.
+    agent._auth_pool_refresh_counts = {}
+
    # Optional opt-in runtime: if api_mode == codex_app_server, hand the
    # turn to the codex app-server subprocess (terminal/file ops/patching
    # all run inside Codex). Default Hermes path is bypassed entirely.
@@ -827,7 +835,6 @@ def run_conversation(
                    aggregator=moa_config.get("aggregator") or {},
                    temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
                    aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
-                    max_tokens=int(moa_config.get("max_tokens", 4096) or 4096),
                )
                if _moa_context:
                    for _msg in reversed(api_messages):
@@ -1692,6 +1699,56 @@ def run_conversation(

                    if agent.api_mode in {"chat_completions", "bedrock_converse", "anthropic_messages"}:
                        assistant_message = _trunc_msg
+                        # ── Content-filter stream stall → fallback (#32421) ──
+                        # When the provider's output-layer safety filter (e.g.
+                        # MiniMax "output new_sensitive (1027)", Azure
+                        # content_filter) kills the stream mid-delivery, the
+                        # raw error was classified at the swallow point and the
+                        # stub tagged ``_content_filter_terminated``.  This
+                        # filter is content-deterministic — continuation
+                        # retries against the SAME primary just re-hit it and
+                        # burn paid attempts (the loop used to give up with
+                        # "Response remained truncated after 3 continuation
+                        # attempts" and never consult the fallback chain).
+                        # Escalate to the configured fallback BEFORE retrying.
+                        _cf_terminated = getattr(
+                            response, "_content_filter_terminated", False
+                        )
+                        if (
+                            _cf_terminated
+                            and agent._fallback_index < len(agent._fallback_chain)
+                        ):
+                            agent._vprint(
+                                f"{agent.log_prefix}🛡️  Content filter terminated "
+                                f"stream — activating fallback provider...",
+                                force=True,
+                            )
+                            agent._emit_status(
+                                "Content filter terminated stream; switching to fallback..."
+                            )
+                            if agent._try_activate_fallback():
+                                # Roll the partial content (if any was already
+                                # appended in a prior continuation pass) back to
+                                # the last clean turn so the fallback provider
+                                # gets a coherent continuation point.
+                                if truncated_response_parts:
+                                    messages = agent._get_messages_up_to_last_assistant(messages)
+                                agent._session_messages = messages
+                                length_continue_retries = 0
+                                truncated_response_parts = []
+                                retry_count = 0
+                                compression_attempts = 0
+                                _retry.primary_recovery_attempted = False
+                                _retry.restart_with_rebuilt_messages = True
+                                break
+                            # No fallback available — fall through to normal
+                            # continuation (best-effort, may loop).
+                            agent._vprint(
+                                f"{agent.log_prefix}⚠️  No fallback provider "
+                                f"configured — retrying with same provider "
+                                f"(may re-hit filter)...",
+                                force=True,
+                            )
                        if assistant_message is not None and not _trunc_has_tool_calls:
                            length_continue_retries += 1
                            interim_msg = agent._build_assistant_message(assistant_message, finish_reason)
@@ -2259,6 +2316,15 @@ def run_conversation(
                    # "unknown variant `image_url`, expected `text`".
                    "unknown variant `image_url`, expected `text`",
                    "unknown variant image_url, expected text",
+                    # OpenRouter routes a request to upstream endpoints and,
+                    # when none of the candidate endpoints for the model accept
+                    # image input, returns HTTP 404 "No endpoints found that
+                    # support image input". Without this phrase the agent never
+                    # strips the images, the retry loop re-sends the same
+                    # rejected request until exhaustion, and the gateway leaves
+                    # every subsequent message queued behind the stuck turn —
+                    # the P1 in issue #21160. The 404 passes the 4xx gate below.
+                    "no endpoints found that support image input",
                )
                _err_lower = _err_body.lower()
                _looks_like_image_rejection = any(
@@ -2830,10 +2896,9 @@ def run_conversation(
                            approx_tokens=approx_tokens,
                            task_id=effective_task_id,
                        )
-                        # Compression created a new session — clear history
-                        # so _flush_messages_to_session_db writes compressed
-                        # messages to the new session, not skipping them.
-                        conversation_history = None
+                        conversation_history = conversation_history_after_compression(
+                            agent, messages
+                        )
                        if len(messages) < original_len or old_ctx > _reduced_ctx:
                            agent._buffer_status(
                                f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -2845,15 +2910,25 @@ def run_conversation(
                    # Fall through to normal error handling if compression
                    # is exhausted or didn't help.

-                # Eager fallback for rate-limit errors (429 or quota exhaustion).
-                # When a fallback model is configured, switch immediately instead
-                # of burning through retries with exponential backoff -- the
-                # primary provider won't recover within the retry window.
+                # Eager fallback for rate-limit errors (429 or quota exhaustion)
+                # and transport errors (connection failure / timeout / provider
+                # overloaded).  Rate limits and billing: switch immediately —
+                # the primary provider won't recover within the retry window.
+                # Transport errors: allow 1 retry first (transient hiccups
+                # recover), then fall back if the provider is truly unreachable.
                is_rate_limited = classified.reason in {
                    FailoverReason.rate_limit,
                    FailoverReason.billing,
                }
-                if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
+                _is_transport_failure = classified.reason in {
+                    FailoverReason.timeout,
+                    FailoverReason.overloaded,
+                }
+                _should_fallback = (
+                    is_rate_limited
+                    or (_is_transport_failure and retry_count >= 2)
+                )
+                if _should_fallback and agent._fallback_index < len(agent._fallback_chain):
                    # Don't eagerly fallback if credential pool rotation may
                    # still recover.  See _pool_may_recover_from_rate_limit
                    # for the single-credential-pool and CloudCode-quota
@@ -2868,6 +2943,10 @@ def run_conversation(
                            agent._buffer_status(
                                "⚠️ Billing or credits exhausted — switching to fallback provider..."
                            )
+                        elif _is_transport_failure:
+                            agent._buffer_status(
+                                "⚠️ Provider unreachable — switching to fallback provider..."
+                            )
                        else:
                            agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
                        if agent._try_activate_fallback(reason=classified.reason):
@@ -3042,10 +3121,9 @@ def run_conversation(
                        messages, system_message, approx_tokens=approx_tokens,
                        task_id=effective_task_id,
                    )
-                    # Compression created a new session — clear history
-                    # so _flush_messages_to_session_db writes compressed
-                    # messages to the new session, not skipping them.
-                    conversation_history = None
+                    conversation_history = conversation_history_after_compression(
+                        agent, messages
+                    )

                    # Re-estimate tokens after compression.  Same-message-count
                    # compression (tool-result pruning, in-place summarization)
@@ -3209,10 +3287,9 @@ def run_conversation(
                        messages, system_message, approx_tokens=approx_tokens,
                        task_id=effective_task_id,
                    )
-                    # Compression created a new session — clear history
-                    # so _flush_messages_to_session_db writes compressed
-                    # messages to the new session, not skipping them.
-                    conversation_history = None
+                    conversation_history = conversation_history_after_compression(
+                        agent, messages
+                    )

                    # Re-estimate tokens after compression.  Same-message-count
                    # compression (tool-result pruning, in-place summarization)
@@ -3474,6 +3551,13 @@ def run_conversation(
                    ):
                        _retry.primary_recovery_attempted = True
                        retry_count = 0
+                        # Primary transport recovery starts a fresh attempt
+                        # cycle. Re-open fallback state so a follow-on 429 can
+                        # still activate fallback_providers after stale
+                        # pre-recovery fallback/credential-pool bookkeeping.
+                        _retry.has_retried_429 = False
+                        agent._fallback_index = 0
+                        agent._fallback_activated = False
                        continue
                    # Try fallback before giving up entirely
                    if agent._has_pending_fallback():
@@ -3661,7 +3745,12 @@ def run_conversation(
                        _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
                        if _ra_raw:
                            try:
-                                _retry_after = min(float(_ra_raw), 120)  # Cap at 2 minutes
+                                # Cap at 10 minutes. Anthropic Tier 1 input-token
+                                # buckets reset in ~171s, so a 120s cap caused us to
+                                # retry before the actual reset window and re-trip the
+                                # limit. 600s covers all realistic provider reset
+                                # windows while still rejecting pathological values. (#26293)
+                                _retry_after = min(float(_ra_raw), 600)
                            except (TypeError, ValueError):
                                pass
                wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
@@ -3742,6 +3831,17 @@ def run_conversation(
            _retry.restart_with_compressed_messages = False
            continue

+        if _retry.restart_with_rebuilt_messages:
+            # A content-filter stream stall (#32421) was escalated to the
+            # fallback chain and the partial content rolled back.  Re-issue
+            # the API call against the now-active fallback provider.  Refund
+            # the budget/count for the stalled attempt so the fallback gets a
+            # fair turn.
+            api_call_count -= 1
+            agent.iteration_budget.refund()
+            _retry.restart_with_rebuilt_messages = False
+            continue
+
        if _retry.restart_with_length_continuation:
            # Progressively boost the output token budget on each retry.
            # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
@@ -4316,10 +4416,9 @@ def run_conversation(
                        approx_tokens=agent.context_compressor.last_prompt_tokens,
                        task_id=effective_task_id,
                    )
-                    # Compression created a new session — clear history so
-                    # _flush_messages_to_session_db writes compressed messages
-                    # to the new session (see preflight compression comment).
-                    conversation_history = None
+                    conversation_history = conversation_history_after_compression(
+                        agent, messages
+                    )
                
                # Save session log incrementally (so progress is visible even if interrupted)
                agent._session_messages = messages
@@ -4361,7 +4460,11 @@ def run_conversation(
                            "as final response"
                        )
                        final_response = _recovered
-                        agent._response_was_previewed = True
+                        # Streaming delivered a fragment, not a confirmed
+                        # final preview. Leave response_previewed false so
+                        # gateway fallback delivery can send the recovered
+                        # text plus the abnormal-turn explanation.
+                        agent._response_was_previewed = False
                        break

                    # If the previous turn already delivered real content alongside
@@ -4606,14 +4709,20 @@ def run_conversation(
                # status from earlier failed attempts in this turn.
                agent._clear_status_buffer()

+                from agent.agent_runtime_helpers import (
+                    intent_ack_continuation_mode,
+                )
+
+                _ack_mode = intent_ack_continuation_mode(agent)
                if (
-                    agent.api_mode == "codex_responses"
+                    _ack_mode != "off"
                    and agent.valid_tool_names
                    and codex_ack_continuations < 2
                    and agent._looks_like_codex_intermediate_ack(
                        user_message=user_message,
                        assistant_content=final_response,
                        messages=messages,
+                        require_workspace=(_ack_mode == "codex_only"),
                    )
                ):
                    codex_ack_continuations += 1
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -23,6 +23,7 @@ from typing import Any

 from agent.file_safety import get_read_block_error, is_write_denied
 from agent.redact import redact_sensitive_text
+from tools.environments.local import hermes_subprocess_env

 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -94,7 +95,10 @@ def _resolve_home_dir() -> str:


 def _build_subprocess_env() -> dict[str, str]:
-    env = os.environ.copy()
+    # Copilot ACP is a model-driving CLI executor: it legitimately needs LLM
+    # provider credentials. Route through the central helper so Tier-1 secrets
+    # (gateway bot tokens, GitHub auth, infra) are still stripped (#29157).
+    env = hermes_subprocess_env(inherit_credentials=True)
    home = _resolve_home_dir()
    env["HOME"] = home
    from hermes_constants import apply_subprocess_home_env
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -537,10 +537,11 @@ class CredentialPool:
                self._entries[idx] = new
                return

-    def _persist(self) -> None:
+    def _persist(self, *, removed_ids: Optional[List[str]] = None) -> None:
        write_credential_pool(
            self.provider,
            [entry.to_dict() for entry in self._entries],
+            removed_ids=removed_ids,
        )

    def _is_terminal_auth_failure(
@@ -1124,13 +1125,17 @@ class CredentialPool:
                        logger.debug(
                            "Failed to clear terminal xAI OAuth state: %s", clear_exc
                        )
+                    removed_ids = [
+                        item.id for item in self._entries
+                        if item.source == "loopback_pkce"
+                    ]
                    self._entries = [
                        item for item in self._entries
                        if item.source != "loopback_pkce"
                    ]
                    if self._current_id == entry.id:
                        self._current_id = None
-                    self._persist()
+                    self._persist(removed_ids=removed_ids)
                    return None
            # For openai-codex: same race as xAI/nous — another Hermes process
            # may have consumed the refresh token between our proactive sync
@@ -1190,13 +1195,17 @@ class CredentialPool:
                        logger.debug(
                            "Failed to clear terminal Codex OAuth state: %s", clear_exc
                        )
+                    removed_ids = [
+                        item.id for item in self._entries
+                        if item.source == "device_code"
+                    ]
                    self._entries = [
                        item for item in self._entries
                        if item.source != "device_code"
                    ]
                    if self._current_id == entry.id:
                        self._current_id = None
-                    self._persist()
+                    self._persist(removed_ids=removed_ids)
                    return None
            # For nous: another process may have consumed the refresh token
            # between our proactive sync and the HTTP call.  Re-sync from
@@ -1253,13 +1262,17 @@ class CredentialPool:
                        auth_mod.NOUS_DEVICE_CODE_SOURCE,
                        f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
                    }
+                    removed_ids = [
+                        item.id for item in self._entries
+                        if item.source in singleton_sources
+                    ]
                    self._entries = [
                        item for item in self._entries
                        if item.source not in singleton_sources
                    ]
                    if self._current_id == entry.id:
                        self._current_id = None
-                    self._persist()
+                    self._persist(removed_ids=removed_ids)
                    return None
            self._mark_exhausted(entry, None)
            return None
@@ -1421,7 +1434,7 @@ class CredentialPool:
            pruned_ids = set(entries_to_prune)
            self._entries = [e for e in self._entries if e.id not in pruned_ids]
        if cleared_any:
-            self._persist()
+            self._persist(removed_ids=entries_to_prune)
        return available

    def _select_unlocked(self) -> Optional[PooledCredential]:
@@ -1595,7 +1608,11 @@ class CredentialPool:
            replace(entry, priority=new_priority)
            for new_priority, entry in enumerate(self._entries)
        ]
-        self._persist()
+        write_credential_pool(
+            self.provider,
+            [entry.to_dict() for entry in self._entries],
+            removed_ids=[removed.id],
+        )
        if self._current_id == removed.id:
            self._current_id = None
        return removed
@@ -2257,6 +2274,11 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
    raw_entries = read_credential_pool(provider)
+    disk_ids = {
+        entry.get("id")
+        for entry in raw_entries
+        if isinstance(entry, dict) and entry.get("id")
+    }
    raw_needs_sanitization = any(
        isinstance(payload, dict)
        and sanitize_borrowed_credential_payload(payload, provider) != payload
@@ -2285,8 +2307,10 @@ def load_pool(provider: str) -> CredentialPool:
        changed |= _normalize_pool_priorities(provider, entries)

    if changed:
+        new_ids = {entry.id for entry in entries}
        write_credential_pool(
            provider,
            [entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
+            removed_ids=disk_ids - new_ids,
        )
    return CredentialPool(provider, entries)
--- a/agent/curator.py
+++ b/agent/curator.py
@@ -273,6 +273,21 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
 # Automatic state transitions (pure function, no LLM)
 # ---------------------------------------------------------------------------

+def _cron_referenced_skills() -> Set[str]:
+    """Skill names referenced by any cron job (incl. paused/disabled).
+
+    Best-effort: a cron-module import error or corrupt jobs store must never
+    break the curator, so any failure yields an empty set (no protection,
+    but no crash).
+    """
+    try:
+        from cron.jobs import referenced_skill_names as _refs
+        return _refs()
+    except Exception as e:
+        logger.debug("Curator could not read cron skill references: %s", e, exc_info=True)
+        return set()
+
+
 def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
    """Walk every curator-managed skill and move active/stale/archived based on
    the latest real activity timestamp. Pinned skills are never touched.
@@ -292,6 +307,8 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
    stale_cutoff = now - timedelta(days=get_stale_after_days())
    archive_cutoff = now - timedelta(days=get_archive_after_days())

+    cron_referenced = _cron_referenced_skills()
+
    counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0, "seeded": 0}

    for row in _u.agent_created_report():
@@ -300,6 +317,15 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
        if row.get("pinned"):
            continue

+        # A skill referenced by any cron job (incl. paused/disabled) is in
+        # use by definition — resuming or the next fire must find it. The
+        # scheduler only bumps usage when a job actually fires, so jobs that
+        # fire less often than archive_after_days, paused jobs, and far-future
+        # one-shots would otherwise have their skills aged out from under
+        # them. Treat referenced skills like pinned: never auto-transition.
+        if name in cron_referenced:
+            continue
+
        # First sight of a curation-eligible skill with no persisted record
        # (e.g. a newly-eligible built-in): anchor its clock to now and defer.
        if not row.get("_persisted", True):
@@ -316,6 +342,18 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int

        current = row.get("state", _u.STATE_ACTIVE)

+        # Never-used skills (use_count == 0) get a grace floor: don't archive
+        # one until it is at least stale_after_days old. A use=0 skill is
+        # absence of evidence, not evidence of staleness — a skill created
+        # recently may simply not have had its trigger come up yet.
+        never_used = int(row.get("use_count", 0) or 0) == 0
+        if never_used and anchor > stale_cutoff:
+            # Younger than the stale window — leave it alone entirely.
+            if current == _u.STATE_STALE:
+                _u.set_state(name, _u.STATE_ACTIVE)
+                counts["reactivated"] += 1
+            continue
+
        if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
            ok, _msg = _u.archive_skill(name)
            if ok:
@@ -390,10 +428,19 @@ CURATOR_REVIEW_PROMPT = (
    "back load-bearing UX (slash-command entry points referenced in docs and "
    "tips) and are filtered out of the candidate list below — never resurrect "
    "one as an archive or absorb target.\n"
+    "3c. DO NOT archive or prune any skill marked `cron=yes` in the candidate "
+    "list. A cron job depends on it and will fail to load it on its next "
+    "run. You MAY still consolidate it into an umbrella — but only because "
+    "the curator rewrites cron job skill references to follow consolidations; "
+    "never simply prune it.\n"
    "4. DO NOT use usage counters as a reason to skip consolidation. The "
    "counters are new and often mostly zero. Judge overlap on CONTENT, "
    "not on use_count. 'use=0' is not evidence a skill is valuable; it's "
-    "absence of evidence either way.\n"
+    "absence of evidence either way. Corollary: 'use=0' is ALSO not a "
+    "reason to PRUNE a skill. Never archive a never-used skill (use=0) "
+    "unless it is at least 30 days old (check last_activity / created date) "
+    "AND its content is genuinely obsolete or fully absorbed elsewhere — a "
+    "recently-created skill simply may not have had its trigger come up yet.\n"
    "5. DO NOT reject consolidation on the grounds that 'each skill has "
    "a distinct trigger'. Pairwise distinctness is the wrong bar. The "
    "right bar is: 'would a human maintainer write this as N separate "
@@ -1413,12 +1460,14 @@ def _render_candidate_list() -> str:
    rows = skill_usage.agent_created_report()
    if not rows:
        return "No agent-created skills to review."
+    cron_referenced = _cron_referenced_skills()
    lines = [f"Agent-created skills ({len(rows)}):\n"]
    for r in rows:
        lines.append(
            f"- {r['name']}  "
            f"state={r['state']}  "
            f"pinned={'yes' if r.get('pinned') else 'no'}  "
+            f"cron={'yes' if r['name'] in cron_referenced else 'no'}  "
            f"activity={r.get('activity_count', 0)}  "
            f"use={r.get('use_count', 0)}  "
            f"view={r.get('view_count', 0)}  "
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -133,6 +133,31 @@ _RATE_LIMIT_PATTERNS = [
    "servicequotaexceededexception",
 ]

+# Patterns that indicate provider-side overload, NOT a per-credential rate
+# limit or billing problem.  The credential is valid — the server is just
+# busy — so the correct recovery is "back off and retry the same key", never
+# "rotate the credential" (rotating exhausts the pool while the endpoint is
+# still busy; a single-key user has nothing to rotate to).  Some providers
+# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
+# status path matches the body against this list before falling through to
+# the rate_limit default.  Phrases are kept narrow and overload-flavoured so a
+# normal rate-limit message ("you have been rate-limited") doesn't hit this
+# bucket. (#14038, #15297)
+_OVERLOADED_PATTERNS = [
+    "overloaded",
+    "temporarily overloaded",
+    "service is temporarily overloaded",
+    "service may be temporarily overloaded",
+    "server is overloaded",
+    "server overloaded",
+    "service overloaded",
+    "service is overloaded",
+    "upstream overloaded",
+    "currently overloaded",
+    "at capacity",
+    "over capacity",
+]
+
 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
 _USAGE_LIMIT_PATTERNS = [
    "usage limit",
@@ -330,6 +355,14 @@ _CONTENT_POLICY_BLOCKED_PATTERNS = [
    # echo back; the underscore form is provider-specific enough.
    "content_filter",
    "responsibleaipolicyviolation",
+    # MiniMax output-layer safety filter. The error string is surfaced
+    # verbatim by MiniMax SDK / OpenAI-compatible endpoints, usually in the
+    # form "output new_sensitive (1027)" when the model's *output* (often a
+    # large tool-call argument block) trips the upstream safety filter and
+    # the SSE stream is truncated mid-flight. ``new_sensitive`` is the
+    # filter name and is narrow enough that billing / format / auth error
+    # strings will not collide. See #32421.
+    "new_sensitive",
 ]

 # Auth patterns (non-status-code signals)
@@ -863,7 +896,19 @@ def _classify_by_status(
        )

    if status_code == 429:
-        # Already checked long_context_tier above; this is a normal rate limit
+        # Already checked long_context_tier above. Some providers (notably
+        # Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
+        # code as a true per-credential rate limit, but the credential is
+        # valid and the correct recovery is "back off and retry the same key",
+        # NOT "rotate the credential" (which exhausts the pool while the
+        # endpoint is still busy, and does nothing for a single-key user).
+        # Disambiguate on the error body so an overload 429 takes the
+        # transient-overload path instead of burning the pool. (#14038)
+        if any(p in error_msg for p in _OVERLOADED_PATTERNS):
+            return result_fn(
+                FailoverReason.overloaded,
+                retryable=True,
+            )
        return result_fn(
            FailoverReason.rate_limit,
            retryable=True,
@@ -1214,6 +1259,17 @@ def _classify_by_message(
            should_fallback=True,
        )

+    # Overloaded / server-busy patterns — must come BEFORE the rate_limit and
+    # billing checks so that a message-only "overloaded" (no 503/529 status,
+    # e.g. some Anthropic-compatible proxies) classifies as a transient
+    # overload (backoff + retry) instead of falling through to `unknown` or
+    # incorrectly triggering credential rotation.
+    if any(p in error_msg for p in _OVERLOADED_PATTERNS):
+        return result_fn(
+            FailoverReason.overloaded,
+            retryable=True,
+        )
+
    # Billing patterns
    if any(p in error_msg for p in _BILLING_PATTERNS):
        return result_fn(
@@ -1303,19 +1359,25 @@ def _extract_status_code(error: Exception) -> Optional[int]:


 def _extract_error_body(error: Exception) -> dict:
-    """Extract the structured error body from an SDK exception."""
-    body = getattr(error, "body", None)
-    if isinstance(body, dict):
-        return body
-    # Some errors have .response.json()
-    response = getattr(error, "response", None)
-    if response is not None:
-        try:
-            json_body = response.json()
-            if isinstance(json_body, dict):
-                return json_body
-        except Exception:
-            pass
+    """Extract the structured error body from an SDK exception or its cause chain."""
+    current = error
+    for _ in range(5):  # Match _extract_status_code() traversal depth.
+        body = getattr(current, "body", None)
+        if isinstance(body, dict):
+            return body
+        # Some errors have .response.json()
+        response = getattr(current, "response", None)
+        if response is not None:
+            try:
+                json_body = response.json()
+                if isinstance(json_body, dict):
+                    return json_body
+            except Exception:
+                pass
+        cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
+        if cause is None or cause is current:
+            break
+        current = cause
    return {}


--- a/agent/image_routing.py
+++ b/agent/image_routing.py
@@ -388,14 +388,98 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
    # BMP: "BM"
    if raw.startswith(b"BM"):
        return "image/bmp"
-    # HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
-    if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
-        b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
-    }:
-        return "image/heic"
+    # ISO-BMFF family (HEIC/HEIF/AVIF): bytes 4..8 == 'ftyp', major brand at 8..12
+    if len(raw) >= 12 and raw[4:8] == b"ftyp":
+        brand = raw[8:12]
+        if brand in {b"avif", b"avis"}:
+            return "image/avif"
+        if brand in {
+            b"heic", b"heix", b"hevc", b"hevx",
+            b"mif1", b"msf1", b"heim", b"heis",
+        }:
+            return "image/heic"
+    # TIFF: II*\0 (little-endian) or MM\0* (big-endian)
+    if raw[:4] in {b"II*\x00", b"MM\x00*"}:
+        return "image/tiff"
+    # ICO: 00 00 01 00 (reserved=0, type=1=icon)
+    if raw[:4] == b"\x00\x00\x01\x00":
+        return "image/x-icon"
+    # SVG: text-based, look for an <svg tag near the start (skip BOM/whitespace)
+    head = raw[:512].lstrip().lower()
+    if head.startswith(b"<?xml") or head.startswith(b"<svg"):
+        if b"<svg" in head:
+            return "image/svg+xml"
    return None


+# Formats every major vision provider (Anthropic, OpenAI, Gemini, Bedrock)
+# accepts natively. Anything outside this set has to be transcoded to PNG
+# before we declare media_type, otherwise the provider returns HTTP 400
+# ("Could not process image" / "Unsupported image media type") and the
+# whole turn fails with no salvage path.
+#
+# Discord (and a few other chat platforms) freely accept attachments in
+# formats outside this set -- AVIF screenshots from Chromium, HEIC from
+# iPhones, TIFF from scanners, BMP from old Windows tools, ICO -- so users
+# do hit this in practice. SVG is vector and Pillow cannot rasterize it;
+# it is skipped (logged) rather than transcoded.
+_UNIVERSALLY_SUPPORTED_MIMES = frozenset({
+    "image/png", "image/jpeg", "image/gif", "image/webp",
+})
+
+
+def _transcode_to_png(raw: bytes) -> Optional[bytes]:
+    """Decode arbitrary image bytes with Pillow and re-encode as PNG.
+
+    Returns None if Pillow isn't installed or can't decode the input
+    (rare formats, corrupted bytes, missing optional decoder plugin for
+    HEIC/AVIF, or vector formats like SVG). Caller falls back to skipping
+    the image so the rest of the turn still works.
+
+    HEIC/HEIF and AVIF need optional Pillow plugins; we try to register
+    them on demand and swallow ImportError so a missing plugin just
+    looks like 'Pillow can't decode this' rather than crashing.
+    """
+    try:
+        from PIL import Image
+    except ImportError:
+        logger.info(
+            "image_routing: Pillow not installed; cannot transcode "
+            "non-standard image format to PNG. Install with `pip install Pillow` "
+            "(and `pillow-heif` / `pillow-avif-plugin` for those formats)."
+        )
+        return None
+    # Optional plugin registration. Silent on failure: an unsupported
+    # format will just fall through to Image.open raising below.
+    try:
+        import pillow_heif  # type: ignore
+
+        pillow_heif.register_heif_opener()
+    except Exception:
+        pass
+    try:
+        import pillow_avif  # type: ignore  # noqa: F401  -- registers AVIF on import
+    except Exception:
+        pass
+    try:
+        from io import BytesIO
+
+        with Image.open(BytesIO(raw)) as im:
+            # Pick an output mode PNG can serialise. Anything other than
+            # the standard set gets normalised to RGBA so transparency is
+            # preserved where the source had it.
+            if im.mode not in {"RGB", "RGBA", "L", "LA", "P"}:
+                im = im.convert("RGBA")
+            buf = BytesIO()
+            im.save(buf, format="PNG", optimize=False)
+            return buf.getvalue()
+    except Exception as exc:
+        logger.info(
+            "image_routing: Pillow could not transcode image to PNG -- %s", exc
+        )
+        return None
+
+
 def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
    """Return image MIME type for *path*.

@@ -431,8 +515,18 @@ def _file_to_data_url(path: Path) -> Optional[str]:
    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
    quality tax just because one other provider is stricter.

-    Returns None only if the file can't be read (missing, permission
-    denied, etc.); the caller reports those paths in ``skipped``.
+    Format compatibility IS handled here: if the sniffed MIME isn't one
+    of ``_UNIVERSALLY_SUPPORTED_MIMES`` (i.e. it's something like AVIF,
+    HEIC, BMP, TIFF, or ICO that some providers reject outright), we
+    transcode to PNG with Pillow before declaring media_type. This fixes
+    the user-visible "Could not process image" HTTP 400 from Anthropic on
+    Discord-attached AVIF/HEIC/BMP files.
+
+    Returns None if the file can't be read OR if the format isn't
+    universally supported AND Pillow can't transcode it (Pillow missing,
+    HEIC/AVIF plugin missing, vector format like SVG, corrupt bytes). The
+    caller reports those paths in ``skipped`` and the rest of the turn
+    proceeds.
    """
    try:
        raw = path.read_bytes()
@@ -440,6 +534,22 @@ def _file_to_data_url(path: Path) -> Optional[str]:
        logger.warning("image_routing: failed to read %s — %s", path, exc)
        return None
    mime = _guess_mime(path, raw=raw)
+    if mime not in _UNIVERSALLY_SUPPORTED_MIMES:
+        transcoded = _transcode_to_png(raw)
+        if transcoded is None:
+            logger.warning(
+                "image_routing: %s is %s which is not accepted by all major "
+                "vision providers and could not be transcoded to PNG; "
+                "skipping this attachment.",
+                path, mime,
+            )
+            return None
+        logger.info(
+            "image_routing: transcoded %s (%s) -> image/png for provider compatibility",
+            path.name, mime,
+        )
+        raw = transcoded
+        mime = "image/png"
    b64 = base64.b64encode(raw).decode("ascii")
    return f"data:{mime};base64,{b64}"

--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@@ -8,6 +8,7 @@ iteration.

 from __future__ import annotations

+import hashlib
 import logging
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any
@@ -25,20 +26,112 @@ logger = logging.getLogger(__name__)
 # opening dozens of sockets at once.
 _MAX_REFERENCE_WORKERS = 8

+# Per-tool-result character budget for the advisory reference view. Tool
+# results can be huge (a full diff, a 5000-line file dump); replaying them
+# verbatim per reference per tool-loop step would blow the reference model's
+# context window and cost. We keep the agent's *actions* (tool calls) in full —
+# they are cheap, high-signal, and tell the reference what the agent did — but
+# preview each tool *result* head+tail so the reference still sees what came
+# back without replaying megabytes. The acting aggregator always gets the full,
+# untrimmed transcript; this budget only shapes the advisory copy.
+_REFERENCE_TOOL_RESULT_BUDGET = 4000
+
+# System prompt prepended to every reference-model call. References are
+# advisory — they do NOT act, call tools, or own the task. Without this
+# framing a reference receives the bare trimmed conversation and assumes it is
+# the acting agent: it then refuses ("I can't access repositories / URLs from
+# here") or tries to call tools it doesn't have. The prompt reframes the model
+# as an analyst whose job is to reason about the presented state and hand its
+# best thinking to the aggregator/orchestrator that will actually act.
+_REFERENCE_SYSTEM_PROMPT = (
+    "You are a reference advisor in a Mixture of Agents (MoA) process. You are "
+    "NOT the acting agent and you do NOT execute anything: you cannot call "
+    "tools, run commands, browse, or access files, repositories, or URLs, and "
+    "you should not try to or apologize for being unable to. A separate "
+    "aggregator/orchestrator model holds those capabilities and will take the "
+    "actual actions.\n\n"
+    "The conversation below is the current state of a task handled by that "
+    "acting agent. Your job is to give your most intelligent analysis of that "
+    "state: understand the goal, reason about the problem, and advise on what "
+    "to do next. Surface the best approach, concrete next steps and tool-use "
+    "strategy, likely pitfalls and risks, and anything the acting agent may "
+    "have missed or gotten wrong. Assume any referenced files, URLs, or "
+    "systems exist and reason about them from the context given rather than "
+    "asking for access.\n\n"
+    "Respond with your advice directly — no preamble, no disclaimers about "
+    "tools or access. Your response is private guidance handed to the "
+    "aggregator, not an answer shown to the user."
+)
+
+

 def _slot_label(slot: dict[str, str]) -> str:
    return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"


+def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
+    """Resolve a reference/aggregator slot to real runtime call kwargs.
+
+    A MoA slot is just a model selection — it must be called the same way any
+    model is called elsewhere, not through a bare ``call_llm(provider=...,
+    model=...)`` that leaves base_url/api_key/api_mode unresolved and lets the
+    auxiliary auto-detector guess. We route the slot's provider through
+    ``resolve_runtime_provider`` (the canonical provider→api_mode/base_url/
+    api_key resolver the CLI, gateway, and delegate_task all use), so the slot
+    gets its provider's real API surface — e.g. MiniMax → anthropic_messages,
+    GPT-5/o-series → max_completion_tokens, custom endpoints → their base_url.
+
+    Returns the kwargs to pass through to ``call_llm`` (provider/model plus the
+    resolved base_url/api_key when available). Falls back to the bare
+    provider/model on any resolution error so a misconfigured slot still
+    attempts the call rather than aborting the whole MoA turn.
+    """
+    provider = str(slot.get("provider") or "").strip()
+    model = str(slot.get("model") or "").strip()
+    out: dict[str, Any] = {"provider": provider, "model": model}
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+
+        rt = resolve_runtime_provider(requested=provider, target_model=model)
+        resolved_provider = str(rt.get("provider") or provider).strip().lower()
+        # call_llm treats an explicit base_url as a custom endpoint. That is
+        # correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
+        # provider-backed targets whose provider branch adds auth refresh,
+        # request metadata, or request-shape adapters. Keep those providers
+        # identified by name.
+        if resolved_provider in {"nous", "openai-codex", "xai-oauth"}:
+            return out
+        # Pass the resolved endpoint through so call_llm builds the request for
+        # the provider's actual API surface instead of auto-detecting. base_url
+        # routes call_llm to the right adapter (incl. anthropic_messages mode);
+        # api_key is the resolved credential for that provider.
+        if rt.get("base_url"):
+            out["base_url"] = rt["base_url"]
+        if rt.get("api_key"):
+            out["api_key"] = rt["api_key"]
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
+    return out
+
+
 def _run_reference(
    slot: dict[str, str],
    ref_messages: list[dict[str, Any]],
    *,
-    temperature: float,
-    max_tokens: int,
+    temperature: float | None = None,
+    max_tokens: int | None = None,
 ) -> tuple[str, str]:
    """Call one reference model and return ``(label, text)``.

+    The slot is resolved to its provider's real runtime (via ``_slot_runtime``)
+    and called through the same ``call_llm`` request-building path any model
+    uses, so per-model wire-format handling (anthropic_messages,
+    max_completion_tokens, fixed/forbidden temperature) applies identically to
+    a reference as it would if that model were the acting model. MoA imposes no
+    cap of its own (``max_tokens`` defaults to ``None`` → omitted → the model's
+    real maximum); ``temperature`` is only the user's configured preset value,
+    which call_llm may still override per model.
+
    Never raises: a failed reference becomes a labelled note so the aggregator
    can still act with partial context. Designed to run inside a thread pool —
    ``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
@@ -46,13 +139,17 @@ def _run_reference(
    """
    label = _slot_label(slot)
    try:
+        # Prepend the advisory-role system prompt so the reference understands
+        # it is analyzing state for an aggregator, not acting on the task. The
+        # trimmed view (_reference_messages) already strips the agent's own
+        # system prompt, so this is the only system message the reference sees.
+        messages = [{"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}, *ref_messages]
        response = call_llm(
            task="moa_reference",
-            provider=slot["provider"],
-            model=slot["model"],
-            messages=ref_messages,
+            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
+            **_slot_runtime(slot),
        )
        return label, _extract_text(response) or "(empty response)"
    except Exception as exc:
@@ -64,8 +161,8 @@ def _run_references_parallel(
    reference_models: list[dict[str, str]],
    ref_messages: list[dict[str, Any]],
    *,
-    temperature: float,
-    max_tokens: int,
+    temperature: float | None = None,
+    max_tokens: int | None = None,
 ) -> list[tuple[str, str]]:
    """Fan out all reference models in parallel, returning outputs in order.

@@ -106,40 +203,140 @@ def _run_references_parallel(
    return [r for r in results if r is not None]


-def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """Build an advisory-safe view of the conversation for reference models.
+def _truncate_tool_result(text: str, budget: int = _REFERENCE_TOOL_RESULT_BUDGET) -> str:
+    """Head+tail preview of a tool result for the advisory view.

-    Reference calls are advisory: they never call tools and never emit the
-    ``tool_calls`` the main model did. Replaying the full transcript verbatim
-    (a) re-bills the ~8K-token Hermes system prompt per reference per
-    iteration and (b) risks 400s from strict providers (Mistral, Fireworks)
-    that reject orphan ``tool`` messages or ``tool_calls`` the reference never
-    produced. We keep only the user/assistant *text* turns, dropping the
-    system prompt, any ``tool``-role messages, and any ``tool_calls`` payloads.
+    Keeps the first and last halves of the budget with a ``[... N chars
+    omitted ...]`` marker between them, so a reference sees both how the result
+    started and how it ended without replaying the whole payload.
    """
-    trimmed: list[dict[str, Any]] = []
+    if not text or len(text) <= budget:
+        return text
+    half = budget // 2
+    omitted = len(text) - 2 * half
+    return f"{text[:half]}\n[... {omitted} chars omitted ...]\n{text[-half:]}"
+
+
+def _render_tool_calls(tool_calls: Any) -> str:
+    """Render an assistant turn's tool_calls as readable text lines.
+
+    The advisory view cannot carry real ``tool_calls`` payloads (strict
+    providers reject tool_calls the reference never produced), so the agent's
+    actions are flattened to text the reference can read and reason about.
+    """
+    lines: list[str] = []
+    for tc in tool_calls or []:
+        fn = (tc.get("function") or {}) if isinstance(tc, dict) else {}
+        name = fn.get("name") or (tc.get("name") if isinstance(tc, dict) else "") or "tool"
+        args = fn.get("arguments")
+        if isinstance(args, str):
+            args_text = args
+        elif args is not None:
+            try:
+                import json
+
+                args_text = json.dumps(args, ensure_ascii=False)
+            except Exception:
+                args_text = str(args)
+        else:
+            args_text = ""
+        lines.append(f"[called tool: {name}({args_text})]" if args_text else f"[called tool: {name}]")
+    return "\n".join(lines)
+
+
+def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Build an advisory view of the conversation for reference models.
+
+    A reference gives an INFORMED judgement on the current state, so it must
+    see what the agent actually did — its tool calls AND the tool results that
+    came back — not just the agent's narration. We therefore preserve the whole
+    conversation flow, but flatten it into clean user/assistant *text* turns:
+
+      - system prompt: dropped (8K of Hermes boilerplate, not advisory signal).
+      - assistant turns: kept; any ``tool_calls`` are rendered inline as
+        ``[called tool: name(args)]`` text lines appended to the turn's text.
+      - ``tool``-role results: NOT dropped. Each is folded (head+tail preview,
+        see ``_truncate_tool_result``) into the *preceding* assistant turn as a
+        ``[tool result: ...]`` block, so the reference sees what came back.
+
+    This emits ZERO ``tool``-role messages and ZERO ``tool_calls`` arrays — only
+    plain user/assistant text — so strict providers (Mistral, Fireworks) that
+    reject orphan tool messages / unproduced tool_calls don't 400, while the
+    reference still has the full picture.
+
+    The view MUST end with a ``user`` turn. Anthropic (and OpenRouter→Anthropic)
+    interpret a trailing assistant turn as an assistant *prefill* to continue,
+    and no-prefill models (e.g. Claude Opus 4.8) reject it with
+    ``400 ... must end with a user message``. Rather than DELETE the agent's
+    latest context to satisfy that (which would blind the reference to the
+    current state), we APPEND a synthetic user turn asking the reference to
+    judge the state above. End-on-user is satisfied and no context is lost.
+
+    The acting aggregator always receives the full, untrimmed transcript; this
+    function only shapes the disposable advisory copy.
+    """
+    advisory_instruction = (
+        "[The conversation above is the current state of the task. Give your "
+        "most intelligent judgement: what is going on, what should happen next, "
+        "what risks or mistakes you see, and how the acting agent should "
+        "proceed.]"
+    )
+
+    rendered: list[dict[str, Any]] = []
+    last_user_content: str | None = None
    for msg in messages:
        role = msg.get("role")
-        if role not in ("user", "assistant"):
-            # Drop system prompt and tool-result messages.
-            continue
        content = msg.get("content")
-        if not isinstance(content, str):
-            # Skip non-text (multimodal/tool-call-only) assistant turns.
-            if not content:
-                continue
        text = content if isinstance(content, str) else ""
-        if role == "assistant" and not text.strip():
-            # Assistant turn that was purely tool calls — nothing advisory.
+
+        if role == "system":
            continue
-        trimmed.append({"role": role, "content": text})
-    if not trimmed:
-        # Degenerate case (e.g. first turn was stripped): fall back to a
-        # minimal user turn so the reference still has something to answer.
+        if role == "user":
+            if text.strip():
+                last_user_content = text
+            rendered.append({"role": "user", "content": text})
+        elif role == "assistant":
+            parts: list[str] = []
+            if text.strip():
+                parts.append(text.strip())
+            calls_text = _render_tool_calls(msg.get("tool_calls"))
+            if calls_text:
+                parts.append(calls_text)
+            # Empty assistant turns (no text, no calls) carry nothing advisory.
+            if parts:
+                rendered.append({"role": "assistant", "content": "\n".join(parts)})
+        elif role == "tool":
+            # Fold the tool result into the preceding assistant turn as text so
+            # the reference sees what came back, without emitting a tool-role
+            # message a reference never produced.
+            result_text = _truncate_tool_result(text)
+            block = f"[tool result: {result_text}]"
+            if rendered and rendered[-1].get("role") == "assistant":
+                rendered[-1]["content"] = rendered[-1]["content"] + "\n" + block
+            else:
+                # No assistant turn to attach to (e.g. a leading tool result);
+                # keep it as advisory context on its own assistant-role line.
+                rendered.append({"role": "assistant", "content": block})
+        # Any other role is ignored.
+
+    # End on a user turn: append a synthetic advisory request rather than
+    # deleting the agent's latest assistant context. This satisfies Anthropic's
+    # no-trailing-assistant-prefill rule while preserving full state.
+    if rendered and rendered[-1].get("role") == "assistant":
+        rendered.append({"role": "user", "content": advisory_instruction})
+    elif rendered and rendered[-1].get("role") == "user":
+        # Already ends on a user turn (fresh user prompt, no agent action yet).
+        # Leave it — the reference answers that prompt directly.
+        pass
+
+    if not rendered:
+        # Degenerate case: nothing rendered. Fall back to the latest user turn.
+        if last_user_content is not None:
+            return [{"role": "user", "content": last_user_content}]
        for msg in reversed(messages):
            if msg.get("role") == "user" and isinstance(msg.get("content"), str):
                return [{"role": "user", "content": msg["content"]}]
-    return trimmed
+    return rendered



@@ -169,12 +366,18 @@ def aggregate_moa_context(
    aggregator: dict[str, str],
    temperature: float = 0.6,
    aggregator_temperature: float = 0.4,
-    max_tokens: int = 4096,
+    max_tokens: int | None = None,
 ) -> str:
    """Run configured reference models and synthesize their advice.

    Failures are returned as model-specific notes instead of aborting the normal
    agent loop; the main model can still act with partial context.
+
+    ``max_tokens`` is ``None`` by default: MoA does not cap reference or
+    aggregator output, so each model uses its own maximum. ``call_llm`` omits
+    the parameter entirely when it is ``None`` (see its docstring), which also
+    sidesteps providers that reject ``max_tokens`` outright. A hardcoded cap
+    here previously truncated long aggregator syntheses.
    """
    reference_outputs: list[tuple[str, str]] = []
    ref_messages = _reference_messages(api_messages)
@@ -203,11 +406,10 @@ def aggregate_moa_context(
    try:
        response = call_llm(
            task="moa_aggregator",
-            provider=aggregator["provider"],
-            model=aggregator["model"],
            messages=[{"role": "user", "content": synth_prompt}],
            temperature=aggregator_temperature,
            max_tokens=max_tokens,
+            **_slot_runtime(aggregator),
        )
        synthesis = _extract_text(response)
    except Exception as exc:
@@ -230,8 +432,38 @@ def aggregate_moa_context(
 class MoAChatCompletions:
    """OpenAI-chat-compatible facade where the aggregator is the acting model."""

-    def __init__(self, preset_name: str):
+    def __init__(self, preset_name: str, reference_callback: Any = None):
        self.preset_name = preset_name or "default"
+        # Optional display hook. Called as reference outputs become available so
+        # frontends can show each reference model's answer as a labelled block
+        # before the aggregator acts. Signature:
+        #   reference_callback(event, **kwargs)
+        # where event is one of:
+        #   "moa.reference"   kwargs: index, count, label, text
+        #   "moa.aggregating" kwargs: aggregator (label), ref_count
+        # Never raises into the model call — display is best-effort.
+        self.reference_callback = reference_callback
+        # State-scoped reference cache. The agent loop calls create() once per
+        # tool-loop iteration; references should re-run whenever the task STATE
+        # advances — i.e. on every new user message AND every new tool result —
+        # so each reference judges the latest state. The advisory view
+        # (_reference_messages) now renders tool calls + results as text, so its
+        # signature changes on every new tool response; the cache key is that
+        # signature, so a new tool result is a cache MISS (references re-run)
+        # while a redundant create() call with identical state is a HIT (no
+        # re-run, no re-emit). This gives "fire on every user/tool response"
+        # for free, without re-firing on a pure no-op re-call.
+        self._ref_cache_key: tuple | None = None
+        self._ref_cache_outputs: list[tuple[str, str]] = []
+
+    def _emit(self, event: str, **kwargs: Any) -> None:
+        cb = self.reference_callback
+        if cb is None:
+            return
+        try:
+            cb(event, **kwargs)
+        except Exception as exc:  # pragma: no cover - display must never break the turn
+            logger.debug("MoA reference_callback failed for %s: %s", event, exc)

    def create(self, **api_kwargs: Any) -> Any:
        from hermes_cli.config import load_config
@@ -241,7 +473,10 @@ class MoAChatCompletions:
        messages = list(api_kwargs.get("messages") or [])
        reference_models = preset.get("reference_models") or []
        aggregator = preset.get("aggregator") or {}
-        max_tokens = int(preset.get("max_tokens", api_kwargs.get("max_tokens") or 4096) or 4096)
+        # MoA does not cap reference or aggregator output: each model uses its
+        # own maximum. Passing max_tokens=None makes call_llm omit the parameter
+        # (it never caps by default), so a long aggregator synthesis is never
+        # truncated and providers that reject max_tokens don't 400.
        temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
        aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)

@@ -253,12 +488,52 @@ class MoAChatCompletions:

        reference_outputs: list[tuple[str, str]] = []
        ref_messages = _reference_messages(messages)
-        reference_outputs = _run_references_parallel(
-            reference_models,
-            ref_messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-        )
+
+        # Turn-scoped cache: only run + display references when the advisory
+        # view changed (i.e. a new user turn). Within one turn the agent loop
+        # calls create() once per tool iteration with the same advisory view;
+        # reuse the cached outputs and skip both the re-run and the re-emit.
+        _sig = hashlib.sha256(
+            "\u0000".join(
+                f"{m.get('role')}:{m.get('content')}" for m in ref_messages
+            ).encode("utf-8", "replace")
+        ).hexdigest()
+        _cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
+        _refs_from_cache = _cache_key == self._ref_cache_key and bool(self._ref_cache_outputs)
+
+        if _refs_from_cache:
+            reference_outputs = list(self._ref_cache_outputs)
+        else:
+            reference_outputs = _run_references_parallel(
+                reference_models,
+                ref_messages,
+                temperature=temperature,
+                max_tokens=None,
+            )
+            self._ref_cache_key = _cache_key
+            self._ref_cache_outputs = list(reference_outputs)
+
+            # Surface each reference model's answer to the display BEFORE the
+            # aggregator acts — once per turn (only on the iteration that
+            # actually ran them). The user sees one labelled block per
+            # reference (rendered like a thinking block) so the MoA process is
+            # visible rather than a silent pause. Best-effort: never blocks the
+            # turn.
+            _ref_count = len(reference_outputs)
+            for _idx, (_label, _text) in enumerate(reference_outputs, start=1):
+                self._emit(
+                    "moa.reference",
+                    index=_idx,
+                    count=_ref_count,
+                    label=_label,
+                    text=_text,
+                )
+            if _ref_count:
+                self._emit(
+                    "moa.aggregating",
+                    aggregator=_slot_label(aggregator),
+                    ref_count=_ref_count,
+                )

        agg_messages = [dict(m) for m in messages]
        if reference_outputs:
@@ -286,21 +561,26 @@ class MoAChatCompletions:
            raise RuntimeError("MoA aggregator cannot be another MoA preset")
        agg_kwargs = dict(api_kwargs)
        agg_kwargs["messages"] = agg_messages
-        agg_kwargs["model"] = aggregator.get("model")
-        agg_kwargs["temperature"] = aggregator_temperature
+        # The aggregator is the acting model. Resolve its slot to the provider's
+        # real runtime (base_url/api_key/api_mode) and call it through the same
+        # request-building path any model uses — so per-model wire-format
+        # handling (anthropic_messages, max_completion_tokens, fixed/forbidden
+        # temperature) applies identically to it. MoA imposes no output cap:
+        # max_tokens is passed through from the caller (normally None → omitted
+        # → the model's real maximum). The preset's old hardcoded 4096 default
+        # is gone — it truncated long syntheses.
        return call_llm(
            task="moa_aggregator",
-            provider=aggregator.get("provider"),
-            model=aggregator.get("model"),
            messages=agg_messages,
            temperature=aggregator_temperature,
            max_tokens=agg_kwargs.get("max_tokens"),
            tools=agg_kwargs.get("tools"),
            extra_body=agg_kwargs.get("extra_body"),
+            **_slot_runtime(aggregator),
        )


 class MoAClient:
-    def __init__(self, preset_name: str):
+    def __init__(self, preset_name: str, reference_callback: Any = None):
        self.chat = type("_MoAChat", (), {})()
-        self.chat.completions = MoAChatCompletions(preset_name)
+        self.chat.completions = MoAChatCompletions(preset_name, reference_callback=reference_callback)
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -478,6 +478,16 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
    return None


+def _lmstudio_server_root(base_url: str) -> str:
+    """Return the LM Studio server root for native ``/api/v1`` endpoints."""
+    root = _normalize_base_url(base_url).rstrip("/")
+    for suffix in ("/api/v1", "/api", "/v1"):
+        if root.endswith(suffix):
+            root = root[: -len(suffix)].rstrip("/")
+            break
+    return root
+
+
 def _is_known_provider_base_url(base_url: str) -> bool:
    return _infer_provider_from_url(base_url) is not None

@@ -549,6 +559,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    server_url = normalized
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]
+    lmstudio_url = _lmstudio_server_root(base_url)

    headers = _auth_headers(api_key)

@@ -556,7 +567,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
        with httpx.Client(timeout=2.0, headers=headers) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
-                r = client.get(f"{server_url}/api/v1/models")
+                r = client.get(f"{lmstudio_url}/api/v1/models")
                if r.status_code == 200:
                    return "lm-studio"
            except Exception:
@@ -774,7 +785,7 @@ def fetch_endpoint_model_metadata(
    if is_local_endpoint(normalized):
        try:
            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
-                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
+                server_url = _lmstudio_server_root(normalized)
                response = requests.get(
                    server_url.rstrip("/") + "/api/v1/models",
                    headers=headers,
@@ -1297,6 +1308,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
    server_url = base_url.rstrip("/")
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]
+    lmstudio_url = _lmstudio_server_root(base_url)

    headers = _auth_headers(api_key)

@@ -1340,7 +1352,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
            # Use _model_id_matches for fuzzy matching: LM Studio stores models as
            # "publisher/slug" but users configure only "slug" after "local:" prefix.
            if server_type == "lm-studio":
-                resp = client.get(f"{server_url}/api/v1/models")
+                resp = client.get(f"{lmstudio_url}/api/v1/models")
                if resp.status_code == 200:
                    data = resp.json()
                    for m in data.get("models", []):
@@ -1646,6 +1658,34 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

+    # 0a. MoA virtual provider — ``model`` is a preset name, not a real model,
+    # and ``base_url`` is the local virtual endpoint, so every probe below would
+    # miss and fall through to the 256K default. The aggregator is the acting
+    # model, so resolve the context window from the aggregator slot's real
+    # provider+model instead. References are advisory-only and never bound the
+    # acting context, so they're ignored here.
+    if (provider or "").strip().lower() == "moa":
+        try:
+            from hermes_cli.config import load_config
+            from hermes_cli.moa_config import resolve_moa_preset
+            from hermes_cli.runtime_provider import resolve_runtime_provider
+
+            preset = resolve_moa_preset(load_config().get("moa") or {}, model)
+            agg = preset.get("aggregator") or {}
+            agg_provider = str(agg.get("provider") or "").strip()
+            agg_model = str(agg.get("model") or "").strip()
+            if agg_model and agg_provider and agg_provider.lower() != "moa":
+                rt = resolve_runtime_provider(requested=agg_provider, target_model=agg_model)
+                return get_model_context_length(
+                    agg_model,
+                    base_url=rt.get("base_url", "") or "",
+                    api_key=rt.get("api_key", "") or "",
+                    provider=agg_provider,
+                )
+        except Exception:
+            logger.debug("MoA aggregator context-length resolution failed", exc_info=True)
+        # Fall through to the generic default if aggregator resolution failed.
+
    # 0b. custom_providers per-model override — check before any probe.
    # This closes the gap where /model switch and display paths used to fall
    # back to 128K despite the user having a per-model context_length set.
--- a/agent/process_bootstrap.py
+++ b/agent/process_bootstrap.py
@@ -26,7 +26,7 @@ from __future__ import annotations
 import os
 import sys
 import urllib.request
-from typing import Optional
+from typing import Any, Optional

 from utils import base_url_hostname, normalize_proxy_url

@@ -142,6 +142,46 @@ def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
    return proxy


+def build_keepalive_http_client(
+    base_url: str = "",
+    *,
+    async_mode: bool = False,
+) -> Optional[Any]:
+    """Build an httpx client for OpenAI SDK calls with env-only proxy policy.
+
+    Uses explicit ``HTTPS_PROXY`` / ``NO_PROXY`` env vars via
+    ``_get_proxy_for_base_url``. A custom transport disables httpx's default
+    ``trust_env`` path, so macOS system proxy settings from
+    ``urllib.request.getproxies()`` (which omit the ExceptionsList) are not
+    applied. Mirrors ``AIAgent._build_keepalive_http_client``.
+    """
+    try:
+        import httpx
+        import socket
+
+        if "api.githubcopilot.com" in str(base_url or "").lower():
+            client_cls = httpx.AsyncClient if async_mode else httpx.Client
+            return client_cls()
+
+        sock_opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
+        if hasattr(socket, "TCP_KEEPIDLE"):
+            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30))
+            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10))
+            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3))
+        elif hasattr(socket, "TCP_KEEPALIVE"):
+            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 30))
+
+        proxy = _get_proxy_for_base_url(base_url)
+        transport_cls = httpx.AsyncHTTPTransport if async_mode else httpx.HTTPTransport
+        client_cls = httpx.AsyncClient if async_mode else httpx.Client
+        return client_cls(
+            transport=transport_cls(socket_options=sock_opts),
+            proxy=proxy,
+        )
+    except Exception:
+        return None
+
+
 def _install_safe_stdio() -> None:
    """Wrap stdout/stderr so best-effort console output cannot crash the agent."""
    for stream_name in ("stdout", "stderr"):
@@ -164,4 +204,5 @@ __all__ = [
    "_install_safe_stdio",
    "_get_proxy_from_env",
    "_get_proxy_for_base_url",
+    "build_keepalive_http_client",
 ]
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -88,12 +88,15 @@ def _find_hermes_md(cwd: Path) -> Optional[Path]:
    stop_at = _find_git_root(cwd)
    current = cwd.resolve()

-    for directory in [current, *current.parents]:
+    # When there is no git root, only check cwd itself – walking parents
+    # could pick up a .hermes.md planted in /tmp, /home, etc.
+    search_dirs = [current, *current.parents] if stop_at else [current]
+
+    for directory in search_dirs:
        for name in _HERMES_MD_NAMES:
            candidate = directory / name
            if candidate.is_file():
                return candidate
-        # Stop walking at the git root (or filesystem root).
        if stop_at and directory == stop_at:
            break
    return None
@@ -617,7 +620,12 @@ DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
-        "Please do not use markdown as it does not render. "
+        "Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
+        "`code`, ```code blocks```, [links](url)) is auto-converted to "
+        "WhatsApp's native syntax (*bold*, _italic_, ~strike~, monospace) — "
+        "feel free to write in markdown, and use bullet lists ('- item') "
+        "freely. Tables are NOT supported — prefer bullet lists or labeled "
+        "key:value pairs. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. The file "
        "will be sent as a native WhatsApp attachment — images (.jpg, .png, "
@@ -682,7 +690,11 @@ PLATFORM_HINTS = {
    ),
    "signal": (
        "You are on a text messaging communication platform, Signal. "
-        "Please do not use markdown as it does not render. "
+        "Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
+        "`code`, ```code blocks```) is auto-converted to Signal's native "
+        "rich formatting — feel free to write in markdown, and use bullet "
+        "lists ('- item') freely (they render as • bullets). Tables are NOT "
+        "supported — prefer bullet lists or labeled key:value pairs. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
@@ -917,8 +929,7 @@ def _probe_remote_backend(env_type: str) -> str | None:
    try:
        # Import locally: tools/ imports are heavy and only relevant when a
        # non-local backend is actually configured.
-        from tools.terminal_tool import _get_env_config  # type: ignore
-        from tools.environments import get_environment  # type: ignore
+        from tools.terminal_tool import _create_environment, _get_env_config  # type: ignore
    except Exception as e:
        logger.debug("Backend probe unavailable (import failed): %s", e)
        _BACKEND_PROBE_CACHE[cache_key] = ""
@@ -926,7 +937,59 @@ def _probe_remote_backend(env_type: str) -> str | None:

    try:
        config = _get_env_config()
-        env = get_environment(config)
+        # Build the environment the same way tools/terminal_tool.py does for a
+        # live command: select the backend image, then assemble ssh/container
+        # config from the env-derived dict. (There is no `get_environment`
+        # factory — the real entry point is `_create_environment`.)
+        if env_type == "docker":
+            image = config.get("docker_image", "")
+        elif env_type == "singularity":
+            image = config.get("singularity_image", "")
+        elif env_type == "modal":
+            image = config.get("modal_image", "")
+        elif env_type == "daytona":
+            image = config.get("daytona_image", "")
+        else:
+            image = ""
+
+        ssh_config = None
+        if env_type == "ssh":
+            ssh_config = {
+                "host": config.get("ssh_host", ""),
+                "user": config.get("ssh_user", ""),
+                "port": config.get("ssh_port", 22),
+                "key": config.get("ssh_key", ""),
+                "persistent": config.get("ssh_persistent", False),
+            }
+
+        container_config = None
+        if env_type in {"docker", "singularity", "modal", "daytona"}:
+            container_config = {
+                "container_cpu": config.get("container_cpu", 1),
+                "container_memory": config.get("container_memory", 5120),
+                "container_disk": config.get("container_disk", 51200),
+                "container_persistent": config.get("container_persistent", True),
+                "modal_mode": config.get("modal_mode", "auto"),
+                "docker_volumes": config.get("docker_volumes", []),
+                "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
+                "docker_forward_env": config.get("docker_forward_env", []),
+                "docker_env": config.get("docker_env", {}),
+                "docker_run_as_host_user": config.get("docker_run_as_host_user", False),
+                "docker_extra_args": config.get("docker_extra_args", []),
+                "docker_persist_across_processes": config.get("docker_persist_across_processes", True),
+                "docker_orphan_reaper": config.get("docker_orphan_reaper", True),
+            }
+
+        env = _create_environment(
+            env_type=env_type,
+            image=image,
+            cwd=config.get("cwd", ""),
+            timeout=config.get("timeout", 180),
+            ssh_config=ssh_config,
+            container_config=container_config,
+            task_id="prompt-backend-probe",
+            host_cwd=config.get("host_cwd"),
+        )
        # Single-line POSIX probe — works on any Unixy backend. Wrapped in
        # `2>/dev/null` so a missing binary doesn't pollute the output.
        probe_cmd = (
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -10,6 +10,7 @@ the first 6 and last 4 characters for debuggability.
 import logging
 import os
 import re
+import shlex

 logger = logging.getLogger(__name__)

@@ -107,12 +108,60 @@ _PREFIX_PATTERNS = [
    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

-# ENV assignment patterns: KEY=value where KEY contains a secret-like name
+# ENV assignment patterns: KEY=value where KEY contains a secret-like name.
+# Uppercase keys tolerate spaces around "=" (e.g. ``FOO_SECRET = bar``) because
+# an all-caps key is almost never prose/code.
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
    rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
 )

+# Lowercase / dotted / hyphenated config keys from config files
+# (application.properties, .env, YAML-ish dumps): ``spring.datasource.password=secret``,
+# ``app.api.key=xyz``, ``password=secret``. The uppercase _ENV_ASSIGN_RE above
+# never matched these, so config-file passwords leaked verbatim (issue #16413).
+#
+# These run only in a config-file context, NOT in prose, code, or URLs — three
+# carve-outs preserved from the original design (#4367 + the documented
+# web-URL passthrough below):
+#   1. The value is bounded by ``[^\s&]`` (stops at whitespace AND ``&``) so
+#      form-urlencoded bodies are handled pair-by-pair (by _redact_form_body),
+#      not greedily swallowed.
+#   2. _CFG_DOTTED_RE only matches when the key is NAMESPACED (contains a dot),
+#      which is unambiguously a config key — never a prose word.
+#   3. _CFG_ANCHORED_RE matches a bare secret-word key only at line start
+#      (optionally after ``export``), so conversational ``I have password=foo``
+#      mid-sentence is left alone.
+# The colon-form URL guard (skip when ``://`` present) lives at the call site.
+_SECRET_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential|auth)"
+_CFG_VALUE = r"(['\"]?)([^\s&]+?)\2(?=[\s&]|$)"
+# Namespaced (dotted) key: the secret word may sit anywhere in a dotted path.
+_CFG_DOTTED_RE = re.compile(
+    rf"((?:[A-Za-z0-9_\-]+\.)+[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*"
+    rf"|[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*\.[A-Za-z0-9_.\-]+)"
+    rf"={_CFG_VALUE}",
+    re.IGNORECASE,
+)
+# Line-anchored bare key: ``password=…`` / ``export api_key=…`` at start of line.
+_CFG_ANCHORED_RE = re.compile(
+    rf"(^[ \t]*(?:export[ \t]+)?[A-Za-z0-9_\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_\-]*)={_CFG_VALUE}",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+# Unquoted YAML / colon config (e.g. ``password: secret``,
+# ``spring.datasource.password: hunter2``). The secret keyword must be part of
+# the KEY (anchored to the start of the line/indent), and the value is a single
+# whitespace-free token — so prose like ``note: secret meeting`` (keyword in the
+# value) and ``error: token expired`` are left alone. Bare ``auth`` is excluded
+# from the key set so ``Authorization:`` / ``author:`` don't match (the former
+# is masked by _AUTH_HEADER_RE); ``auth_token``/``auth-token`` still match via
+# the ``token`` keyword. Quoted values defer to _JSON_FIELD_RE via the lookahead.
+_YAML_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential)"
+_YAML_ASSIGN_RE = re.compile(
+    rf"(^[ \t]*[A-Za-z0-9_.\-]*{_YAML_CFG_NAMES}[A-Za-z0-9_.\-]*)(:[ \t]*)(?!['\"])([^\s&]+)",
+    re.IGNORECASE | re.MULTILINE,
+)
+
 # JSON field patterns: "apiKey": "value", "token": "value", etc.
 _JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)"
 _JSON_FIELD_RE = re.compile(
@@ -125,8 +174,15 @@ _JSON_FIELD_RE = re.compile(
 # while the header name and scheme word are preserved for debuggability. The
 # previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
 # ``token <pat>`` leaked verbatim into logs/transcripts.
+#
+# The credential class excludes quote characters (``"`` / ``'``): a token sitting
+# flush against a closing quote (``"Authorization: Bearer sk-..."``) must not pull
+# that quote into the match, or masking turns value corruption into *syntax*
+# corruption — the closing quote vanishes and the command/string no longer parses
+# (unterminated quote → shell EOF / Python SyntaxError). Real credentials never
+# contain ``"`` or ``'``, so excluding them is safe. See #43083.
 _AUTH_HEADER_RE = re.compile(
-    r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
+    r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?([^\s\"']+)",
    re.IGNORECASE,
 )

@@ -154,9 +210,37 @@ _PRIVATE_KEY_RE = re.compile(
 )

 # Database connection strings: protocol://user:PASSWORD@host
-# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
+# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password.
+# The userinfo and password groups forbid whitespace ([^:\s]+ / [^@\s]+) so the
+# match can never span a line break. A real DSN password never contains
+# whitespace; without this bound the greedy [^@]+ would scan past the end of a
+# code line to the next stray "@" (e.g. a Python decorator), swallowing
+# intervening lines and corrupting tool OUTPUT for any source containing a
+# postgresql:// f-string template. See issue #33801.
 _DB_CONNSTR_RE = re.compile(
-    r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
+    r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:\s]+:)([^@\s]+)(@)",
+    re.IGNORECASE,
+)
+
+# Bare-token credential in a web/transport URL: ``scheme://TOKEN@host``.
+# This is the ``git remote set-url origin https://PASSWORD@github.com/...``
+# shape from issue #6396 — a single opaque credential in the userinfo position
+# with NO ``user:pass`` colon. It is unambiguously a secret: legitimate
+# round-trip URLs (OAuth callbacks, magic links, pre-signed shares — see the
+# "Web-URL redaction is intentionally OFF" note in redact_sensitive_text) carry
+# their tokens in the QUERY STRING, never in bare userinfo. The colon form
+# ``user:pass@`` is deliberately left to pass through (commit "pass web URLs
+# through unchanged", #34029) and is NOT matched here — the token class forbids
+# ``:``. DB schemes are handled by _DB_CONNSTR_RE above and excluded here.
+#
+# Guards against false positives:
+#   - 8+ char floor skips short usernames (git, admin, root, deploy, ubuntu).
+#   - The token class ``[^\s:@/]`` cannot cross ``/``, so an ``@`` sitting in a
+#     path or query (e.g. ``?q=user@example.com``) is never treated as userinfo.
+_URL_BARE_TOKEN_RE = re.compile(
+    r"((?:https?|wss?|git|ssh|ftp|ftps|sftp)://)"  # scheme
+    r"([^\s:@/]{8,})"                               # bare token (no colon/slash/@), 8+ chars
+    r"(@[^\s]+)",                                   # @host...
    re.IGNORECASE,
 )

@@ -340,7 +424,40 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
+def _mask_token_nonreusable(token: str) -> str:
+    """Redact a prefix-matched credential to a NON-REUSABLE sentinel.
+
+    Unlike :func:`_mask_token` (which keeps head/tail chars — fine for logs
+    that are never fed back into a config), this emits a marker that:
+
+    * cannot be mistaken for a usable-but-truncated key, so an agent that
+      reads it from a config file and writes it back does NOT corrupt the
+      stored credential into a dead 13-char string (issue #35519); and
+    * still does not leak the secret material (no head/tail chars).
+
+    The vendor prefix label is preserved for debuggability so the agent can
+    still tell *which* credential is present (e.g. a GitHub PAT vs an OpenAI
+    key) without seeing any of its bytes.
+    """
+    if not token:
+        return "«redacted-secret»"
+    # Preserve only the recognizable vendor prefix label (e.g. "ghp_", "sk-"),
+    # never any of the random secret body.
+    label = ""
+    for sub in _PREFIX_SUBSTRINGS:
+        if token.startswith(sub):
+            label = sub
+            break
+    return f"«redacted:{label}…»" if label else "«redacted-secret»"
+
+
+def redact_sensitive_text(
+    text: str,
+    *,
+    force: bool = False,
+    code_file: bool = False,
+    file_read: bool = False,
+) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
@@ -353,6 +470,17 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
    private keys, DB connstrings, JWTs, and URL secrets are still redacted.

+    Set file_read=True for file *content* returned to the agent (read_file /
+    search_files / cat). Secrets are STILL redacted — they are never exposed —
+    but prefix-matched credentials are replaced with a non-reusable sentinel
+    (``«redacted:ghp_…»``) instead of a head/tail-preserving mask
+    (``ghp_S1...Pn2T``). The old mask looked like a real-but-truncated key, so
+    an agent reading it from config.yaml and writing it back silently corrupted
+    the stored credential into a dead 13-char value → 401 (issue #35519). The
+    sentinel is syntactically invalid as a token, so it can't be mistaken for a
+    usable key or written back as one. Implies code_file=True (config/data
+    files shouldn't trigger the source-code ENV/JSON false-positive paths).
+
    Performance: each regex pattern is gated behind a cheap substring
    pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
    for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
@@ -371,9 +499,15 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    if not (force or _REDACT_ENABLED):
        return text

+    # file_read content shouldn't hit the source-code ENV/JSON false-positive
+    # paths either (it's config/data, not log lines).
+    if file_read:
+        code_file = True
+
    # Known prefixes (sk-, ghp_, etc.) — gate on substring presence
    if _has_known_prefix_substring(text):
-        text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
+        _prefix_sub = _mask_token_nonreusable if file_read else _mask_token
+        text = _PREFIX_RE.sub(lambda m: _prefix_sub(m.group(1)), text)

    # ENV assignments: OPENAI_API_KEY=***  (skip for code files — false positives)
    if not code_file:
@@ -382,6 +516,13 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
                name, quote, value = m.group(1), m.group(2), m.group(3)
                return f"{name}={quote}{_mask_token(value)}{quote}"
            text = _ENV_ASSIGN_RE.sub(_redact_env, text)
+            # Lowercase/dotted config keys (issue #16413). Skip URLs entirely —
+            # web-URL query params are intentionally passed through (see note
+            # near the bottom of this function); _DB_CONNSTR_RE still guards
+            # connection-string passwords.
+            if "://" not in text:
+                text = _CFG_DOTTED_RE.sub(_redact_env, text)
+                text = _CFG_ANCHORED_RE.sub(_redact_env, text)

        # JSON fields: "apiKey": "***"  (skip for code files — false positives)
        if ":" in text and '"' in text:
@@ -390,6 +531,15 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
                return f'{key}: "{_mask_token(value)}"'
            text = _JSON_FIELD_RE.sub(_redact_json, text)

+        # Unquoted YAML / colon config: password: ***  (after JSON so quoted
+        # values are handled there; the lookahead in _YAML_ASSIGN_RE skips
+        # quotes). Skip URLs — web-URL query params pass through by design.
+        if ":" in text and "://" not in text:
+            def _redact_yaml(m):
+                key, sep, value = m.group(1), m.group(2), m.group(3)
+                return f"{key}{sep}{_mask_token(value)}"
+            text = _YAML_ASSIGN_RE.sub(_redact_yaml, text)
+
    # Authorization headers — _AUTH_HEADER_RE matches any scheme after
    # "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
    # cheapest substring gate that covers every casing without a casefold().
@@ -419,9 +569,32 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    if "BEGIN" in text and "-----" in text:
        text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)

-    # Database connection string passwords
+    # Database connection string passwords. With code_file=True, a password
+    # group that is a pure ``{...}`` brace expression is an f-string template
+    # reference (e.g. f"postgresql://{user}:{pass}@{host}"), not a literal
+    # credential — preserve it. Literal passwords are still redacted. The regex
+    # forbids whitespace in the password group, so a single-line template's
+    # group(2) is exactly the brace expression. See issue #33801.
    if "://" in text:
-        text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
+        if code_file:
+            def _redact_db(m):
+                pw = m.group(2)
+                if pw.startswith("{") and pw.endswith("}"):
+                    return m.group(0)
+                return f"{m.group(1)}***{m.group(3)}"
+            text = _DB_CONNSTR_RE.sub(_redact_db, text)
+        else:
+            text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
+
+        # Bare-token userinfo in web/transport URLs: ``scheme://TOKEN@host``.
+        # The git-remote-with-embedded-password shape from #6396. Only the
+        # colon-less bare-token form is redacted — ``user:pass@`` and
+        # query-string tokens are left to pass through (see the web-URL note
+        # below). See _URL_BARE_TOKEN_RE for the false-positive guards.
+        text = _URL_BARE_TOKEN_RE.sub(
+            lambda m: f"{m.group(1)}{_mask_token(m.group(2))}{m.group(3)}",
+            text,
+        )

    # JWT tokens (eyJ... — base64-encoded JSON headers)
    if "eyJ" in text:
@@ -434,7 +607,12 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    # blanket-redacting param values by name breaks those skills mid-flow.
    # Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
    # caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
-    # are still caught by _DB_CONNSTR_RE.
+    # are still caught by _DB_CONNSTR_RE. The ONE userinfo case still redacted
+    # is the colon-less bare-token form ``scheme://TOKEN@host`` (#6396, handled
+    # by _URL_BARE_TOKEN_RE in the ``://`` block above): a bare credential in
+    # userinfo is never a round-trip workflow token (those live in the query
+    # string), so masking it can't break a skill. The ``user:pass@`` form is
+    # left to pass through per #34029.

    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
    if "&" in text and "=" in text:
@@ -452,6 +630,66 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    return text


+# Commands whose stdout is an environment-variable dump (KEY=value lines),
+# NOT source code. For these, terminal-output redaction must run the
+# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
+# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
+# masked. For all other commands, code_file=True is used to avoid mangling
+# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
+# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
+_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
+
+
+def is_env_dump_command(command: str | None) -> bool:
+    """Return True if ``command`` dumps environment variables to stdout.
+
+    Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
+    first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
+    ``||`` / ``|``). Conservative: a parse failure or anything unrecognized
+    returns False (callers then fall back to the safer code_file=True path,
+    which still masks prefix-shaped keys).
+    """
+    if not command or not isinstance(command, str):
+        return False
+    # Split on shell separators, then inspect the first token of each segment.
+    segments = re.split(r"[|;&]+", command)
+    for seg in segments:
+        seg = seg.strip()
+        if not seg:
+            continue
+        try:
+            tokens = shlex.split(seg)
+        except ValueError:
+            tokens = seg.split()
+        if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
+            return True
+    return False
+
+
+def redact_terminal_output(
+    output: str, command: str | None = None, *, force: bool = False
+) -> str:
+    """Redact secrets from terminal/process stdout.
+
+    Single redaction policy for ALL terminal-output surfaces — foreground
+    ``terminal`` results AND background ``process(action=poll/log/wait)``
+    output — so they can't diverge. Picks ``code_file`` based on whether
+    ``command`` is an environment dump:
+
+    - env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
+      → ``code_file=False`` so the ENV-assignment pass masks opaque tokens.
+    - anything else (or unknown command) → ``code_file=True`` to avoid
+      false positives on source/config dumps.
+
+    ``force=True`` bypasses the global ``security.redact_secrets`` preference
+    for safety boundaries that must never emit raw credentials.
+    """
+    if not output:
+        return output
+    code_file = not is_env_dump_command(command or "")
+    return redact_sensitive_text(output, force=force, code_file=code_file)
+
+
 # Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
 # the input string, the prefix regex cannot match anything, so we skip it.
 # False positives are fine (they just run the regex, which then matches
--- a/agent/replay_cleanup.py
+++ b/agent/replay_cleanup.py
@@ -0,0 +1,140 @@
+"""Replay-history sanitization shared across resume code paths.
+
+When a session's last turn dies mid-tool-loop — the process is killed by a
+restart/shutdown command, a stale-timeout fires, or an interrupt lands before
+the tool result is written — the persisted transcript can end with a dangling
+``assistant(tool_calls)`` (no matching ``tool`` answer) or an interrupted
+``assistant→tool`` block.  On resume the model sees that broken tail and
+re-issues the unanswered call, producing an endless "thinking"/reboot loop
+(#49201, #29086).
+
+These pure helpers strip those tails before the history is replayed to the
+model.  They were originally local to ``gateway/run.py`` (which fixed the
+messaging-gateway path) and are extracted here so every resume surface — the
+messaging gateway AND the TUI/WebUI gateway — shares the same cleanup instead
+of the WebUI path silently skipping it.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+
+def is_interrupted_tool_result(content: Any) -> bool:
+    """Return True if a tool result indicates the tool was interrupted."""
+    if not isinstance(content, str):
+        return False
+    lowered = content.lower()
+    if "[command interrupted]" in lowered:
+        return True
+    if "exit_code" in lowered and ("130" in lowered or "-1" in lowered):
+        return "interrupt" in lowered
+    return False
+
+
+def strip_interrupted_tool_tails(
+    agent_history: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Strip interrupted assistant→tool sequences from replay history.
+
+    Older interrupted gateway turns can be followed by a queued real user
+    message, so the interrupted assistant/tool block is not necessarily the
+    final tail by the time we rebuild replay history.  Remove any contiguous
+    assistant(tool_calls) + tool-result block that contains an interrupted tool
+    result, while preserving successful tool-call sequences intact.
+    """
+    if not agent_history:
+        return agent_history
+
+    cleaned: List[Dict[str, Any]] = []
+    i = 0
+    n = len(agent_history)
+    while i < n:
+        msg = agent_history[i]
+        if msg.get("role") == "assistant" and "tool_calls" in msg:
+            j = i + 1
+            tool_results: List[Dict[str, Any]] = []
+            while j < n and agent_history[j].get("role") == "tool":
+                tool_results.append(agent_history[j])
+                j += 1
+            if tool_results and any(
+                is_interrupted_tool_result(m.get("content", ""))
+                for m in tool_results
+            ):
+                logger.debug(
+                    "Stripping interrupted assistant→tool replay block "
+                    "(indices %d–%d, tool_results=%d)",
+                    i, j - 1, len(tool_results),
+                )
+                i = j
+                continue
+        if msg.get("role") == "tool" and is_interrupted_tool_result(msg.get("content", "")):
+            logger.debug("Stripping orphan interrupted tool result from replay history")
+            i += 1
+            continue
+        cleaned.append(msg)
+        i += 1
+
+    return cleaned
+
+
+def strip_dangling_tool_call_tail(
+    agent_history: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Strip a trailing ``assistant(tool_calls)`` block left with NO answers.
+
+    When a tool call itself kills the gateway process (``docker restart``,
+    ``systemctl restart``, ``kill``, ``hermes gateway restart``), the process
+    is terminated by SIGKILL *mid-call* — before the tool result is ever
+    written and before the orderly shutdown rewind
+    (``_drop_trailing_empty_response_scaffolding``) can run.  The last thing
+    persisted is the ``assistant`` message that issued the ``tool_calls``,
+    with zero matching ``tool`` rows.
+
+    On resume the model sees an unanswered tool call at the tail and naturally
+    re-issues it — which restarts the gateway again, producing the infinite
+    reboot loop in #49201.  ``strip_interrupted_tool_tails`` does not catch
+    this because there is no tool result to inspect for an interrupt marker.
+
+    This strips that dangling tail at the source so there is nothing for the
+    model to re-execute.  It only acts when the tail is an
+    ``assistant(tool_calls)`` whose calls have NO corresponding ``tool``
+    results — a completed assistant→tool pair (any tool answers present) is
+    left untouched so genuine mid-progress tool loops still resume.
+    """
+    if not agent_history:
+        return agent_history
+
+    last = agent_history[-1]
+    if not (
+        isinstance(last, dict)
+        and last.get("role") == "assistant"
+        and last.get("tool_calls")
+    ):
+        return agent_history
+
+    logger.debug(
+        "Stripping dangling unanswered assistant(tool_calls) tail "
+        "(%d call(s)) — process likely killed mid-tool-call by a "
+        "restart/shutdown command (#49201)",
+        len(last.get("tool_calls") or []),
+    )
+    return agent_history[:-1]
+
+
+def sanitize_replay_history(
+    agent_history: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Apply both replay-tail strippers in the canonical order.
+
+    Convenience entry point for resume code paths: removes interrupted
+    assistant→tool blocks anywhere in the history, then removes a dangling
+    unanswered ``assistant(tool_calls)`` tail.  Returns the same list object
+    when there is nothing to strip.
+    """
+    if not agent_history:
+        return agent_history
+    return strip_dangling_tool_call_tail(strip_interrupted_tool_tails(agent_history))
--- a/agent/shell_hooks.py
+++ b/agent/shell_hooks.py
@@ -122,6 +122,8 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple

+from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
+
 try:
    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
 except ImportError:  # pragma: no cover
@@ -441,6 +443,7 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
        return result

    t0 = time.monotonic()
+    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        proc = subprocess.run(
            argv,
@@ -449,6 +452,7 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
            timeout=spec.timeout,
            text=True,
            shell=False,
+            **_popen_kwargs,
        )
    except subprocess.TimeoutExpired:
        result["timed_out"] = True
--- a/agent/skill_preprocessing.py
+++ b/agent/skill_preprocessing.py
@@ -5,6 +5,8 @@ import re
 import subprocess
 from pathlib import Path

+from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
+
 logger = logging.getLogger(__name__)

 # Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
@@ -66,6 +68,7 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
    Failures return a short ``[inline-shell error: ...]`` marker instead of
    raising, so one bad snippet can't wreck the whole skill message.
    """
+    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        completed = subprocess.run(
            ["bash", "-c", command],
@@ -75,6 +78,7 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
            timeout=max(1, int(timeout)),
            check=False,
            stdin=subprocess.DEVNULL,
+            **_popen_kwargs,
        )
    except subprocess.TimeoutExpired:
        return f"[inline-shell timeout after {timeout}s: {command}]"
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -28,6 +28,7 @@ import uuid
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional

+from agent.conversation_compression import conversation_history_after_compression
 from agent.iteration_budget import IterationBudget
 from agent.model_metadata import (
    estimate_messages_tokens_rough,
@@ -400,7 +401,9 @@ def build_turn_context(
                    _orig_len, len(messages), _orig_tokens, _preflight_tokens
                ):
                    break  # Cannot compress further: neither rows nor tokens moved
-                conversation_history = None
+                conversation_history = conversation_history_after_compression(
+                    agent, messages
+                )
                agent._empty_content_retries = 0
                agent._thinking_prefill_retries = 0
                agent._last_content_with_tools = None
--- a/agent/turn_finalizer.py
+++ b/agent/turn_finalizer.py
@@ -289,7 +289,14 @@ def finalize_turn(
                    and len(_stripped) <= 24
                    and _stripped[-1:] not in {".", "!", "?", "。", "！", "？", "`", ")"}
                )
-                if _is_empty_terminal or _is_partial_fragment:
+                _is_partial_stream_recovery = (
+                    str(_turn_exit_reason) == "partial_stream_recovery"
+                )
+                if (
+                    _is_empty_terminal
+                    or _is_partial_fragment
+                    or _is_partial_stream_recovery
+                ):
                    _explanation = agent._format_turn_completion_explanation(
                        _turn_exit_reason
                    )
--- a/agent/turn_retry_state.py
+++ b/agent/turn_retry_state.py
@@ -67,6 +67,11 @@ class TurnRetryState:
    # ── Restart signals (read by the outer loop after the attempt) ───────
    restart_with_compressed_messages: bool = False
    restart_with_length_continuation: bool = False
+    # Set when a content-filter stream stall (e.g. MiniMax "new_sensitive")
+    # has been escalated to the fallback chain: the partial-stream content
+    # was rolled back off ``messages`` and the loop should re-issue the API
+    # call against the newly-activated provider (#32421).
+    restart_with_rebuilt_messages: bool = False

    def __iter__(self):
        # Convenience for debugging / tests: iterate (name, value) pairs.
--- a/agent/verification_stop.py
+++ b/agent/verification_stop.py
@@ -15,6 +15,63 @@ from typing import Any, Iterable

 _MAX_CHANGED_PATHS_IN_NUDGE = 8

+# Non-code file extensions whose edits carry no verifiable runtime behavior:
+# documentation, prose, and data/markup that no test/build exercises. When a
+# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is
+# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a
+# SKILL.md or README edit must never demand a /tmp verification script). A turn
+# that edits any non-listed path (a real source/code/config file) still nudges.
+_NON_CODE_VERIFY_EXTENSIONS = frozenset(
+    {
+        ".md",
+        ".markdown",
+        ".mdx",
+        ".rst",
+        ".txt",
+        ".text",
+        ".adoc",
+        ".asciidoc",
+        ".org",
+        ".log",
+        ".csv",
+        ".tsv",
+    }
+)
+
+# Filenames (case-insensitive, extension-less or otherwise) that are pure prose
+# even without a recognized doc extension.
+_NON_CODE_VERIFY_FILENAMES = frozenset(
+    {
+        "license",
+        "licence",
+        "notice",
+        "authors",
+        "contributors",
+        "changelog",
+        "codeowners",
+    }
+)
+
+
+def _is_non_code_path(raw: str) -> bool:
+    """Return True when a changed path is documentation/prose with nothing to verify."""
+    try:
+        p = Path(str(raw))
+    except Exception:
+        return False
+    suffix = p.suffix.lower()
+    if suffix in _NON_CODE_VERIFY_EXTENSIONS:
+        return True
+    if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES:
+        return True
+    return False
+
+
+def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]:
+    """Drop documentation/prose paths; keep paths that could have verifiable behavior."""
+    return [p for p in paths if p and not _is_non_code_path(p)]
+
+
 # Session identities (platform or source) that are NOT human conversational
 # messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
 # local, gateway) and programmatic callers (API server, webhooks, tools).
@@ -79,12 +136,13 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
    """Return whether edit -> verify-before-finish behavior is enabled.

    Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
-    explicit boolean ``agent.verify_on_stop`` config value, then a surface-aware
-    default. The config default is the sentinel ``"auto"`` (see
-    ``DEFAULT_CONFIG``), which resolves to ON for interactive coding surfaces
+    explicit ``agent.verify_on_stop`` config value. The config default is
+    ``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
+    opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
+    sets it explicitly: it resolves to ON for interactive coding surfaces
    (CLI, TUI, desktop) and programmatic callers, and OFF for conversational
-    messaging surfaces (Telegram, Discord, etc.) where the verification
-    narrative would otherwise reach a human as chat noise.
+    messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
+    falls back to OFF.
    """
    env = os.environ.get("HERMES_VERIFY_ON_STOP")
    if env is not None:
@@ -106,8 +164,11 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
            return True
        if token in {"0", "false", "no", "off"}:
            return False
-    # "auto", missing, or any other value -> surface-aware default.
-    return not _session_is_messaging_surface()
+        if token == "auto":
+            # Explicit opt-in to the legacy surface-aware behavior.
+            return not _session_is_messaging_surface()
+    # Missing or unknown value -> OFF (the new default).
+    return False


 def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
@@ -190,7 +251,10 @@ def build_verify_on_stop_nudge(
    max_attempts: int = 2,
 ) -> str | None:
    """Return a synthetic follow-up when edited code lacks fresh verification."""
-    paths = sorted({str(p) for p in changed_paths if p})
+    # Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) —
+    # they carry no verifiable behavior, so a turn that touched only those has
+    # nothing to verify and must not nudge.
+    paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)})
    if not paths or attempts >= max_attempts:
        return None

--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -1,15 +1,13 @@
-const test = require('node:test')
-const assert = require('node:assert/strict')
-const path = require('node:path')
+import assert from 'node:assert/strict'
+import path from 'node:path'
+import test from 'node:test'

-const {
-  POSIX_SANE_PATH_ENTRIES,
-  appendUniquePathEntries,
+import { appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  normalizeHermesHomeRoot,
-  pathEnvKey
-} = require('./backend-env.cjs')
+  pathEnvKey,
+  POSIX_SANE_PATH_ENTRIES } from './backend-env'

 test('desktop backend PATH adds Hermes-managed bins and missing POSIX sane entries', () => {
  const result = buildDesktopBackendPath({
--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -1,4 +1,4 @@
-const path = require('node:path')
+import path from 'node:path'

 // Match the POSIX fallback surface used by the Python terminal environment.
 // macOS apps launched from Finder/Dock often inherit only /usr/bin:/bin:/usr/sbin:/sbin,
@@ -23,12 +23,14 @@ function pathModuleForPlatform(platform = process.platform) {
 }

 function pathEnvKey(env = process.env, platform = process.platform) {
-  if (platform !== 'win32') return 'PATH'
+  if (platform !== 'win32') {return 'PATH'}
+
  return Object.keys(env || {}).find(key => key.toUpperCase() === 'PATH') || 'PATH'
 }

 function currentPathValue(env = process.env, platform = process.platform) {
  const key = pathEnvKey(env, platform)
+
  return env?.[key] || ''
 }

@@ -37,10 +39,11 @@ function appendUniquePathEntries(entries, { delimiter = path.delimiter } = {}) {
  const ordered = []

  for (const entry of entries) {
-    if (!entry) continue
+    if (!entry) {continue}
    const parts = Array.isArray(entry) ? entry : String(entry).split(delimiter)
+
    for (const part of parts) {
-      if (!part || seen.has(part)) continue
+      if (!part || seen.has(part)) {continue}
      seen.add(part)
      ordered.push(part)
    }
@@ -55,7 +58,7 @@ function buildDesktopBackendPath({
  currentPath = '',
  platform = process.platform,
  pathModule = pathModuleForPlatform(platform)
-} = {}) {
+}: any = {}) {
  const delimiter = delimiterForPlatform(platform)
  const hermesNodeBin = hermesHome ? pathModule.join(hermesHome, 'node', 'bin') : null
  const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
@@ -64,13 +67,15 @@ function buildDesktopBackendPath({
  return appendUniquePathEntries([hermesNodeBin, venvBin, currentPath, saneEntries], { delimiter })
 }

-function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
-  if (!hermesHome) return hermesHome
+function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) }: any = {}) {
+  if (!hermesHome) {return hermesHome}
  const resolved = pathModule.resolve(String(hermesHome))
  const parent = pathModule.dirname(resolved)
+
  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
    return pathModule.dirname(parent)
  }
+
  return resolved
 }

@@ -81,7 +86,7 @@ function buildDesktopBackendEnv({
  currentEnv = process.env,
  platform = process.platform,
  pathModule = pathModuleForPlatform(platform)
-} = {}) {
+}: any = {}) {
  const delimiter = delimiterForPlatform(platform)
  const currentPythonPath = currentEnv?.PYTHONPATH || ''
  const key = pathEnvKey(currentEnv, platform)
@@ -98,12 +103,10 @@ function buildDesktopBackendEnv({
  }
 }

-module.exports = {
-  POSIX_SANE_PATH_ENTRIES,
-  appendUniquePathEntries,
+export { appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  delimiterForPlatform,
  normalizeHermesHomeRoot,
-  pathEnvKey
-}
+  pathEnvKey,
+  POSIX_SANE_PATH_ENTRIES }
--- a/apps/desktop/electron/backend-probes.test.cjs
+++ b/apps/desktop/electron/backend-probes.test.cjs
@@ -5,13 +5,13 @@
 * (Wired into npm test:desktop:platforms in package.json.)
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'

-const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
+import { canImportHermesCli, hermesRuntimeImportProbe, verifyHermesCli } from './backend-probes'

 // Resolve the host's own Node binary -- guaranteed to be on disk and
 // runnable. We use it as both a stand-in for "a python that doesn't
@@ -40,6 +40,12 @@ test('canImportHermesCli returns false when binary does not exist', () => {
  assert.equal(canImportHermesCli(ghost), false)
 })

+test('hermes runtime import probe checks config dependencies', () => {
+  const probe = hermesRuntimeImportProbe()
+  assert.match(probe, /\bimport yaml\b/)
+  assert.match(probe, /\bimport hermes_cli\.config\b/)
+})
+
 test('verifyHermesCli returns false when command is falsy', () => {
  assert.equal(verifyHermesCli(''), false)
  assert.equal(verifyHermesCli(null), false)
@@ -57,6 +63,7 @@ test('verifyHermesCli returns true when --version exits 0', () => {
  // verifyHermesCli only cares about the exit code.
  const scriptPath = path.join(os.tmpdir(), `hermes-probes-ok-${Date.now()}-${process.pid}.cjs`)
  fs.writeFileSync(scriptPath, 'process.exit(0)\n')
+
  try {
    // Use node as the launcher and our script as the "command". Pass
    // shell:false (default) -- node is a real binary, no shim.
--- a/apps/desktop/electron/backend-probes.cjs
+++ b/apps/desktop/electron/backend-probes.cjs
@@ -32,12 +32,23 @@
 * as bootstrap-platform.cjs and hardening.cjs).
 */

-const { execFileSync } = require('node:child_process')
+import { execFileSync } from 'node:child_process'

 const PROBE_TIMEOUT_MS = 5000

 /**
- * Return true iff `python -c "import hermes_cli"` exits 0.
+ * Return the Python snippet used to verify Hermes can import far enough to
+ * launch the CLI. Kept exported for tests so dependency regressions are
+ * caught without needing a real broken venv fixture.
+ *
+ * @returns {string}
+ */
+function hermesRuntimeImportProbe() {
+  return 'import yaml; import hermes_cli.config'
+}
+
+/**
+ * Return true iff the Hermes runtime import probe exits 0.
 *
 * Used to gate the "fallback to system Python with hermes_cli installed"
 * rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
@@ -46,17 +57,25 @@ const PROBE_TIMEOUT_MS = 5000
 * site-packages -- and the resolver returns a backend that immediately
 * dies on spawn.
 *
+ * The probe intentionally imports hermes_cli.config, not just the top-level
+ * package: a broken/empty Windows launcher venv can still see the source tree
+ * through PYTHONPATH but lack PyYAML, then die on the first real CLI import.
+ *
 * @param {string} pythonPath - Absolute path to a python.exe / python.
+ * @param {object} [opts.env] - Additional environment for the probe.
 * @returns {boolean}
 */
-function canImportHermesCli(pythonPath) {
-  if (!pythonPath) return false
+function canImportHermesCli(pythonPath: string, opts:{env?: Record<string, string>} = {}) {
+  if (!pythonPath) {return false}
+
  try {
-    execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
+    execFileSync(pythonPath, ['-c', hermesRuntimeImportProbe()], {
+      env: { ...process.env, ...(opts.env || {}) },
      stdio: 'ignore',
      timeout: PROBE_TIMEOUT_MS,
      windowsHide: true
    })
+
    return true
  } catch {
    return false
@@ -77,30 +96,30 @@ function canImportHermesCli(pythonPath) {
 *
 * @param {string} hermesCommand - Resolved absolute path to a hermes
 *   executable (or an interpreter+script wrapper).
- * @param {object} [opts]
 * @param {boolean} [opts.shell] - Whether to run through a shell. For
 *   .cmd/.bat shims on Windows execFileSync needs shell:true to find
 *   the cmd interpreter; mirrors the same flag isCommandScript() drives
 *   in resolveHermesBackend.
 * @returns {boolean}
 */
-function verifyHermesCli(hermesCommand, opts = {}) {
-  if (!hermesCommand) return false
+function verifyHermesCli(hermesCommand: string, opts?: {shell?: boolean}) {
+  if (!hermesCommand) {return false}
+
  try {
    execFileSync(hermesCommand, ['--version'], {
      stdio: 'ignore',
      timeout: PROBE_TIMEOUT_MS,
-      shell: Boolean(opts.shell),
+      shell: Boolean(opts?.shell),
      windowsHide: true
    })
+
    return true
  } catch {
    return false
  }
 }

-module.exports = {
-  canImportHermesCli,
-  verifyHermesCli,
-  PROBE_TIMEOUT_MS
-}
+export { canImportHermesCli,
+  hermesRuntimeImportProbe,
+  PROBE_TIMEOUT_MS,
+  verifyHermesCli }
--- a/apps/desktop/electron/backend-ready.test.cjs
+++ b/apps/desktop/electron/backend-ready.test.cjs
@@ -11,29 +11,32 @@
 * HERMES_DESKTOP_PORT_ANNOUNCE_TIMEOUT_MS, clamped to a 45s floor.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
-const { EventEmitter } = require('node:events')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
+import assert from 'node:assert/strict'
+import { EventEmitter } from 'node:events'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'

-const {
+import { DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
+  MIN_PORT_ANNOUNCE_TIMEOUT_MS,
  readDashboardReadyFile,
+  resolvePortAnnounceTimeoutMs,
  waitForDashboardPort,
  waitForDashboardPortAnnouncement,
-  waitForDashboardReadyFile,
-  resolvePortAnnounceTimeoutMs,
-  DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
-  MIN_PORT_ANNOUNCE_TIMEOUT_MS
-} = require('./backend-ready.cjs')
+  waitForDashboardReadyFile } from './backend-ready'
+
+type FakeChildProcess = EventEmitter & {
+  stdout: EventEmitter
+}

 // A minimal stand-in for a spawned child process: an EventEmitter with a
 // stdout EventEmitter, matching the surface waitForDashboardPort consumes
 // (child.stdout.on('data'), child.on('exit'|'error') + the .off() teardown).
-function makeFakeChild() {
-  const child = new EventEmitter()
+function makeFakeChild(): FakeChildProcess {
+  const child = new EventEmitter() as FakeChildProcess
  child.stdout = new EventEmitter()
+
  return child
 }

@@ -132,6 +135,7 @@ test('a late announcement after timeout does not throw (listeners torn down)', a

 function mkTmpReadyFile() {
  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-ready-test-'))
+
  return {
    dir,
    file: path.join(dir, 'ready.json'),
@@ -141,6 +145,7 @@ function mkTmpReadyFile() {

 test('readDashboardReadyFile returns a valid port from JSON', () => {
  const tmp = mkTmpReadyFile()
+
  try {
    fs.writeFileSync(tmp.file, JSON.stringify({ port: 4567 }))
    assert.equal(readDashboardReadyFile(tmp.file), 4567)
@@ -151,6 +156,7 @@ test('readDashboardReadyFile returns a valid port from JSON', () => {

 test('readDashboardReadyFile ignores missing, malformed, or invalid files', () => {
  const tmp = mkTmpReadyFile()
+
  try {
    assert.equal(readDashboardReadyFile(tmp.file), null)
    fs.writeFileSync(tmp.file, '{')
@@ -165,6 +171,7 @@ test('readDashboardReadyFile ignores missing, malformed, or invalid files', () =
 test('waitForDashboardReadyFile resolves when the ready file appears', async () => {
  const tmp = mkTmpReadyFile()
  const child = makeFakeChild()
+
  try {
    const p = waitForDashboardReadyFile(tmp.file, child, 1000)
    setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 8765 })), 20)
@@ -177,6 +184,7 @@ test('waitForDashboardReadyFile resolves when the ready file appears', async ()
 test('waitForDashboardPortAnnouncement uses ready file when provided', async () => {
  const tmp = mkTmpReadyFile()
  const child = makeFakeChild()
+
  try {
    const p = waitForDashboardPortAnnouncement(child, { readyFile: tmp.file, timeoutMs: 1000 })
    setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 9876 })), 20)
@@ -189,6 +197,7 @@ test('waitForDashboardPortAnnouncement uses ready file when provided', async ()
 test('waitForDashboardReadyFile rejects when the child exits before file readiness', async () => {
  const tmp = mkTmpReadyFile()
  const child = makeFakeChild()
+
  try {
    const p = waitForDashboardReadyFile(tmp.file, child, 1000)
    child.emit('exit', 1, null)
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -1,4 +1,4 @@
-const fs = require('node:fs')
+import fs from 'node:fs'

 const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m

@@ -23,9 +23,11 @@ const MIN_PORT_ANNOUNCE_TIMEOUT_MS = 45_000
 */
 function resolvePortAnnounceTimeoutMs(env = process.env) {
  const parsed = Number(env.HERMES_DESKTOP_PORT_ANNOUNCE_TIMEOUT_MS)
+
  if (Number.isFinite(parsed) && parsed > 0) {
    return Math.max(MIN_PORT_ANNOUNCE_TIMEOUT_MS, Math.round(parsed))
  }
+
  return DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS
 }

@@ -52,7 +54,7 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
    let done = false

    function cleanup() {
-      if (done) return
+      if (done) {return}
      done = true
      clearTimeout(timer)
      child.stdout.off('data', onData)
@@ -63,13 +65,16 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
    function onData(chunk) {
      buf += chunk.toString()
      let nl
+
      while ((nl = buf.indexOf('\n')) !== -1) {
        const line = buf.slice(0, nl)
        buf = buf.slice(nl + 1)
        const m = line.match(_READY_RE)
+
        if (m) {
          cleanup()
          resolve(parseInt(m[1], 10))
+
          return
        }
      }
@@ -96,11 +101,13 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
  })
 }

-function readDashboardReadyFile(readyFile) {
-  if (!readyFile) return null
+function readDashboardReadyFile(readyFile: fs.PathOrFileDescriptor) {
+  if (!readyFile) {return null}
+
  try {
    const parsed = JSON.parse(fs.readFileSync(readyFile, 'utf8'))
    const port = Number(parsed?.port)
+
    return Number.isInteger(port) && port > 0 ? port : null
  } catch {
    return null
@@ -113,16 +120,18 @@ function waitForDashboardReadyFile(readyFile, child, timeoutMs = resolvePortAnno
    let interval = null

    function cleanup() {
-      if (done) return
+      if (done) {return}
      done = true
      clearTimeout(timer)
-      if (interval) clearInterval(interval)
+
+      if (interval) {clearInterval(interval)}
      child.off('exit', onExit)
      child.off('error', onError)
    }

    function check() {
      const port = readDashboardReadyFile(readyFile)
+
      if (port) {
        cleanup()
        resolve(port)
@@ -147,25 +156,29 @@ function waitForDashboardReadyFile(readyFile, child, timeoutMs = resolvePortAnno
    child.on('exit', onExit)
    child.on('error', onError)
    interval = setInterval(check, 50)
-    if (typeof interval.unref === 'function') interval.unref()
+
+    if (typeof interval.unref === 'function') {interval.unref()}
    check()
  })
 }

-function waitForDashboardPortAnnouncement(child, options = {}) {
+function waitForDashboardPortAnnouncement(child, options: {
+  readyFile?: fs.PathOrFileDescriptor,
+  timeoutMs?: number
+} = {}) {
  const timeoutMs = options.timeoutMs ?? resolvePortAnnounceTimeoutMs()
+
  if (options.readyFile) {
    return waitForDashboardReadyFile(options.readyFile, child, timeoutMs)
  }
+
  return waitForDashboardPort(child, timeoutMs)
 }

-module.exports = {
-  waitForDashboardPort,
-  waitForDashboardPortAnnouncement,
-  waitForDashboardReadyFile,
+export { DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
+  MIN_PORT_ANNOUNCE_TIMEOUT_MS,
  readDashboardReadyFile,
  resolvePortAnnounceTimeoutMs,
-  DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
-  MIN_PORT_ANNOUNCE_TIMEOUT_MS
-}
+  waitForDashboardPort,
+  waitForDashboardPortAnnouncement,
+  waitForDashboardReadyFile }
--- a/apps/desktop/electron/bootstrap-platform.test.cjs
+++ b/apps/desktop/electron/bootstrap-platform.test.cjs
@@ -1,14 +1,13 @@
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const path = require('node:path')
-const test = require('node:test')
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import path from 'node:path'
+import test from 'node:test'
+import { fileURLToPath } from 'node:url'

-const {
-  bundledRuntimeImportCheck,
+import { bundledRuntimeImportCheck,
  detectRemoteDisplay,
  isWindowsBinaryPathInWsl,
-  isWslEnvironment
-} = require('./bootstrap-platform.cjs')
+  isWslEnvironment } from './bootstrap-platform'

 test('isWslEnvironment detects WSL2 env vars on linux', () => {
  assert.equal(isWslEnvironment({ WSL_DISTRO_NAME: 'Ubuntu' }, 'linux'), true)
@@ -87,8 +86,8 @@ test('detectRemoteDisplay honors the HERMES_DESKTOP_DISABLE_GPU override both wa
 })

 test('packaged electron entrypoints do not require unpackaged npm modules', () => {
-  const electronDir = __dirname
-  const entrypoints = ['main.cjs', 'preload.cjs', 'bootstrap-platform.cjs']
+  const electronDir = path.dirname(fileURLToPath(import.meta.url))
+  const entrypoints = ['main.ts', 'preload.ts', 'bootstrap-platform.ts']
  // - electron: provided by the electron runtime, always resolvable in packaged builds.
  // - node-pty: hoisted by workspace dedup AND shipped via extraResources to
  //   resources/native-deps/node-pty (see scripts/stage-native-deps.cjs). main.cjs
@@ -100,6 +99,7 @@ test('packaged electron entrypoints do not require unpackaged npm modules', () =

  for (const entrypoint of entrypoints) {
    const source = fs.readFileSync(path.join(electronDir, entrypoint), 'utf8')
+
    const bareRequires = Array.from(source.matchAll(requirePattern))
      .map(match => match[1])
      .filter(specifier => !specifier.startsWith('node:'))
--- a/apps/desktop/electron/bootstrap-platform.cjs
+++ b/apps/desktop/electron/bootstrap-platform.cjs
@@ -1,20 +1,23 @@
-const fs = require('node:fs')
+import fs from 'node:fs'

 function isWslEnvironment(env = process.env, platform = process.platform, kernelRelease = null) {
-  if (platform !== 'linux') return false
-  if (env.WSL_DISTRO_NAME || env.WSL_INTEROP) return true
+  if (platform !== 'linux') {return false}
+
+  if (env.WSL_DISTRO_NAME || env.WSL_INTEROP) {return true}

  try {
    const release = kernelRelease ?? fs.readFileSync('/proc/sys/kernel/osrelease', 'utf8')
+
    return /microsoft|wsl/i.test(release)
  } catch {
    return false
  }
 }

-function isWindowsBinaryPathInWsl(filePath, options = {}) {
+function isWindowsBinaryPathInWsl(filePath, options: {isWsl?: boolean, env?: NodeJS.ProcessEnv, platform?: NodeJS.Platform} = {}) {
  const isWsl = options.isWsl ?? isWslEnvironment(options.env, options.platform)
-  if (!isWsl) return false
+
+  if (!isWsl) {return false}

  const normalized = String(filePath || '')
    .replace(/\\/g, '/')
@@ -48,19 +51,21 @@ const GPU_OVERRIDE_OFF = new Set(['0', 'false', 'no', 'off'])
 *
 * Pure + dependency-free so it can be unit-tested and called before app ready.
 */
-function detectRemoteDisplay(options = {}) {
+function detectRemoteDisplay(options: {env?: NodeJS.ProcessEnv, platform?: NodeJS.Platform} = {}) {
  const env = options.env ?? process.env
  const platform = options.platform ?? process.platform

  const override = String(env.HERMES_DESKTOP_DISABLE_GPU || '')
    .trim()
    .toLowerCase()
-  if (GPU_OVERRIDE_ON.has(override)) return 'override (HERMES_DESKTOP_DISABLE_GPU)'
-  if (GPU_OVERRIDE_OFF.has(override)) return null
+
+  if (GPU_OVERRIDE_ON.has(override)) {return 'override (HERMES_DESKTOP_DISABLE_GPU)'}
+
+  if (GPU_OVERRIDE_OFF.has(override)) {return null}

  // Launched from an SSH session → the display is X11-forwarded or otherwise
  // remote. Covers the common `ssh user@box` + GUI-forwarding case.
-  if (env.SSH_CONNECTION || env.SSH_CLIENT || env.SSH_TTY) return 'ssh-session'
+  if (env.SSH_CONNECTION || env.SSH_CLIENT || env.SSH_TTY) {return 'ssh-session'}

  if (platform === 'linux') {
    // X11 forwarding sets DISPLAY to "<host>:N" (e.g. "localhost:10.0"); a
@@ -68,6 +73,7 @@ function detectRemoteDisplay(options = {}) {
    // NB: WSLg deliberately isn't treated as remote — it reports
    // GPU-accelerated vGPU surfaces locally and doesn't show the flicker.
    const display = String(env.DISPLAY || '')
+
    if (display.includes(':') && display.split(':')[0]) {
      return `x11-forwarding (DISPLAY=${display})`
    }
@@ -77,15 +83,14 @@ function detectRemoteDisplay(options = {}) {
    // RDP sessions report SESSIONNAME like "RDP-Tcp#7"; the local console is
    // "Console".
    const sessionName = String(env.SESSIONNAME || '')
-    if (/^rdp-/i.test(sessionName)) return `rdp (SESSIONNAME=${sessionName})`
+
+    if (/^rdp-/i.test(sessionName)) {return `rdp (SESSIONNAME=${sessionName})`}
  }

  return null
 }

-module.exports = {
-  bundledRuntimeImportCheck,
+export { bundledRuntimeImportCheck,
  detectRemoteDisplay,
  isWindowsBinaryPathInWsl,
-  isWslEnvironment
-}
+  isWslEnvironment }
--- a/apps/desktop/electron/bootstrap-runner.test.cjs
+++ b/apps/desktop/electron/bootstrap-runner.test.cjs
@@ -1,15 +1,13 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'

-const {
-  runBootstrap,
-  resolveInstallScript,
+import { cachedScriptPath,
  installedAgentInstallScript,
-  cachedScriptPath
-} = require('./bootstrap-runner.cjs')
+  resolveInstallScript,
+  runBootstrap } from './bootstrap-runner'

 const SCRIPT_NAME = process.platform === 'win32' ? 'install.ps1' : 'install.sh'

@@ -22,6 +20,7 @@ test('runBootstrap bails immediately when the signal is already aborted', async
  controller.abort()

  const events = []
+
  const result = await runBootstrap({
    installStamp: null,
    activeRoot: '/tmp/hermes-runner-test',
@@ -42,6 +41,7 @@ test('runBootstrap bails immediately when the signal is already aborted', async

 test('installedAgentInstallScript resolves the installer in the agent checkout', () => {
  const home = mkTmpHome()
+
  try {
    assert.equal(installedAgentInstallScript(home), null, 'absent before the checkout exists')

@@ -59,6 +59,7 @@ test('installedAgentInstallScript resolves the installer in the agent checkout',

 test('resolveInstallScript prefers a cached script without touching the network', async () => {
  const home = mkTmpHome()
+
  try {
    const commit = 'a'.repeat(40)
    const cached = cachedScriptPath(home, commit)
@@ -66,6 +67,7 @@ test('resolveInstallScript prefers a cached script without touching the network'
    fs.writeFileSync(cached, '#!/bin/sh\necho cached\n')

    const logs = []
+
    const result = await resolveInstallScript({
      installStamp: { commit },
      sourceRepoRoot: null,
@@ -82,6 +84,7 @@ test('resolveInstallScript prefers a cached script without touching the network'

 test('resolveInstallScript falls back to the installed agent checkout on a 404', async () => {
  const home = mkTmpHome()
+
  try {
    const commit = 'a'.repeat(40)
    // Seed the installed agent checkout so the fallback has something to resolve.
@@ -91,6 +94,7 @@ test('resolveInstallScript falls back to the installed agent checkout on a 404',
    fs.writeFileSync(installed, '#!/bin/sh\necho fallback\n')

    const logs = []
+
    const result = await resolveInstallScript({
      installStamp: { commit },
      sourceRepoRoot: null,
@@ -117,6 +121,7 @@ test('resolveInstallScript falls back to the installed agent checkout on a 404',

 test('resolveInstallScript rethrows when the 404 fallback is unavailable', async () => {
  const home = mkTmpHome()
+
  try {
    const commit = 'a'.repeat(40)
    // No installed agent checkout seeded -> nothing to fall back to.
--- a/apps/desktop/electron/bootstrap-runner.cjs
+++ b/apps/desktop/electron/bootstrap-runner.cjs
@@ -1,5 +1,3 @@
-'use strict'
-
 /**
 * bootstrap-runner.cjs
 *
@@ -34,11 +32,11 @@
 *     no UI consumes them yet)
 */

-const fs = require('node:fs')
-const fsp = require('node:fs/promises')
-const path = require('node:path')
-const https = require('node:https')
-const { spawn } = require('node:child_process')
+import { spawn } from 'node:child_process'
+import fs from 'node:fs'
+import fsp from 'node:fs/promises'
+import https from 'node:https'
+import path from 'node:path'

 const IS_WINDOWS = process.platform === 'win32'

@@ -46,6 +44,7 @@ function hiddenWindowsChildOptions(options = {}) {
  if (!IS_WINDOWS || Object.prototype.hasOwnProperty.call(options, 'windowsHide')) {
    return options
  }
+
  return { ...options, windowsHide: true }
 }

@@ -71,10 +70,12 @@ function installScriptKind() {
 }

 function resolveLocalInstallScript(sourceRepoRoot) {
-  if (!sourceRepoRoot) return null
+  if (!sourceRepoRoot) {return null}
  const candidate = path.join(sourceRepoRoot, 'scripts', installScriptName())
+
  try {
    fs.accessSync(candidate, fs.constants.R_OK)
+
    return candidate
  } catch {
    return null
@@ -90,10 +91,12 @@ function bootstrapCacheDir(hermesHome) {
 // the pinned commit can't be fetched from GitHub (e.g. a locally-built desktop
 // app stamped to an unpushed HEAD).
 function installedAgentInstallScript(hermesHome) {
-  if (!hermesHome) return null
+  if (!hermesHome) {return null}
  const candidate = path.join(hermesHome, 'hermes-agent', 'scripts', installScriptName())
+
  try {
    fs.accessSync(candidate, fs.constants.R_OK)
+
    return candidate
  } catch {
    return null
@@ -110,6 +113,7 @@ function downloadInstallScript(commit, destPath) {
  // verification beyond "did the file we wrote pass a syntax probe."
  const scriptName = installScriptName()
  const url = `https://raw.githubusercontent.com/NousResearch/hermes-agent/${commit}/scripts/${scriptName}`
+
  return new Promise((resolve, reject) => {
    fs.mkdirSync(path.dirname(destPath), { recursive: true })
    const tmpPath = destPath + '.tmp'
@@ -129,8 +133,10 @@ function downloadInstallScript(commit, destPath) {
                    `Failed to download ${scriptName}: HTTP ${res2.statusCode} from redirect ${res.headers.location}`
                  )
                )
+
                return
              }
+
              const out2 = fs.createWriteStream(tmpPath)
              res2.pipe(out2)
              out2.on('finish', () => {
@@ -141,18 +147,24 @@ function downloadInstallScript(commit, destPath) {
              out2.on('error', reject)
            })
            .on('error', reject)
+
          return
        }
+
        if (res.statusCode !== 200) {
          out.close()
+
          try {
            fs.unlinkSync(tmpPath)
          } catch {
            void 0
          }
+
          reject(new Error(`Failed to download ${scriptName}: HTTP ${res.statusCode} from ${url}`))
+
          return
        }
+
        res.pipe(out)
        out.on('finish', () => {
          out.close()
@@ -165,6 +177,7 @@ function downloadInstallScript(commit, destPath) {
          } catch {
            void 0
          }
+
          reject(err)
        })
      })
@@ -174,6 +187,7 @@ function downloadInstallScript(commit, destPath) {
        } catch {
          void 0
        }
+
        reject(err)
      })
  })
@@ -190,8 +204,10 @@ async function resolveInstallScript({
  //    without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
  //    of APP_ROOT/../..).
  const localScript = resolveLocalInstallScript(sourceRepoRoot)
+
  if (localScript) {
    emit({ type: 'log', line: `[bootstrap] using local ${installScriptName()} at ${localScript}` })
+
    return { path: localScript, source: 'local', kind: installScriptKind() }
  }

@@ -204,12 +220,14 @@ async function resolveInstallScript({
  }

  const cached = cachedScriptPath(hermesHome, installStamp.commit)
+
  try {
    await fsp.access(cached, fs.constants.R_OK)
    emit({
      type: 'log',
      line: `[bootstrap] using cached ${installScriptName()} for ${installStamp.commit.slice(0, 12)}`
    })
+
    return { path: cached, source: 'cache', commit: installStamp.commit, kind: installScriptKind() }
  } catch {
    // not cached; download
@@ -219,9 +237,11 @@ async function resolveInstallScript({
    type: 'log',
    line: `[bootstrap] fetching ${installScriptName()} for ${installStamp.commit.slice(0, 12)} from GitHub`
  })
+
  try {
    await _download(installStamp.commit, cached)
    emit({ type: 'log', line: `[bootstrap] saved to ${cached}` })
+
    return { path: cached, source: 'download', commit: installStamp.commit, kind: installScriptKind() }
  } catch (err) {
    // The pinned commit may not be fetchable from GitHub -- most commonly a
@@ -230,6 +250,7 @@ async function resolveInstallScript({
    // ships inside the already-installed agent checkout so dev/self-builds can
    // still bootstrap instead of dying with a fatal 404.
    const installed = installedAgentInstallScript(hermesHome)
+
    if (installed) {
      emit({
        type: 'log',
@@ -237,15 +258,18 @@ async function resolveInstallScript({
          `[bootstrap] GitHub fetch failed (${err.message}); ` +
          `falling back to installed agent ${installScriptName()} at ${installed}`
      })
+
      try {
        fs.mkdirSync(path.dirname(cached), { recursive: true })
        fs.copyFileSync(installed, cached)
+
        return { path: cached, source: 'installed-agent', commit: installStamp.commit, kind: installScriptKind() }
      } catch {
        // Cache copy failed (read-only FS, etc.) -- use the source path directly.
        return { path: installed, source: 'installed-agent', commit: installStamp.commit, kind: installScriptKind() }
      }
    }
+
    throw err
  }
 }
@@ -271,31 +295,37 @@ function powershellUnderRoot(root) {
 function resolveWindowsPowerShell() {
  for (const v of ['SystemRoot', 'windir']) {
    const root = process.env[v]
+
    if (root) {
      const candidate = powershellUnderRoot(root)
+
      try {
-        if (fs.statSync(candidate).isFile()) return candidate
+        if (fs.statSync(candidate).isFile()) {return candidate}
      } catch {
        void 0
      }
    }
  }
+
  const pathDirs = (process.env.PATH || process.env.Path || '').split(path.delimiter).filter(Boolean)
+
  for (const exe of ['powershell.exe', 'pwsh.exe']) {
    for (const dir of pathDirs) {
      const candidate = path.join(dir, exe)
+
      try {
-        if (fs.statSync(candidate).isFile()) return candidate
+        if (fs.statSync(candidate).isFile()) {return candidate}
      } catch {
        void 0
      }
    }
  }
+
  return 'powershell.exe'
 }

-function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, hermesHome } = {}) {
-  return new Promise((resolve, reject) => {
+function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, hermesHome }: any = {}) {
+  return new Promise<any>((resolve, reject) => {
    const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
    const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]

@@ -319,12 +349,14 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme

    const onAbort = () => {
      killed = true
+
      try {
        child.kill('SIGTERM')
      } catch {
        void 0
      }
    }
+
    if (abortSignal) {
      if (abortSignal.aborted) {
        onAbort()
@@ -342,10 +374,12 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
      stdout += chunk
      stdoutBuf += chunk
      let nl
+
      while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
        const line = stdoutBuf.slice(0, nl).replace(/\r$/, '')
        stdoutBuf = stdoutBuf.slice(nl + 1)
-        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })
+
+        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })}
      }
    })

@@ -354,30 +388,34 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
      stderr += chunk
      stderrBuf += chunk
      let nl
+
      while ((nl = stderrBuf.indexOf('\n')) !== -1) {
        const line = stderrBuf.slice(0, nl).replace(/\r$/, '')
        stderrBuf = stderrBuf.slice(nl + 1)
-        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })
+
+        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })}
      }
    })

    child.on('error', err => {
-      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
+      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
      reject(err)
    })

    child.on('close', (code, signal) => {
-      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
+      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
+
      // Flush any trailing bytes
-      if (stdoutBuf) emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' })
-      if (stderrBuf) emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' })
-      resolve({ stdout, stderr, code, signal, killed })
+      if (stdoutBuf) {emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' } as any)}
+
+      if (stderrBuf) {emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' } as any)}
+      resolve({ stdout, stderr, code, signal, killed } as any)
    })
  })
 }

-function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome } = {}) {
-  return new Promise((resolve, reject) => {
+function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome }: any = {}) {
+  return new Promise<any>((resolve, reject) => {
    const child = spawn('bash', [scriptPath, ...args], {
      stdio: ['ignore', 'pipe', 'pipe'],
      env: {
@@ -392,12 +430,14 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome

    const onAbort = () => {
      killed = true
+
      try {
        child.kill('SIGTERM')
      } catch {
        void 0
      }
    }
+
    if (abortSignal) {
      if (abortSignal.aborted) {
        onAbort()
@@ -414,10 +454,12 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome
      stdout += chunk
      stdoutBuf += chunk
      let nl
+
      while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
        const line = stdoutBuf.slice(0, nl).replace(/\r$/, '')
        stdoutBuf = stdoutBuf.slice(nl + 1)
-        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })
+
+        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })}
      }
    })

@@ -426,22 +468,26 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome
      stderr += chunk
      stderrBuf += chunk
      let nl
+
      while ((nl = stderrBuf.indexOf('\n')) !== -1) {
        const line = stderrBuf.slice(0, nl).replace(/\r$/, '')
        stderrBuf = stderrBuf.slice(nl + 1)
-        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })
+
+        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })}
      }
    })

    child.on('error', err => {
-      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
+      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
      reject(err)
    })

    child.on('close', (code, signal) => {
-      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
-      if (stdoutBuf) emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' })
-      if (stderrBuf) emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' })
+      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
+
+      if (stdoutBuf) {emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' })}
+
+      if (stderrBuf) {emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' })}
      resolve({ stdout, stderr, code, signal, killed })
    })
  })
@@ -456,48 +502,60 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome
 // instead of falling back to install.ps1's default ($Branch = "main").
 function buildPinArgs(installStamp) {
  const args = []
+
  if (installStamp && installStamp.commit) {
    args.push('-Commit', installStamp.commit)
  }
+
  if (installStamp && installStamp.branch) {
    args.push('-Branch', installStamp.branch)
  }
+
  return args
 }

 function buildPosixPinArgs({ installStamp, activeRoot, hermesHome }) {
  const args = ['--dir', activeRoot, '--hermes-home', hermesHome]
+
  if (installStamp && installStamp.branch) {
    args.push('--branch', installStamp.branch)
  }
+
  if (installStamp && installStamp.commit) {
    args.push('--commit', installStamp.commit)
  }
+
  return args
 }

 async function fetchManifest({ scriptPath, installerKind, emit, hermesHome, activeRoot, installStamp }) {
  const isPosix = installerKind === 'posix'
+
  const args = isPosix
    ? ['--manifest', ...buildPosixPinArgs({ installStamp, activeRoot, hermesHome })]
    : ['-Manifest', ...buildPinArgs(installStamp)]
+
  const result = await (isPosix ? spawnBash : spawnPowerShell)(scriptPath, args, {
    emit,
    stageName: '__manifest__',
    hermesHome
  })
+
  if (result.code !== 0) {
    throw new Error(
      `${isPosix ? 'install.sh --manifest' : 'install.ps1 -Manifest'} failed: exit ${result.code}\n${result.stderr || result.stdout}`
    )
  }
+
  // The manifest is the LAST JSON line on stdout (install.ps1 may print
  // banner / info lines first depending on Console.OutputEncoding effects).
  // Find the last line that parses as JSON with a `stages` field.
  const lines = result.stdout.split(/\r?\n/).filter(Boolean)
+
  for (let i = lines.length - 1; i >= 0; i--) {
    try {
      const parsed = JSON.parse(lines[i])
+
      if (parsed && Array.isArray(parsed.stages)) {
        return parsed
      }
@@ -505,6 +563,7 @@ async function fetchManifest({ scriptPath, installerKind, emit, hermesHome, acti
      void 0
    }
  }
+
  throw new Error(
    `${isPosix ? 'install.sh --manifest' : 'install.ps1 -Manifest'} produced no parseable JSON payload\n${result.stdout}`
  )
@@ -515,9 +574,11 @@ async function fetchManifest({ scriptPath, installerKind, emit, hermesHome, acti
 // for the double-emit bug we addressed in the install.ps1 PR).
 function parseStageResult(stdout) {
  const lines = stdout.split(/\r?\n/).filter(Boolean)
+
  for (let i = lines.length - 1; i >= 0; i--) {
    try {
      const parsed = JSON.parse(lines[i])
+
      if (parsed && typeof parsed.ok === 'boolean' && typeof parsed.stage === 'string') {
        return parsed
      }
@@ -525,6 +586,7 @@ function parseStageResult(stdout) {
      void 0
    }
  }
+
  return null
 }

@@ -533,6 +595,7 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
  emit({ type: 'stage', name: stage.name, state: 'running' })

  const isPosix = installerKind === 'posix'
+
  const args = isPosix
    ? [
        '--stage',
@@ -542,6 +605,7 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
        ...buildPosixPinArgs({ installStamp, activeRoot, hermesHome })
      ]
    : ['-Stage', stage.name, '-NonInteractive', '-Json', ...buildPinArgs(installStamp)]
+
  const result = await (isPosix ? spawnBash : spawnPowerShell)(scriptPath, args, {
    emit,
    stageName: stage.name,
@@ -554,6 +618,7 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
  if (result.killed) {
    const ev = { type: 'stage', name: stage.name, state: 'failed', durationMs, error: 'cancelled by user' }
    emit(ev)
+
    return ev
  }

@@ -568,20 +633,26 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
      error: `${isPosix ? 'install.sh --stage' : 'install.ps1 -Stage'} ${stage.name} produced no JSON result frame (exit=${result.code})`,
      json: null
    }
+
    emit(ev)
+
    return ev
  }

  if (json.ok && json.skipped) {
    const ev = { type: 'stage', name: stage.name, state: 'skipped', durationMs, json }
    emit(ev)
+
    return ev
  }
+
  if (json.ok) {
    const ev = { type: 'stage', name: stage.name, state: 'succeeded', durationMs, json }
    emit(ev)
+
    return ev
  }
+
  const ev = {
    type: 'stage',
    name: stage.name,
@@ -590,7 +661,9 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
    json,
    error: json.reason || `exit code ${result.code}`
  }
+
  emit(ev)
+
  return ev
 }

@@ -603,6 +676,7 @@ function openRunLog(logRoot) {
  const ts = new Date().toISOString().replace(/[:.]/g, '-')
  const logPath = path.join(logRoot, `bootstrap-${ts}.log`)
  const stream = fs.createWriteStream(logPath, { flags: 'a' })
+
  return { path: logPath, stream }
 }

@@ -633,6 +707,7 @@ async function runBootstrap(opts) {
        void 0
      }
    }
+
    return { ok: false, cancelled: true }
  }

@@ -646,8 +721,9 @@ async function runBootstrap(opts) {
    } catch {
      void 0
    }
+
    try {
-      if (typeof onEvent === 'function') onEvent(ev)
+      if (typeof onEvent === 'function') {onEvent(ev)}
    } catch (err) {
      // Don't let a subscriber bug crash the bootstrap
      runLog.stream.write(`emit error: ${err && err.message}\n`)
@@ -677,6 +753,7 @@ async function runBootstrap(opts) {
      activeRoot,
      installStamp
    })
+
    emit({
      type: 'manifest',
      stages: manifest.stages,
@@ -690,8 +767,10 @@ async function runBootstrap(opts) {
    for (const stage of manifest.stages) {
      if (abortSignal && abortSignal.aborted) {
        emit({ type: 'failed', error: 'bootstrap cancelled by user' })
+
        return { ok: false, cancelled: true }
      }
+
      const ev = await runStage({
        scriptPath: scriptInfo.path,
        installerKind,
@@ -702,9 +781,11 @@ async function runBootstrap(opts) {
        abortSignal,
        installStamp
      })
+
      if (ev.state === 'failed') {
-        emit({ type: 'failed', stage: stage.name, error: ev.error || 'stage failed' })
-        return { ok: false, failedStage: stage.name, error: ev.error }
+        emit({ type: 'failed', stage: stage.name, error: (ev as any).error || 'stage failed' })
+
+        return { ok: false, failedStage: stage.name, error: (ev as any).error }
      }
    }

@@ -713,11 +794,14 @@ async function runBootstrap(opts) {
      pinnedCommit: installStamp ? installStamp.commit : null,
      pinnedBranch: installStamp ? installStamp.branch : null
    }
+
    const marker = typeof writeMarker === 'function' ? writeMarker(markerPayload) : markerPayload
    emit({ type: 'complete', marker })
+
    return { ok: true, marker }
  } catch (err) {
    emit({ type: 'failed', error: err.message || String(err) })
+
    return { ok: false, error: err.message || String(err) }
  } finally {
    try {
@@ -728,12 +812,10 @@ async function runBootstrap(opts) {
  }
 }

-module.exports = {
-  runBootstrap,
+export { cachedScriptPath,
+  installedAgentInstallScript,
  // Exposed for testability
  parseStageResult,
-  resolveLocalInstallScript,
  resolveInstallScript,
-  installedAgentInstallScript,
-  cachedScriptPath
-}
+  resolveLocalInstallScript,
+  runBootstrap }
--- a/apps/desktop/electron/build-mode.cjs
+++ b/apps/desktop/electron/build-mode.cjs
@@ -1,20 +0,0 @@
-'use strict'
-
-/**
- * build-mode.cjs — pure helper for the desktop's thin-vs-thick build mode.
- *
- * The desktop ships in two shapes:
- *   - thick (default): bundles the first-launch bootstrap installer, can
- *     spawn a local Hermes backend, and supports in-app self-update.
- *   - thin: no bootstrap, no local backend, no self-update. Connects ONLY
- *     to a remote gateway. Used for sandboxed/package-managed deployments
- *     (Flatpak, Snap, etc.) where the agent lives elsewhere.
- * 
- * The esbuild bundler bakes this env var into the source code, so it's read at build time, not runtime.
- */
-
-function isThinClient() {
-  return process.env.HERMES_DESKTOP_BUILD_MODE === 'thin'
-}
-
-module.exports = { isThinClient }
--- a/apps/desktop/electron/build-mode.test.cjs
+++ b/apps/desktop/electron/build-mode.test.cjs
@@ -1,41 +0,0 @@
-'use strict'
-
-const test = require('node:test')
-const assert = require('node:assert/strict')
-
-// We test build-mode.cjs by controlling process.env directly. The module
-// reads process.env.HERMES_DESKTOP_BUILD_MODE at call time (not import time),
-// so we can mutate the env and re-require to exercise both modes.
-
-function freshModule() {
-  // Bust the require cache so the module re-evaluates with the current env.
-  delete require.cache[require.resolve('./build-mode.cjs')]
-  return require('./build-mode.cjs')
-}
-
-test('isThinClient returns false by default (thick mode)', () => {
-  const prev = process.env.HERMES_DESKTOP_BUILD_MODE
-  delete process.env.HERMES_DESKTOP_BUILD_MODE
-  const { isThinClient } = freshModule()
-  assert.equal(isThinClient(), false)
-  process.env.HERMES_DESKTOP_BUILD_MODE = prev
-})
-
-test('isThinClient returns true when HERMES_DESKTOP_BUILD_MODE=thin', () => {
-  const prev = process.env.HERMES_DESKTOP_BUILD_MODE
-  process.env.HERMES_DESKTOP_BUILD_MODE = 'thin'
-  const { isThinClient } = freshModule()
-  assert.equal(isThinClient(), true)
-  process.env.HERMES_DESKTOP_BUILD_MODE = prev
-})
-
-test('isThinClient returns false for non-thin values', () => {
-  const prev = process.env.HERMES_DESKTOP_BUILD_MODE
-  process.env.HERMES_DESKTOP_BUILD_MODE = 'thick'
-  const { isThinClient } = freshModule()
-  assert.equal(isThinClient(), false)
-  process.env.HERMES_DESKTOP_BUILD_MODE = 'thick-client'
-  const { isThinClient: isThin2 } = freshModule()
-  assert.equal(isThin2(), false)
-  process.env.HERMES_DESKTOP_BUILD_MODE = prev
-})
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -10,26 +10,24 @@
 * and the OAuth session-cookie detector.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const {
-  AT_COOKIE_VARIANTS,
-  RT_COOKIE_VARIANTS,
+import { AT_COOKIE_VARIANTS,
  authModeFromStatus,
  buildGatewayWsUrl,
  buildGatewayWsUrlWithTicket,
  connectionScopeKey,
-  cookiesHaveSession,
  cookiesHaveLiveSession,
-  normAuthMode,
+  cookiesHaveSession,
  normalizeRemoteBaseUrl,
+  normAuthMode,
  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
-  tokenPreview
-} = require('./connection-config.cjs')
+  RT_COOKIE_VARIANTS,
+  tokenPreview } from './connection-config'

 // --- connectionScopeKey / normAuthMode ---

@@ -73,6 +71,7 @@ test('profileRemoteOverride returns the per-profile remote with defaulted auth m
      coder: { mode: 'remote', url: '  https://coder.example.com/hermes  ', token: { value: 'sek' } }
    }
  }
+
  assert.deepEqual(profileRemoteOverride(config, 'coder'), {
    url: 'https://coder.example.com/hermes',
    authMode: 'token',
@@ -365,6 +364,7 @@ test('resolveTestWsUrl (oauth, mint ok) builds a ?ticket= URL', async () => {
  const url = await resolveTestWsUrl('https://gw.example.com', 'oauth', null, {
    mintTicket: async () => 'tkt-9'
  })
+
  assert.equal(url, 'wss://gw.example.com/api/ws?ticket=tkt-9')
 })

@@ -376,13 +376,14 @@ test('resolveTestWsUrl (oauth, mint FAILS) throws — must NOT skip WS validatio
          throw new Error('401 ticket mint failed')
        }
      }),
-    err => {
+    (err: any) => {
      // Actionable, points the user at re-auth, and preserves the cause + flag
      // the boot overlay uses to offer a sign-in prompt.
      assert.match(err.message, /WebSocket ticket/i)
      assert.match(err.message, /sign in again/i)
      assert.equal(err.needsOauthLogin, true)
      assert.ok(err.cause instanceof Error)
+
      return true
    }
  )
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -45,6 +45,7 @@ function normalizeRemoteBaseUrl(rawUrl) {
  }

  let parsed
+
  try {
    parsed = new URL(value)
  } catch (error) {
@@ -105,13 +106,16 @@ function buildGatewayWsUrlWithTicket(baseUrl, ticket) {
 * @param {{ mintTicket: (baseUrl: string) => Promise<string> }} deps
 * @returns {Promise<string|null>}
 */
-async function resolveTestWsUrl(baseUrl, authMode, token, deps = {}) {
+async function resolveTestWsUrl(baseUrl, authMode, token, deps: any = {}) {
  if (authMode === 'oauth') {
    const mintTicket = deps.mintTicket
+
    if (typeof mintTicket !== 'function') {
      throw new Error('resolveTestWsUrl: a mintTicket function is required in OAuth mode.')
    }
+
    let ticket
+
    try {
      ticket = await mintTicket(baseUrl)
    } catch (error) {
@@ -119,15 +123,19 @@ async function resolveTestWsUrl(baseUrl, authMode, token, deps = {}) {
        'Reached the gateway over HTTP, but could not mint a WebSocket ticket for the OAuth session ' +
          '(it may have expired). Open Settings → Gateway and sign in again.'
      )
-      err.needsOauthLogin = true
+
+      ;(err as any).needsOauthLogin = true
      err.cause = error
      throw err
    }
+
    return buildGatewayWsUrlWithTicket(baseUrl, ticket)
  }
+
  if (!token) {
    return null
  }
+
  return buildGatewayWsUrl(baseUrl, token)
 }

@@ -154,11 +162,13 @@ function normAuthMode(mode) {
 function profileRemoteOverride(config, profile) {
  const key = connectionScopeKey(profile)
  const entry = key ? config?.profiles?.[key] : null
+
  if (!entry || typeof entry !== 'object' || entry.mode !== 'remote') {
    return null
  }

  const url = String(entry.url || '').trim()
+
  if (!url) {
    return null
  }
@@ -172,18 +182,21 @@ function profileRemoteOverride(config, profile) {
 * query parameter. Local pooled backends and per-profile remote overrides do not
 * need this: they already run against a backend scoped to the target profile.
 */
-function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
+function pathWithGlobalRemoteProfile(path, profile, opts: any = {}) {
  const scopedProfile = connectionScopeKey(profile)
+
  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
    return path
  }

  const rawPath = String(path || '')
+
  if (!rawPath) {
    return path
  }

  let parsed
+
  try {
    parsed = new URL(rawPath, 'http://hermes.local')
  } catch {
@@ -224,9 +237,12 @@ function authModeFromStatus(statusBody) {
 * Returns 'oauth' | 'token'.
 */
 function resolveAuthMode(inputAuthMode, existingAuthMode) {
-  if (inputAuthMode === 'oauth') return 'oauth'
-  if (inputAuthMode === 'token') return 'token'
-  if (existingAuthMode === 'oauth') return 'oauth'
+  if (inputAuthMode === 'oauth') {return 'oauth'}
+
+  if (inputAuthMode === 'token') {return 'token'}
+
+  if (existingAuthMode === 'oauth') {return 'oauth'}
+
  return 'token'
 }

@@ -242,7 +258,8 @@ function resolveAuthMode(inputAuthMode, existingAuthMode) {
 * need to know whether an unexpired access token is present right now.
 */
 function cookiesHaveSession(cookies) {
-  if (!Array.isArray(cookies)) return false
+  if (!Array.isArray(cookies)) {return false}
+
  return cookies.some(c => c && AT_COOKIE_VARIANTS.includes(c.name) && c.value)
 }

@@ -260,24 +277,23 @@ function cookiesHaveSession(cookies) {
 * the RT is also dead/revoked).
 */
 function cookiesHaveLiveSession(cookies) {
-  if (!Array.isArray(cookies)) return false
+  if (!Array.isArray(cookies)) {return false}
+
  return cookies.some(c => c && c.value && (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)))
 }

-module.exports = {
-  AT_COOKIE_VARIANTS,
-  RT_COOKIE_VARIANTS,
+export { AT_COOKIE_VARIANTS,
  authModeFromStatus,
  buildGatewayWsUrl,
  buildGatewayWsUrlWithTicket,
  connectionScopeKey,
-  cookiesHaveSession,
  cookiesHaveLiveSession,
-  normAuthMode,
+  cookiesHaveSession,
  normalizeRemoteBaseUrl,
+  normAuthMode,
  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
-  tokenPreview
-}
+  RT_COOKIE_VARIANTS,
+  tokenPreview }
--- a/apps/desktop/electron/dashboard-token.test.cjs
+++ b/apps/desktop/electron/dashboard-token.test.cjs
@@ -5,17 +5,15 @@
 * (Wired into npm test:desktop:platforms in package.json.)
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const {
-  adoptServedDashboardToken,
+import { adoptServedDashboardToken,
  dashboardIndexUrl,
  extractInjectedDashboardToken,
  fetchPublicText,
  isForeignBackendToken,
-  resolveServedDashboardToken
-} = require('./dashboard-token.cjs')
+  resolveServedDashboardToken } from './dashboard-token'

 test('extractInjectedDashboardToken reads the JSON-encoded dashboard token', () => {
  const html = '<script>window.__HERMES_SESSION_TOKEN__="served-token";window.__HERMES_BASE_PATH__=""</script>'
@@ -39,9 +37,11 @@ test('dashboardIndexUrl preserves dashboard path prefixes', () => {

 test('resolveServedDashboardToken uses the served token and logs when it differs', async () => {
  const logs = []
+
  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
    fetchText: async url => {
      assert.equal(url, 'http://127.0.0.1:9120/')
+
      return '<script>window.__HERMES_SESSION_TOKEN__="served-token";</script>'
    },
    rememberLog: line => logs.push(line)
@@ -100,8 +100,9 @@ test('isForeignBackendToken only flags a mismatched token from a dead child', ()
    [{ servedToken: null, spawnToken: 'mine', childAlive: false }, false],
    [{ servedToken: '', spawnToken: 'mine', childAlive: false }, false]
  ]
+
  for (const [input, expected] of cases) {
-    assert.equal(isForeignBackendToken(input), expected, JSON.stringify(input))
+    assert.equal(isForeignBackendToken(input as any), expected, JSON.stringify(input))
  }
 })

@@ -128,6 +129,7 @@ test('adoptServedDashboardToken refuses a foreign token when our child is dead',

 test('adoptServedDashboardToken falls back to the spawn token when the fetch fails', async () => {
  const logs = []
+
  const token = await adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
    childAlive: () => true,
    fetchText: async () => {
--- a/apps/desktop/electron/dashboard-token.cjs
+++ b/apps/desktop/electron/dashboard-token.cjs
@@ -9,29 +9,35 @@

 const DEFAULT_TOKEN_FETCH_TIMEOUT_MS = 3_000

-async function fetchPublicText(url, options = {}) {
+async function fetchPublicText(url, options: any = {}) {
  const { protocol } = new URL(url)
+
  if (protocol !== 'http:' && protocol !== 'https:') {
    throw new Error(`Unsupported Hermes backend URL protocol: ${protocol}`)
  }

  const timeoutMs = options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
+
  const res = await fetch(url, { signal: AbortSignal.timeout(timeoutMs) }).catch(error => {
    if (error.name === 'TimeoutError') {
      throw new Error(`Timed out connecting to Hermes backend after ${timeoutMs}ms`)
    }
+
    throw error
  })
+
  const text = await res.text()

-  if (!res.ok) throw new Error(`${res.status}: ${text || res.statusText}`)
+  if (!res.ok) {throw new Error(`${res.status}: ${text || res.statusText}`)}

  return text
 }

 function extractInjectedDashboardToken(html) {
  const match = /window\.__HERMES_SESSION_TOKEN__\s*=\s*("(?:\\.|[^"\\])*")/.exec(String(html || ''))
-  if (!match) return null
+
+  if (!match) {return null}
+
  try {
    return JSON.parse(match[1])
  } catch {
@@ -43,11 +49,13 @@ function dashboardIndexUrl(baseUrl) {
  return `${String(baseUrl || '').replace(/\/+$/, '')}/`
 }

-async function resolveServedDashboardToken(baseUrl, fallbackToken, options = {}) {
+async function resolveServedDashboardToken(baseUrl, fallbackToken, options: any = {}) {
  const fetchText = options.fetchText || fetchPublicText
+
  const html = await fetchText(dashboardIndexUrl(baseUrl), {
    timeoutMs: options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
  })
+
  const servedToken = extractInjectedDashboardToken(html)

  if (servedToken && servedToken !== fallbackToken && typeof options.rememberLog === 'function') {
@@ -76,6 +84,7 @@ function isForeignBackendToken({ servedToken, spawnToken, childAlive }) {
 async function adoptServedDashboardToken(baseUrl, spawnToken, { childAlive, label = 'Hermes backend', ...options }) {
  const servedToken = await resolveServedDashboardToken(baseUrl, spawnToken, options).catch(error => {
    options.rememberLog?.(`[boot] could not read served dashboard token (${label}): ${error.message}`)
+
    return spawnToken
  })

@@ -88,12 +97,10 @@ async function adoptServedDashboardToken(baseUrl, spawnToken, { childAlive, labe
  return servedToken
 }

-module.exports = {
-  DEFAULT_TOKEN_FETCH_TIMEOUT_MS,
-  adoptServedDashboardToken,
+export { adoptServedDashboardToken,
  dashboardIndexUrl,
+  DEFAULT_TOKEN_FETCH_TIMEOUT_MS,
  extractInjectedDashboardToken,
  fetchPublicText,
  isForeignBackendToken,
-  resolveServedDashboardToken
-}
+  resolveServedDashboardToken }
--- a/apps/desktop/electron/desktop-uninstall.test.cjs
+++ b/apps/desktop/electron/desktop-uninstall.test.cjs
@@ -9,19 +9,17 @@
 * cleanup-script builders (POSIX + Windows).
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const {
-  UNINSTALL_MODES,
-  buildPosixCleanupScript,
+import { buildPosixCleanupScript,
  buildWindowsCleanupScript,
  modeRemovesAgent,
  modeRemovesUserData,
  resolveRemovableAppPath,
  shouldRemoveAppBundle,
-  uninstallArgsForMode
-} = require('./desktop-uninstall.cjs')
+  UNINSTALL_MODES,
+  uninstallArgsForMode } from './desktop-uninstall'

 // --- uninstallArgsForMode ---

@@ -132,6 +130,7 @@ test('buildPosixCleanupScript waits for the PID, runs the uninstall module, remo
    appPath: '/opt/hermes/linux-unpacked',
    hermesHome: '/home/x/.hermes'
  })
+
  assert.match(script, /^#!\/bin\/bash/)
  assert.match(script, /pid=4321/)
  assert.match(script, /kill -0 "\$pid"/)
@@ -152,6 +151,7 @@ test('buildPosixCleanupScript exports PYTHONPATH when pythonPath is set (lite/fu
    appPath: null,
    hermesHome: '/home/x/.hermes'
  })
+
  // System python + source on PYTHONPATH so import hermes_cli works while the
  // venv is torn down.
  assert.match(script, /export PYTHONPATH='\/home\/x\/\.hermes\/hermes-agent'/)
@@ -168,6 +168,7 @@ test('buildPosixCleanupScript omits PYTHONPATH when pythonPath is null (gui)', (
    appPath: null,
    hermesHome: '/h'
  })
+
  assert.doesNotMatch(script, /export PYTHONPATH/)
 })

@@ -181,6 +182,7 @@ test('buildPosixCleanupScript omits the bundle rm when appPath is null', () => {
    appPath: null,
    hermesHome: '/h'
  })
+
  assert.doesNotMatch(script, /rm -rf '\//)
  // Still runs the uninstall.
  assert.match(script, /'-m' 'hermes_cli\.uninstall' '--mode' 'lite'/)
@@ -196,6 +198,7 @@ test('buildPosixCleanupScript single-quote-escapes paths with apostrophes', () =
    appPath: null,
    hermesHome: '/h'
  })
+
  // The apostrophe is closed-escaped-reopened so the shell sees the literal.
  assert.match(script, /'\/home\/o'\\''brien\/python'/)
 })
@@ -212,6 +215,7 @@ test('buildWindowsCleanupScript waits (bounded) for PID, runs uninstall, rmdir b
    appPath: 'C:\\Users\\x\\AppData\\Local\\Programs\\Hermes',
    hermesHome: 'C:\\Users\\x\\AppData\\Local\\hermes'
  })
+
  assert.match(script, /@echo off/)
  assert.match(script, /set "PID=9988"/)
  // PYTHONPATH set so a system python can import hermes_cli from source.
@@ -238,6 +242,7 @@ test('buildWindowsCleanupScript omits PYTHONPATH + rmdir when not needed (gui, n
    appPath: null,
    hermesHome: 'C:\\h'
  })
+
  assert.doesNotMatch(script, /rmdir/)
  assert.doesNotMatch(script, /set "PYTHONPATH=/)
 })
--- a/apps/desktop/electron/desktop-uninstall.cjs
+++ b/apps/desktop/electron/desktop-uninstall.cjs
@@ -26,7 +26,7 @@
 * shape as the self-update swap-and-relaunch flow already in main.cjs.
 */

-const path = require('node:path')
+import path from 'node:path'

 const UNINSTALL_MODES = ['gui', 'lite', 'full']

@@ -41,6 +41,7 @@ function uninstallArgsForMode(mode) {
  if (!UNINSTALL_MODES.includes(mode)) {
    throw new Error(`Unknown uninstall mode: ${mode}`)
  }
+
  return ['-m', 'hermes_cli.uninstall', '--mode', mode]
 }

@@ -65,9 +66,10 @@ function modeRemovesUserData(mode) {
 * Returns null when we can't confidently identify a removable bundle (e.g.
 * running from a dev checkout, or a system-package install we must not rmtree).
 */
-function resolveRemovableAppPath(execPath, platform, env = {}) {
+function resolveRemovableAppPath(execPath, platform, env: any = {}) {
  const exe = String(execPath || '')
-  if (!exe) return null
+
+  if (!exe) {return null}

  // Use the path flavor that matches the TARGET platform, not the host running
  // this code — so the Windows branch parses backslash paths correctly even
@@ -79,22 +81,28 @@ function resolveRemovableAppPath(execPath, platform, env = {}) {
    const macOsDir = p.dirname(exe) // …/Contents/MacOS
    const contents = p.dirname(macOsDir) // …/Contents
    const appBundle = p.dirname(contents) // …/Hermes.app
-    if (appBundle.endsWith('.app')) return appBundle
+
+    if (appBundle.endsWith('.app')) {return appBundle}
+
    return null
  }

  if (platform === 'win32') {
    // NSIS per-user installs Hermes.exe directly in the install dir.
    const dir = p.dirname(exe)
-    if (/[\\/]Hermes$/i.test(dir) || /[\\/]hermes-desktop$/i.test(dir)) return dir
+
+    if (/[\\/]Hermes$/i.test(dir) || /[\\/]hermes-desktop$/i.test(dir)) {return dir}
+
    return null
  }

  // Linux: an AppImage exposes its own path via the APPIMAGE env var.
-  if (env.APPIMAGE) return env.APPIMAGE
+  if (env.APPIMAGE) {return env.APPIMAGE}
  // Unpacked electron-builder tree: …/linux-unpacked/hermes
  const dir = p.dirname(exe)
-  if (/-unpacked$/.test(dir)) return dir
+
+  if (/-unpacked$/.test(dir)) {return dir}
+
  return null
 }

@@ -121,6 +129,7 @@ function shouldRemoveAppBundle(isPackaged, appPath) {
 */
 function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot, uninstallArgs, appPath, hermesHome }) {
  const q = s => `'${String(s).replace(/'/g, `'\\''`)}'`
+
  const lines = [
    '#!/bin/bash',
    'set -u',
@@ -135,16 +144,21 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
    'fi',
    `export HERMES_HOME=${q(hermesHome)}`
  ]
+
  if (pythonPath) {
    lines.push(`export PYTHONPATH=${q(pythonPath)}\${PYTHONPATH:+:$PYTHONPATH}`)
  }
+
  lines.push(`cd ${q(agentRoot)} 2>/dev/null || true`, `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`)
+
  if (appPath) {
    lines.push(`rm -rf ${q(appPath)} || true`)
  }
+
  // Self-delete the script.
  lines.push('rm -f "$0" 2>/dev/null || true')
  lines.push('')
+
  return lines.join('\n')
 }

@@ -180,15 +194,18 @@ function buildWindowsCleanupScript({
  // under %LOCALAPPDATA% never contain them). `&`/`^` in a path would still be
  // a problem, but Hermes install paths don't use them.
  const q = s => `"${String(s).replace(/"/g, '')}"`
+
  const lines = [
    '@echo off',
    'setlocal enableextensions',
    `set "HERMES_HOME=${String(hermesHome).replace(/"/g, '')}"`,
    `set "PID=${pid}"`
  ]
+
  if (pythonPath) {
    lines.push(`set "PYTHONPATH=${String(pythonPath).replace(/"/g, '')};%PYTHONPATH%"`)
  }
+
  lines.push(
    'set /a waited=0',
    ':waitloop',
@@ -206,6 +223,7 @@ function buildWindowsCleanupScript({
    `cd /d ${q(agentRoot)}`,
    `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')}`
  )
+
  if (appPath) {
    lines.push(
      'set /a tries=0',
@@ -220,18 +238,18 @@ function buildWindowsCleanupScript({
      ':rmdone'
    )
  }
+
  lines.push('del "%~f0"')
  lines.push('')
+
  return lines.join('\r\n')
 }

-module.exports = {
-  UNINSTALL_MODES,
-  buildPosixCleanupScript,
+export { buildPosixCleanupScript,
  buildWindowsCleanupScript,
  modeRemovesAgent,
  modeRemovesUserData,
  resolveRemovableAppPath,
  shouldRemoveAppBundle,
-  uninstallArgsForMode
-}
+  UNINSTALL_MODES,
+  uninstallArgsForMode }
--- a/apps/desktop/electron/embed-referer.cjs
+++ b/apps/desktop/electron/embed-referer.cjs
@@ -1,9 +1,8 @@
-'use strict'
-
-const { session } = require('electron')
+import { session } from 'electron'

 const EMBED_SESSION_PARTITION = 'persist:hermes-embed'
 const EMBED_REFERER = 'https://www.youtube.com/'
+
 const YOUTUBE_REFERER_HOST_RE =
  /(^|\.)(youtube\.com|youtube-nocookie\.com|googlevideo\.com|ytimg\.com|youtubei\.googleapis\.com)$/i

@@ -23,6 +22,7 @@ function installEmbedRefererForSession(embedSession) {

    if (!YOUTUBE_REFERER_HOST_RE.test(host)) {
      callback({ requestHeaders: details.requestHeaders })
+
      return
    }

@@ -45,4 +45,4 @@ function installEmbedReferer() {
  }
 }

-module.exports = { installEmbedReferer }
+export { installEmbedReferer }
--- a/apps/desktop/electron/fs-read-dir.test.cjs
+++ b/apps/desktop/electron/fs-read-dir.test.cjs
@@ -1,19 +1,17 @@
-'use strict'
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'
+import { pathToFileURL } from 'node:url'

-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const test = require('node:test')
-const { pathToFileURL } = require('node:url')
-
-const { readDirForIpc } = require('./fs-read-dir.cjs')
+import { readDirForIpc } from './fs-read-dir'

 function mkTmpDir() {
  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-fs-read-dir-'))
 }

-function fakeDirent(name, flags = {}) {
+function fakeDirent(name, flags: any = {}) {
  return {
    name,
    isDirectory: () => Boolean(flags.directory),
@@ -109,10 +107,12 @@ test('readDirForIpc accepts file URLs for directories', async () => {

 test('readDirForIpc returns invalid-path for blank or non-string input', async () => {
  let readdirCalls = 0
+
  const fsImpl = {
    promises: {
      readdir: async () => {
        readdirCalls += 1
+
        return []
      }
    }
@@ -126,10 +126,12 @@ test('readDirForIpc returns invalid-path for blank or non-string input', async (

 test('readDirForIpc rejects Windows device paths before readdir', async () => {
  let readdirCalls = 0
+
  const fsImpl = {
    promises: {
      readdir: async () => {
        readdirCalls += 1
+
        return []
      }
    }
@@ -224,6 +226,7 @@ test('readDirForIpc allows expanding symlink or junction directories outside the
    fs.writeFileSync(path.join(outside, 'outside.txt'), 'ok')

    const linkPath = path.join(root, 'outside-link')
+
    try {
      fs.symlinkSync(outside, linkPath, process.platform === 'win32' ? 'junction' : 'dir')
    } catch (error) {
@@ -252,6 +255,7 @@ test('readDirForIpc stats symbolic links and unknown entries without dropping th
  const input = path.join('virtual-root')
  const resolved = path.resolve(input)
  const statCalls = []
+
  const fsImpl = {
    promises: {
      readdir: async () => [
@@ -266,9 +270,11 @@ test('readDirForIpc stats symbolic links and unknown entries without dropping th
        }

        statCalls.push(fullPath)
+
        if (fullPath.endsWith(`${path.sep}linked-dir`)) {
          return { isDirectory: () => true }
        }
+
        throw Object.assign(new Error('gone'), { code: 'ENOENT' })
      }
    }
@@ -301,12 +307,15 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
  let peak = 0
  let releaseStats
  let markFirstStatStarted
+
  const statsReleased = new Promise(resolve => {
    releaseStats = resolve
  })
+
  const firstStatStarted = new Promise(resolve => {
    markFirstStatStarted = resolve
  })
+
  const fsImpl = {
    promises: {
      readdir: async () => [
@@ -326,6 +335,7 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
        active -= 1

        const name = path.basename(fullPath)
+
        if (name === failedName) {
          throw Object.assign(new Error('gone'), { code: 'ENOENT' })
        }
--- a/apps/desktop/electron/fs-read-dir.cjs
+++ b/apps/desktop/electron/fs-read-dir.cjs
@@ -1,8 +1,7 @@
-'use strict'
+import fs from 'node:fs'
+import path from 'node:path'

-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveDirectoryForIpc } = require('./hardening.cjs')
+import { resolveDirectoryForIpc } from './hardening'

 const FS_READDIR_STAT_CONCURRENCY = 16

@@ -37,7 +36,7 @@ function direntIsSymbolicLink(dirent) {
 }

 function shouldStatDirent(dirent) {
-  if (direntIsDirectory(dirent)) return false
+  if (direntIsDirectory(dirent)) {return false}

  return direntIsSymbolicLink(dirent) || !direntIsFile(dirent)
 }
@@ -70,13 +69,13 @@ async function mapWithStatConcurrency(items, mapper) {
  }

  const workerCount = Math.min(FS_READDIR_STAT_CONCURRENCY, items.length)
-  const workers = Array.from({ length: workerCount }, () => runWorker())
+  const workers = Array.from({ length: workerCount } as any, () => runWorker())
  await Promise.all(workers)

  return results
 }

-async function readDirForIpc(dirPath, options = {}) {
+async function readDirForIpc(dirPath, options: any = {}) {
  const fsImpl = options.fs || fs
  let resolved

@@ -102,6 +101,4 @@ async function readDirForIpc(dirPath, options = {}) {
  }
 }

-module.exports = {
-  readDirForIpc
-}
+export { readDirForIpc }
--- a/apps/desktop/electron/gateway-ws-probe.test.cjs
+++ b/apps/desktop/electron/gateway-ws-probe.test.cjs
@@ -9,16 +9,20 @@
 * outcome (open, frame, error, early close, never-opens) without a network.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs')
+import { probeGatewayWebSocket } from './gateway-ws-probe'

 // Minimal WebSocket double: records listeners synchronously (the probe attaches
 // them in its executor) and exposes emit() so the test can replay events.
-function makeFakeWs() {
+function makeFakeWs(): { FakeWs: new (url: string) => any; instances: any[] } {
  const instances = []
+
  class FakeWs {
+    url: string
+    closed = false
+    listeners: Record<string, any[]> = {}
    constructor(url) {
      this.url = url
      this.listeners = {}
@@ -32,9 +36,12 @@ function makeFakeWs() {
      this.closed = true
    }
    emit(type, event) {
-      for (const fn of this.listeners[type] || []) fn(event)
+      for (const fn of this.listeners[type] || []) {
+        fn(event)
+      }
    }
  }
+
  return { FakeWs, instances }
 }

@@ -51,11 +58,13 @@ test('probe resolves ok when the socket opens and stays open', async () => {

 test('probe resolves ok immediately when a frame arrives', async () => {
  const { FakeWs, instances } = makeFakeWs()
+
  const promise = probeGatewayWebSocket('ws://host/api/ws?token=t', {
    WebSocketImpl: FakeWs,
    connectTimeoutMs: 1_000,
    readyGraceMs: 10_000 // long grace: success must come from the frame, not the timer
  })
+
  instances[0].emit('open')
  instances[0].emit('message', { data: '{"jsonrpc":"2.0"}' })
  const result = await promise
@@ -95,11 +104,13 @@ test('probe fails when the gateway accepts then immediately closes (auth rejecte

 test('probe times out when the socket never opens', async () => {
  const { FakeWs } = makeFakeWs()
+
  const result = await probeGatewayWebSocket('ws://host/api/ws?token=t', {
    WebSocketImpl: FakeWs,
    connectTimeoutMs: 20,
    readyGraceMs: 10
  })
+
  assert.equal(result.ok, false)
  assert.match(result.reason, /Timed out/)
 })
--- a/apps/desktop/electron/gateway-ws-probe.cjs
+++ b/apps/desktop/electron/gateway-ws-probe.cjs
@@ -36,13 +36,13 @@ const DEFAULT_READY_GRACE_MS = 750
 * Attempt a live WebSocket connection and classify the outcome.
 *
 * @param {string} wsUrl - Fully-formed ws(s):// URL including the credential.
- * @param {object} [options]
- * @param {new (url: string) => any} [options.WebSocketImpl] - WebSocket ctor.
- * @param {number} [options.connectTimeoutMs]
- * @param {number} [options.readyGraceMs]
 * @returns {Promise<{ ok: boolean, reason?: string }>}
 */
-function probeGatewayWebSocket(wsUrl, options = {}) {
+function probeGatewayWebSocket<T>(wsUrl: string, options:{
+  WebSocketImpl?: any,
+  connectTimeoutMs?: number
+  readyGraceMs?: number
+} = {}) {
  const WebSocketImpl = options.WebSocketImpl
  const connectTimeoutMs = options.connectTimeoutMs ?? DEFAULT_CONNECT_TIMEOUT_MS
  const readyGraceMs = options.readyGraceMs ?? DEFAULT_READY_GRACE_MS
@@ -54,7 +54,7 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
    })
  }

-  return new Promise(resolve => {
+  return new Promise<any>(resolve => {
    let settled = false
    let opened = false
    let connectTimer = null
@@ -66,6 +66,7 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
        clearTimeout(connectTimer)
        connectTimer = null
      }
+
      if (graceTimer !== null) {
        clearTimeout(graceTimer)
        graceTimer = null
@@ -73,14 +74,16 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
    }

    const finish = result => {
-      if (settled) return
+      if (settled) {return}
      settled = true
      clearTimers()
+
      try {
        socket?.close?.()
      } catch {
        // ignore — best effort teardown
      }
+
      resolve(result)
    }

@@ -91,11 +94,12 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
        ok: false,
        reason: error instanceof Error ? error.message : String(error)
      })
+
      return
    }

    const onOpen = () => {
-      if (settled) return
+      if (settled) {return}
      opened = true
      // Upgrade accepted. Give the server a brief window to reject the
      // credential post-handshake (early close) before declaring success.
@@ -118,7 +122,8 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
    }

    const onClose = event => {
-      if (settled) return
+      if (settled) {return}
+
      if (opened) {
        // Opened, then closed inside the grace window: the upgrade was accepted
        // but the session was refused (e.g. ws-ticket/token rejected, or a
@@ -127,8 +132,10 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
          ok: false,
          reason: closeReason(event, 'The gateway accepted the connection then closed it (credential rejected?).')
        })
+
        return
      }
+
      finish({
        ok: false,
        reason: closeReason(event, 'The gateway closed the WebSocket before it opened.')
@@ -154,8 +161,10 @@ function probeGatewayWebSocket(wsUrl, options = {}) {
 function addListener(socket, type, handler) {
  if (typeof socket.addEventListener === 'function') {
    socket.addEventListener(type, handler)
+
    return
  }
+
  // Node's global WebSocket implements addEventListener; this fallback keeps the
  // helper usable with the `ws` package's EventEmitter shape too.
  if (typeof socket.on === 'function') {
@@ -164,25 +173,31 @@ function addListener(socket, type, handler) {
 }

 function extractErrorReason(event) {
-  if (!event) return ''
-  if (event instanceof Error) return event.message
+  if (!event) {return ''}
+
+  if (event instanceof Error) {return event.message}
  const err = event.error || event.message
-  if (err instanceof Error) return err.message
-  if (typeof err === 'string') return err
+
+  if (err instanceof Error) {return err.message}
+
+  if (typeof err === 'string') {return err}
+
  return ''
 }

 function closeReason(event, fallback) {
  const code = event && typeof event.code === 'number' ? event.code : null
  const reason = event && typeof event.reason === 'string' ? event.reason.trim() : ''
-  if (code && reason) return `${fallback} (code ${code}: ${reason})`
-  if (code) return `${fallback} (code ${code})`
-  if (reason) return `${fallback} (${reason})`
+
+  if (code && reason) {return `${fallback} (code ${code}: ${reason})`}
+
+  if (code) {return `${fallback} (code ${code})`}
+
+  if (reason) {return `${fallback} (${reason})`}
+
  return fallback
 }

-module.exports = {
-  DEFAULT_CONNECT_TIMEOUT_MS,
+export { DEFAULT_CONNECT_TIMEOUT_MS,
  DEFAULT_READY_GRACE_MS,
-  probeGatewayWebSocket
-}
+  probeGatewayWebSocket }
--- a/apps/desktop/electron/git-repo-scan.cjs
+++ b/apps/desktop/electron/git-repo-scan.cjs
@@ -1,14 +1,12 @@
-'use strict'
-
 // Repo-first discovery: walk bounded roots for git repos using only Node's `fs`
 // — no native addon, so it just works for anyone who pulls main (no
 // electron-rebuild). Mirrors how GitHub Desktop scans: stop at the first `.git`
 // (don't descend into a repo), cap depth, and skip heavy non-repo trees so the
 // first scan stays fast. Results are cached by the backend after the first run.

-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'

 const fsp = fs.promises

@@ -36,14 +34,14 @@ async function mapLimit(items, limit, fn) {
    }
  }

-  await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker))
+  await Promise.all(Array.from({ length: Math.min(limit, items.length) } as any, worker))
 }

 /**
 * Scan `roots` (default: the home dir) for git repositories. Returns deduped
 * `{ root, label }` entries. `options.maxDepth` caps recursion (default 3).
 */
-async function scanGitRepos(roots, options = {}) {
+async function scanGitRepos(roots, options: any = {}) {
  const maxDepth = Number(options.maxDepth) || DEFAULT_MAX_DEPTH
  const searchRoots = Array.isArray(roots) && roots.length > 0 ? roots : [os.homedir()]
  const found = new Map()
@@ -54,6 +52,7 @@ async function scanGitRepos(roots, options = {}) {
    }

    let entries
+
    try {
      entries = await fsp.readdir(dir, { withFileTypes: true })
    } catch {
@@ -73,6 +72,7 @@ async function scanGitRepos(roots, options = {}) {
    }

    const subdirs = []
+
    for (const entry of entries) {
      // Real directories only (skip symlinks to avoid loops), no hidden dirs, no
      // known heavy trees.
@@ -93,4 +93,4 @@ async function scanGitRepos(roots, options = {}) {
  return [...found.entries()].map(([root, label]) => ({ label, root }))
 }

-module.exports = { scanGitRepos }
+export { scanGitRepos }
--- a/apps/desktop/electron/git-review-ops.test.cjs
+++ b/apps/desktop/electron/git-review-ops.test.cjs
@@ -1,9 +1,7 @@
-'use strict'
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const assert = require('node:assert/strict')
-const test = require('node:test')
-
-const { resolveRenamePath } = require('./git-review-ops.cjs')
+import { resolveRenamePath } from './git-review-ops'

 test('resolveRenamePath: plain path is unchanged', () => {
  assert.equal(resolveRenamePath('src/a.ts'), 'src/a.ts')
--- a/apps/desktop/electron/git-review-ops.cjs
+++ b/apps/desktop/electron/git-review-ops.cjs
@@ -1,18 +1,38 @@
-'use strict'
-
 // Git ops backing the coding rail + Codex-style review pane. Built on `simple-git`
 // (a maintained wrapper around the system git binary — same git the rest of the
 // app shells to, no native build) so we read structured status()/diffSummary()
 // results instead of hand-parsing porcelain. Reads degrade to null/empty on a
 // non-repo / remote backend; mutations reject so the renderer can toast.

-const { execFile } = require('node:child_process')
-const fs = require('node:fs/promises')
-const path = require('node:path')
+import { execFile } from 'node:child_process'
+import fs from 'node:fs/promises'
+import path from 'node:path'

-const simpleGit = require('simple-git')
+import simpleGitFn from 'simple-git'

-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
+import { resolveRequestedPathForIpc } from './hardening'
+
+// `simple-git` is a pure-JS runtime dep that workspace dedup hoists into the
+// repo-root node_modules.  Packaged builds set `files:` in package.json, which
+// excludes node_modules from the asar, so a normal import fails at launch
+// (issue #52735: "Cannot find module 'simple-git'").  We ship the dep's
+// closure under resources/native-deps/vendor/node_modules/ via extraResources
+// + scripts/stage-native-deps.mjs, and resolve from there when the hoisted
+// import isn't reachable.  The `vendor/` nesting matters: electron-builder
+// drops a node_modules dir at the root of an extraResources copy but keeps a
+// nested one.  Dev mode never hits the fallback -- Node's normal lookup finds
+// the hoisted copy.
+let simpleGit = simpleGitFn
+
+if (!simpleGit) {
+  const resourcesPath = (process as any).resourcesPath
+
+  if (!resourcesPath) {
+    throw new Error("git-review IPC: 'simple-git' not found and no resourcesPath to fall back to")
+  }
+
+  simpleGit = require(path.join(resourcesPath, 'native-deps', 'vendor', 'node_modules', 'simple-git'))
+}

 const COMMIT_CONTEXT_DIFF_MAX_CHARS = 120_000
 const COMMIT_CONTEXT_UNTRACKED_MAX = 80
@@ -33,7 +53,7 @@ function ghEnv(ghBin) {

 // Run the `gh` CLI in a repo. Resolves { ok, stdout } so callers branch on
 // availability/auth without a throw. gh missing/unauthed → ok:false.
-function runGh(args, cwd, ghBin) {
+function runGh(args, cwd, ghBin): Promise<{ok: boolean, stdout: string}> {
  return new Promise(resolve => {
    execFile(
      ghBin || 'gh',
@@ -241,10 +261,11 @@ async function reviewList(repoPath, scope, baseRef, gitBin) {

      const range = scope === 'branch' ? `${base}...HEAD` : base
      const summary = await git.diffSummary([range])
+
      const files = summary.files.map(file => ({
        path: resolveRenamePath(file.file),
-        added: file.binary ? 0 : file.insertions,
-        removed: file.binary ? 0 : file.deletions,
+        added: 'insertions' in file ? file.insertions : 0 ,
+        removed: 'deletions' in file ? file.deletions : 0 ,
        status: 'M',
        staged: false
      }))
@@ -272,6 +293,7 @@ async function reviewList(repoPath, scope, baseRef, gitBin) {
      git.diffSummary(['--cached']),
      git.diffSummary([])
    ])
+
    const stagedCounts = countsByPath(staged)
    const unstagedCounts = countsByPath(unstaged)

@@ -476,6 +498,7 @@ async function reviewCommitContext(repoPath, gitBin) {
  const safe = args => git.diff(args).catch(() => '')

  let status
+
  try {
    status = await git.status()
  } catch {
@@ -491,9 +514,11 @@ async function reviewCommitContext(repoPath, gitBin) {

  // Untracked files have no diff — list them so new files aren't invisible.
  const untracked = status.not_added || []
+
  if (untracked.length > 0) {
    const visible = untracked.slice(0, COMMIT_CONTEXT_UNTRACKED_MAX)
    const omitted = untracked.length - visible.length
+
    const note =
      `\n# New (untracked) files:\n${visible.map(p => `#   ${p}`).join('\n')}\n` +
      (omitted > 0 ? `#   ... ${omitted} more omitted\n` : '')
@@ -588,6 +613,7 @@ async function repoStatus(repoPath, gitBin) {
  // fail soft and hide the coding rail instead of spamming IPC handler errors.
  try {
    const stat = await fs.stat(cwd)
+
    if (!stat.isDirectory()) {
      return null
    }
@@ -596,11 +622,13 @@ async function repoStatus(repoPath, gitBin) {
  }

  let git
+
  try {
    git = gitFor(cwd, gitBin)
  } catch {
    return null
  }
+
  let status

  try {
@@ -611,6 +639,7 @@ async function repoStatus(repoPath, gitBin) {
  }

  const detached = typeof status.detached === 'boolean' ? status.detached : !status.current
+
  const files = status.files.map(file => ({
    path: file.path,
    staged: isStaged(file),
@@ -652,10 +681,12 @@ async function repoStatus(repoPath, gitBin) {
  // can't stall the probe.
  try {
    const untracked = status.not_added.slice(0, 500)
+
    for (let i = 0; i < untracked.length; i += UNTRACKED_LINE_COUNT_CONCURRENCY) {
      const batch = await Promise.all(
        untracked.slice(i, i + UNTRACKED_LINE_COUNT_CONCURRENCY).map(path => untrackedInsertions(cwd, path))
      )
+
      result.added += batch.reduce((sum, n) => sum + n, 0)
    }
  } catch {
@@ -665,8 +696,7 @@ async function repoStatus(repoPath, gitBin) {
  return result
 }

-module.exports = {
-  branchBase,
+export { branchBase,
  fileDiffVsHead,
  repoStatus,
  resolveRenamePath,
@@ -676,9 +706,8 @@ module.exports = {
  reviewDiff,
  reviewList,
  reviewPush,
-  reviewRevParse,
  reviewRevert,
+  reviewRevParse,
  reviewShipInfo,
  reviewStage,
-  reviewUnstage
-}
+  reviewUnstage }
--- a/apps/desktop/electron/git-root.test.cjs
+++ b/apps/desktop/electron/git-root.test.cjs
@@ -1,13 +1,11 @@
-'use strict'
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'
+import { pathToFileURL } from 'node:url'

-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const test = require('node:test')
-const { pathToFileURL } = require('node:url')
-
-const { gitRootForIpc } = require('./git-root.cjs')
+import { gitRootForIpc } from './git-root'

 function mkTmpDir() {
  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-git-root-'))
--- a/apps/desktop/electron/git-root.cjs
+++ b/apps/desktop/electron/git-root.cjs
@@ -1,8 +1,7 @@
-'use strict'
+import fs from 'node:fs'
+import path from 'node:path'

-const fs = require('node:fs')
-const path = require('node:path')
-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
+import { resolveRequestedPathForIpc } from './hardening'

 function findGitRoot(start, fsImpl = fs) {
  let dir = start
@@ -28,7 +27,7 @@ function findGitRoot(start, fsImpl = fs) {
  return null
 }

-async function gitRootForIpc(startPath, options = {}) {
+async function gitRootForIpc(startPath, options: {fs?: typeof fs} = {}) {
  const fsImpl = options.fs || fs
  let resolved

@@ -48,7 +47,5 @@ async function gitRootForIpc(startPath, options = {}) {
  }
 }

-module.exports = {
-  findGitRoot,
-  gitRootForIpc
-}
+export { findGitRoot,
+  gitRootForIpc }
--- a/apps/desktop/electron/git-worktree-ops.test.cjs
+++ b/apps/desktop/electron/git-worktree-ops.test.cjs
@@ -1,20 +1,16 @@
-'use strict'
+import assert from 'node:assert/strict'
+import { execFileSync } from 'node:child_process'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'

-const assert = require('node:assert/strict')
-const { execFileSync } = require('node:child_process')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const test = require('node:test')
-
-const {
-  addWorktree,
+import { addWorktree,
  ensureGitRepo,
  listBranches,
  parseWorktrees,
  sanitizeBranch,
-  switchBranch
-} = require('./git-worktree-ops.cjs')
+  switchBranch } from './git-worktree-ops'

 test('sanitizeBranch: spaces → hyphens, forbidden chars dropped, edges trimmed', () => {
  assert.equal(sanitizeBranch('beach vibes'), 'beach-vibes')
--- a/apps/desktop/electron/git-worktree-ops.cjs
+++ b/apps/desktop/electron/git-worktree-ops.cjs
@@ -1,16 +1,14 @@
-'use strict'
-
 // Git-driven worktree operations for the desktop "Start work" flow: spin up a
 // fresh worktree the lightest way (`git worktree add -b`), list real worktrees,
 // and remove them. Git is the source of truth; the renderer just drives these.

-const path = require('node:path')
-const fs = require('node:fs')
-const { execFile } = require('node:child_process')
+import { execFile } from 'node:child_process'
+import fs from 'node:fs'
+import path from 'node:path'

-const { resolveRequestedPathForIpc } = require('./hardening.cjs')
+import { resolveRequestedPathForIpc } from './hardening'

-function runGit(gitBin, args, cwd) {
+function runGit(gitBin, args, cwd): Promise<string> {
  return new Promise((resolve, reject) => {
    execFile(
      gitBin,
@@ -306,6 +304,7 @@ async function listBranches(repoPath, gitBin) {
      ['for-each-ref', '--format=%(refname:short)', '--sort=-committerdate', 'refs/heads'],
      resolved
    )
+
    const trees = await listWorktrees(resolved, gitBin)
    const pathByBranch = new Map(trees.filter(tree => tree.branch).map(tree => [tree.branch, tree.path]))
    const trunk = await defaultBranch(gitBin, resolved)
@@ -338,13 +337,11 @@ async function switchBranch(repoPath, branch, gitBin) {
  return { branch: target }
 }

-module.exports = {
-  addWorktree,
+export { addWorktree,
  ensureGitRepo,
  listBranches,
  listWorktrees,
  parseWorktrees,
  removeWorktree,
  sanitizeBranch,
-  switchBranch
-}
+  switchBranch }
--- a/apps/desktop/electron/hardening.test.cjs
+++ b/apps/desktop/electron/hardening.test.cjs
@@ -1,23 +1,22 @@
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const test = require('node:test')
-const { pathToFileURL } = require('node:url')
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'
+import { pathToFileURL } from 'node:url'

-const {
-  DEFAULT_FETCH_TIMEOUT_MS,
+import { DEFAULT_FETCH_TIMEOUT_MS,
  encryptDesktopSecret,
  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
  resolveRequestedPathForIpc,
  resolveTimeoutMs,
-  sensitiveFileBlockReason
-} = require('./hardening.cjs')
+  sensitiveFileBlockReason } from './hardening'

-async function rejectsWithCode(promise, code) {
-  await assert.rejects(promise, error => {
+async function rejectsWithCode(promise, code: string) {
+  await assert.rejects(promise, (error: any) => {
    assert.equal(error?.code, code)
+
    return true
  })
 }
@@ -76,8 +75,9 @@ test('path helpers reject blank non-string NUL and Windows device syntax', async
  for (const devicePath of devicePaths) {
    assert.throws(
      () => resolveRequestedPathForIpc(devicePath, { purpose: 'File preview' }),
-      error => {
+      (error: any) => {
        assert.equal(error?.code, 'device-path')
+
        return true
      }
    )
@@ -86,8 +86,9 @@ test('path helpers reject blank non-string NUL and Windows device syntax', async

  assert.throws(
    () => resolveRequestedPathForIpc('file:///%E0%A4%A', { purpose: 'File preview' }),
-    error => {
+    (error: any) => {
      assert.equal(error?.code, 'invalid-path')
+
      return true
    }
  )
@@ -131,19 +132,23 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',
    maxBytes: 256,
    purpose: 'File preview'
  })
+
  assert.equal(fromRelative.resolvedPath, textPath)
  assert.equal(fromRelative.stat.size, 11)

  const fromFileUrl = await resolveReadableFileForIpc(pathToFileURL(textPath).toString(), {
    purpose: 'File preview'
  })
+
  assert.equal(fromFileUrl.resolvedPath, textPath)

  const spacedPath = path.join(tempDir, 'notes with spaces.txt')
  fs.writeFileSync(spacedPath, 'space ok', 'utf8')
+
  const fromSpacedFileUrl = await resolveReadableFileForIpc(pathToFileURL(spacedPath).toString(), {
    purpose: 'File preview'
  })
+
  assert.equal(fromSpacedFileUrl.resolvedPath, spacedPath)

  await assert.rejects(
@@ -184,9 +189,11 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',

  const envTemplatePath = path.join(tempDir, '.env.example')
  fs.writeFileSync(envTemplatePath, 'EXAMPLE_TOKEN=value', 'utf8')
+
  const envTemplate = await resolveReadableFileForIpc(envTemplatePath, {
    purpose: 'File preview'
  })
+
  assert.equal(envTemplate.resolvedPath, envTemplatePath)
 })

@@ -229,8 +236,10 @@ test('resolveReadableFileForIpc blocks symlinks whose realpath is sensitive', as
  } catch (error) {
    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
      t.skip(`symlink creation is not permitted on this platform (${error.code})`)
+
      return
    }
+
    throw error
  }

@@ -268,8 +277,10 @@ test('resolveDirectoryForIpc accepts directory symlinks or junctions', async t =
  } catch (error) {
    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
      t.skip(`directory symlink creation is not permitted on this platform (${error.code})`)
+
      return
    }
+
    throw error
  }

--- a/apps/desktop/electron/hardening.cjs
+++ b/apps/desktop/electron/hardening.cjs
@@ -1,7 +1,7 @@
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const { fileURLToPath } = require('node:url')
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import { fileURLToPath } from 'node:url'

 const DEFAULT_FETCH_TIMEOUT_MS = 15_000
 const DATA_URL_READ_MAX_BYTES = 16 * 1024 * 1024
@@ -13,6 +13,7 @@ const SENSITIVE_EXTENSIONS = new Set(['.kdbx', '.p12', '.pem', '.pfx'])
 function resolveTimeoutMs(timeoutMs, fallbackMs = DEFAULT_FETCH_TIMEOUT_MS) {
  const fallback =
    Number.isFinite(fallbackMs) && Number(fallbackMs) > 0 ? Math.round(Number(fallbackMs)) : DEFAULT_FETCH_TIMEOUT_MS
+
  const parsed = Number(timeoutMs)

  if (Number.isFinite(parsed) && parsed > 0) {
@@ -62,6 +63,7 @@ function sensitiveFileBlockReason(filePath) {
  const normalized = String(filePath || '')
    .replace(/\\/g, '/')
    .toLowerCase()
+
  const basename = path.basename(normalized)
  const ext = path.extname(basename)

@@ -87,6 +89,7 @@ function sensitiveFileBlockReason(filePath) {

  if (basename.startsWith('.env.')) {
    const suffix = basename.slice('.env.'.length)
+
    if (!SAFE_ENV_SUFFIXES.has(suffix)) {
      return `${basename} is blocked because it appears to contain environment secrets.`
    }
@@ -107,9 +110,10 @@ function sensitiveFileBlockReason(filePath) {
  return null
 }

-function ipcPathError(code, message) {
-  const error = new Error(message)
-  error.code = code
+function ipcPathError(code: any, message: string): Error & {code: any} {
+  const error = new Error(message) as Error & {code: any}
+  (error as any).code = code
+
  return error
 }

@@ -129,6 +133,7 @@ function rejectUnsafePathSyntax(filePath, purpose = 'File read') {
  }

  const normalized = raw.replace(/\\/g, '/').toLowerCase()
+
  if (
    normalized.startsWith('//?/') ||
    normalized.startsWith('//./') ||
@@ -141,7 +146,7 @@ function rejectUnsafePathSyntax(filePath, purpose = 'File read') {
  return raw
 }

-function resolveRequestedPathForIpc(filePath, options = {}) {
+function resolveRequestedPathForIpc(filePath, options: {purpose?: string, baseDir?: fs.PathOrFileDescriptor} = {}) {
  const purpose = String(options.purpose || 'File read')
  let raw = rejectUnsafePathSyntax(filePath, purpose)

@@ -154,17 +159,21 @@ function resolveRequestedPathForIpc(filePath, options = {}) {

  if (/^file:/i.test(raw)) {
    let resolvedPath
+
    try {
      const parsed = new URL(raw)
+
      if (parsed.protocol !== 'file:') {
        throw new Error('not a file URL')
      }
+
      resolvedPath = fileURLToPath(parsed)
    } catch {
      throw ipcPathError('invalid-path', `${purpose} failed: file URL is invalid.`)
    }

    rejectUnsafePathSyntax(resolvedPath, purpose)
+
    return path.resolve(resolvedPath)
  }

@@ -178,14 +187,16 @@ function resolveRequestedPathForIpc(filePath, options = {}) {
  return resolvedPath
 }

-async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
+async function statForIpc(fsImpl: {promises: {stat: typeof fs.promises.stat}}, resolvedPath, purpose, typeLabel) {
  try {
    return await fsImpl.promises.stat(resolvedPath)
  } catch (error) {
    const code = error && typeof error === 'object' ? error.code : ''
+
    if (code === 'ENOENT' || code === 'ENOTDIR') {
      throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
    }
+
    throw ipcPathError(
      code || 'read-error',
      `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
@@ -201,6 +212,7 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {
  try {
    const realPath = await fsImpl.promises.realpath(resolvedPath)
    rejectUnsafePathSyntax(realPath, purpose)
+
    return realPath
  } catch (error) {
    const code = error && typeof error === 'object' ? error.code : ''
@@ -213,12 +225,13 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {

 function rejectSensitiveFilePath(filePath, purpose) {
  const blockReason = sensitiveFileBlockReason(filePath)
+
  if (blockReason) {
    throw ipcPathError('sensitive-file', `${purpose} blocked for sensitive file: ${blockReason}`)
  }
 }

-async function resolveDirectoryForIpc(dirPath, options = {}) {
+async function resolveDirectoryForIpc(dirPath, options: {purpose?: string , baseDir?: fs.PathOrFileDescriptor, fs?: {promises:{stat: typeof fs.promises.stat}}} = {}) {
  const purpose = String(options.purpose || 'Directory read')
  const fsImpl = options.fs || fs
  const resolvedPath = resolveRequestedPathForIpc(dirPath, { baseDir: options.baseDir, purpose })
@@ -233,7 +246,7 @@ async function resolveDirectoryForIpc(dirPath, options = {}) {
  return { realPath, resolvedPath, stat }
 }

-async function resolveReadableFileForIpc(filePath, options = {}) {
+async function resolveReadableFileForIpc(filePath, options: {purpose?: string , baseDir?: fs.PathOrFileDescriptor, fs?: typeof fs, blockSensitive?: boolean, maxBytes?: number} = {}) {
  const purpose = String(options.purpose || 'File read')
  const fsImpl = options.fs || fs
  const resolvedPath = resolveRequestedPathForIpc(filePath, { baseDir: options.baseDir, purpose })
@@ -253,11 +266,13 @@ async function resolveReadableFileForIpc(filePath, options = {}) {
  }

  const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose)
+
  if (options.blockSensitive !== false) {
    rejectSensitiveFilePath(realPath, purpose)
  }

  const maxBytes = Number.isFinite(options.maxBytes) && Number(options.maxBytes) > 0 ? Number(options.maxBytes) : null
+
  if (maxBytes && stat.size > maxBytes) {
    throw ipcPathError('EFBIG', `${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`)
  }
@@ -271,15 +286,13 @@ async function resolveReadableFileForIpc(filePath, options = {}) {
  return { realPath, resolvedPath, stat }
 }

-module.exports = {
-  DATA_URL_READ_MAX_BYTES,
+export { DATA_URL_READ_MAX_BYTES,
  DEFAULT_FETCH_TIMEOUT_MS,
-  TEXT_PREVIEW_SOURCE_MAX_BYTES,
  encryptDesktopSecret,
  rejectUnsafePathSyntax,
  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
  resolveRequestedPathForIpc,
  resolveTimeoutMs,
-  sensitiveFileBlockReason
-}
+  sensitiveFileBlockReason,
+  TEXT_PREVIEW_SOURCE_MAX_BYTES }
--- a/apps/desktop/electron/link-title-window.test.cjs
+++ b/apps/desktop/electron/link-title-window.test.cjs
@@ -1,10 +1,11 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const { createLinkTitleWindow, linkTitleWindowOptions } = require('./link-title-window.cjs')
+import { createLinkTitleWindow, linkTitleWindowOptions } from './link-title-window'

 function makeFakeBrowserWindow() {
  const calls = { audioMuted: [] }
+
  const FakeBrowserWindow = function (options) {
    this.options = options
    this.webContents = {
--- a/apps/desktop/electron/link-title-window.cjs
+++ b/apps/desktop/electron/link-title-window.cjs
@@ -1,5 +1,3 @@
-'use strict'
-
 // Hidden BrowserWindow used by tier-2 link-title resolution: when curl can't
 // read a page <title> (bot walls, JS-rendered pages), we briefly load the URL
 // in an offscreen window and read its title. That window loads arbitrary
@@ -39,4 +37,4 @@ function createLinkTitleWindow(BrowserWindow, partitionSession) {
  return window
 }

-module.exports = { createLinkTitleWindow, linkTitleWindowOptions }
+export { createLinkTitleWindow, linkTitleWindowOptions }
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
--- a/apps/desktop/electron/oauth-net-request.test.cjs
+++ b/apps/desktop/electron/oauth-net-request.test.cjs
@@ -4,10 +4,10 @@
 * Run with: node --test electron/oauth-net-request.test.cjs
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')
+import { serializeJsonBody, setJsonRequestHeaders } from './oauth-net-request'

 test('serializeJsonBody returns undefined for absent bodies', () => {
  assert.equal(serializeJsonBody(undefined), undefined)
@@ -21,6 +21,7 @@ test('serializeJsonBody JSON-encodes request bodies', () => {

 test('setJsonRequestHeaders does not set Electron-restricted Content-Length', () => {
  const headers = []
+
  const request = {
    setHeader(name, value) {
      headers.push([name, value])
--- a/apps/desktop/electron/oauth-net-request.cjs
+++ b/apps/desktop/electron/oauth-net-request.cjs
@@ -14,7 +14,5 @@ function setJsonRequestHeaders(request) {
  request.setHeader('Content-Type', 'application/json')
 }

-module.exports = {
-  serializeJsonBody,
-  setJsonRequestHeaders
-}
+export { serializeJsonBody,
+  setJsonRequestHeaders }
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -1,4 +1,4 @@
-const { contextBridge, ipcRenderer, webUtils } = require('electron')
+import { contextBridge, ipcRenderer, webUtils } from 'electron'

 contextBridge.exposeInMainWorld('hermesDesktop', {
  getConnection: profile => ipcRenderer.invoke('hermes:connection', profile),
@@ -24,12 +24,14 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    onState: callback => {
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on('hermes:pet-overlay:state', listener)
+
      return () => ipcRenderer.removeListener('hermes:pet-overlay:state', listener)
    },
    // Main renderer subscribes to overlay control messages.
    onControl: callback => {
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on('hermes:pet-overlay:control', listener)
+
      return () => ipcRenderer.removeListener('hermes:pet-overlay:control', listener)
    }
  },
@@ -120,64 +122,76 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
      const channel = `hermes:terminal:${id}:data`
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on(channel, listener)
+
      return () => ipcRenderer.removeListener(channel, listener)
    },
    onExit: (id, callback) => {
      const channel = `hermes:terminal:${id}:exit`
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on(channel, listener)
+
      return () => ipcRenderer.removeListener(channel, listener)
    }
  },
  onClosePreviewRequested: callback => {
    const listener = () => callback()
    ipcRenderer.on('hermes:close-preview-requested', listener)
+
    return () => ipcRenderer.removeListener('hermes:close-preview-requested', listener)
  },
  onOpenUpdatesRequested: callback => {
    const listener = () => callback()
    ipcRenderer.on('hermes:open-updates', listener)
+
    return () => ipcRenderer.removeListener('hermes:open-updates', listener)
  },
  onDeepLink: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:deep-link', listener)
+
    return () => ipcRenderer.removeListener('hermes:deep-link', listener)
  },
  signalDeepLinkReady: () => ipcRenderer.invoke('hermes:deep-link-ready'),
  onWindowStateChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:window-state-changed', listener)
+
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
  onFocusSession: callback => {
    const listener = (_event, sessionId) => callback(sessionId)
    ipcRenderer.on('hermes:focus-session', listener)
+
    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
  },
  onNotificationAction: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:notification-action', listener)
+
    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
+
    return () => ipcRenderer.removeListener('hermes:preview-file-changed', listener)
  },
  onBackendExit: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:backend-exit', listener)
+
    return () => ipcRenderer.removeListener('hermes:backend-exit', listener)
  },
  onPowerResume: callback => {
    const listener = () => callback()
    ipcRenderer.on('hermes:power-resume', listener)
+
    return () => ipcRenderer.removeListener('hermes:power-resume', listener)
  },
  onBootProgress: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:boot-progress', listener)
+
    return () => ipcRenderer.removeListener('hermes:boot-progress', listener)
  },
  // First-launch bootstrap progress -- emitted by the install.ps1 stage
@@ -192,6 +206,7 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  onBootstrapEvent: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:bootstrap:event', listener)
+
    return () => ipcRenderer.removeListener('hermes:bootstrap:event', listener)
  },
  getVersion: () => ipcRenderer.invoke('hermes:version'),
@@ -208,6 +223,7 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    onProgress: callback => {
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on('hermes:updates:progress', listener)
+
      return () => ipcRenderer.removeListener('hermes:updates:progress', listener)
    }
  },
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -1,11 +1,9 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const {
-  buildSessionWindowUrl,
+import { buildSessionWindowUrl,
  chatWindowWebPreferences,
-  createSessionWindowRegistry
-} = require('./session-windows.cjs')
+  createSessionWindowRegistry } from './session-windows'

 // A minimal fake BrowserWindow: tracks listeners + destroyed state and lets a
 // test fire the 'closed' event, mirroring the slice of the Electron API the
@@ -96,6 +94,7 @@ test('registry opens one window per session and focuses on re-open', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
  const win = makeFakeWindow()
+
  const factory = () => {
    built += 1

@@ -145,6 +144,7 @@ test('registry rebuilds a fresh window after the previous one was destroyed', ()

  let built = 0
  const second = makeFakeWindow()
+
  const result = registry.openOrFocus('s1', () => {
    built += 1

@@ -158,6 +158,7 @@ test('registry rebuilds a fresh window after the previous one was destroyed', ()
 test('registry ignores empty / non-string session ids', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
+
  const factory = () => {
    built += 1

--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -3,7 +3,7 @@
 // here so they can be unit-tested with node --test (mirroring how the rest of
 // electron/*.cjs splits testable logic out of the main.cjs monolith).

-const { pathToFileURL } = require('node:url')
+import { pathToFileURL } from 'node:url'

 // Secondary windows open at the minimum usable size — a compact side panel for
 // subagent watch / cmd-click session pop-out, not a second full desktop.
@@ -42,7 +42,7 @@ function chatWindowWebPreferences(preloadPath) {
 // scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
 // session): the renderer resumes it lazily so the gateway never builds an agent
 // just to stream into it.
-function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
+function buildSessionWindowUrl(sessionId: string, { devServer, rendererIndexPath, watch, newSession }: any = {}) {
  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`

@@ -115,10 +115,8 @@ function createSessionWindowRegistry() {
  }
 }

-module.exports = {
-  buildSessionWindowUrl,
+export { buildSessionWindowUrl,
  chatWindowWebPreferences,
  createSessionWindowRegistry,
  SESSION_WINDOW_MIN_HEIGHT,
-  SESSION_WINDOW_MIN_WIDTH
-}
+  SESSION_WINDOW_MIN_WIDTH }
--- a/apps/desktop/electron/titlebar-overlay-width.cjs
+++ b/apps/desktop/electron/titlebar-overlay-width.cjs
@@ -1,11 +0,0 @@
-// Pre-layout fallback for WCO right-edge reservation (--titlebar-tools-right).
-// Live width comes from navigator.windowControlsOverlay in the renderer.
-
-const OVERLAY_FALLBACK_WIDTH = 144
-
-/** @param {{ isWindows?: boolean, isWsl?: boolean }} opts */
-function nativeOverlayWidth({ isWindows = false, isWsl = false } = {}) {
-  return isWindows || isWsl ? OVERLAY_FALLBACK_WIDTH : 0
-}
-
-module.exports = { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth }
--- a/apps/desktop/electron/titlebar-overlay-width.test.cjs
+++ b/apps/desktop/electron/titlebar-overlay-width.test.cjs
@@ -1,7 +1,7 @@
-const assert = require('node:assert/strict')
-const test = require('node:test')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth } = require('./titlebar-overlay-width.cjs')
+import { nativeOverlayWidth, OVERLAY_FALLBACK_WIDTH } from './titlebar-overlay-width'

 // This static reservation is only the pre-layout FALLBACK. Once laid out the
 // renderer reads the exact width from navigator.windowControlsOverlay
@@ -18,10 +18,17 @@ test('WSLg paints the same WCO, so it reserves the same fallback width', () => {
  assert.equal(nativeOverlayWidth({ isWsl: true }), OVERLAY_FALLBACK_WIDTH)
 })

-test('plain Linux and macOS reserve nothing', () => {
-  assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), 0)
-  assert.equal(nativeOverlayWidth(), 0)
-  assert.equal(nativeOverlayWidth({}), 0)
+test('plain Linux paints the WCO too, so it reserves the fallback width', () => {
+  // Regression #53185: re-enabling the overlay on plain Linux (KDE/GNOME)
+  // without reserving its width left the native min/max/close buttons painting
+  // on top of the app's right-edge titlebar tools.
+  assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), OVERLAY_FALLBACK_WIDTH)
+  assert.equal(nativeOverlayWidth(), OVERLAY_FALLBACK_WIDTH)
+  assert.equal(nativeOverlayWidth({}), OVERLAY_FALLBACK_WIDTH)
+})
+
+test('macOS uses traffic lights, not a WCO overlay, so it reserves nothing', () => {
+  assert.equal(nativeOverlayWidth({ isMac: true }), 0)
 })

 test('the fallback width is a sane positive pixel value', () => {
--- a/apps/desktop/electron/titlebar-overlay-width.ts
+++ b/apps/desktop/electron/titlebar-overlay-width.ts
@@ -0,0 +1,23 @@
+const OVERLAY_FALLBACK_WIDTH = 144
+
+/**
+ * Static pre-layout reservation (px) for the right-side native window-controls
+ * overlay (min/max/close). Only a FALLBACK — once laid out the renderer reads
+ * the exact width from navigator.windowControlsOverlay
+ * (use-window-controls-overlay-width.ts) and uses this value only when the WCO
+ * API is unavailable.
+ *
+ * macOS uses traffic lights positioned via trafficLightPosition, not a WCO
+ * overlay, so it reserves nothing here. Every other desktop platform now paints
+ * the Electron overlay (Windows, WSLg, and plain Linux KDE/GNOME), so they all
+ * reserve the fallback width.
+ *
+ * @param {{ isWindows?: boolean, isWsl?: boolean, isMac?: boolean }} opts
+ */
+function nativeOverlayWidth({ isWindows = false, isWsl = false, isMac = false } = {}) {
+  if (isMac) {return 0}
+
+  return OVERLAY_FALLBACK_WIDTH
+}
+
+export { nativeOverlayWidth, OVERLAY_FALLBACK_WIDTH }
--- a/apps/desktop/electron/update-count.test.cjs
+++ b/apps/desktop/electron/update-count.test.cjs
@@ -1,7 +1,7 @@
-'use strict'
-const test = require('node:test')
-const assert = require('node:assert/strict')
-const { resolveBehindCount, shouldCountCommits } = require('./update-count.cjs')
+import assert from 'node:assert/strict'
+import test from 'node:test'
+
+import { resolveBehindCount, shouldCountCommits } from './update-count'

 // FAIL-BEFORE: pre-fix the function did `Number.parseInt(countStr) || 0`
 // unconditionally, so a shallow checkout with no merge-base surfaced the bogus
--- a/apps/desktop/electron/update-count.cjs
+++ b/apps/desktop/electron/update-count.cjs
@@ -1,5 +1,3 @@
-'use strict'
-
 // Whether `git rev-list HEAD..origin/<branch> --count` produces a meaningful
 // number worth computing. On a SHALLOW checkout (installer clones with
 // --depth 1) the local history often shares no merge-base with the freshly
@@ -19,10 +17,12 @@ function shouldCountCommits({ isShallow, hasMergeBase }) {
 // (developers / Docker dev images) keep the exact count path unchanged.
 function resolveBehindCount({ countStr, currentSha, targetSha, isShallow, hasMergeBase }) {
  if (!shouldCountCommits({ isShallow, hasMergeBase })) {
-    if (currentSha && targetSha && currentSha === targetSha) return 0
+    if (currentSha && targetSha && currentSha === targetSha) {return 0}
+
    return 1 // behind by an unknown amount — show a generic "update available"
  }
+
  return Number.parseInt(countStr, 10) || 0
 }

-module.exports = { resolveBehindCount, shouldCountCommits }
+export { resolveBehindCount, shouldCountCommits }
--- a/apps/desktop/electron/update-marker.test.cjs
+++ b/apps/desktop/electron/update-marker.test.cjs
@@ -12,16 +12,17 @@
 * strand future launches, and (c) self-heal by deleting a stale marker file.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
-const fs = require('fs')
-const os = require('os')
-const path = require('path')
+import fs from 'fs'
+import assert from 'node:assert/strict'
+import test from 'node:test'
+import os from 'os'
+import path from 'path'

-const { markerPath, isPidAlive, readLiveUpdateMarker, UPDATE_MARKER_MAX_AGE_MS } = require('./update-marker.cjs')
+import { isPidAlive, markerPath, readLiveUpdateMarker, UPDATE_MARKER_MAX_AGE_MS } from './update-marker'

 function tmpHome(tag) {
  const dir = fs.mkdtempSync(path.join(os.tmpdir(), `hermes-marker-${tag}-`))
+
  return dir
 }

@@ -29,10 +30,11 @@ function writeMarker(home, pid, startedAtSec) {
  fs.writeFileSync(markerPath(home), `${pid}\n${startedAtSec}`)
 }

-const ALIVE = () => true // injected kill that "succeeds" => pid alive
-const DEAD = () => {
-  const err = new Error('no such process')
-  err.code = 'ESRCH'
+const ALIVE: typeof process.kill = () => true // injected kill that "succeeds" => pid alive
+
+const DEAD : typeof process.kill= () => {
+  const err = new Error('no such process');
+  (err as any).code = 'ESRCH'
  throw err
 }

@@ -84,9 +86,10 @@ test('isPidAlive: own pid is alive, impossible pid is dead', () => {

 test('isPidAlive: EPERM counts as alive (process owned by another user)', () => {
  const eperm = () => {
-    const err = new Error('operation not permitted')
-    err.code = 'EPERM'
+    const err = new Error('operation not permitted');
+    (err as any).code = 'EPERM'
    throw err
  }
+
  assert.equal(isPidAlive(4242, eperm), true)
 })
--- a/apps/desktop/electron/update-marker.cjs
+++ b/apps/desktop/electron/update-marker.cjs
@@ -20,8 +20,8 @@
 * log sinks are.
 */

-const fs = require('fs')
-const path = require('path')
+import fs from 'fs'
+import path from 'path'

 // Even with a live-looking PID, never treat a marker older than this as a live
 // update. A full update (git pull + pip + desktop rebuild) is minutes, not tens
@@ -37,10 +37,12 @@ function markerPath(hermesHome) {
 // not deliver a signal — it just probes existence/permission. ESRCH => dead;
 // EPERM => alive but owned by another user (still "alive" for our purposes).
 // Injectable `kill` keeps it unit-testable.
-function isPidAlive(pid, kill = process.kill.bind(process)) {
-  if (!Number.isInteger(pid) || pid <= 0) return false
+function isPidAlive(pid, kill: typeof process.kill = process.kill.bind(process)) {
+  if (!Number.isInteger(pid) || pid <= 0) {return false}
+
  try {
    kill(pid, 0)
+
    return true
  } catch (err) {
    return Boolean(err && err.code === 'EPERM')
@@ -59,9 +61,12 @@ function isPidAlive(pid, kill = process.kill.bind(process)) {
 * Pure-ish: file I/O against the given path, plus an injectable pid probe and
 * clock for tests.
 */
-function readLiveUpdateMarker(hermesHome, { kill, now = Date.now, maxAgeMs = UPDATE_MARKER_MAX_AGE_MS } = {}) {
+function readLiveUpdateMarker(hermesHome, { kill, now = Date.now, maxAgeMs = UPDATE_MARKER_MAX_AGE_MS }: {
+  now?: () => number, maxAgeMs?: number, kill?: typeof process.kill
+} = {}) {
  const file = markerPath(hermesHome)
  let raw
+
  try {
    raw = fs.readFileSync(file, 'utf8')
  } catch {
@@ -80,14 +85,14 @@ function readLiveUpdateMarker(hermesHome, { kill, now = Date.now, maxAgeMs = UPD
    } catch {
      void 0
    }
+
    return null
  }
+
  return { pid, ageMs }
 }

-module.exports = {
-  UPDATE_MARKER_MAX_AGE_MS,
+export { isPidAlive,
  markerPath,
-  isPidAlive,
-  readLiveUpdateMarker
-}
+  readLiveUpdateMarker,
+  UPDATE_MARKER_MAX_AGE_MS }
--- a/apps/desktop/electron/update-rebuild.test.cjs
+++ b/apps/desktop/electron/update-rebuild.test.cjs
@@ -12,10 +12,10 @@
 * success, and must run at most twice.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const { shouldRetryRebuild, runRebuildWithRetry } = require('./update-rebuild.cjs')
+import { runRebuildWithRetry, shouldRetryRebuild } from './update-rebuild'

 test('shouldRetryRebuild retries only on a non-success exit', () => {
  assert.equal(shouldRetryRebuild(0), false)
@@ -25,30 +25,39 @@ test('shouldRetryRebuild retries only on a non-success exit', () => {

 test('a clean first rebuild runs once and does not retry', async () => {
  const codes = []
+
  const result = await runRebuildWithRetry(attempt => {
    codes.push(attempt)
+
    return Promise.resolve({ code: 0 })
  })
+
  assert.deepEqual(codes, [0])
  assert.equal(result.code, 0)
 })

 test('a failed first rebuild retries once and succeeds', async () => {
  const codes = []
+
  const result = await runRebuildWithRetry(attempt => {
    codes.push(attempt)
+
    return Promise.resolve({ code: attempt === 0 ? 1 : 0 })
  })
+
  assert.deepEqual(codes, [0, 1])
  assert.equal(result.code, 0)
 })

 test('a rebuild that keeps failing runs at most twice and reports the failure', async () => {
  const codes = []
+
  const result = await runRebuildWithRetry(attempt => {
    codes.push(attempt)
+
    return Promise.resolve({ code: 1, error: 'rebuild-failed' })
  })
+
  assert.deepEqual(codes, [0, 1])
  assert.equal(result.code, 1)
  assert.equal(result.error, 'rebuild-failed')
--- a/apps/desktop/electron/update-rebuild.cjs
+++ b/apps/desktop/electron/update-rebuild.cjs
@@ -1,5 +1,3 @@
-'use strict'
-
 /**
 * Retry-once policy for the desktop `--build-only` rebuild during self-update.
 *
@@ -20,10 +18,12 @@ function shouldRetryRebuild(code) {
 */
 async function runRebuildWithRetry(rebuild) {
  let result = await rebuild(0)
+
  if (shouldRetryRebuild(result.code)) {
    result = await rebuild(1)
  }
+
  return result
 }

-module.exports = { shouldRetryRebuild, runRebuildWithRetry }
+export { runRebuildWithRetry, shouldRetryRebuild }
--- a/apps/desktop/electron/update-relaunch.test.cjs
+++ b/apps/desktop/electron/update-relaunch.test.cjs
@@ -17,24 +17,22 @@
 *      (keep a working window) unless a non-interactive fallback applies.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const { execFileSync } = require('node:child_process')
+import assert from 'node:assert/strict'
+import { execFileSync } from 'node:child_process'
+import fs from 'node:fs'
+import os from 'node:os'
+import path from 'node:path'
+import test from 'node:test'

-const {
-  unpackedDirName,
-  resolveUnpackedRelease,
-  decideRelaunchOutcome,
-  sandboxPreflight,
-  sandboxFallbackFromEnv,
+import { buildRelaunchScript,
  collectRelaunchArgs,
  collectRelaunchEnv,
-  buildRelaunchScript,
-  shellQuote
-} = require('./update-relaunch.cjs')
+  decideRelaunchOutcome,
+  resolveUnpackedRelease,
+  sandboxFallbackFromEnv,
+  sandboxPreflight,
+  shellQuote,
+  unpackedDirName } from './update-relaunch'

 const ROOT = '/home/u/.hermes/hermes-agent'
 const UNPACKED = path.join(ROOT, 'apps', 'desktop', 'release', 'linux-unpacked')
@@ -91,6 +89,7 @@ test('decideRelaunchOutcome: only under-unpacked + sandbox-ok relaunches', () =>
 // ---------------------------------------------------------------------------

 const fakeStat = (uid, mode) => () => ({ uid, mode })
+
 const throwStat = () => {
  throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
 }
@@ -150,6 +149,7 @@ test('collectRelaunchArgs drops Electron internals, keeps user/launcher args', (
    '--profile=work', // app flag — keep
    '--remote-debugging-port=9222' // internal — drop
  ]
+
  assert.deepEqual(collectRelaunchArgs(argv), ['--no-sandbox', 'hermes://open/agent/42', '--profile=work'])
  assert.deepEqual(collectRelaunchArgs(undefined), [])
 })
@@ -165,6 +165,7 @@ test('collectRelaunchEnv preserves HERMES_HOME + HERMES_DESKTOP_* + sandbox opt-
    HOME: '/home/u', // not preserved
    UNRELATED: 'x'
  }
+
  assert.deepEqual(collectRelaunchEnv(env), {
    HERMES_HOME: '/home/u/.hermes',
    HERMES_DESKTOP_REMOTE_URL: 'http://box:9119',
@@ -207,6 +208,7 @@ test('buildRelaunchScript embeds pid/exec/args/env/cwd and is valid bash', () =>
  // It must be syntactically valid bash (`bash -n`). Write to a temp file and lint.
  const tmp = path.join(os.tmpdir(), `hermes-relaunch-test-${Date.now()}.sh`)
  fs.writeFileSync(tmp, script)
+
  try {
    execFileSync('bash', ['-n', tmp], { stdio: 'pipe' })
  } finally {
@@ -222,13 +224,16 @@ test('buildRelaunchScript with no args/env still lints clean', () => {
    env: {},
    cwd: ''
  })
+
  const tmp = path.join(os.tmpdir(), `hermes-relaunch-test2-${Date.now()}.sh`)
  fs.writeFileSync(tmp, script)
+
  try {
    execFileSync('bash', ['-n', tmp], { stdio: 'pipe' })
  } finally {
    fs.rmSync(tmp, { force: true })
  }
+
  // exec line has no trailing args.
  assert.match(script, /exec '\/opt\/Hermes\/Hermes'\n/)
 })
--- a/apps/desktop/electron/update-relaunch.cjs
+++ b/apps/desktop/electron/update-relaunch.cjs
@@ -1,5 +1,3 @@
-'use strict'
-
 /**
 * update-relaunch.cjs — pure decision + script-generation helpers for the
 * Linux in-app update relaunch (#45205).
@@ -37,12 +35,14 @@
 * the closeable manual-restart terminal state instead.
 */

-const path = require('node:path')
+import path from 'node:path'

 // Map process.platform → electron-builder's `release/<dir>-unpacked` name.
 function unpackedDirName(platform) {
-  if (platform === 'darwin') return 'mac-unpacked' // not used (mac swaps bundles)
-  if (platform === 'win32') return 'win-unpacked'
+  if (platform === 'darwin') {return 'mac-unpacked'} // not used (mac swaps bundles)
+
+  if (platform === 'win32') {return 'win-unpacked'}
+
  return 'linux-unpacked'
 }

@@ -56,15 +56,17 @@ function unpackedDirName(platform) {
 * `.../release/linux-unpacked-evil` can't masquerade as `.../release/linux-unpacked`.
 */
 function resolveUnpackedRelease(execPath, updateRoot, platform) {
-  if (!execPath || !updateRoot) return null
+  if (!execPath || !updateRoot) {return null}
  const releaseDir = path.join(updateRoot, 'apps', 'desktop', 'release')
  const unpacked = path.join(releaseDir, unpackedDirName(platform))
  const normalizedExec = path.resolve(String(execPath))
  // execPath must be the unpacked dir itself or a descendant of it.
  const withSep = unpacked.endsWith(path.sep) ? unpacked : unpacked + path.sep
+
  if (normalizedExec === unpacked || normalizedExec.startsWith(withSep)) {
    return unpacked
  }
+
  return null
 }

@@ -81,8 +83,10 @@ function resolveUnpackedRelease(execPath, updateRoot, platform) {
 *                app. Closeable manual-restart terminal state.
 */
 function decideRelaunchOutcome({ underUnpacked, sandboxOk }) {
-  if (!underUnpacked) return 'guiSkew'
-  if (!sandboxOk) return 'manual'
+  if (!underUnpacked) {return 'guiSkew'}
+
+  if (!sandboxOk) {return 'manual'}
+
  return 'relaunch'
 }

@@ -99,9 +103,10 @@ function decideRelaunchOutcome({ underUnpacked, sandboxOk }) {
 * `statSync` is injectable so this is testable without a real setuid file.
 */
 function sandboxPreflight(unpackedDir, statSync) {
-  if (!unpackedDir) return { ok: false, reason: 'no-unpacked-dir', path: null }
+  if (!unpackedDir) {return { ok: false, reason: 'no-unpacked-dir', path: null }}
  const sandboxPath = path.join(unpackedDir, 'chrome-sandbox')
  let st
+
  try {
    st = statSync(sandboxPath)
  } catch {
@@ -109,15 +114,20 @@ function sandboxPreflight(unpackedDir, statSync) {
    // sandbox; nothing to block the relaunch.
    return { ok: true, reason: 'no-sandbox-helper', path: sandboxPath }
  }
+
  const ownedByRoot = st.uid === 0
  const hasSetuid = (st.mode & 0o4000) !== 0
+
  if (ownedByRoot && hasSetuid) {
    return { ok: true, reason: 'launchable', path: sandboxPath }
  }
+
  if (!ownedByRoot && !hasSetuid) {
    return { ok: false, reason: 'not-root-not-setuid', path: sandboxPath }
  }
-  if (!ownedByRoot) return { ok: false, reason: 'not-root', path: sandboxPath }
+
+  if (!ownedByRoot) {return { ok: false, reason: 'not-root', path: sandboxPath }}
+
  return { ok: false, reason: 'not-setuid', path: sandboxPath }
 }

@@ -137,8 +147,11 @@ function sandboxPreflight(unpackedDir, statSync) {
 */
 function sandboxFallbackFromEnv(env, launchArgs) {
  const disable = String((env && env.ELECTRON_DISABLE_SANDBOX) || '').trim()
-  if (disable === '1' || disable.toLowerCase() === 'true') return true
-  if (Array.isArray(launchArgs) && launchArgs.some(a => a === '--no-sandbox')) return true
+
+  if (disable === '1' || disable.toLowerCase() === 'true') {return true}
+
+  if (Array.isArray(launchArgs) && launchArgs.some(a => a === '--no-sandbox')) {return true}
+
  return false
 }

@@ -176,9 +189,11 @@ const INTERNAL_ARG_PREFIXES = [
 * the exec path itself; there is no entry-script arg as in a dev run).
 */
 function collectRelaunchArgs(argv) {
-  if (!Array.isArray(argv)) return []
+  if (!Array.isArray(argv)) {return []}
+
  return argv.filter(arg => {
-    if (typeof arg !== 'string' || arg.length === 0) return false
+    if (typeof arg !== 'string' || arg.length === 0) {return false}
+
    return !INTERNAL_ARG_PREFIXES.some(prefix =>
      prefix.endsWith('=') ? arg.startsWith(prefix) : arg === prefix || arg.startsWith(prefix + '=')
    )
@@ -197,13 +212,17 @@ const PRESERVED_ENV_PREFIXES = ['HERMES_DESKTOP_']

 function collectRelaunchEnv(env) {
  const out = {}
-  if (!env || typeof env !== 'object') return out
+
+  if (!env || typeof env !== 'object') {return out}
+
  for (const [key, value] of Object.entries(env)) {
-    if (value == null) continue
+    if (value == null) {continue}
+
    if (PRESERVED_ENV_KEYS.includes(key) || PRESERVED_ENV_PREFIXES.some(p => key.startsWith(p))) {
      out[key] = String(value)
    }
  }
+
  return out
 }

@@ -223,8 +242,10 @@ function buildRelaunchScript({ pid, execPath, args, env, cwd }) {
  const exports = Object.entries(env || {})
    .map(([k, v]) => `export ${k}=${shellQuote(v)}`)
    .join('\n')
+
  const quotedArgs = (args || []).map(shellQuote).join(' ')
  const cwdLine = cwd ? `cd ${shellQuote(cwd)} 2>/dev/null || true` : ''
+
  // NOTE: `exec` replaces the watcher process with the relaunched app, so the
  // re-exec inherits exactly the env/cwd we set above.
  return `#!/bin/bash
@@ -249,17 +270,15 @@ exec ${shellQuote(execPath)}${quotedArgs ? ' ' + quotedArgs : ''}
 `
 }

-module.exports = {
-  unpackedDirName,
-  resolveUnpackedRelease,
-  decideRelaunchOutcome,
-  sandboxPreflight,
-  sandboxFallbackFromEnv,
+export { buildRelaunchScript,
  collectRelaunchArgs,
  collectRelaunchEnv,
-  buildRelaunchScript,
-  shellQuote,
+  decideRelaunchOutcome,
  INTERNAL_ARG_PREFIXES,
  PRESERVED_ENV_KEYS,
-  PRESERVED_ENV_PREFIXES
-}
+  PRESERVED_ENV_PREFIXES,
+  resolveUnpackedRelease,
+  sandboxFallbackFromEnv,
+  sandboxPreflight,
+  shellQuote,
+  unpackedDirName }
--- a/apps/desktop/electron/update-remote.test.cjs
+++ b/apps/desktop/electron/update-remote.test.cjs
@@ -15,16 +15,14 @@
 * never prompts and should keep the normal fetch path).
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const {
-  OFFICIAL_REPO_HTTPS_URL,
-  OFFICIAL_REPO_CANONICAL,
-  canonicalGitHubRemote,
+import { canonicalGitHubRemote,
+  isOfficialSshRemote,
  isSshRemote,
-  isOfficialSshRemote
-} = require('./update-remote.cjs')
+  OFFICIAL_REPO_CANONICAL,
+  OFFICIAL_REPO_HTTPS_URL } from './update-remote'

 test('canonicalGitHubRemote normalizes SSH and HTTPS forms to the same value', () => {
  assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
--- a/apps/desktop/electron/update-remote.cjs
+++ b/apps/desktop/electron/update-remote.cjs
@@ -19,8 +19,9 @@ const OFFICIAL_REPO_CANONICAL = 'github.com/nousresearch/hermes-agent'
 // no trailing slash, no .git suffix) so SSH and HTTPS forms of the same repo
 // compare equal.
 function canonicalGitHubRemote(url) {
-  if (!url) return ''
+  if (!url) {return ''}
  let value = String(url).trim()
+
  if (value.startsWith('git@github.com:')) {
    value = `github.com/${value.slice('git@github.com:'.length)}`
  } else if (value.startsWith('ssh://git@github.com/')) {
@@ -28,13 +29,17 @@ function canonicalGitHubRemote(url) {
  } else {
    try {
      const parsed = new URL(value)
-      if (parsed.hostname && parsed.pathname) value = `${parsed.hostname}${parsed.pathname}`
+
+      if (parsed.hostname && parsed.pathname) {value = `${parsed.hostname}${parsed.pathname}`}
    } catch {
      // Leave non-URL forms unchanged.
    }
  }
+
  value = value.trim().replace(/\/+$/, '')
-  if (value.endsWith('.git')) value = value.slice(0, -4)
+
+  if (value.endsWith('.git')) {value = value.slice(0, -4)}
+
  return value.toLowerCase()
 }

@@ -42,6 +47,7 @@ function isSshRemote(url) {
  const value = String(url || '')
    .trim()
    .toLowerCase()
+
  return value.startsWith('git@') || value.startsWith('ssh://')
 }

@@ -49,10 +55,8 @@ function isOfficialSshRemote(url) {
  return isSshRemote(url) && canonicalGitHubRemote(url) === OFFICIAL_REPO_CANONICAL
 }

-module.exports = {
-  OFFICIAL_REPO_HTTPS_URL,
-  OFFICIAL_REPO_CANONICAL,
-  canonicalGitHubRemote,
+export { canonicalGitHubRemote,
+  isOfficialSshRemote,
  isSshRemote,
-  isOfficialSshRemote
-}
+  OFFICIAL_REPO_CANONICAL,
+  OFFICIAL_REPO_HTTPS_URL }
--- a/apps/desktop/electron/vscode-marketplace.test.cjs
+++ b/apps/desktop/electron/vscode-marketplace.test.cjs
@@ -1,9 +1,7 @@
-'use strict'
+import assert from 'node:assert'
+import test from 'node:test'

-const assert = require('node:assert')
-const test = require('node:test')
-
-const { __testing, extractThemes, readCentralDirectory } = require('./vscode-marketplace.cjs')
+import { __testing, extractThemes, readCentralDirectory } from './vscode-marketplace'

 // Build a minimal zip with stored (uncompressed) entries so the test controls
 // the bytes exactly — exercises the central-directory reader + theme extraction
@@ -72,6 +70,7 @@ test('extractThemes reads contributed color themes (resolving ./ paths)', () =>
      themes: [{ label: 'Dracula', uiTheme: 'vs-dark', path: './themes/dracula.json' }]
    }
  })
+
  const themeJson = JSON.stringify({ name: 'Dracula', type: 'dark', colors: { 'editor.background': '#282a36' } })

  const zip = makeZip([
--- a/apps/desktop/electron/vscode-marketplace.cjs
+++ b/apps/desktop/electron/vscode-marketplace.cjs
@@ -1,5 +1,3 @@
-'use strict'
-
 /**
 * VS Code Marketplace color-theme fetcher (main process).
 *
@@ -14,8 +12,8 @@
 * zip library into the desktop bundle for a feature this small.
 */

-const https = require('node:https')
-const zlib = require('node:zlib')
+import https from 'node:https'
+import zlib from 'node:zlib'

 const GALLERY_QUERY_URL = 'https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery'
 const VSIX_ASSET_TYPE = 'Microsoft.VisualStudio.Services.VSIXPackage'
@@ -30,7 +28,7 @@ function request(
  url,
  { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {},
  redirectsLeft = MAX_REDIRECTS
-) {
+): Promise<Buffer<ArrayBuffer>> {
  return new Promise((resolve, reject) => {
    const req = https.request(url, { method, headers }, res => {
      const status = res.statusCode ?? 0
@@ -102,6 +100,7 @@ async function resolveExtension(id) {
    // IncludeCategoryAndTags | IncludeLatestVersionOnly = 914.
    flags: 914
  })
+
  const extension = json?.results?.[0]?.extensions?.[0]

  if (!extension) {
@@ -127,6 +126,7 @@ async function resolveExtension(id) {
 /** POST an ExtensionQuery payload and return the parsed gallery response. */
 async function queryGallery(payload, { maxBytes = 4 * 1024 * 1024 } = {}) {
  const body = JSON.stringify(payload)
+
  const raw = await request(GALLERY_QUERY_URL, {
    method: 'POST',
    headers: {
@@ -332,10 +332,12 @@ async function fetchMarketplaceThemes(id) {
  return { extensionId: trimmed, displayName, themes }
 }

-module.exports = {
-  fetchMarketplaceThemes,
-  searchMarketplaceThemes,
+const __testing = { themeEntryName, looksLikeIconTheme }
+
+export {
+  __testing,
  extractThemes,
+  fetchMarketplaceThemes,
  readCentralDirectory,
-  __testing: { themeEntryName, looksLikeIconTheme }
+  searchMarketplaceThemes
 }
--- a/apps/desktop/electron/window-state.test.cjs
+++ b/apps/desktop/electron/window-state.test.cjs
@@ -4,19 +4,17 @@
 * clamping, and the debounce that collapses mid-drag write storms.
 */

-const test = require('node:test')
-const assert = require('node:assert/strict')
+import assert from 'node:assert/strict'
+import test from 'node:test'

-const {
-  DEFAULT_WIDTH,
+import { computeWindowOptions,
+  debounce,
  DEFAULT_HEIGHT,
-  MIN_WIDTH,
+  DEFAULT_WIDTH,
  MIN_HEIGHT,
-  sanitizeWindowState,
+  MIN_WIDTH,
  onScreen,
-  computeWindowOptions,
-  debounce
-} = require('./window-state.cjs')
+  sanitizeWindowState } from './window-state'

 // A single 1920×1080 monitor (work area trimmed for the taskbar).
 const PRIMARY = [{ workArea: { x: 0, y: 0, width: 1920, height: 1040 } }]
@@ -121,6 +119,7 @@ test('computeWindowOptions does not clamp when displays are unknown', () => {
 test('debounce coalesces a burst into one trailing run', t => {
  t.mock.timers.enable({ apis: ['setTimeout'] })
  let calls = 0
+
  const d = debounce(() => {
    calls += 1
  }, 250)
@@ -138,6 +137,7 @@ test('debounce coalesces a burst into one trailing run', t => {
 test('debounce.flush runs now and cancels the pending timer', t => {
  t.mock.timers.enable({ apis: ['setTimeout'] })
  let calls = 0
+
  const d = debounce(() => {
    calls += 1
  }, 250)
--- a/apps/desktop/electron/window-state.cjs
+++ b/apps/desktop/electron/window-state.cjs
@@ -21,41 +21,59 @@ const MIN_VISIBLE = 48
 const finite = v => typeof v === 'number' && Number.isFinite(v)
 const clamp = (v, lo, hi) => Math.max(lo, Math.min(v, hi))

+interface SanitizedWindowState{
+  width: number, height: number, isMaximized: boolean, x?: number,y?: number
+}
+
 // Parse raw JSON → clean state, or null if garbage. width/height are required
 // and floored; x/y survive only as a finite pair; isMaximized is strict.
-function sanitizeWindowState(raw) {
-  if (!raw || typeof raw !== 'object' || !finite(raw.width) || !finite(raw.height)) return null
+function sanitizeWindowState(raw?: any): SanitizedWindowState | null

-  const state = {
+
+ {
+  if (!raw || typeof raw !== 'object' || !finite(raw.width) || !finite(raw.height)) {return null}
+
+  const state: SanitizedWindowState = {
    width: Math.max(MIN_WIDTH, Math.round(raw.width)),
    height: Math.max(MIN_HEIGHT, Math.round(raw.height)),
-    isMaximized: raw.isMaximized === true
+    isMaximized: raw.isMaximized === true,
  }
+
  if (finite(raw.x) && finite(raw.y)) {
-    state.x = Math.round(raw.x)
+    state.x = Math.round(raw.x);
    state.y = Math.round(raw.y)
  }
+
  return state
 }

 // True when `bounds` overlaps some display's work area by ≥ MIN_VISIBLE on both
 // axes. `displays` is Electron's screen.getAllDisplays() shape.
 function onScreen(bounds, displays) {
-  if (!Array.isArray(displays)) return false
+  if (!Array.isArray(displays)) {return false}
+
  return displays.some(({ workArea: a } = {}) => {
-    if (!a) return false
+    if (!a) {return false}
    const x = Math.min(bounds.x + bounds.width, a.x + a.width) - Math.max(bounds.x, a.x)
    const y = Math.min(bounds.y + bounds.height, a.y + a.height) - Math.max(bounds.y, a.y)
+
    return x >= MIN_VISIBLE && y >= MIN_VISIBLE
  })
 }

+interface WindowOptions {
+  width: number
+  height: number
+  x?: number
+  y?: number
+}
+
 // Sanitized state (or null) → BrowserWindow size/position options. Always sets
 // width/height, capped to the largest current display so a size saved on a
 // since-disconnected bigger monitor can't exceed any screen the user now has.
 // Sets x/y only when still on-screen; otherwise Electron centers the window.
-function computeWindowOptions(state, displays) {
-  const opts = {
+function computeWindowOptions(state, displays): WindowOptions {
+  const opts: WindowOptions = {
    width: finite(state?.width) ? state.width : DEFAULT_WIDTH,
    height: finite(state?.height) ? state.height : DEFAULT_HEIGHT
  }
@@ -67,6 +85,7 @@ function computeWindowOptions(state, displays) {
        : m,
    { width: 0, height: 0 }
  )
+
  if (cap.width && cap.height) {
    opts.width = clamp(opts.width, MIN_WIDTH, cap.width)
    opts.height = clamp(opts.height, MIN_HEIGHT, cap.height)
@@ -78,9 +97,10 @@ function computeWindowOptions(state, displays) {
    finite(state.y) &&
    onScreen({ x: state.x, y: state.y, width: opts.width, height: opts.height }, displays)
  ) {
-    opts.x = state.x
+    opts.x = state.x;
    opts.y = state.y
  }
+
  return opts
 }

@@ -89,6 +109,7 @@ function computeWindowOptions(state, displays) {
 // cancels the pending timer — used on close, before the window is gone.
 function debounce(fn, delayMs) {
  let timer = null
+
  const debounced = () => {
    clearTimeout(timer)
    timer = setTimeout(() => {
@@ -96,22 +117,22 @@ function debounce(fn, delayMs) {
      fn()
    }, delayMs)
  }
+
  debounced.flush = () => {
    clearTimeout(timer)
    timer = null
    fn()
  }
+
  return debounced
 }

-module.exports = {
-  DEFAULT_WIDTH,
+export { computeWindowOptions,
+  debounce,
  DEFAULT_HEIGHT,
-  MIN_WIDTH,
+  DEFAULT_WIDTH,
  MIN_HEIGHT,
  MIN_VISIBLE,
-  sanitizeWindowState,
+  MIN_WIDTH,
  onScreen,
-  computeWindowOptions,
-  debounce
-}
+  sanitizeWindowState }
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -1,11 +1,10 @@
-'use strict'
+import assert from 'node:assert/strict'
+import fs from 'node:fs'
+import path from 'node:path'
+import test from 'node:test'
+import { fileURLToPath } from 'node:url'

-const test = require('node:test')
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const path = require('node:path')
-
-const ELECTRON_DIR = __dirname
+const ELECTRON_DIR = path.dirname(fileURLToPath(import.meta.url))

 function readElectronFile(name) {
  return fs.readFileSync(path.join(ELECTRON_DIR, name), 'utf8').replace(/\r\n/g, '\n')
@@ -24,7 +23,7 @@ function requireHiddenChildOptions(source, needle) {
 }

 test('desktop background child processes opt into hidden Windows consoles', () => {
-  const source = readElectronFile('main.cjs')
+  const source = readElectronFile('main.ts')

  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)

@@ -53,8 +52,25 @@ test('desktop background child processes opt into hidden Windows consoles', () =
  assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.dashboardArgs\]/)
 })

+test('getNoConsoleVenvPython prefers base pythonw over the uv re-exec shim', () => {
+  const source = readElectronFile('main.ts')
+
+  const body = source.slice(
+    source.indexOf('function getNoConsoleVenvPython(venvRoot)'),
+    source.indexOf('function getVenvSitePackagesEntries(venvRoot)')
+  )
+
+  // The venv Scripts\pythonw.exe re-execs a console python.exe (flashes a
+  // conhost); the base pythonw must be resolved first so it never runs.
+  const baseIdx = body.indexOf('basePythonw')
+  const shimIdx = body.indexOf("'Scripts', 'pythonw.exe'")
+  assert.notEqual(baseIdx, -1, 'base pythonw resolution missing')
+  assert.notEqual(shimIdx, -1, 'venv shim fallback missing')
+  assert.ok(baseIdx < shimIdx, 'base pythonw must be preferred before the venv Scripts shim')
+})
+
 test('intentional or interactive desktop child processes stay documented', () => {
-  const source = readElectronFile('main.cjs')
+  const source = readElectronFile('main.ts')

  assert.match(source, /windowsHide: false/)
  assert.match(source, /handOffWindowsBootstrapRecovery/)
@@ -65,7 +81,7 @@ test('intentional or interactive desktop child processes stay documented', () =>
 })

 test('bootstrap PowerShell runner hides Windows console children', () => {
-  const source = readElectronFile('bootstrap-runner.cjs')
+  const source = readElectronFile('bootstrap-runner.ts')

  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
  requireHiddenChildOptions(source, 'spawn(ps, fullArgs')
--- a/Show More
+++ b/Show More