WIPipw wipwip

wip thin client
2026-06-30 23:35:25 +08:00 · 2026-06-26 22:07:54 -04:00 · 2026-06-26 19:30:29 -04:00
600 changed files with 7371 additions and 39577 deletions
--- a/.envrc
+++ b/.envrc
@@ -1,5 +1,5 @@
 watch_file pyproject.toml uv.lock
 watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
-watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
+watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix

 use flake
--- a/.github/actions/hermes-smoke-test/action.yml
+++ b/.github/actions/hermes-smoke-test/action.yml
@@ -0,0 +1,50 @@
+name: Hermes smoke test
+description: >
+  Run the image's built-in entrypoint against `--help` and `dashboard --help`
+  to catch basic runtime regressions before publishing.  Requires the image
+  to already be loaded into the local Docker daemon under `image`.
+
+  Works identically on amd64 and arm64 runners.
+
+inputs:
+  image:
+    description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Ensure /tmp/hermes-test is hermes-writable
+      shell: bash
+      run: |
+        # The image runs as the hermes user (UID 10000).  GitHub Actions
+        # creates /tmp/hermes-test root-owned by default, which hermes
+        # can't write to — chown it to match the in-container UID before
+        # bind-mounting.  Real users doing `docker run -v ~/.hermes:...`
+        # with their own UID hit the same issue and have their own
+        # remediations (HERMES_UID env var, or chown locally).
+        mkdir -p /tmp/hermes-test
+        sudo chown -R 10000:10000 /tmp/hermes-test
+
+    - name: hermes --help
+      shell: bash
+      run: |
+        # Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
+        # this exercises the actual production startup path. PR #30136
+        # review caught that an --entrypoint override here had been
+        # silently neutered by the s6-overlay migration — stage2-hook
+        # ignores its CMD args, so the smoke test was a no-op.
+        docker run --rm \
+          -v /tmp/hermes-test:/opt/data \
+          "${{ inputs.image }}" --help
+
+    - name: hermes dashboard --help
+      shell: bash
+      run: |
+        # Regression guard for #9153: dashboard was present in source but
+        # missing from the published image.  If this fails, something in
+        # the Dockerfile is excluding the dashboard subcommand from the
+        # installed package.
+        docker run --rm \
+          -v /tmp/hermes-test:/opt/data \
+          "${{ inputs.image }}" dashboard --help
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,6 @@ permissions:
  pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
  actions: read # needed by osv-scanner (SARIF upload)
  security-events: write # needed by osv-scanner (SARIF upload)
-  packages: write # needed by docker build

 concurrency:
  group: ci-${{ github.ref }}
@@ -33,7 +32,6 @@ jobs:
  # (all lanes true) so post-merge validation is never weakened.
  # ─────────────────────────────────────────────────────────────────────
  detect:
-    name: Detect affected areas
    runs-on: ubuntu-latest
    outputs:
      python: ${{ steps.classify.outputs.python }}
@@ -55,15 +53,11 @@ jobs:
  # Skipped workflows (if condition is false) don't spin up runners.
  # ─────────────────────────────────────────────────────────────────────
  tests:
-    name: Python tests
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/tests.yml
-    with:
-      slice_count: 8

  lint:
-    name: Python lints
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/lint.yml
@@ -71,49 +65,35 @@ jobs:
      event_name: ${{ needs.detect.outputs.event_name }}

  typecheck:
-    name: TypeScript
    needs: detect
    if: needs.detect.outputs.frontend == 'true'
    uses: ./.github/workflows/typecheck.yml

  docs-site:
-    name: Docs Site
    needs: detect
    if: needs.detect.outputs.site == 'true'
    uses: ./.github/workflows/docs-site-checks.yml

  history-check:
-    name: Deny unrelated histories
    needs: detect
    if: needs.detect.outputs.event_name == 'pull_request'
    uses: ./.github/workflows/history-check.yml

  contributor-check:
-    name: Check contributors
    needs: detect
    if: needs.detect.outputs.python == 'true'
    uses: ./.github/workflows/contributor-check.yml

  uv-lockfile:
-    name: Check uv.lock
    needs: detect
    uses: ./.github/workflows/uv-lockfile-check.yml

  docker-lint:
-    name: Lint Docker scripts
    needs: detect
    if: needs.detect.outputs.docker_meta == 'true'
    uses: ./.github/workflows/docker-lint.yml

-  docker:
-    name: Build&Test Docker image
-    needs: detect
-    if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
-    uses: ./.github/workflows/docker.yml
-    secrets: inherit
-
  supply-chain:
-    name: Supply-chain scan
    needs: detect
    if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
    uses: ./.github/workflows/supply-chain-audit.yml
@@ -124,7 +104,7 @@ jobs:
      mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}

  osv-scanner:
-    name: OSV scan
+    needs: detect
    uses: ./.github/workflows/osv-scanner.yml

  # ─────────────────────────────────────────────────────────────────────
@@ -147,8 +127,6 @@ jobs:
      - docker-lint
      - supply-chain
      - osv-scanner
-      # we don't require docker to pass rn because it's so slow lol
-      # - docker
    if: always()
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/docker-lint.yml
+++ b/.github/workflows/docker-lint.yml
@@ -2,7 +2,7 @@ name: Docker / shell lint

 # Lints the container build inputs: Dockerfile (via hadolint) and any shell
 # scripts under docker/ (via shellcheck). These catch the class of regression
-# the behavioral docker smoke test can't — unquoted variable
+# the behavioral docker-publish smoke test can't — unquoted variable
 # expansions, silently-failing RUN commands, etc.
 #
 # Rules and ignores are documented in .hadolint.yaml at the repo root.
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -1,9 +1,24 @@
-name: Docker Build, Test, and Publish
+name: Docker Build and Publish

 on:
+  push:
+    branches: [main]
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
+      - '.github/actions/hermes-smoke-test/**'
+
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
+  pull_request:
+
  release:
    types: [published]
-  workflow_call:

 permissions:
  contents: read
@@ -24,7 +39,11 @@ env:
  IMAGE_NAME: nousresearch/hermes-agent

 jobs:
-  # Build, test, and optionally push the amd64 image.
+  # ---------------------------------------------------------------------------
+  # Build amd64 natively.  This job also runs the smoke tests (basic --help
+  # and the dashboard subcommand regression guard from #9153), because amd64
+  # is the only arch we can `load` into the local daemon on an amd64 runner.
+  # ---------------------------------------------------------------------------
  build-amd64:
    # Only run on the upstream repository, not on forks
    if: github.repository == 'NousResearch/hermes-agent'
@@ -34,19 +53,24 @@ jobs:
      digest: ${{ steps.push.outputs.digest }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      # The image build + integration tests run on every event
-      # (PRs, push-to-main, release). Publish steps below are gated to
-      # push-to-main / release only.
+      # The image build + smoke test + integration tests run ONLY on
+      # push-to-main and release — never on PRs. They are the heaviest jobs
+      # in CI (~15-45 min) and a broken build surfaces on the main push (and
+      # is gated pre-merge by docker-lint + uv-lockfile-check). Every step
+      # below is skipped on PRs, so the job still reports green and the
+      # required check never hangs.
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+        if: github.event_name != 'pull_request'
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

-      # Build once, load into the local daemon for testing.  Cached
+      # Build once, load into the local daemon for smoke testing.  Cached
      # to gha with a per-arch scope; the push step below reuses every
      # layer from this build.
-      - name: Build image (amd64)
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+      - name: Build image (amd64, smoke test)
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -58,12 +82,25 @@ jobs:
          cache-from: type=gha,scope=docker-amd64
          cache-to: type=gha,mode=max,scope=docker-amd64

+      - name: Smoke test image
+        if: github.event_name != 'pull_request'
+        uses: ./.github/actions/hermes-smoke-test
+        with:
+          image: ${{ env.IMAGE_NAME }}:test
+
+      # ---------------------------------------------------------------------
      # Run the docker-integration test suite against the freshly-built
-      # image already loaded into the local daemon (`:test`).
+      # image already loaded into the local daemon (`:test`).  These tests
+      # are excluded from the sharded `tests.yml :: test` matrix on purpose
+      # (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
+      # shard would otherwise reach the session-scoped ``built_image``
+      # fixture in ``tests/docker/conftest.py`` and start a 3-7min
+      # ``docker build`` — guaranteed to
+      # die in fixture setup.
      #
-      # Piggybacking here avoids a second image build: the build step
-      # already loaded the image into the daemon under
-      # `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
+      # Piggybacking here avoids a second image build: the smoke test
+      # already proved the image loads + runs, so the daemon has it under
+      # `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
      # that.  The fixture's ``HERMES_TEST_IMAGE`` branch (see
      # tests/docker/conftest.py:62-63) short-circuits the rebuild.
      #
@@ -73,20 +110,26 @@ jobs:
      # cheapest path to coverage on every PR that touches docker code.
      # ---------------------------------------------------------------------
      - name: Install uv (for docker tests)
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        if: github.event_name != 'pull_request'
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11 (for docker tests)
+        if: github.event_name != 'pull_request'
        run: uv python install 3.11

      - name: Install Python dependencies (for docker tests)
+        if: github.event_name != 'pull_request'
        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
          # ``dev`` extra pulls in pytest, pytest-asyncio —
          # everything tests/docker/ needs.  We deliberately avoid ``all``
          # here because the docker tests only drive the container via
          # subprocess and don't import hermes_agent's optional deps.
-          uv sync --locked --python 3.11 --extra dev
+          uv pip install -e ".[dev]"

      - name: Run docker integration tests
+        if: github.event_name != 'pull_request'
        env:
          # Skip rebuild; use the image already loaded by the build step.
          HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
@@ -96,11 +139,12 @@ jobs:
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""
        run: |
-          scripts/run_tests.sh tests/docker/ --file-timeout 600
+          source .venv/bin/activate
+          python -m pytest tests/docker/ -v --tb=short

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -111,7 +155,7 @@ jobs:
      - name: Push amd64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -135,7 +179,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: digest-amd64
          path: /tmp/digests/*
@@ -143,7 +187,10 @@ jobs:
          retention-days: 1

  # ---------------------------------------------------------------------------
-  # Build, test, and optionally push the arm64 image.
+  # Build arm64 natively on GitHub's free arm64 runner.  This replaces the
+  # previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
+  # a cache scope with amd64.  Matches the amd64 job's shape: build+load,
+  # smoke test, then on push/release push by digest.
  # ---------------------------------------------------------------------------
  build-arm64:
    if: github.repository == 'NousResearch/hermes-agent'
@@ -153,26 +200,29 @@ jobs:
      digest: ${{ steps.push.outputs.digest }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

+      # arm64 build runs only on push-to-main and release (see build-amd64).
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+        if: github.event_name != 'pull_request'
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

      # Log in to ghcr.io so the registry-backed build cache below can be
      # read (cache-from) on every event and written (cache-to) on
      # push/release.  Uses the workflow's GITHUB_TOKEN, which is valid for
      # the whole job — unlike the gha cache backend's short-lived Azure SAS
      # token, which expired mid-build on slow cold-cache arm64 runs and
-      # crashed the build before the tests ran (the reason the gha cache
+      # crashed the build before the smoke test (the reason the gha cache
      # was removed from arm64 PRs in the first place).
      - name: Log in to ghcr.io (build cache)
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

-      # Build once, load into the local daemon for testing, then push
+      # Build once, load into the local daemon for smoke testing, then push
      # by digest below. Reads AND writes the registry-backed cache so the
      # push reuses layers from this build and the next build starts warm.
      #
@@ -180,8 +230,9 @@ jobs:
      # cache that previously broke here: its credential is the job-lifetime
      # GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
      # token failure mode cannot recur.
-      - name: Build image (arm64, cached publish)
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+      - name: Build image (arm64, smoke test, cached publish)
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -193,29 +244,15 @@ jobs:
          cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
          cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max

-      - name: Install uv for docker tests
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
-
-      - name: Set up Python 3.11 for docker tests
-        run: uv python install 3.11
-
-      - name: Install Python dependencies for docker tests
-        run: |
-          uv sync --locked --python 3.11 --extra dev
-
-      - name: Run docker tests
-        env:
-          # Skip rebuild; use the image already loaded by the build step.
-          HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
-          OPENROUTER_API_KEY: ""
-          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
-        run: |
-          scripts/run_tests.sh tests/docker/ --file-timeout 600
+      - name: Smoke test image
+        if: github.event_name != 'pull_request'
+        uses: ./.github/actions/hermes-smoke-test
+        with:
+          image: ${{ env.IMAGE_NAME }}:test

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -223,7 +260,7 @@ jobs:
      - name: Push arm64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
        with:
          context: .
          file: Dockerfile
@@ -245,7 +282,7 @@ jobs:

      - name: Upload digest artifact
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: digest-arm64
          path: /tmp/digests/*
@@ -267,17 +304,17 @@ jobs:
    timeout-minutes: 10
    steps:
      - name: Download digests
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          path: /tmp/digests
          pattern: digest-*
          merge-multiple: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

      - name: Log in to Docker Hub
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -37,7 +37,7 @@ jobs:
          fetch-depth: 0 # need full history for merge-base + worktree

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      - name: Install ruff + ty
        uses: ./.github/actions/retry
@@ -110,7 +110,7 @@ jobs:
          cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"

      - name: Upload reports as artifact
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
        with:
          name: lint-reports
          path: .lint-reports/
@@ -164,7 +164,7 @@ jobs:
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      - name: Install ruff
        uses: ./.github/actions/retry
--- a/.github/workflows/skills-index.yml
+++ b/.github/workflows/skills-index.yml
@@ -3,17 +3,17 @@ name: Build Skills Index
 on:
  schedule:
    # Run twice daily: 6 AM and 6 PM UTC
-    - cron: "0 6,18 * * *"
-  workflow_dispatch: # Manual trigger
+    - cron: '0 6,18 * * *'
+  workflow_dispatch:  # Manual trigger
  push:
    branches: [main]
    paths:
-      - "scripts/build_skills_index.py"
-      - ".github/workflows/skills-index.yml"
+      - 'scripts/build_skills_index.py'
+      - '.github/workflows/skills-index.yml'

 permissions:
  contents: read
-  actions: write # to trigger deploy-site.yml on schedule
+  actions: write   # to trigger deploy-site.yml on schedule

 jobs:
  build-index:
@@ -21,11 +21,11 @@ jobs:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.11"
+          python-version: '3.11'

      - name: Install dependencies
        run: pip install httpx==0.28.1 pyyaml==6.0.2
@@ -36,7 +36,7 @@ jobs:
        run: python scripts/build_skills_index.py

      - name: Upload index artifact
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: skills-index
          path: website/static/api/skills-index.json
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -2,11 +2,6 @@ name: Tests

 on:
  workflow_call:
-    inputs:
-      slice_count:
-        description: Number of parallel test slices
-        type: number
-        default: 8

 permissions:
  contents: read
@@ -17,11 +12,13 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  generate:
-    name: "Generate slices"
+  test:
    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.matrix.outputs.matrix }}
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        slice: [1, 2, 3, 4, 5, 6]
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -30,26 +27,13 @@ jobs:
        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
        with:
          path: test_durations.json
+          # main always writes a new suffix, but jobs pick the latest one with the same prefix
+          # quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
+          # If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
+          # If there are no exact matches, the action searches for partial matches of the restore keys.
+          # When the action finds a partial match, the most recent cache is restored to the path directory.
          key: test-durations

-      - name: Generate test slices
-        id: matrix
-        run: |
-          MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
-          echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
-
-  test:
-    name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
-    needs: generate
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-
      - name: Install ripgrep (prebuilt binary)
        run: |
          set -euo pipefail
@@ -65,7 +49,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
@@ -94,19 +78,33 @@ jobs:
        # re-download, keeping the persisted cache small and fast to restore.
        run: uv cache prune --ci

-      - name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
-        # Per-file isolation via scripts/run_tests.sh: each test file runs
-        # in its own freshly-spawned `python -m pytest <file>` subprocess
+      - name: Run tests (slice ${{ matrix.slice }}/6)
+        # Per-file isolation via scripts/run_tests_parallel.py: discovers
+        # every test_*.py file under tests/ (excluding integration/ + e2e/),
+        # then runs `python -m pytest <file>` in a freshly-spawned subprocess
        # with bounded parallelism. No xdist, no shared workers, no
        # module-level state leakage between files.
        #
-        # File list is pre-computed by the generate job (--generate-slices)
-        # which runs LPT distribution once and passes the file list to each
-        # matrix job via --files. Previously each job re-discovered files and
-        # re-ran LPT independently — redundant N times.
+        # Why per-file (not per-test): per-test spawn cost (~250ms × 17k
+        # tests = 70min CPU minimum) blew the wall-clock budget. Per-file
+        # spawn (~250ms × ~850 files = ~3.5min) fits while still giving
+        # every file a fresh interpreter — the only isolation boundary
+        # that matters in practice (cross-file leakage was the original
+        # flake source; intra-file is the test author's responsibility).
+        #
+        # Why drop xdist entirely: xdist's persistent workers accumulate
+        # state across files, which is exactly the leakage we wanted to
+        # fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
+        # the job with cleaner semantics.
+        #
+        # Matrix slicing (--slice I/N): files are distributed across 6
+        # jobs by cached duration (LPT algorithm) so each job gets
+        # roughly equal wall time. Without a cache, files default to 2s
+        # estimate and get split roughly evenly by count — still correct,
+        # just not perfectly balanced.
        run: |
          source .venv/bin/activate
-          scripts/run_tests.sh --files '${{ matrix.slice.files }}'
+          python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
@@ -116,7 +114,7 @@ jobs:
      - name: Upload per-slice durations
        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
-          name: test-durations-slice-${{ matrix.slice.index }}
+          name: test-durations-slice-${{ matrix.slice }}
          path: test_durations.json
          retention-days: 1

@@ -175,7 +173,7 @@ jobs:
          rg --version

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
        with:
          # Persist uv's download/wheel cache (~/.cache/uv) across runs.
          # Keyed on the dependency manifests, so the cache is reused until
--- a/.github/workflows/typecheck.yml
+++ b/.github/workflows/typecheck.yml
@@ -6,7 +6,6 @@ on:

 jobs:
  typecheck:
-    name: Check TypeScript
    runs-on: ubuntu-latest
    strategy:
      matrix:
@@ -23,7 +22,8 @@ jobs:
      # native builds. Skipping install scripts drops node-pty's node-gyp
      # header fetch — the transient flake that killed this job pre-`tsc` — and
      # is faster. retry covers the remaining registry blips.
-      - uses: ./.github/actions/retry
+      - 
+        uses: ./.github/actions/retry
        with:
          command: npm ci --ignore-scripts
      - run: npm run --prefix ${{ matrix.package }} typecheck
@@ -35,7 +35,6 @@ jobs:
  # users build apps/desktop from source on install/update. Run the real
  # `vite build` here so that class of break fails in CI instead.
  desktop-build:
-    name: Build desktop app
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -45,7 +44,8 @@ jobs:
          cache: npm
      # Keep install scripts here: the production build may need node-pty's
      # native binary. retry handles the transient install-time fetch flakes.
-      - uses: ./.github/actions/retry
+      - 
+        uses: ./.github/actions/retry
        with:
          command: npm ci
      - run: npm run --prefix apps/desktop build
--- a/.github/workflows/upload_to_pypi.yml
+++ b/.github/workflows/upload_to_pypi.yml
@@ -5,11 +5,11 @@ name: Publish to PyPI
 on:
  push:
    tags:
-      - "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
+      - 'v20*'  # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
  workflow_dispatch:
    inputs:
      confirm_tag:
-        description: "Tag to publish (e.g. v2026.5.15). Must already exist."
+        description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
        required: true
        type: string

@@ -27,7 +27,7 @@ jobs:
    name: Build distribution 📦
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          persist-credentials: false
          # On workflow_dispatch, check out the confirmed tag.
@@ -43,17 +43,17 @@ jobs:
          fi

      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
-          python-version: "3.13"
+          python-version: '3.13'

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e  # v6

      - name: Set up Node.js
-        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
-          node-version: "22"
+          node-version: '22'

      - name: Build web dashboard
        run: cd web && npm ci && npm run build
@@ -81,7 +81,7 @@ jobs:
        run: uv build --sdist --wheel

      - name: Upload distribution artifacts
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: python-package-distributions
          path: dist/
@@ -94,17 +94,17 @@ jobs:
      name: pypi
      url: https://pypi.org/p/hermes-agent
    permissions:
-      id-token: write # OIDC trusted publishing
+      id-token: write  # OIDC trusted publishing

    steps:
      - name: Download distribution artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          name: python-package-distributions
          path: dist/

      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # v1.14.0
        with:
          skip-existing: true

@@ -116,12 +116,12 @@ jobs:
    needs: publish
    runs-on: ubuntu-latest
    permissions:
-      contents: write # attach assets to the existing release
-      id-token: write # sigstore signing
+      contents: write   # attach assets to the existing release
+      id-token: write   # sigstore signing

    steps:
      - name: Download distribution artifacts
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          name: python-package-distributions
          path: dist/
@@ -145,7 +145,7 @@ jobs:

      - name: Sign with Sigstore
        if: env.skip_sign != 'true'
-        uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
+        uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc  # v3.3.0
        with:
          inputs: >-
            ./dist/*.tar.gz
--- a/.github/workflows/uv-lockfile-check.yml
+++ b/.github/workflows/uv-lockfile-check.yml
@@ -4,7 +4,7 @@ name: uv.lock check
 # that modify pyproject.toml without regenerating uv.lock (or vice versa)
 # must not merge, because the Docker build's `uv sync --frozen` step will
 # fail on a stale lockfile and we'd rather catch it here than in the
-# docker workflow on main.
+# docker-publish workflow on main.
 #
 # ─────────────────────────────────────────────────────────────────────────
 # IMPORTANT: this check runs against the MERGED state, not just your branch
@@ -63,7 +63,7 @@ jobs:
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Install uv
-        uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

      # `uv lock --check` re-resolves the project from pyproject.toml and
      # compares the result to uv.lock, exiting non-zero if they disagree.
@@ -100,7 +100,7 @@ jobs:

          This check is blocking because the Docker image build uses
          `uv sync --frozen --extra all`, which rejects stale lockfiles
-          — catching it here avoids a ~15 min failed docker run
+          — catching it here avoids a ~15 min failed docker-publish run
          on `main` post-merge.
          EOF
            echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
--- a/.gitignore
+++ b/.gitignore
@@ -137,9 +137,3 @@ RELEASE_v*.md
 # Desktop demo-run scratch output (hermes writes demo/*.txt during recorded
 # walkthroughs). Throwaway artifacts, never part of the app.
 apps/desktop/demo/
-
-# PR infographics are rendered locally and embedded in PR descriptions via the
-# image-provider (fal.media) URL — they are NEVER committed to the repo. The
-# PR body is the archive. See the hermes-agent-dev skill's
-# pr-infographic-workflow reference (storage rule + lapse #8 / #COMMIT-1).
-infographic/
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -123,17 +123,6 @@ conservative at the waist.
  without E2E proof, and plugins that touch core files.** Plugins live in their
  own directory and work within the ABCs/hooks we provide; if a plugin needs
  more, widen the generic plugin surface, don't special-case it in core.
- **Third-party products / other people's projects integrated into the core
-  tree.** Observability backends, vendor SaaS integrations, analytics dashboards,
-  and similar "someone else's product" plugins do NOT land under `plugins/` in
-  this repo. They place an ongoing maintenance burden on us to keep them working
-  against a fast-moving core, for a backend we don't own. Ship them as a
-  **standalone plugin repo** users install into `~/.hermes/plugins/` (or via a
-  pip entry point), and promote them in the Nous Research Discord
-  (`#plugins-skills-and-skins`). This is a coupling-and-maintenance decision, not
-  a quality bar — the plugin can be excellent and still be a close. PRs that add
-  such a directory to the tree are closed with a pointer to publish it as its own
-  repo.

 ### Before you call it a bug — verify the premise (and when NOT to close)

@@ -794,24 +783,6 @@ landing in this tree. PRs that add a new directory under
 provider as its own repo. Existing in-tree providers stay; bug fixes
 to them are welcome.

-**No new third-party-product plugins in-tree (policy, June 2026):** the
-same rule applies beyond memory providers. Plugins that integrate
-someone else's product or project — observability/metrics backends,
-vendor SaaS connectors, analytics dashboards, paid-service tie-ins —
-must ship as **standalone plugin repos** that users install into
-`~/.hermes/plugins/` (or via pip entry points). They register through
-the existing plugin discovery path and use the ABCs/hooks/ctx surface
-we expose; nothing special is needed in core. The reason is
-maintenance load: every product we absorb into the tree becomes our
-burden to keep working against a fast-moving core, for a backend we
-don't own. Promote standalone plugins in the Nous Research Discord
-(`#plugins-skills-and-skins`). PRs that add such a directory under
-`plugins/` are closed with a pointer to publish it as its own repo —
-this is a coupling decision, not a quality judgment. (The
-`observability/`, `kanban/`, `disk-cleanup/`, etc. directories already
-in the tree are existing precedent, not an invitation to add more
-third-party-product plugins alongside them.)
-
 ### Model-provider plugins (`plugins/model-providers/<name>/`)

 Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -85,23 +85,6 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr

 ---

-## Third-Party Product Integrations: Ship as a Standalone Plugin
-
-The same rule extends to **any plugin that integrates someone else's product or project** — observability/metrics backends, vendor SaaS connectors, analytics dashboards, paid-service tie-ins, and similar third-party integrations. **These do not land in this repo.**
-
-The reason is maintenance load, not quality. Every external product absorbed into the core tree becomes ours to keep working against a fast-moving codebase, for a backend we don't own and can't control. Hermes ships a lot and the core moves quickly; coupling third-party products into it creates an open-ended burden on the maintainers.
-
-Publish these as a **standalone plugin repo** instead:
-
- Implement the relevant ABC and use the existing plugin discovery path (`~/.hermes/plugins/`, project `.hermes/plugins/`, or a pip entry point) — see [Build a Hermes Plugin](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin)
- Register lifecycle hooks (`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`), tools (`ctx.register_tool`), and CLI subcommands (`ctx.register_cli_command`) through the surface we already expose — no core changes needed
- If your plugin needs a capability the framework doesn't expose, that's a feature request to **widen the generic plugin surface** (a new hook or `ctx` method) — never special-case your plugin in core
- Promote it in the [Nous Research Discord](https://discord.gg/NousResearch) `#plugins-skills-and-skins` channel so users can find and install it
-
-A well-built third-party-product plugin can clear automated review and still be closed for this reason — it's a placement decision, not a verdict on the code. PRs that add such a directory under `plugins/` will be closed with a pointer to publish it as its own repo.
-
---
-
 ## Development Setup

 ### Prerequisites
--- a/28
+++ b/28
@@ -189,13 +189,7 @@ RUN cd web && npm run build && \

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
-# --link decouples this layer from parents for cache purposes; --chmod bakes
-# the final read-only permissions at copy time so we skip the separate
-# `chmod -R` pass that previously walked ~30k files across the venv +
-# node_modules + source (21s amd64 / 222s arm64 — #49113).  `a+rX,go-w`
-# gives the non-root hermes user read + traverse but no write; root retains
-# write so the build steps below don't need chmod u+w dances.
-COPY --link --chmod=a+rX,go-w . .
+COPY . .

 # ---------- Permissions ----------
 # Link hermes-agent itself (editable). Deps are already installed in the
@@ -203,15 +197,19 @@ COPY --link --chmod=a+rX,go-w . .
 # resolution or downloads.
 RUN uv pip install --no-cache-dir --no-deps -e "."

-# Wire the exec shim and install-method stamp.  Files under /opt/hermes are
-# already root-owned (COPY, uv sync, npm install all run as root) and
-# read-only for the hermes user (go-w from the --chmod above).
-
+# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
+# instances must not be able to self-edit the installed source or venv; user
+# data, skills, plugins, config, logs, and dashboard uploads live under
+# /opt/data instead. Root can still repair the image during build/boot, but
+# supervised Hermes processes drop to the non-root hermes user.
 USER root
 RUN mkdir -p /opt/hermes/bin && \
    cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
    chmod 0755 /opt/hermes/bin/hermes && \
-    printf 'docker\n' > /opt/hermes/.install_method
+    printf 'docker\n' > /opt/hermes/.install_method && \
+    chown -R root:root /opt/hermes && \
+    chmod -R a+rX /opt/hermes && \
+    chmod -R a-w /opt/hermes
 # The ``.install_method`` stamp is baked next to the running code (the install
 # tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
 # volume that is commonly bind-mounted from the host and even shared with a
@@ -238,11 +236,13 @@ RUN mkdir -p /opt/hermes/bin && \
 #
 # The arg is optional — local `docker build` without --build-arg simply
 # omits the file, and the runtime falls back to live-git lookup.  CI
-# (.github/workflows/docker.yml) passes ${{ github.sha }} so
+# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
 # every published image has it.
 ARG HERMES_GIT_SHA=
 RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
-        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
+        chmod u+w /opt/hermes && \
+        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
+        chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
    fi

 # ---------- s6-overlay service wiring ----------
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), OpenRouter, OpenAI, your own endpoint, and [many others](https://hermes-agent.nousresearch.com/docs/integrations/providers). Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -722,50 +722,10 @@ def init_agent(
    elif agent.provider == "moa":
        from agent.moa_loop import MoAClient
        agent.api_mode = "chat_completions"
-
-        # Route reference-model outputs to the agent's tool_progress_callback so
-        # every surface that already consumes it (CLI spinner/scrollback, TUI,
-        # desktop, gateway) can show each reference's answer as a labelled block
-        # before the aggregator acts. The facade emits "moa.reference" and
-        # "moa.aggregating" events; we forward them through the same callback
-        # the tool lifecycle uses. Best-effort and cache-safe — these are
-        # display-only events, they never touch the message history.
-        def _moa_reference_relay(event: str, **kwargs: Any) -> None:
-            cb = getattr(agent, "tool_progress_callback", None)
-            if cb is None:
-                return
-            try:
-                if event == "moa.reference":
-                    label = str(kwargs.get("label") or "")
-                    text = str(kwargs.get("text") or "")
-                    idx = kwargs.get("index")
-                    count = kwargs.get("count")
-                    cb(
-                        "moa.reference",
-                        label,
-                        text,
-                        None,
-                        moa_index=idx,
-                        moa_count=count,
-                    )
-                elif event == "moa.aggregating":
-                    cb(
-                        "moa.aggregating",
-                        str(kwargs.get("aggregator") or ""),
-                        None,
-                        None,
-                        moa_ref_count=kwargs.get("ref_count"),
-                    )
-            except Exception:
-                pass
-
-        agent.client = MoAClient(
-            agent.model or "default",
-            reference_callback=_moa_reference_relay,
-        )
+        agent.client = MoAClient(agent.model or "default")
        agent._client_kwargs = {}
        agent.api_key = api_key or "moa-virtual-provider"
-        agent.base_url = "moa://local"
+        agent.base_url = base_url or "moa://local"
        if not agent.quiet_mode:
            print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
    elif agent.api_mode == "bedrock_converse":
@@ -1307,12 +1267,6 @@ def init_agent(
        _agent_section = {}
    agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

-    # Intent-ack continuation config: "auto" (default — codex_responses only,
-    # the historical gate), true (all api_modes), false (never), or a list of
-    # model-name substrings.  Resolved against the active api_mode/model in the
-    # conversation loop's intent-ack block.
-    agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
-
    # Universal task-completion guidance toggle.  Default True.  Surfaced
    # as a separate flag from tool_use_enforcement because the guidance
    # applies to ALL models, not just the model families enforcement
@@ -1676,10 +1630,8 @@ def init_agent(
            f"Model {agent.model} has a context window of {_ctx:,} tokens, "
            f"which is below the minimum {MINIMUM_CONTEXT_LENGTH:,} required "
            f"by Hermes Agent.  Choose a model with at least "
-            f"{MINIMUM_CONTEXT_LENGTH // 1000}K context.  If your server "
-            f"reports a window smaller than the model's true window, set "
-            f"model.context_length in config.yaml to the real value "
-            f"(this must be at least {MINIMUM_CONTEXT_LENGTH // 1000}K)."
+            f"{MINIMUM_CONTEXT_LENGTH // 1000}K context, or set "
+            f"model.context_length in config.yaml to override."
        )

    # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand).
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -42,14 +42,6 @@ from utils import base_url_host_matches, base_url_hostname, env_var_enabled, ato
 logger = logging.getLogger(__name__)


-# Max consecutive successful credential-pool token refreshes of the SAME entry
-# on a persistent auth failure before we give up and let the fallback chain
-# activate. A single-entry OAuth pool can re-mint a fresh token indefinitely
-# even when the upstream keeps rejecting it, so without this cap the retry loop
-# spins forever and never reaches ``_try_activate_fallback``. See #26080.
-_MAX_AUTH_REFRESH_ATTEMPTS = 2
-
-
 def _ra():
    """Lazy ``run_agent`` reference for test-patch routing."""
    import run_agent
@@ -783,30 +775,6 @@ def recover_with_credential_pool(
            return False, has_retried_429
        refreshed = pool.try_refresh_current()
        if refreshed is not None:
-            # ``try_refresh_current()`` re-mints a fresh OAuth token and reports
-            # success even when the upstream keeps rejecting it — a single-entry
-            # pool (common for OAuth/Max subscribers) has nothing to rotate to,
-            # so a bare "refreshed → retry" loop spins forever on the same dead
-            # token and the configured fallback never activates. Cap consecutive
-            # same-entry refreshes and fall through to fallback once exceeded.
-            # See #26080.
-            refreshed_id = getattr(refreshed, "id", None)
-            if refreshed_id is not None:
-                refresh_counts = getattr(agent, "_auth_pool_refresh_counts", None)
-                if refresh_counts is None:
-                    refresh_counts = {}
-                    agent._auth_pool_refresh_counts = refresh_counts
-                refresh_key = (agent.provider, refreshed_id)
-                refresh_counts[refresh_key] = refresh_counts.get(refresh_key, 0) + 1
-                if refresh_counts[refresh_key] > _MAX_AUTH_REFRESH_ATTEMPTS:
-                    _ra().logger.warning(
-                        "Credential auth failure persists after %s refreshes for "
-                        "pool entry %s — treating as unrecoverable and allowing "
-                        "fallback to activate.",
-                        refresh_counts[refresh_key] - 1,
-                        refreshed_id,
-                    )
-                    return False, has_retried_429
            _ra().logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
            agent._swap_credential(refreshed)
            return True, has_retried_429
@@ -1078,34 +1046,6 @@ def restore_primary_runtime(agent) -> bool:
            api_mode=rt.get("compressor_api_mode", ""),
        )

-        # ── Re-select from the credential pool if one is available ──
-        # The snapshot's api_key was captured at construction time.  Across
-        # turns the pool may have rotated (token revocation, billing/rate-limit
-        # exhaustion, cooldown), leaving the snapshot key stale.  Restoring it
-        # blindly re-fails on the first request and burns through the remaining
-        # pool entries before cross-provider fallback even gets a chance.  Ask
-        # the pool for its current best entry and swap the live credential in.
-        # When the pool is absent, empty, or the entry has no usable key, we
-        # keep the snapshot key (the existing behavior).  Fixes #25205.
-        pool = getattr(agent, "_credential_pool", None)
-        if pool is not None and pool.has_available():
-            entry = pool.select()
-            if entry is not None:
-                entry_key = (
-                    getattr(entry, "runtime_api_key", None)
-                    or getattr(entry, "access_token", "")
-                )
-                if entry_key:
-                    # ``_swap_credential`` rebuilds the OpenAI/Anthropic client,
-                    # reapplies base-url-scoped headers, and carries the
-                    # accumulated base_url / OAuth-detection fixes (#33163).
-                    agent._swap_credential(entry)
-                    logger.info(
-                        "Restore re-selected pool entry %s (%s)",
-                        getattr(entry, "id", "?"),
-                        getattr(entry, "label", "?"),
-                    )
-
        # ── Reset fallback chain for the new turn ──
        agent._fallback_activated = False
        agent._fallback_index = 0
@@ -1480,15 +1420,6 @@ def create_openai_client(agent, client_kwargs: dict, *, reason: str, shared: boo
        keepalive_http = agent._build_keepalive_http_client(client_kwargs.get("base_url", ""))
        if keepalive_http is not None:
            client_kwargs["http_client"] = keepalive_http
-    # Delegate all rate-limit / 5xx retry to hermes's outer conversation loop,
-    # which honors Retry-After and applies adaptive/jittered backoff. The OpenAI
-    # SDK default (max_retries=2) uses its own 1-2s backoff that ignores
-    # Retry-After and double-retries inside our loop — the same deadlock the
-    # Anthropic clients hit (#26293). This is the single chokepoint every primary
-    # OpenAI/aggregator client passes through (init, switch_model, recovery,
-    # restore, request-scoped); auxiliary_client builds its own clients and keeps
-    # SDK retries because it is NOT wrapped by the conversation loop.
-    client_kwargs.setdefault("max_retries", 0)
    # Uses the module-level `OpenAI` name, resolved lazily on first
    # access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
    client = _ra().OpenAI(**client_kwargs)
@@ -1568,10 +1499,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
    # _client_kwargs is a dict — snapshot a shallow copy so mutating the
    # live dict doesn't poison the rollback target.
    _snapshot["_client_kwargs"] = dict(getattr(agent, "_client_kwargs", {}) or {})
-    # Snapshot the credential pool reference so a failed client rebuild can
-    # restore the original pool (issue #52727: pool reload is part of this
-    # switch and must be reversible on rollback).
-    _snapshot["_credential_pool"] = getattr(agent, "_credential_pool", _MISSING)

    try:
        # Clear the per-config context_length override so the new model's
@@ -1596,36 +1523,8 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
        if api_key:
            agent.api_key = api_key

-        # ── Reload credential pool for the new provider (issue #52727) ──
-        # Without this, ``recover_with_credential_pool`` sees a
-        # ``pool.provider != agent.provider`` mismatch and short-circuits,
-        # leaving the new provider with no rotation/recovery on 401/429 and
-        # burning the original pool's entries. Only reload when the provider
-        # actually changed (or the pool was missing) — re-selecting the same
-        # provider must not churn the pool reference. A reload failure is
-        # logged + swallowed: the switch itself must still complete.
-        old_norm = (old_provider or "").strip().lower()
-        new_norm = (new_provider or "").strip().lower()
-        if old_norm != new_norm or getattr(agent, "_credential_pool", None) is None:
-            try:
-                from agent.credential_pool import load_pool
-                agent._credential_pool = load_pool(new_provider)
-            except Exception as _pool_exc:  # noqa: BLE001
-                logger.warning(
-                    "switch_model: credential pool reload failed for %s (%s); "
-                    "continuing without pool rotation this turn",
-                    new_provider, _pool_exc,
-                )
-
        # ── Build new client ──
-        if (new_provider or "").strip().lower() == "moa":
-            from agent.moa_loop import MoAClient
-
-            agent.api_key = api_key or "moa-virtual-provider"
-            agent.base_url = "moa://local"
-            agent._client_kwargs = {}
-            agent.client = MoAClient(agent.model or "default")
-        elif api_mode == "anthropic_messages":
+        if api_mode == "anthropic_messages":
            from agent.anthropic_adapter import (
                build_anthropic_client,
                resolve_anthropic_token,
@@ -2205,21 +2104,8 @@ def looks_like_codex_intermediate_ack(
    user_message: str,
    assistant_content: str,
    messages: List[Dict[str, Any]],
-    require_workspace: bool = True,
 ) -> bool:
-    """Detect a planning/ack message that should continue instead of ending the turn.
-
-    ``require_workspace`` (default True) keeps the original codex-coding scope:
-    the ack must reference a filesystem/repo workspace. The conversation loop
-    passes ``require_workspace=False`` when the user has explicitly opted into
-    intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
-    is ``true`` or a model-list), so general autonomous workflows ("I'll run a
-    health check on the server", "I'll start the deployment") — which carry a
-    future-ack and an action verb but no filesystem reference — are caught too.
-    The future-ack + short-content + no-prior-tools + action-verb requirements
-    always apply, which is what keeps conversational "I'll help you brainstorm"
-    replies from tripping it.
-    """
+    """Detect a planning/ack message that should continue instead of ending the turn."""
    if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
        return False

@@ -2272,67 +2158,17 @@ def looks_like_codex_intermediate_ack(
        "path",
    )

-    assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
-    if not assistant_mentions_action:
-        return False
-
-    # Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
-    # call is enough — the user asked us to keep going when the model only
-    # announces intent, regardless of whether a filesystem is involved.
-    if not require_workspace:
-        return True
-
    user_text = (user_message or "").strip().lower()
    user_targets_workspace = (
        any(marker in user_text for marker in workspace_markers)
        or "~/" in user_text
        or "/" in user_text
    )
+    assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
    assistant_targets_workspace = any(
        marker in assistant_text for marker in workspace_markers
    )
-    return user_targets_workspace or assistant_targets_workspace
-
-
-def intent_ack_continuation_mode(agent) -> str:
-    """Classify the resolved intent-ack continuation mode for this turn.
-
-    Returns one of:
-      * ``"off"``        — never continue.
-      * ``"codex_only"`` — historical scope: continue only on the
-        ``codex_responses`` api_mode, and only for codebase/workspace acks
-        (``require_workspace=True``).
-      * ``"all"``        — user opted in for every api_mode; continue on any
-        future-ack + action verb (``require_workspace=False``).
-
-    Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
-    (default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
-    ``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
-    matches the active model name, else off.
-    """
-    mode = getattr(agent, "_intent_ack_continuation", "auto")
-
-    if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
-        return "all"
-    if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
-        return "off"
-    if isinstance(mode, list):
-        model_lower = (agent.model or "").lower()
-        return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
-    # "auto" or any unrecognised value — historical codex-only behavior.
-    return "codex_only" if agent.api_mode == "codex_responses" else "off"
-
-
-def intent_ack_continuation_enabled(agent) -> bool:
-    """Whether intent-ack continuation should fire at all for this turn.
-
-    The ``codex_ack_continuations < 2`` per-turn cap and the
-    ``looks_like_codex_intermediate_ack`` detector are applied by the caller;
-    this only decides the on/off gate. Callers that also need to know whether
-    the workspace requirement applies should use ``intent_ack_continuation_mode``
-    directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
-    """
-    return intent_ack_continuation_mode(agent) != "off"
+    return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action



--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -673,9 +673,6 @@ def _build_anthropic_client_with_bearer_hook(
    kwargs = {
        "timeout": timeout_obj,
        "http_client": http_client,
-        # Delegate retry to hermes's outer loop (honors Retry-After); the SDK
-        # default max_retries=2 ignores it and double-retries. (#26293)
-        "max_retries": 0,
        # The SDK requires *something* for api_key/auth_token. Our
        # event hook overrides Authorization per request so this value
        # is never sent. The sentinel string makes accidental leaks
@@ -760,12 +757,6 @@ def build_anthropic_client(
    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
-        # Delegate all rate-limit / 5xx retry to hermes's outer conversation
-        # loop, which honors Retry-After. The SDK default (max_retries=2) uses
-        # its own 1-2s backoff that ignores Retry-After and double-retries
-        # inside our loop — burning request slots against a bucket that won't
-        # refill for minutes. (#26293)
-        "max_retries": 0,
    }
    if normalized_base_url:
        # Azure Anthropic endpoints require an ``api-version`` query parameter.
@@ -861,9 +852,6 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
-        # Delegate retry to hermes's outer loop (honors Retry-After); the SDK
-        # default max_retries=2 ignores it and double-retries. (#26293)
-        max_retries=0,
        default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])},
    )

@@ -926,72 +914,44 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
    return None


-def _read_claude_code_credentials_from_file() -> Optional[Dict[str, Any]]:
-    """Read Claude Code OAuth credentials from ~/.claude/.credentials.json.
-
-    Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
-    """
-    cred_path = Path.home() / ".claude" / ".credentials.json"
-    if not cred_path.exists():
-        return None
-    try:
-        data = json.loads(cred_path.read_text(encoding="utf-8"))
-    except (json.JSONDecodeError, OSError, IOError) as e:
-        logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
-        return None
-
-    oauth_data = data.get("claudeAiOauth")
-    if not (oauth_data and isinstance(oauth_data, dict)):
-        return None
-    access_token = oauth_data.get("accessToken", "")
-    if not access_token:
-        return None
-    return {
-        "accessToken": access_token,
-        "refreshToken": oauth_data.get("refreshToken", ""),
-        "expiresAt": oauth_data.get("expiresAt", 0),
-        "source": "claude_code_credentials_file",
-    }
-
-
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
    """Read refreshable Claude Code OAuth credentials.

-    Reads from two possible sources and reconciles them:
+    Checks two sources in order:
      1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
      2. ~/.claude/.credentials.json file

-    Selection rules when both are present:
-      - If exactly one is non-expired, prefer that one. (Handles the case
-        where Claude Code refreshes one source but not the other — observed
-        in the wild on Claude Code 2.1.x.)
-      - Otherwise, prefer the source with the later ``expiresAt`` so that
-        any subsequent refresh uses the most recent ``refreshToken``.
-
    This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
    subscription flow is OAuth/setup-token based with refreshable credentials,
    and native direct Anthropic provider usage should follow that path rather
    than auto-detecting Claude's first-party managed key.

-    Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
+    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
+    # Try macOS Keychain first (covers Claude Code >=2.1.114)
    kc_creds = _read_claude_code_credentials_from_keychain()
-    file_creds = _read_claude_code_credentials_from_file()
+    if kc_creds:
+        return kc_creds

-    if kc_creds and file_creds:
-        kc_valid = is_claude_code_token_valid(kc_creds)
-        file_valid = is_claude_code_token_valid(file_creds)
-        if kc_valid and not file_valid:
-            return kc_creds
-        if file_valid and not kc_valid:
-            return file_creds
-        # Both valid or both expired: prefer the later expiresAt so the
-        # downstream refresh path uses the freshest refresh_token.
-        kc_exp = kc_creds.get("expiresAt", 0) or 0
-        file_exp = file_creds.get("expiresAt", 0) or 0
-        return kc_creds if kc_exp >= file_exp else file_creds
+    # Fall back to JSON file
+    cred_path = Path.home() / ".claude" / ".credentials.json"
+    if cred_path.exists():
+        try:
+            data = json.loads(cred_path.read_text(encoding="utf-8"))
+            oauth_data = data.get("claudeAiOauth")
+            if oauth_data and isinstance(oauth_data, dict):
+                access_token = oauth_data.get("accessToken", "")
+                if access_token:
+                    return {
+                        "accessToken": access_token,
+                        "refreshToken": oauth_data.get("refreshToken", ""),
+                        "expiresAt": oauth_data.get("expiresAt", 0),
+                        "source": "claude_code_credentials_file",
+                    }
+        except (json.JSONDecodeError, OSError, IOError) as e:
+            logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)

-    return kc_creds or file_creds
+    return None


 def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
@@ -1074,40 +1034,8 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)


 def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
-    """Attempt to refresh an expired Claude Code OAuth token.
-
-    Claude Code's OAuth refresh tokens are single-use: a successful refresh
-    rotates the pair and invalidates the old refresh token. Claude Code itself
-    also refreshes on its own schedule (IDE/CLI activity), so by the time
-    Hermes notices an expired token, Claude Code may have already rotated it.
-    POSTing our now-stale refresh token in that window races Claude Code and
-    fails with ``invalid_grant``.
-
-    So before refreshing, re-read the live credential sources. If Claude Code
-    has already produced a valid token, adopt it and skip the POST entirely.
-    Only fall back to refreshing ourselves when no fresh credential is found.
-    """
-    # Claude Code may have already refreshed — adopt its token rather than
-    # racing it with our (possibly already-rotated) refresh token. Only adopt
-    # when the live re-read produced a DIFFERENT token with a real future
-    # expiry: re-adopting the same credential we were just handed would be a
-    # no-op, and a 0/absent ``expiresAt`` means "managed key / unknown expiry"
-    # (see is_claude_code_token_valid) which must NOT be treated as a fresh
-    # refresh here.
-    current = read_claude_code_credentials()
-    if current:
-        current_token = current.get("accessToken", "")
-        current_exp = current.get("expiresAt", 0) or 0
-        if (
-            current_token
-            and current_token != creds.get("accessToken", "")
-            and current_exp > 0
-            and is_claude_code_token_valid(current)
-        ):
-            logger.debug("Adopted Claude Code's already-refreshed OAuth token")
-            return current_token
-
-    refresh_token = (current or {}).get("refreshToken", "") or creds.get("refreshToken", "")
+    """Attempt to refresh an expired Claude Code OAuth token."""
+    refresh_token = creds.get("refreshToken", "")
    if not refresh_token:
        logger.debug("No refresh token available — cannot refresh")
        return None
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -102,7 +102,6 @@ OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance

 from agent.credential_pool import load_pool
 from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length
-from agent.process_bootstrap import build_keepalive_http_client
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
 from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
@@ -110,23 +109,6 @@ from utils import base_url_host_matches, base_url_hostname, env_float, model_for
 logger = logging.getLogger(__name__)


-def _openai_http_client_kwargs(
-    base_url: Optional[str],
-    *,
-    async_mode: bool = False,
-) -> Dict[str, Any]:
-    """Inject keepalive httpx client with env-only proxy (not macOS system proxy)."""
-    client = build_keepalive_http_client(str(base_url or ""), async_mode=async_mode)
-    if client is None:
-        return {}
-    return {"http_client": client}
-
-
-def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any:
-    kwargs = {**_openai_http_client_kwargs(base_url), **kwargs}
-    return OpenAI(api_key=api_key, base_url=base_url, **kwargs)
-
-
 # ── Interrupt protection for atomic auxiliary tasks ──────────────────────
 # Some auxiliary tasks must NOT be aborted mid-flight by a gateway interrupt
 # (e.g. an incoming user message while the agent is busy). Context
@@ -1632,7 +1614,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            _merged_aux = _apply_user_default_headers(extra.get("default_headers"))
            if _merged_aux:
                extra["default_headers"] = _merged_aux
-            _client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
+            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
            return _client, model

@@ -1672,7 +1654,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        _merged_aux2 = _apply_user_default_headers(extra.get("default_headers"))
        if _merged_aux2:
            extra["default_headers"] = _merged_aux2
-        _client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
+        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
        return _client, model

@@ -1687,21 +1669,20 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op
    pool_present, entry = _select_pool_entry("openrouter")
    if pool_present:
        or_key = explicit_api_key or _pool_runtime_api_key(entry)
-        if or_key:
-            base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
-            logger.debug("Auxiliary client: OpenRouter via pool")
-            return _create_openai_client(api_key=or_key, base_url=base_url,
-                           default_headers=build_or_headers()), model or _OPENROUTER_MODEL
-        # Pool exists but is exhausted (no usable runtime key) — fall through to
-        # the OPENROUTER_API_KEY env-var path rather than failing outright.
-        logger.debug("Auxiliary client: OpenRouter pool exhausted, trying OPENROUTER_API_KEY")
+        if not or_key:
+            _mark_provider_unhealthy("openrouter", ttl=60)
+            return None, None
+        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
+        logger.debug("Auxiliary client: OpenRouter via pool")
+        return OpenAI(api_key=or_key, base_url=base_url,
+                       default_headers=build_or_headers()), model or _OPENROUTER_MODEL

    or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        _mark_provider_unhealthy("openrouter", ttl=60)
        return None, None
    logger.debug("Auxiliary client: OpenRouter")
-    return _create_openai_client(api_key=or_key, base_url=OPENROUTER_BASE_URL,
+    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
                   default_headers=build_or_headers()), model or _OPENROUTER_MODEL


@@ -1794,7 +1775,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
            return None, None
        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
    return (
-        _create_openai_client(
+        OpenAI(
            api_key=api_key,
            base_url=base_url,
        ),
@@ -2071,7 +2052,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    if _custom_headers:
        _extra["default_headers"] = _custom_headers
    if custom_mode == "codex_responses":
-        real_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
+        real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
        return CodexAuxiliaryClient(real_client, model), model
    if custom_mode == "anthropic_messages":
        # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -2085,14 +2066,14 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
                "Custom endpoint declares api_mode=anthropic_messages but the "
                "anthropic SDK is not installed — falling back to OpenAI-wire."
            )
-            return _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra), model
+            return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
        return (
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
    # URL-based anthropic detection for custom endpoints that didn't set
    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
-    _fallback_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
+    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
    _fallback_client = _maybe_wrap_anthropic(
        _fallback_client, model, custom_key, custom_base, custom_mode,
    )
@@ -2121,7 +2102,7 @@ def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str
        return None, None
    api_key, base_url = resolved
    logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
-    real_client = _create_openai_client(api_key=api_key, base_url=base_url)
+    real_client = OpenAI(api_key=api_key, base_url=base_url)
    return CodexAuxiliaryClient(real_client, model), model


@@ -2158,7 +2139,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
-    real_client = _create_openai_client(
+    real_client = OpenAI(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
@@ -2258,7 +2239,7 @@ def _try_azure_foundry(
    if _dq:
        extra["default_query"] = _dq

-    client = _create_openai_client(api_key=api_key, base_url=_clean_base, **extra)
+    client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)

    if runtime_api_mode == "codex_responses":
        # GPT-5.x / o-series / codex models on Azure Foundry are
@@ -3643,37 +3624,6 @@ def _resolve_auto(
    # config.yaml (auxiliary.<task>.provider) still win over this.
    main_provider = str(runtime_provider or _read_main_provider() or "")
    main_model = str(runtime_model or _read_main_model() or "")
-
-    # MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
-    # there is no real "moa" HTTP endpoint, so resolving an aux client against
-    # provider="moa"/model=<preset> sends the preset name as the model id and
-    # the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
-    # (title generation, compression, vision, …) don't need the reference
-    # fan-out — they should run on the aggregator, which is the preset's acting
-    # model. Resolve the MoA preset to its aggregator slot and continue Step 1
-    # with that real provider+model. Mirrors the MoA context-length resolution.
-    if main_provider == "moa":
-        try:
-            from hermes_cli.config import load_config
-            from hermes_cli.moa_config import resolve_moa_preset
-
-            _preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
-            _agg = _preset.get("aggregator") or {}
-            _agg_provider = str(_agg.get("provider") or "").strip()
-            _agg_model = str(_agg.get("model") or "").strip()
-            if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
-                main_provider = _agg_provider
-                main_model = _agg_model
-                # The MoA virtual runtime carries a non-HTTP base_url
-                # ("moa://local") and a placeholder api_key; they belong to the
-                # facade, not the aggregator's real provider. Drop them so the
-                # aggregator resolves through its own provider credentials.
-                runtime_base_url = ""
-                runtime_api_key = ""
-                runtime_api_mode = ""
-        except Exception:
-            logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
-
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
@@ -3820,10 +3770,6 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    _merged_async = _apply_user_default_headers(async_kwargs.get("default_headers"))
    if _merged_async:
        async_kwargs["default_headers"] = _merged_async
-    async_kwargs = {
-        **_openai_http_client_kwargs(sync_base_url, async_mode=True),
-        **async_kwargs,
-    }
    return AsyncOpenAI(**async_kwargs), model


@@ -4034,7 +3980,7 @@ def resolve_provider_client(
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
            final_model = _normalize_resolved_model(model, provider)
-            raw_client = _create_openai_client(
+            raw_client = OpenAI(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
                default_headers=_codex_cloudflare_headers(codex_token),
@@ -4115,7 +4061,7 @@ def resolve_provider_client(
            _merged_custom = _apply_user_default_headers(extra.get("default_headers"))
            if _merged_custom:
                extra["default_headers"] = _merged_custom
-            client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **extra)
+            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
@@ -4219,7 +4165,7 @@ def resolve_provider_client(
                        _fb_headers = _apply_user_default_headers(_fb_extra.get("default_headers"))
                        if _fb_headers:
                            _fb_extra["default_headers"] = _fb_headers
-                        client = _create_openai_client(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
+                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
@@ -4228,7 +4174,7 @@ def resolve_provider_client(
                    if async_mode:
                        return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
                    return sync_anthropic, final_model
-                client = _create_openai_client(api_key=custom_key, base_url=_clean_base2, **_extra2)
+                client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
                # codex_responses or inherited auto-detect (via _wrap_if_needed).
                # _wrap_if_needed reads the closed-over `api_mode` (the task-level
                # override). Named-provider entry api_mode=codex_responses also
@@ -4370,7 +4316,7 @@ def resolve_provider_client(
        _merged_main = _apply_user_default_headers(headers)
        if _merged_main:
            headers = _merged_main
-        client = _create_openai_client(api_key=api_key, base_url=base_url,
+        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))

        # Copilot GPT-5+ models (except gpt-5-mini) require the Responses
@@ -4906,7 +4852,7 @@ def _refresh_nous_auxiliary_client(
        return None, model

    fresh_key, fresh_base_url = runtime
-    sync_client = _create_openai_client(api_key=fresh_key, base_url=fresh_base_url)
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
    final_model = model

    current_loop = None
@@ -6016,17 +5962,8 @@ def call_llm(
        # When the provider returns a 429 rate-limit (not billing), fall
        # back to an alternative provider instead of exhausting retries
        # against the same rate-limited endpoint.
-        #
-        # ── Auth error fallback (#21165) ─────────────────────────────
-        # When the resolved provider returns 401 and neither the Nous
-        # refresh path nor explicit provider credential refresh applies,
-        # fall back to an alternative provider instead of dropping the
-        # auxiliary task on the floor (silent compression failure /
-        # message loss). Auth is NOT a capacity error: it only bypasses
-        # the explicit-provider gate when the user is in auto mode.
        should_fallback = (
-            _is_auth_error(first_err)
-            or _is_payment_error(first_err)
+            _is_payment_error(first_err)
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
@@ -6056,9 +5993,7 @@ def call_llm(
            or _is_invalid_aux_response_error(first_err)
        )
        if should_fallback and (is_auto or is_capacity_error):
-            if _is_auth_error(first_err):
-                reason = "auth error"
-            elif _is_payment_error(first_err):
+            if _is_payment_error(first_err):
                reason = "payment error"
                # Resolve the actual provider label (resolved_provider may be
                # "auto"; the client's base_url tells us which backend got the
@@ -6507,13 +6442,8 @@ async def async_call_llm(
                        raise

        # ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
-        # Auth error fallback (#21165): a 401 that survived the refresh path
-        # falls back in auto mode just like the sync call_llm() path. Auth is
-        # NOT a capacity error, so on an explicit provider it still respects
-        # the user's choice (handled by the is_auto/is_capacity_error gate).
        should_fallback = (
-            _is_auth_error(first_err)
-            or _is_payment_error(first_err)
+            _is_payment_error(first_err)
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
            or _is_model_incompatible_error(first_err)
@@ -6535,9 +6465,7 @@ async def async_call_llm(
            or _is_invalid_aux_response_error(first_err)
        )
        if should_fallback and (is_auto or is_capacity_error):
-            if _is_auth_error(first_err):
-                reason = "auth error"
-            elif _is_payment_error(first_err):
+            if _is_payment_error(first_err):
                reason = "payment error"
                _mark_provider_unhealthy(
                    _recoverable_pool_provider(resolved_provider, client) or resolved_provider
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -37,18 +37,6 @@ from tools.terminal_tool import is_persistent_env
 from utils import base_url_host_matches, base_url_hostname, env_float, env_int

 logger = logging.getLogger(__name__)
-_OPENROUTER_PROVIDER_SORT_VALUES = {"throughput", "latency", "price"}
-
-# When the fallback chain is fully exhausted on a non-rate-limit failure
-# (e.g. every provider returns a non-retryable client error like HTTP 400),
-# arm a short cooldown so the NEXT turn's restore_primary_runtime stays gated
-# and does not reset _fallback_index=0 to replay the entire chain again.
-# Without this, a client/gateway that re-submits immediately would re-marshal
-# the full (potentially 80k-token) context once per provider every turn and
-# can drive a constrained host into memory/swap exhaustion.  Rate-limit /
-# billing reasons keep their own 60s cooldown (set above); this is the
-# narrower non-rate-limit case.  See issue #24996.
-_FALLBACK_EXHAUSTED_COOLDOWN_S = 5.0


 def _ra():
@@ -127,23 +115,6 @@ def _is_openai_codex_backend(agent) -> bool:
    )


-def _validated_openrouter_provider_sort(raw_sort: Any) -> Optional[str]:
-    """Return a normalized OpenRouter provider.sort value or None."""
-    if not isinstance(raw_sort, str):
-        return None
-    sort_value = raw_sort.strip().lower()
-    if not sort_value:
-        return None
-    if sort_value in _OPENROUTER_PROVIDER_SORT_VALUES:
-        return sort_value
-    logger.warning(
-        "Ignoring invalid OpenRouter provider.sort value %r (allowed: %s)",
-        raw_sort,
-        ", ".join(sorted(_OPENROUTER_PROVIDER_SORT_VALUES)),
-    )
-    return None
-
-
 def _env_float(name: str, default: float) -> float:
    try:
        return float(os.getenv(name, str(default)))
@@ -258,11 +229,6 @@ def interruptible_api_call(agent, api_kwargs: dict):
                        invalidate_runtime_client(region)
                    raise
                result["response"] = normalize_converse_response(raw_response)
-            elif agent.provider == "moa":
-                # MoA is a virtual chat-completions provider backed by the
-                # in-process MoAClient facade. Do not rebuild a request-local
-                # OpenAI client from the virtual runtime metadata.
-                result["response"] = agent.client.chat.completions.create(**api_kwargs)
            else:
                request_client = _set_request_client(
                    agent._create_request_openai_client(
@@ -732,9 +698,8 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
        _prefs["ignore"] = agent.providers_ignored
    if agent.providers_order:
        _prefs["order"] = agent.providers_order
-    _provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
-    if _provider_sort:
-        _prefs["sort"] = _provider_sort
+    if agent.provider_sort:
+        _prefs["sort"] = agent.provider_sort
    if agent.provider_require_parameters:
        _prefs["require_parameters"] = True
    if agent.provider_data_collection:
@@ -1050,23 +1015,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
                    "arguments": tool_call.function.arguments
                },
            }
-            # Tool-call arguments are intentionally NOT redacted here. This
-            # dict enters the in-memory conversation history that is replayed
-            # to the model on every subsequent turn AND persisted to state.db,
-            # which is itself replayed verbatim on session resume
-            # (get_messages_as_conversation). Masking a credential to `***`
-            # here poisons that replay: the model reads back its own
-            # `PGPASSWORD='***' psql ...` call and copies the placeholder into
-            # the next tool call, breaking every credential-dependent command
-            # on the second turn (#43083). The masking also provided no real
-            # protection — the same secret still leaks verbatim through tool
-            # OUTPUT (file contents, command output, diffs, the compaction
-            # block), none of which this pass ever touched. Keeping secrets
-            # out of the replayable store is a separate tokenization/vault
-            # concern, not something arg-redaction can deliver without
-            # breaking replay. Storage-time redaction remains governed by the
-            # `security.redact_secrets` toggle. (#19798 introduced this;
-            # #43083 removed it.)
+            # Defence-in-depth: redact credentials from tool call arguments
+            # before they enter conversation history. Tool execution uses the
+            # raw API response object, not this dict, so redacting the
+            # persisted shape is safe and only affects storage. Catches the
+            # case where a model accidentally inlines a secret into a tool
+            # call (e.g. `terminal(command="curl -H 'Authorization: Bearer
+            # sk-...'")`). (#19798)
+            if isinstance(tc_dict["function"]["arguments"], str):
+                from agent.redact import redact_sensitive_text
+                tc_dict["function"]["arguments"] = redact_sensitive_text(
+                    tc_dict["function"]["arguments"]
+                )
            # Preserve extra_content (e.g. Gemini thought_signature) so it
            # is sent back on subsequent API calls.  Without this, Gemini 3
            # thinking models reject the request with a 400 error.
@@ -1133,22 +1093,8 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
        if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
            agent._rate_limited_until = time.monotonic() + 60
    if agent._fallback_index >= len(agent._fallback_chain):
-        # Chain exhausted.  If we actually walked a non-empty chain and the
-        # failure was NOT a rate-limit/billing event (those already armed
-        # their own 60s cooldown above), arm a short cooldown so the next
-        # turn's restore_primary_runtime stays gated instead of resetting
-        # _fallback_index=0 and re-marshaling the whole context across every
-        # provider again.  Guards the cross-turn replay storm in #24996.
-        if (
-            len(agent._fallback_chain) > 0
-            and reason not in {FailoverReason.rate_limit, FailoverReason.billing}
-        ):
-            _existing_cooldown = getattr(agent, "_rate_limited_until", 0) or 0
-            agent._rate_limited_until = max(
-                _existing_cooldown,
-                time.monotonic() + _FALLBACK_EXHAUSTED_COOLDOWN_S,
-            )
        return False
+
    fb = agent._fallback_chain[agent._fallback_index]
    agent._fallback_index += 1
    fb_provider = (fb.get("provider") or "").strip().lower()
@@ -1264,16 +1210,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
            agent._transport_cache.clear()
        agent._fallback_activated = True

-        # Rebind the credential pool to the fallback provider when the provider
-        # changes.  Keeping the primary pool attached would make downstream
-        # recovery (rate_limit / billing / auth) mutate the wrong credential
-        # set and can overwrite the fallback's base_url back to the primary
-        # endpoint.  See #33163.
-        #
+        # Clear the credential pool when the fallback provider doesn't match
+        # the pool's provider.  The pool was seeded for the primary provider;
+        # leaving it attached means downstream recovery (rate_limit / billing /
+        # auth) calls ``_swap_credential`` with a primary entry which overwrites
+        # the agent's ``base_url`` back to the primary's endpoint — every
+        # fallback request then 404s against the wrong host.  See #33163.
        # When the fallback shares the pool's provider (e.g. both openrouter
-        # entries with different routing) the pool is preserved.  When the
-        # providers differ, load the fallback provider's own pool if one exists
-        # so provider-specific rotation continues to work after the switch.
+        # entries with different routing) the pool is preserved.
        _existing_pool = getattr(agent, "_credential_pool", None)
        if _existing_pool is not None:
            _pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
@@ -1284,22 +1228,6 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                    fb_provider, fb_model, _pool_provider,
                )
                agent._credential_pool = None
-        if getattr(agent, "_credential_pool", None) is None:
-            try:
-                from agent.credential_pool import load_pool
-
-                fallback_pool = load_pool(fb_provider)
-                if fallback_pool and fallback_pool.has_credentials():
-                    agent._credential_pool = fallback_pool
-                    logger.info(
-                        "Fallback to %s/%s: attached fallback credential pool",
-                        fb_provider, fb_model,
-                    )
-            except Exception as exc:
-                logger.debug(
-                    "Fallback to %s/%s: could not attach credential pool: %s",
-                    fb_provider, fb_model, exc,
-                )

        # Honor per-provider / per-model request_timeout_seconds for the
        # fallback target (same knob the primary client uses).  None = use
@@ -1530,9 +1458,8 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
                provider_preferences["ignore"] = agent.providers_ignored
            if agent.providers_order:
                provider_preferences["order"] = agent.providers_order
-            _provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
-            if _provider_sort:
-                provider_preferences["sort"] = _provider_sort
+            if agent.provider_sort:
+                provider_preferences["sort"] = agent.provider_sort
            if provider_preferences and (
                (agent.provider or "").strip().lower() == "openrouter"
                or agent._is_openrouter_url()
@@ -2319,15 +2246,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                                _fire_first_delta()
                                agent._fire_reasoning_delta(thinking_text)

-            # Return the native Anthropic Message for downstream processing.
-            # If the stream was interrupted (the event loop broke out above on
-            # agent._interrupt_requested), do NOT call get_final_message() — on
-            # a partially-consumed stream the SDK may hang draining remaining
-            # events or return a Message with incomplete tool_use blocks (partial
-            # JSON in `input`). The outer poll loop raises InterruptedError, so
-            # this return value is discarded anyway.
-            if agent._interrupt_requested:
-                return None
+            # Return the native Anthropic Message for downstream processing
            return stream.get_final_message()

    def _call():
@@ -2472,19 +2391,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            diag=request_client_holder.get("diag"),
                        )
                        _close_request_client_once("stream_mid_tool_retry_cleanup")
-                        if agent.api_mode == "anthropic_messages":
-                            try:
-                                agent._anthropic_client.close()
-                                agent._rebuild_anthropic_client()
-                            except Exception:
-                                pass
-                        else:
-                            try:
-                                agent._replace_primary_openai_client(
-                                    reason="stream_mid_tool_retry_pool_cleanup"
-                                )
-                            except Exception:
-                                pass
+                        try:
+                            agent._replace_primary_openai_client(
+                                reason="stream_mid_tool_retry_pool_cleanup"
+                            )
+                        except Exception:
+                            pass
                        continue

                    # SSE error events from proxies (e.g. OpenRouter sends
@@ -2532,19 +2444,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            _close_request_client_once("stream_retry_cleanup")
                            # Also rebuild the primary client to purge
                            # any dead connections from the pool.
-                            if agent.api_mode == "anthropic_messages":
-                                try:
-                                    agent._anthropic_client.close()
-                                    agent._rebuild_anthropic_client()
-                                except Exception:
-                                    pass
-                            else:
-                                try:
-                                    agent._replace_primary_openai_client(
-                                        reason="stream_retry_pool_cleanup"
-                                    )
-                                except Exception:
-                                    pass
+                            try:
+                                agent._replace_primary_openai_client(
+                                    reason="stream_retry_pool_cleanup"
+                                )
+                            except Exception:
+                                pass
                            continue
                        # Retries exhausted. Log the final failure with
                        # full diagnostic detail (chain, headers,
@@ -2715,17 +2620,10 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                pass
            # Rebuild the primary client too — its connection pool
            # may hold dead sockets from the same provider outage.
-            if agent.api_mode == "anthropic_messages":
-                try:
-                    agent._anthropic_client.close()
-                    agent._rebuild_anthropic_client()
-                except Exception:
-                    pass
-            else:
-                try:
-                    agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
-                except Exception:
-                    pass
+            try:
+                agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
+            except Exception:
+                pass
            # Reset the timer so we don't kill repeatedly while
            # the inner thread processes the closure.
            last_chunk_time["t"] = time.time()
@@ -2801,30 +2699,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                role="assistant", content=_partial_text, tool_calls=None,
                reasoning_content=None,
            )
-            # Detect provider output-layer content filtering (e.g. MiniMax
-            # "output new_sensitive (1027)", Azure/OpenAI content_filter,
-            # Anthropic safety refusal).  The raw error is about to be
-            # swallowed into a finish_reason=length stub, so classify it HERE
-            # while we still have it and stamp the stub.  Retrying such a
-            # content-deterministic filter on the same primary just re-hits
-            # the filter — the conversation loop reads this tag and activates
-            # the fallback chain instead of burning continuation retries.
-            # error_classifier is the single source of truth for "what counts
-            # as a content filter" (#32421).
-            _content_filter_terminated = False
-            try:
-                from agent.error_classifier import classify_api_error, FailoverReason
-                _cls = classify_api_error(
-                    result["error"],
-                    provider=str(getattr(agent, "provider", "") or ""),
-                    model=str(getattr(agent, "model", "") or ""),
-                )
-                _content_filter_terminated = (
-                    _cls.reason == FailoverReason.content_policy_blocked
-                )
-            except Exception:
-                _content_filter_terminated = False
-            _stub = SimpleNamespace(
+            return SimpleNamespace(
                id=PARTIAL_STREAM_STUB_ID,
                model=getattr(agent, "model", "unknown"),
                choices=[SimpleNamespace(
@@ -2833,9 +2708,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                usage=None,
                _dropped_tool_names=_partial_names or None,
            )
-            if _content_filter_terminated:
-                _stub._content_filter_terminated = True
-            return _stub
        raise result["error"]
    return result["response"]

--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -60,8 +60,6 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional

-from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
-
 logger = logging.getLogger("hermes.coding_context")

 CODING_TOOLSET = "coding"
@@ -649,14 +647,12 @@ def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:


 def _git(cwd: Path, *args: str) -> str:
-    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        out = subprocess.run(
            ["git", "-C", str(cwd), *args],
            capture_output=True,
            text=True,
            timeout=_GIT_TIMEOUT,
-            **_popen_kwargs,
        )
    except (OSError, subprocess.SubprocessError):
        return ""
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -12,7 +12,6 @@ from pathlib import Path
 from typing import Awaitable, Callable

 from agent.model_metadata import estimate_tokens_rough
-from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags

 _QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
 REFERENCE_PATTERN = re.compile(
@@ -291,7 +290,6 @@ def _expand_git_reference(
    args: list[str],
    label: str,
 ) -> tuple[str | None, str | None]:
-    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        result = subprocess.run(
            ["git", *args],
@@ -300,7 +298,6 @@ def _expand_git_reference(
            text=True,
            timeout=30,
            stdin=subprocess.DEVNULL,
-            **_popen_kwargs,
        )
    except subprocess.TimeoutExpired:
        return f"{ref.raw}: git command timed out (30s)", None
@@ -486,7 +483,6 @@ def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:


 def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
-    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        result = subprocess.run(
            ["rg", "--files", str(path.relative_to(cwd))],
@@ -495,7 +491,6 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
            text=True,
            timeout=10,
            stdin=subprocess.DEVNULL,
-            **_popen_kwargs,
        )
    except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
        return None
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -288,29 +288,6 @@ def replay_compression_warning(agent: Any) -> None:
            pass


-def conversation_history_after_compression(agent: Any, messages: list) -> Optional[list]:
-    """Return the correct flush baseline after a compression boundary.
-
-    Legacy compression rotates to a fresh child session. That child has not
-    seen the compacted transcript through the normal same-turn flush path yet,
-    so callers must clear ``conversation_history`` to ``None`` and let the next
-    persistence call write the whole compacted list.
-
-    In-place compaction is different: ``archive_and_compact()`` has already
-    soft-archived the previous active rows and inserted ``messages`` as the new
-    active live transcript under the same session id. If the same agent turn
-    continues with ``conversation_history=None``, the identity-based flush path
-    treats those already-persisted compacted dicts as new and appends them a
-    second time, doubling the active context and retriggering compression.
-
-    A shallow copy is intentional: it captures the current compacted dict
-    identities as history while allowing later same-turn appends to remain new.
-    """
-    if bool(getattr(agent, "_last_compaction_in_place", False)):
-        return list(messages)
-    return None
-
-
 def compress_context(
    agent: Any,
    messages: list,
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -28,7 +28,6 @@ import uuid
 from typing import Any, Dict, List, Optional

 from agent.codex_responses_adapter import _summarize_user_message_for_log
-from agent.conversation_compression import conversation_history_after_compression
 from agent.display import KawaiiSpinner
 from agent.error_classifier import FailoverReason, classify_api_error
 from agent.iteration_budget import IterationBudget
@@ -588,13 +587,6 @@ def run_conversation(
    compression_attempts = 0
    _turn_exit_reason = "unknown"  # Diagnostic: why the loop ended

-    # Per-turn tally of consecutive successful credential-pool token refreshes,
-    # keyed by (provider, pool-entry-id). A persistent upstream 401 lets
-    # ``try_refresh_current()`` "succeed" forever on a single-entry OAuth pool,
-    # so this tally caps same-entry refreshes and lets the fallback chain take
-    # over instead of spinning. Reset here so each turn starts fresh. See #26080.
-    agent._auth_pool_refresh_counts = {}
-
    # Optional opt-in runtime: if api_mode == codex_app_server, hand the
    # turn to the codex app-server subprocess (terminal/file ops/patching
    # all run inside Codex). Default Hermes path is bypassed entirely.
@@ -835,6 +827,7 @@ def run_conversation(
                    aggregator=moa_config.get("aggregator") or {},
                    temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
                    aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
+                    max_tokens=int(moa_config.get("max_tokens", 4096) or 4096),
                )
                if _moa_context:
                    for _msg in reversed(api_messages):
@@ -1699,56 +1692,6 @@ def run_conversation(

                    if agent.api_mode in {"chat_completions", "bedrock_converse", "anthropic_messages"}:
                        assistant_message = _trunc_msg
-                        # ── Content-filter stream stall → fallback (#32421) ──
-                        # When the provider's output-layer safety filter (e.g.
-                        # MiniMax "output new_sensitive (1027)", Azure
-                        # content_filter) kills the stream mid-delivery, the
-                        # raw error was classified at the swallow point and the
-                        # stub tagged ``_content_filter_terminated``.  This
-                        # filter is content-deterministic — continuation
-                        # retries against the SAME primary just re-hit it and
-                        # burn paid attempts (the loop used to give up with
-                        # "Response remained truncated after 3 continuation
-                        # attempts" and never consult the fallback chain).
-                        # Escalate to the configured fallback BEFORE retrying.
-                        _cf_terminated = getattr(
-                            response, "_content_filter_terminated", False
-                        )
-                        if (
-                            _cf_terminated
-                            and agent._fallback_index < len(agent._fallback_chain)
-                        ):
-                            agent._vprint(
-                                f"{agent.log_prefix}🛡️  Content filter terminated "
-                                f"stream — activating fallback provider...",
-                                force=True,
-                            )
-                            agent._emit_status(
-                                "Content filter terminated stream; switching to fallback..."
-                            )
-                            if agent._try_activate_fallback():
-                                # Roll the partial content (if any was already
-                                # appended in a prior continuation pass) back to
-                                # the last clean turn so the fallback provider
-                                # gets a coherent continuation point.
-                                if truncated_response_parts:
-                                    messages = agent._get_messages_up_to_last_assistant(messages)
-                                agent._session_messages = messages
-                                length_continue_retries = 0
-                                truncated_response_parts = []
-                                retry_count = 0
-                                compression_attempts = 0
-                                _retry.primary_recovery_attempted = False
-                                _retry.restart_with_rebuilt_messages = True
-                                break
-                            # No fallback available — fall through to normal
-                            # continuation (best-effort, may loop).
-                            agent._vprint(
-                                f"{agent.log_prefix}⚠️  No fallback provider "
-                                f"configured — retrying with same provider "
-                                f"(may re-hit filter)...",
-                                force=True,
-                            )
                        if assistant_message is not None and not _trunc_has_tool_calls:
                            length_continue_retries += 1
                            interim_msg = agent._build_assistant_message(assistant_message, finish_reason)
@@ -2316,15 +2259,6 @@ def run_conversation(
                    # "unknown variant `image_url`, expected `text`".
                    "unknown variant `image_url`, expected `text`",
                    "unknown variant image_url, expected text",
-                    # OpenRouter routes a request to upstream endpoints and,
-                    # when none of the candidate endpoints for the model accept
-                    # image input, returns HTTP 404 "No endpoints found that
-                    # support image input". Without this phrase the agent never
-                    # strips the images, the retry loop re-sends the same
-                    # rejected request until exhaustion, and the gateway leaves
-                    # every subsequent message queued behind the stuck turn —
-                    # the P1 in issue #21160. The 404 passes the 4xx gate below.
-                    "no endpoints found that support image input",
                )
                _err_lower = _err_body.lower()
                _looks_like_image_rejection = any(
@@ -2896,9 +2830,10 @@ def run_conversation(
                            approx_tokens=approx_tokens,
                            task_id=effective_task_id,
                        )
-                        conversation_history = conversation_history_after_compression(
-                            agent, messages
-                        )
+                        # Compression created a new session — clear history
+                        # so _flush_messages_to_session_db writes compressed
+                        # messages to the new session, not skipping them.
+                        conversation_history = None
                        if len(messages) < original_len or old_ctx > _reduced_ctx:
                            agent._buffer_status(
                                f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -2910,25 +2845,15 @@ def run_conversation(
                    # Fall through to normal error handling if compression
                    # is exhausted or didn't help.

-                # Eager fallback for rate-limit errors (429 or quota exhaustion)
-                # and transport errors (connection failure / timeout / provider
-                # overloaded).  Rate limits and billing: switch immediately —
-                # the primary provider won't recover within the retry window.
-                # Transport errors: allow 1 retry first (transient hiccups
-                # recover), then fall back if the provider is truly unreachable.
+                # Eager fallback for rate-limit errors (429 or quota exhaustion).
+                # When a fallback model is configured, switch immediately instead
+                # of burning through retries with exponential backoff -- the
+                # primary provider won't recover within the retry window.
                is_rate_limited = classified.reason in {
                    FailoverReason.rate_limit,
                    FailoverReason.billing,
                }
-                _is_transport_failure = classified.reason in {
-                    FailoverReason.timeout,
-                    FailoverReason.overloaded,
-                }
-                _should_fallback = (
-                    is_rate_limited
-                    or (_is_transport_failure and retry_count >= 2)
-                )
-                if _should_fallback and agent._fallback_index < len(agent._fallback_chain):
+                if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
                    # Don't eagerly fallback if credential pool rotation may
                    # still recover.  See _pool_may_recover_from_rate_limit
                    # for the single-credential-pool and CloudCode-quota
@@ -2943,10 +2868,6 @@ def run_conversation(
                            agent._buffer_status(
                                "⚠️ Billing or credits exhausted — switching to fallback provider..."
                            )
-                        elif _is_transport_failure:
-                            agent._buffer_status(
-                                "⚠️ Provider unreachable — switching to fallback provider..."
-                            )
                        else:
                            agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
                        if agent._try_activate_fallback(reason=classified.reason):
@@ -3121,9 +3042,10 @@ def run_conversation(
                        messages, system_message, approx_tokens=approx_tokens,
                        task_id=effective_task_id,
                    )
-                    conversation_history = conversation_history_after_compression(
-                        agent, messages
-                    )
+                    # Compression created a new session — clear history
+                    # so _flush_messages_to_session_db writes compressed
+                    # messages to the new session, not skipping them.
+                    conversation_history = None

                    # Re-estimate tokens after compression.  Same-message-count
                    # compression (tool-result pruning, in-place summarization)
@@ -3287,9 +3209,10 @@ def run_conversation(
                        messages, system_message, approx_tokens=approx_tokens,
                        task_id=effective_task_id,
                    )
-                    conversation_history = conversation_history_after_compression(
-                        agent, messages
-                    )
+                    # Compression created a new session — clear history
+                    # so _flush_messages_to_session_db writes compressed
+                    # messages to the new session, not skipping them.
+                    conversation_history = None

                    # Re-estimate tokens after compression.  Same-message-count
                    # compression (tool-result pruning, in-place summarization)
@@ -3551,13 +3474,6 @@ def run_conversation(
                    ):
                        _retry.primary_recovery_attempted = True
                        retry_count = 0
-                        # Primary transport recovery starts a fresh attempt
-                        # cycle. Re-open fallback state so a follow-on 429 can
-                        # still activate fallback_providers after stale
-                        # pre-recovery fallback/credential-pool bookkeeping.
-                        _retry.has_retried_429 = False
-                        agent._fallback_index = 0
-                        agent._fallback_activated = False
                        continue
                    # Try fallback before giving up entirely
                    if agent._has_pending_fallback():
@@ -3745,12 +3661,7 @@ def run_conversation(
                        _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
                        if _ra_raw:
                            try:
-                                # Cap at 10 minutes. Anthropic Tier 1 input-token
-                                # buckets reset in ~171s, so a 120s cap caused us to
-                                # retry before the actual reset window and re-trip the
-                                # limit. 600s covers all realistic provider reset
-                                # windows while still rejecting pathological values. (#26293)
-                                _retry_after = min(float(_ra_raw), 600)
+                                _retry_after = min(float(_ra_raw), 120)  # Cap at 2 minutes
                            except (TypeError, ValueError):
                                pass
                wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
@@ -3831,17 +3742,6 @@ def run_conversation(
            _retry.restart_with_compressed_messages = False
            continue

-        if _retry.restart_with_rebuilt_messages:
-            # A content-filter stream stall (#32421) was escalated to the
-            # fallback chain and the partial content rolled back.  Re-issue
-            # the API call against the now-active fallback provider.  Refund
-            # the budget/count for the stalled attempt so the fallback gets a
-            # fair turn.
-            api_call_count -= 1
-            agent.iteration_budget.refund()
-            _retry.restart_with_rebuilt_messages = False
-            continue
-
        if _retry.restart_with_length_continuation:
            # Progressively boost the output token budget on each retry.
            # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
@@ -4416,9 +4316,10 @@ def run_conversation(
                        approx_tokens=agent.context_compressor.last_prompt_tokens,
                        task_id=effective_task_id,
                    )
-                    conversation_history = conversation_history_after_compression(
-                        agent, messages
-                    )
+                    # Compression created a new session — clear history so
+                    # _flush_messages_to_session_db writes compressed messages
+                    # to the new session (see preflight compression comment).
+                    conversation_history = None
                
                # Save session log incrementally (so progress is visible even if interrupted)
                agent._session_messages = messages
@@ -4460,11 +4361,7 @@ def run_conversation(
                            "as final response"
                        )
                        final_response = _recovered
-                        # Streaming delivered a fragment, not a confirmed
-                        # final preview. Leave response_previewed false so
-                        # gateway fallback delivery can send the recovered
-                        # text plus the abnormal-turn explanation.
-                        agent._response_was_previewed = False
+                        agent._response_was_previewed = True
                        break

                    # If the previous turn already delivered real content alongside
@@ -4709,20 +4606,14 @@ def run_conversation(
                # status from earlier failed attempts in this turn.
                agent._clear_status_buffer()

-                from agent.agent_runtime_helpers import (
-                    intent_ack_continuation_mode,
-                )
-
-                _ack_mode = intent_ack_continuation_mode(agent)
                if (
-                    _ack_mode != "off"
+                    agent.api_mode == "codex_responses"
                    and agent.valid_tool_names
                    and codex_ack_continuations < 2
                    and agent._looks_like_codex_intermediate_ack(
                        user_message=user_message,
                        assistant_content=final_response,
                        messages=messages,
-                        require_workspace=(_ack_mode == "codex_only"),
                    )
                ):
                    codex_ack_continuations += 1
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -23,7 +23,6 @@ from typing import Any

 from agent.file_safety import get_read_block_error, is_write_denied
 from agent.redact import redact_sensitive_text
-from tools.environments.local import hermes_subprocess_env

 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -95,10 +94,7 @@ def _resolve_home_dir() -> str:


 def _build_subprocess_env() -> dict[str, str]:
-    # Copilot ACP is a model-driving CLI executor: it legitimately needs LLM
-    # provider credentials. Route through the central helper so Tier-1 secrets
-    # (gateway bot tokens, GitHub auth, infra) are still stripped (#29157).
-    env = hermes_subprocess_env(inherit_credentials=True)
+    env = os.environ.copy()
    home = _resolve_home_dir()
    env["HOME"] = home
    from hermes_constants import apply_subprocess_home_env
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -537,11 +537,10 @@ class CredentialPool:
                self._entries[idx] = new
                return

-    def _persist(self, *, removed_ids: Optional[List[str]] = None) -> None:
+    def _persist(self) -> None:
        write_credential_pool(
            self.provider,
            [entry.to_dict() for entry in self._entries],
-            removed_ids=removed_ids,
        )

    def _is_terminal_auth_failure(
@@ -1125,17 +1124,13 @@ class CredentialPool:
                        logger.debug(
                            "Failed to clear terminal xAI OAuth state: %s", clear_exc
                        )
-                    removed_ids = [
-                        item.id for item in self._entries
-                        if item.source == "loopback_pkce"
-                    ]
                    self._entries = [
                        item for item in self._entries
                        if item.source != "loopback_pkce"
                    ]
                    if self._current_id == entry.id:
                        self._current_id = None
-                    self._persist(removed_ids=removed_ids)
+                    self._persist()
                    return None
            # For openai-codex: same race as xAI/nous — another Hermes process
            # may have consumed the refresh token between our proactive sync
@@ -1195,17 +1190,13 @@ class CredentialPool:
                        logger.debug(
                            "Failed to clear terminal Codex OAuth state: %s", clear_exc
                        )
-                    removed_ids = [
-                        item.id for item in self._entries
-                        if item.source == "device_code"
-                    ]
                    self._entries = [
                        item for item in self._entries
                        if item.source != "device_code"
                    ]
                    if self._current_id == entry.id:
                        self._current_id = None
-                    self._persist(removed_ids=removed_ids)
+                    self._persist()
                    return None
            # For nous: another process may have consumed the refresh token
            # between our proactive sync and the HTTP call.  Re-sync from
@@ -1262,17 +1253,13 @@ class CredentialPool:
                        auth_mod.NOUS_DEVICE_CODE_SOURCE,
                        f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
                    }
-                    removed_ids = [
-                        item.id for item in self._entries
-                        if item.source in singleton_sources
-                    ]
                    self._entries = [
                        item for item in self._entries
                        if item.source not in singleton_sources
                    ]
                    if self._current_id == entry.id:
                        self._current_id = None
-                    self._persist(removed_ids=removed_ids)
+                    self._persist()
                    return None
            self._mark_exhausted(entry, None)
            return None
@@ -1434,7 +1421,7 @@ class CredentialPool:
            pruned_ids = set(entries_to_prune)
            self._entries = [e for e in self._entries if e.id not in pruned_ids]
        if cleared_any:
-            self._persist(removed_ids=entries_to_prune)
+            self._persist()
        return available

    def _select_unlocked(self) -> Optional[PooledCredential]:
@@ -1608,11 +1595,7 @@ class CredentialPool:
            replace(entry, priority=new_priority)
            for new_priority, entry in enumerate(self._entries)
        ]
-        write_credential_pool(
-            self.provider,
-            [entry.to_dict() for entry in self._entries],
-            removed_ids=[removed.id],
-        )
+        self._persist()
        if self._current_id == removed.id:
            self._current_id = None
        return removed
@@ -2274,11 +2257,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
    raw_entries = read_credential_pool(provider)
-    disk_ids = {
-        entry.get("id")
-        for entry in raw_entries
-        if isinstance(entry, dict) and entry.get("id")
-    }
    raw_needs_sanitization = any(
        isinstance(payload, dict)
        and sanitize_borrowed_credential_payload(payload, provider) != payload
@@ -2307,10 +2285,8 @@ def load_pool(provider: str) -> CredentialPool:
        changed |= _normalize_pool_priorities(provider, entries)

    if changed:
-        new_ids = {entry.id for entry in entries}
        write_credential_pool(
            provider,
            [entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
-            removed_ids=disk_ids - new_ids,
        )
    return CredentialPool(provider, entries)
--- a/agent/curator.py
+++ b/agent/curator.py
@@ -273,21 +273,6 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
 # Automatic state transitions (pure function, no LLM)
 # ---------------------------------------------------------------------------

-def _cron_referenced_skills() -> Set[str]:
-    """Skill names referenced by any cron job (incl. paused/disabled).
-
-    Best-effort: a cron-module import error or corrupt jobs store must never
-    break the curator, so any failure yields an empty set (no protection,
-    but no crash).
-    """
-    try:
-        from cron.jobs import referenced_skill_names as _refs
-        return _refs()
-    except Exception as e:
-        logger.debug("Curator could not read cron skill references: %s", e, exc_info=True)
-        return set()
-
-
 def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
    """Walk every curator-managed skill and move active/stale/archived based on
    the latest real activity timestamp. Pinned skills are never touched.
@@ -307,8 +292,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
    stale_cutoff = now - timedelta(days=get_stale_after_days())
    archive_cutoff = now - timedelta(days=get_archive_after_days())

-    cron_referenced = _cron_referenced_skills()
-
    counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0, "seeded": 0}

    for row in _u.agent_created_report():
@@ -317,15 +300,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
        if row.get("pinned"):
            continue

-        # A skill referenced by any cron job (incl. paused/disabled) is in
-        # use by definition — resuming or the next fire must find it. The
-        # scheduler only bumps usage when a job actually fires, so jobs that
-        # fire less often than archive_after_days, paused jobs, and far-future
-        # one-shots would otherwise have their skills aged out from under
-        # them. Treat referenced skills like pinned: never auto-transition.
-        if name in cron_referenced:
-            continue
-
        # First sight of a curation-eligible skill with no persisted record
        # (e.g. a newly-eligible built-in): anchor its clock to now and defer.
        if not row.get("_persisted", True):
@@ -342,18 +316,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int

        current = row.get("state", _u.STATE_ACTIVE)

-        # Never-used skills (use_count == 0) get a grace floor: don't archive
-        # one until it is at least stale_after_days old. A use=0 skill is
-        # absence of evidence, not evidence of staleness — a skill created
-        # recently may simply not have had its trigger come up yet.
-        never_used = int(row.get("use_count", 0) or 0) == 0
-        if never_used and anchor > stale_cutoff:
-            # Younger than the stale window — leave it alone entirely.
-            if current == _u.STATE_STALE:
-                _u.set_state(name, _u.STATE_ACTIVE)
-                counts["reactivated"] += 1
-            continue
-
        if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
            ok, _msg = _u.archive_skill(name)
            if ok:
@@ -428,19 +390,10 @@ CURATOR_REVIEW_PROMPT = (
    "back load-bearing UX (slash-command entry points referenced in docs and "
    "tips) and are filtered out of the candidate list below — never resurrect "
    "one as an archive or absorb target.\n"
-    "3c. DO NOT archive or prune any skill marked `cron=yes` in the candidate "
-    "list. A cron job depends on it and will fail to load it on its next "
-    "run. You MAY still consolidate it into an umbrella — but only because "
-    "the curator rewrites cron job skill references to follow consolidations; "
-    "never simply prune it.\n"
    "4. DO NOT use usage counters as a reason to skip consolidation. The "
    "counters are new and often mostly zero. Judge overlap on CONTENT, "
    "not on use_count. 'use=0' is not evidence a skill is valuable; it's "
-    "absence of evidence either way. Corollary: 'use=0' is ALSO not a "
-    "reason to PRUNE a skill. Never archive a never-used skill (use=0) "
-    "unless it is at least 30 days old (check last_activity / created date) "
-    "AND its content is genuinely obsolete or fully absorbed elsewhere — a "
-    "recently-created skill simply may not have had its trigger come up yet.\n"
+    "absence of evidence either way.\n"
    "5. DO NOT reject consolidation on the grounds that 'each skill has "
    "a distinct trigger'. Pairwise distinctness is the wrong bar. The "
    "right bar is: 'would a human maintainer write this as N separate "
@@ -1460,14 +1413,12 @@ def _render_candidate_list() -> str:
    rows = skill_usage.agent_created_report()
    if not rows:
        return "No agent-created skills to review."
-    cron_referenced = _cron_referenced_skills()
    lines = [f"Agent-created skills ({len(rows)}):\n"]
    for r in rows:
        lines.append(
            f"- {r['name']}  "
            f"state={r['state']}  "
            f"pinned={'yes' if r.get('pinned') else 'no'}  "
-            f"cron={'yes' if r['name'] in cron_referenced else 'no'}  "
            f"activity={r.get('activity_count', 0)}  "
            f"use={r.get('use_count', 0)}  "
            f"view={r.get('view_count', 0)}  "
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -133,31 +133,6 @@ _RATE_LIMIT_PATTERNS = [
    "servicequotaexceededexception",
 ]

-# Patterns that indicate provider-side overload, NOT a per-credential rate
-# limit or billing problem.  The credential is valid — the server is just
-# busy — so the correct recovery is "back off and retry the same key", never
-# "rotate the credential" (rotating exhausts the pool while the endpoint is
-# still busy; a single-key user has nothing to rotate to).  Some providers
-# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
-# status path matches the body against this list before falling through to
-# the rate_limit default.  Phrases are kept narrow and overload-flavoured so a
-# normal rate-limit message ("you have been rate-limited") doesn't hit this
-# bucket. (#14038, #15297)
-_OVERLOADED_PATTERNS = [
-    "overloaded",
-    "temporarily overloaded",
-    "service is temporarily overloaded",
-    "service may be temporarily overloaded",
-    "server is overloaded",
-    "server overloaded",
-    "service overloaded",
-    "service is overloaded",
-    "upstream overloaded",
-    "currently overloaded",
-    "at capacity",
-    "over capacity",
-]
-
 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
 _USAGE_LIMIT_PATTERNS = [
    "usage limit",
@@ -355,14 +330,6 @@ _CONTENT_POLICY_BLOCKED_PATTERNS = [
    # echo back; the underscore form is provider-specific enough.
    "content_filter",
    "responsibleaipolicyviolation",
-    # MiniMax output-layer safety filter. The error string is surfaced
-    # verbatim by MiniMax SDK / OpenAI-compatible endpoints, usually in the
-    # form "output new_sensitive (1027)" when the model's *output* (often a
-    # large tool-call argument block) trips the upstream safety filter and
-    # the SSE stream is truncated mid-flight. ``new_sensitive`` is the
-    # filter name and is narrow enough that billing / format / auth error
-    # strings will not collide. See #32421.
-    "new_sensitive",
 ]

 # Auth patterns (non-status-code signals)
@@ -896,19 +863,7 @@ def _classify_by_status(
        )

    if status_code == 429:
-        # Already checked long_context_tier above. Some providers (notably
-        # Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
-        # code as a true per-credential rate limit, but the credential is
-        # valid and the correct recovery is "back off and retry the same key",
-        # NOT "rotate the credential" (which exhausts the pool while the
-        # endpoint is still busy, and does nothing for a single-key user).
-        # Disambiguate on the error body so an overload 429 takes the
-        # transient-overload path instead of burning the pool. (#14038)
-        if any(p in error_msg for p in _OVERLOADED_PATTERNS):
-            return result_fn(
-                FailoverReason.overloaded,
-                retryable=True,
-            )
+        # Already checked long_context_tier above; this is a normal rate limit
        return result_fn(
            FailoverReason.rate_limit,
            retryable=True,
@@ -1259,17 +1214,6 @@ def _classify_by_message(
            should_fallback=True,
        )

-    # Overloaded / server-busy patterns — must come BEFORE the rate_limit and
-    # billing checks so that a message-only "overloaded" (no 503/529 status,
-    # e.g. some Anthropic-compatible proxies) classifies as a transient
-    # overload (backoff + retry) instead of falling through to `unknown` or
-    # incorrectly triggering credential rotation.
-    if any(p in error_msg for p in _OVERLOADED_PATTERNS):
-        return result_fn(
-            FailoverReason.overloaded,
-            retryable=True,
-        )
-
    # Billing patterns
    if any(p in error_msg for p in _BILLING_PATTERNS):
        return result_fn(
@@ -1359,25 +1303,19 @@ def _extract_status_code(error: Exception) -> Optional[int]:


 def _extract_error_body(error: Exception) -> dict:
-    """Extract the structured error body from an SDK exception or its cause chain."""
-    current = error
-    for _ in range(5):  # Match _extract_status_code() traversal depth.
-        body = getattr(current, "body", None)
-        if isinstance(body, dict):
-            return body
-        # Some errors have .response.json()
-        response = getattr(current, "response", None)
-        if response is not None:
-            try:
-                json_body = response.json()
-                if isinstance(json_body, dict):
-                    return json_body
-            except Exception:
-                pass
-        cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
-        if cause is None or cause is current:
-            break
-        current = cause
+    """Extract the structured error body from an SDK exception."""
+    body = getattr(error, "body", None)
+    if isinstance(body, dict):
+        return body
+    # Some errors have .response.json()
+    response = getattr(error, "response", None)
+    if response is not None:
+        try:
+            json_body = response.json()
+            if isinstance(json_body, dict):
+                return json_body
+        except Exception:
+            pass
    return {}


--- a/agent/image_routing.py
+++ b/agent/image_routing.py
@@ -388,98 +388,14 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
    # BMP: "BM"
    if raw.startswith(b"BM"):
        return "image/bmp"
-    # ISO-BMFF family (HEIC/HEIF/AVIF): bytes 4..8 == 'ftyp', major brand at 8..12
-    if len(raw) >= 12 and raw[4:8] == b"ftyp":
-        brand = raw[8:12]
-        if brand in {b"avif", b"avis"}:
-            return "image/avif"
-        if brand in {
-            b"heic", b"heix", b"hevc", b"hevx",
-            b"mif1", b"msf1", b"heim", b"heis",
-        }:
-            return "image/heic"
-    # TIFF: II*\0 (little-endian) or MM\0* (big-endian)
-    if raw[:4] in {b"II*\x00", b"MM\x00*"}:
-        return "image/tiff"
-    # ICO: 00 00 01 00 (reserved=0, type=1=icon)
-    if raw[:4] == b"\x00\x00\x01\x00":
-        return "image/x-icon"
-    # SVG: text-based, look for an <svg tag near the start (skip BOM/whitespace)
-    head = raw[:512].lstrip().lower()
-    if head.startswith(b"<?xml") or head.startswith(b"<svg"):
-        if b"<svg" in head:
-            return "image/svg+xml"
+    # HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
+    if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
+        b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
+    }:
+        return "image/heic"
    return None


-# Formats every major vision provider (Anthropic, OpenAI, Gemini, Bedrock)
-# accepts natively. Anything outside this set has to be transcoded to PNG
-# before we declare media_type, otherwise the provider returns HTTP 400
-# ("Could not process image" / "Unsupported image media type") and the
-# whole turn fails with no salvage path.
-#
-# Discord (and a few other chat platforms) freely accept attachments in
-# formats outside this set -- AVIF screenshots from Chromium, HEIC from
-# iPhones, TIFF from scanners, BMP from old Windows tools, ICO -- so users
-# do hit this in practice. SVG is vector and Pillow cannot rasterize it;
-# it is skipped (logged) rather than transcoded.
-_UNIVERSALLY_SUPPORTED_MIMES = frozenset({
-    "image/png", "image/jpeg", "image/gif", "image/webp",
-})
-
-
-def _transcode_to_png(raw: bytes) -> Optional[bytes]:
-    """Decode arbitrary image bytes with Pillow and re-encode as PNG.
-
-    Returns None if Pillow isn't installed or can't decode the input
-    (rare formats, corrupted bytes, missing optional decoder plugin for
-    HEIC/AVIF, or vector formats like SVG). Caller falls back to skipping
-    the image so the rest of the turn still works.
-
-    HEIC/HEIF and AVIF need optional Pillow plugins; we try to register
-    them on demand and swallow ImportError so a missing plugin just
-    looks like 'Pillow can't decode this' rather than crashing.
-    """
-    try:
-        from PIL import Image
-    except ImportError:
-        logger.info(
-            "image_routing: Pillow not installed; cannot transcode "
-            "non-standard image format to PNG. Install with `pip install Pillow` "
-            "(and `pillow-heif` / `pillow-avif-plugin` for those formats)."
-        )
-        return None
-    # Optional plugin registration. Silent on failure: an unsupported
-    # format will just fall through to Image.open raising below.
-    try:
-        import pillow_heif  # type: ignore
-
-        pillow_heif.register_heif_opener()
-    except Exception:
-        pass
-    try:
-        import pillow_avif  # type: ignore  # noqa: F401  -- registers AVIF on import
-    except Exception:
-        pass
-    try:
-        from io import BytesIO
-
-        with Image.open(BytesIO(raw)) as im:
-            # Pick an output mode PNG can serialise. Anything other than
-            # the standard set gets normalised to RGBA so transparency is
-            # preserved where the source had it.
-            if im.mode not in {"RGB", "RGBA", "L", "LA", "P"}:
-                im = im.convert("RGBA")
-            buf = BytesIO()
-            im.save(buf, format="PNG", optimize=False)
-            return buf.getvalue()
-    except Exception as exc:
-        logger.info(
-            "image_routing: Pillow could not transcode image to PNG -- %s", exc
-        )
-        return None
-
-
 def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
    """Return image MIME type for *path*.

@@ -515,18 +431,8 @@ def _file_to_data_url(path: Path) -> Optional[str]:
    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
    quality tax just because one other provider is stricter.

-    Format compatibility IS handled here: if the sniffed MIME isn't one
-    of ``_UNIVERSALLY_SUPPORTED_MIMES`` (i.e. it's something like AVIF,
-    HEIC, BMP, TIFF, or ICO that some providers reject outright), we
-    transcode to PNG with Pillow before declaring media_type. This fixes
-    the user-visible "Could not process image" HTTP 400 from Anthropic on
-    Discord-attached AVIF/HEIC/BMP files.
-
-    Returns None if the file can't be read OR if the format isn't
-    universally supported AND Pillow can't transcode it (Pillow missing,
-    HEIC/AVIF plugin missing, vector format like SVG, corrupt bytes). The
-    caller reports those paths in ``skipped`` and the rest of the turn
-    proceeds.
+    Returns None only if the file can't be read (missing, permission
+    denied, etc.); the caller reports those paths in ``skipped``.
    """
    try:
        raw = path.read_bytes()
@@ -534,22 +440,6 @@ def _file_to_data_url(path: Path) -> Optional[str]:
        logger.warning("image_routing: failed to read %s — %s", path, exc)
        return None
    mime = _guess_mime(path, raw=raw)
-    if mime not in _UNIVERSALLY_SUPPORTED_MIMES:
-        transcoded = _transcode_to_png(raw)
-        if transcoded is None:
-            logger.warning(
-                "image_routing: %s is %s which is not accepted by all major "
-                "vision providers and could not be transcoded to PNG; "
-                "skipping this attachment.",
-                path, mime,
-            )
-            return None
-        logger.info(
-            "image_routing: transcoded %s (%s) -> image/png for provider compatibility",
-            path.name, mime,
-        )
-        raw = transcoded
-        mime = "image/png"
    b64 = base64.b64encode(raw).decode("ascii")
    return f"data:{mime};base64,{b64}"

--- a/agent/moa_loop.py
+++ b/agent/moa_loop.py
@@ -8,7 +8,6 @@ iteration.

 from __future__ import annotations

-import hashlib
 import logging
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any
@@ -26,112 +25,20 @@ logger = logging.getLogger(__name__)
 # opening dozens of sockets at once.
 _MAX_REFERENCE_WORKERS = 8

-# Per-tool-result character budget for the advisory reference view. Tool
-# results can be huge (a full diff, a 5000-line file dump); replaying them
-# verbatim per reference per tool-loop step would blow the reference model's
-# context window and cost. We keep the agent's *actions* (tool calls) in full —
-# they are cheap, high-signal, and tell the reference what the agent did — but
-# preview each tool *result* head+tail so the reference still sees what came
-# back without replaying megabytes. The acting aggregator always gets the full,
-# untrimmed transcript; this budget only shapes the advisory copy.
-_REFERENCE_TOOL_RESULT_BUDGET = 4000
-
-# System prompt prepended to every reference-model call. References are
-# advisory — they do NOT act, call tools, or own the task. Without this
-# framing a reference receives the bare trimmed conversation and assumes it is
-# the acting agent: it then refuses ("I can't access repositories / URLs from
-# here") or tries to call tools it doesn't have. The prompt reframes the model
-# as an analyst whose job is to reason about the presented state and hand its
-# best thinking to the aggregator/orchestrator that will actually act.
-_REFERENCE_SYSTEM_PROMPT = (
-    "You are a reference advisor in a Mixture of Agents (MoA) process. You are "
-    "NOT the acting agent and you do NOT execute anything: you cannot call "
-    "tools, run commands, browse, or access files, repositories, or URLs, and "
-    "you should not try to or apologize for being unable to. A separate "
-    "aggregator/orchestrator model holds those capabilities and will take the "
-    "actual actions.\n\n"
-    "The conversation below is the current state of a task handled by that "
-    "acting agent. Your job is to give your most intelligent analysis of that "
-    "state: understand the goal, reason about the problem, and advise on what "
-    "to do next. Surface the best approach, concrete next steps and tool-use "
-    "strategy, likely pitfalls and risks, and anything the acting agent may "
-    "have missed or gotten wrong. Assume any referenced files, URLs, or "
-    "systems exist and reason about them from the context given rather than "
-    "asking for access.\n\n"
-    "Respond with your advice directly — no preamble, no disclaimers about "
-    "tools or access. Your response is private guidance handed to the "
-    "aggregator, not an answer shown to the user."
-)
-
-

 def _slot_label(slot: dict[str, str]) -> str:
    return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"


-def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
-    """Resolve a reference/aggregator slot to real runtime call kwargs.
-
-    A MoA slot is just a model selection — it must be called the same way any
-    model is called elsewhere, not through a bare ``call_llm(provider=...,
-    model=...)`` that leaves base_url/api_key/api_mode unresolved and lets the
-    auxiliary auto-detector guess. We route the slot's provider through
-    ``resolve_runtime_provider`` (the canonical provider→api_mode/base_url/
-    api_key resolver the CLI, gateway, and delegate_task all use), so the slot
-    gets its provider's real API surface — e.g. MiniMax → anthropic_messages,
-    GPT-5/o-series → max_completion_tokens, custom endpoints → their base_url.
-
-    Returns the kwargs to pass through to ``call_llm`` (provider/model plus the
-    resolved base_url/api_key when available). Falls back to the bare
-    provider/model on any resolution error so a misconfigured slot still
-    attempts the call rather than aborting the whole MoA turn.
-    """
-    provider = str(slot.get("provider") or "").strip()
-    model = str(slot.get("model") or "").strip()
-    out: dict[str, Any] = {"provider": provider, "model": model}
-    try:
-        from hermes_cli.runtime_provider import resolve_runtime_provider
-
-        rt = resolve_runtime_provider(requested=provider, target_model=model)
-        resolved_provider = str(rt.get("provider") or provider).strip().lower()
-        # call_llm treats an explicit base_url as a custom endpoint. That is
-        # correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
-        # provider-backed targets whose provider branch adds auth refresh,
-        # request metadata, or request-shape adapters. Keep those providers
-        # identified by name.
-        if resolved_provider in {"nous", "openai-codex", "xai-oauth"}:
-            return out
-        # Pass the resolved endpoint through so call_llm builds the request for
-        # the provider's actual API surface instead of auto-detecting. base_url
-        # routes call_llm to the right adapter (incl. anthropic_messages mode);
-        # api_key is the resolved credential for that provider.
-        if rt.get("base_url"):
-            out["base_url"] = rt["base_url"]
-        if rt.get("api_key"):
-            out["api_key"] = rt["api_key"]
-    except Exception as exc:  # pragma: no cover - defensive
-        logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
-    return out
-
-
 def _run_reference(
    slot: dict[str, str],
    ref_messages: list[dict[str, Any]],
    *,
-    temperature: float | None = None,
-    max_tokens: int | None = None,
+    temperature: float,
+    max_tokens: int,
 ) -> tuple[str, str]:
    """Call one reference model and return ``(label, text)``.

-    The slot is resolved to its provider's real runtime (via ``_slot_runtime``)
-    and called through the same ``call_llm`` request-building path any model
-    uses, so per-model wire-format handling (anthropic_messages,
-    max_completion_tokens, fixed/forbidden temperature) applies identically to
-    a reference as it would if that model were the acting model. MoA imposes no
-    cap of its own (``max_tokens`` defaults to ``None`` → omitted → the model's
-    real maximum); ``temperature`` is only the user's configured preset value,
-    which call_llm may still override per model.
-
    Never raises: a failed reference becomes a labelled note so the aggregator
    can still act with partial context. Designed to run inside a thread pool —
    ``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
@@ -139,17 +46,13 @@ def _run_reference(
    """
    label = _slot_label(slot)
    try:
-        # Prepend the advisory-role system prompt so the reference understands
-        # it is analyzing state for an aggregator, not acting on the task. The
-        # trimmed view (_reference_messages) already strips the agent's own
-        # system prompt, so this is the only system message the reference sees.
-        messages = [{"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}, *ref_messages]
        response = call_llm(
            task="moa_reference",
-            messages=messages,
+            provider=slot["provider"],
+            model=slot["model"],
+            messages=ref_messages,
            temperature=temperature,
            max_tokens=max_tokens,
-            **_slot_runtime(slot),
        )
        return label, _extract_text(response) or "(empty response)"
    except Exception as exc:
@@ -161,8 +64,8 @@ def _run_references_parallel(
    reference_models: list[dict[str, str]],
    ref_messages: list[dict[str, Any]],
    *,
-    temperature: float | None = None,
-    max_tokens: int | None = None,
+    temperature: float,
+    max_tokens: int,
 ) -> list[tuple[str, str]]:
    """Fan out all reference models in parallel, returning outputs in order.

@@ -203,140 +106,40 @@ def _run_references_parallel(
    return [r for r in results if r is not None]


-def _truncate_tool_result(text: str, budget: int = _REFERENCE_TOOL_RESULT_BUDGET) -> str:
-    """Head+tail preview of a tool result for the advisory view.
-
-    Keeps the first and last halves of the budget with a ``[... N chars
-    omitted ...]`` marker between them, so a reference sees both how the result
-    started and how it ended without replaying the whole payload.
-    """
-    if not text or len(text) <= budget:
-        return text
-    half = budget // 2
-    omitted = len(text) - 2 * half
-    return f"{text[:half]}\n[... {omitted} chars omitted ...]\n{text[-half:]}"
-
-
-def _render_tool_calls(tool_calls: Any) -> str:
-    """Render an assistant turn's tool_calls as readable text lines.
-
-    The advisory view cannot carry real ``tool_calls`` payloads (strict
-    providers reject tool_calls the reference never produced), so the agent's
-    actions are flattened to text the reference can read and reason about.
-    """
-    lines: list[str] = []
-    for tc in tool_calls or []:
-        fn = (tc.get("function") or {}) if isinstance(tc, dict) else {}
-        name = fn.get("name") or (tc.get("name") if isinstance(tc, dict) else "") or "tool"
-        args = fn.get("arguments")
-        if isinstance(args, str):
-            args_text = args
-        elif args is not None:
-            try:
-                import json
-
-                args_text = json.dumps(args, ensure_ascii=False)
-            except Exception:
-                args_text = str(args)
-        else:
-            args_text = ""
-        lines.append(f"[called tool: {name}({args_text})]" if args_text else f"[called tool: {name}]")
-    return "\n".join(lines)
-
-
 def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """Build an advisory view of the conversation for reference models.
+    """Build an advisory-safe view of the conversation for reference models.

-    A reference gives an INFORMED judgement on the current state, so it must
-    see what the agent actually did — its tool calls AND the tool results that
-    came back — not just the agent's narration. We therefore preserve the whole
-    conversation flow, but flatten it into clean user/assistant *text* turns:
-
-      - system prompt: dropped (8K of Hermes boilerplate, not advisory signal).
-      - assistant turns: kept; any ``tool_calls`` are rendered inline as
-        ``[called tool: name(args)]`` text lines appended to the turn's text.
-      - ``tool``-role results: NOT dropped. Each is folded (head+tail preview,
-        see ``_truncate_tool_result``) into the *preceding* assistant turn as a
-        ``[tool result: ...]`` block, so the reference sees what came back.
-
-    This emits ZERO ``tool``-role messages and ZERO ``tool_calls`` arrays — only
-    plain user/assistant text — so strict providers (Mistral, Fireworks) that
-    reject orphan tool messages / unproduced tool_calls don't 400, while the
-    reference still has the full picture.
-
-    The view MUST end with a ``user`` turn. Anthropic (and OpenRouter→Anthropic)
-    interpret a trailing assistant turn as an assistant *prefill* to continue,
-    and no-prefill models (e.g. Claude Opus 4.8) reject it with
-    ``400 ... must end with a user message``. Rather than DELETE the agent's
-    latest context to satisfy that (which would blind the reference to the
-    current state), we APPEND a synthetic user turn asking the reference to
-    judge the state above. End-on-user is satisfied and no context is lost.
-
-    The acting aggregator always receives the full, untrimmed transcript; this
-    function only shapes the disposable advisory copy.
+    Reference calls are advisory: they never call tools and never emit the
+    ``tool_calls`` the main model did. Replaying the full transcript verbatim
+    (a) re-bills the ~8K-token Hermes system prompt per reference per
+    iteration and (b) risks 400s from strict providers (Mistral, Fireworks)
+    that reject orphan ``tool`` messages or ``tool_calls`` the reference never
+    produced. We keep only the user/assistant *text* turns, dropping the
+    system prompt, any ``tool``-role messages, and any ``tool_calls`` payloads.
    """
-    advisory_instruction = (
-        "[The conversation above is the current state of the task. Give your "
-        "most intelligent judgement: what is going on, what should happen next, "
-        "what risks or mistakes you see, and how the acting agent should "
-        "proceed.]"
-    )
-
-    rendered: list[dict[str, Any]] = []
-    last_user_content: str | None = None
+    trimmed: list[dict[str, Any]] = []
    for msg in messages:
        role = msg.get("role")
-        content = msg.get("content")
-        text = content if isinstance(content, str) else ""
-
-        if role == "system":
+        if role not in ("user", "assistant"):
+            # Drop system prompt and tool-result messages.
            continue
-        if role == "user":
-            if text.strip():
-                last_user_content = text
-            rendered.append({"role": "user", "content": text})
-        elif role == "assistant":
-            parts: list[str] = []
-            if text.strip():
-                parts.append(text.strip())
-            calls_text = _render_tool_calls(msg.get("tool_calls"))
-            if calls_text:
-                parts.append(calls_text)
-            # Empty assistant turns (no text, no calls) carry nothing advisory.
-            if parts:
-                rendered.append({"role": "assistant", "content": "\n".join(parts)})
-        elif role == "tool":
-            # Fold the tool result into the preceding assistant turn as text so
-            # the reference sees what came back, without emitting a tool-role
-            # message a reference never produced.
-            result_text = _truncate_tool_result(text)
-            block = f"[tool result: {result_text}]"
-            if rendered and rendered[-1].get("role") == "assistant":
-                rendered[-1]["content"] = rendered[-1]["content"] + "\n" + block
-            else:
-                # No assistant turn to attach to (e.g. a leading tool result);
-                # keep it as advisory context on its own assistant-role line.
-                rendered.append({"role": "assistant", "content": block})
-        # Any other role is ignored.
-
-    # End on a user turn: append a synthetic advisory request rather than
-    # deleting the agent's latest assistant context. This satisfies Anthropic's
-    # no-trailing-assistant-prefill rule while preserving full state.
-    if rendered and rendered[-1].get("role") == "assistant":
-        rendered.append({"role": "user", "content": advisory_instruction})
-    elif rendered and rendered[-1].get("role") == "user":
-        # Already ends on a user turn (fresh user prompt, no agent action yet).
-        # Leave it — the reference answers that prompt directly.
-        pass
-
-    if not rendered:
-        # Degenerate case: nothing rendered. Fall back to the latest user turn.
-        if last_user_content is not None:
-            return [{"role": "user", "content": last_user_content}]
+        content = msg.get("content")
+        if not isinstance(content, str):
+            # Skip non-text (multimodal/tool-call-only) assistant turns.
+            if not content:
+                continue
+        text = content if isinstance(content, str) else ""
+        if role == "assistant" and not text.strip():
+            # Assistant turn that was purely tool calls — nothing advisory.
+            continue
+        trimmed.append({"role": role, "content": text})
+    if not trimmed:
+        # Degenerate case (e.g. first turn was stripped): fall back to a
+        # minimal user turn so the reference still has something to answer.
        for msg in reversed(messages):
            if msg.get("role") == "user" and isinstance(msg.get("content"), str):
                return [{"role": "user", "content": msg["content"]}]
-    return rendered
+    return trimmed



@@ -366,18 +169,12 @@ def aggregate_moa_context(
    aggregator: dict[str, str],
    temperature: float = 0.6,
    aggregator_temperature: float = 0.4,
-    max_tokens: int | None = None,
+    max_tokens: int = 4096,
 ) -> str:
    """Run configured reference models and synthesize their advice.

    Failures are returned as model-specific notes instead of aborting the normal
    agent loop; the main model can still act with partial context.
-
-    ``max_tokens`` is ``None`` by default: MoA does not cap reference or
-    aggregator output, so each model uses its own maximum. ``call_llm`` omits
-    the parameter entirely when it is ``None`` (see its docstring), which also
-    sidesteps providers that reject ``max_tokens`` outright. A hardcoded cap
-    here previously truncated long aggregator syntheses.
    """
    reference_outputs: list[tuple[str, str]] = []
    ref_messages = _reference_messages(api_messages)
@@ -406,10 +203,11 @@ def aggregate_moa_context(
    try:
        response = call_llm(
            task="moa_aggregator",
+            provider=aggregator["provider"],
+            model=aggregator["model"],
            messages=[{"role": "user", "content": synth_prompt}],
            temperature=aggregator_temperature,
            max_tokens=max_tokens,
-            **_slot_runtime(aggregator),
        )
        synthesis = _extract_text(response)
    except Exception as exc:
@@ -432,38 +230,8 @@ def aggregate_moa_context(
 class MoAChatCompletions:
    """OpenAI-chat-compatible facade where the aggregator is the acting model."""

-    def __init__(self, preset_name: str, reference_callback: Any = None):
+    def __init__(self, preset_name: str):
        self.preset_name = preset_name or "default"
-        # Optional display hook. Called as reference outputs become available so
-        # frontends can show each reference model's answer as a labelled block
-        # before the aggregator acts. Signature:
-        #   reference_callback(event, **kwargs)
-        # where event is one of:
-        #   "moa.reference"   kwargs: index, count, label, text
-        #   "moa.aggregating" kwargs: aggregator (label), ref_count
-        # Never raises into the model call — display is best-effort.
-        self.reference_callback = reference_callback
-        # State-scoped reference cache. The agent loop calls create() once per
-        # tool-loop iteration; references should re-run whenever the task STATE
-        # advances — i.e. on every new user message AND every new tool result —
-        # so each reference judges the latest state. The advisory view
-        # (_reference_messages) now renders tool calls + results as text, so its
-        # signature changes on every new tool response; the cache key is that
-        # signature, so a new tool result is a cache MISS (references re-run)
-        # while a redundant create() call with identical state is a HIT (no
-        # re-run, no re-emit). This gives "fire on every user/tool response"
-        # for free, without re-firing on a pure no-op re-call.
-        self._ref_cache_key: tuple | None = None
-        self._ref_cache_outputs: list[tuple[str, str]] = []
-
-    def _emit(self, event: str, **kwargs: Any) -> None:
-        cb = self.reference_callback
-        if cb is None:
-            return
-        try:
-            cb(event, **kwargs)
-        except Exception as exc:  # pragma: no cover - display must never break the turn
-            logger.debug("MoA reference_callback failed for %s: %s", event, exc)

    def create(self, **api_kwargs: Any) -> Any:
        from hermes_cli.config import load_config
@@ -473,10 +241,7 @@ class MoAChatCompletions:
        messages = list(api_kwargs.get("messages") or [])
        reference_models = preset.get("reference_models") or []
        aggregator = preset.get("aggregator") or {}
-        # MoA does not cap reference or aggregator output: each model uses its
-        # own maximum. Passing max_tokens=None makes call_llm omit the parameter
-        # (it never caps by default), so a long aggregator synthesis is never
-        # truncated and providers that reject max_tokens don't 400.
+        max_tokens = int(preset.get("max_tokens", api_kwargs.get("max_tokens") or 4096) or 4096)
        temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
        aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)

@@ -488,52 +253,12 @@ class MoAChatCompletions:

        reference_outputs: list[tuple[str, str]] = []
        ref_messages = _reference_messages(messages)
-
-        # Turn-scoped cache: only run + display references when the advisory
-        # view changed (i.e. a new user turn). Within one turn the agent loop
-        # calls create() once per tool iteration with the same advisory view;
-        # reuse the cached outputs and skip both the re-run and the re-emit.
-        _sig = hashlib.sha256(
-            "\u0000".join(
-                f"{m.get('role')}:{m.get('content')}" for m in ref_messages
-            ).encode("utf-8", "replace")
-        ).hexdigest()
-        _cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
-        _refs_from_cache = _cache_key == self._ref_cache_key and bool(self._ref_cache_outputs)
-
-        if _refs_from_cache:
-            reference_outputs = list(self._ref_cache_outputs)
-        else:
-            reference_outputs = _run_references_parallel(
-                reference_models,
-                ref_messages,
-                temperature=temperature,
-                max_tokens=None,
-            )
-            self._ref_cache_key = _cache_key
-            self._ref_cache_outputs = list(reference_outputs)
-
-            # Surface each reference model's answer to the display BEFORE the
-            # aggregator acts — once per turn (only on the iteration that
-            # actually ran them). The user sees one labelled block per
-            # reference (rendered like a thinking block) so the MoA process is
-            # visible rather than a silent pause. Best-effort: never blocks the
-            # turn.
-            _ref_count = len(reference_outputs)
-            for _idx, (_label, _text) in enumerate(reference_outputs, start=1):
-                self._emit(
-                    "moa.reference",
-                    index=_idx,
-                    count=_ref_count,
-                    label=_label,
-                    text=_text,
-                )
-            if _ref_count:
-                self._emit(
-                    "moa.aggregating",
-                    aggregator=_slot_label(aggregator),
-                    ref_count=_ref_count,
-                )
+        reference_outputs = _run_references_parallel(
+            reference_models,
+            ref_messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )

        agg_messages = [dict(m) for m in messages]
        if reference_outputs:
@@ -561,26 +286,21 @@ class MoAChatCompletions:
            raise RuntimeError("MoA aggregator cannot be another MoA preset")
        agg_kwargs = dict(api_kwargs)
        agg_kwargs["messages"] = agg_messages
-        # The aggregator is the acting model. Resolve its slot to the provider's
-        # real runtime (base_url/api_key/api_mode) and call it through the same
-        # request-building path any model uses — so per-model wire-format
-        # handling (anthropic_messages, max_completion_tokens, fixed/forbidden
-        # temperature) applies identically to it. MoA imposes no output cap:
-        # max_tokens is passed through from the caller (normally None → omitted
-        # → the model's real maximum). The preset's old hardcoded 4096 default
-        # is gone — it truncated long syntheses.
+        agg_kwargs["model"] = aggregator.get("model")
+        agg_kwargs["temperature"] = aggregator_temperature
        return call_llm(
            task="moa_aggregator",
+            provider=aggregator.get("provider"),
+            model=aggregator.get("model"),
            messages=agg_messages,
            temperature=aggregator_temperature,
            max_tokens=agg_kwargs.get("max_tokens"),
            tools=agg_kwargs.get("tools"),
            extra_body=agg_kwargs.get("extra_body"),
-            **_slot_runtime(aggregator),
        )


 class MoAClient:
-    def __init__(self, preset_name: str, reference_callback: Any = None):
+    def __init__(self, preset_name: str):
        self.chat = type("_MoAChat", (), {})()
-        self.chat.completions = MoAChatCompletions(preset_name, reference_callback=reference_callback)
+        self.chat.completions = MoAChatCompletions(preset_name)
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -478,16 +478,6 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
    return None


-def _lmstudio_server_root(base_url: str) -> str:
-    """Return the LM Studio server root for native ``/api/v1`` endpoints."""
-    root = _normalize_base_url(base_url).rstrip("/")
-    for suffix in ("/api/v1", "/api", "/v1"):
-        if root.endswith(suffix):
-            root = root[: -len(suffix)].rstrip("/")
-            break
-    return root
-
-
 def _is_known_provider_base_url(base_url: str) -> bool:
    return _infer_provider_from_url(base_url) is not None

@@ -559,7 +549,6 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    server_url = normalized
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]
-    lmstudio_url = _lmstudio_server_root(base_url)

    headers = _auth_headers(api_key)

@@ -567,7 +556,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
        with httpx.Client(timeout=2.0, headers=headers) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
-                r = client.get(f"{lmstudio_url}/api/v1/models")
+                r = client.get(f"{server_url}/api/v1/models")
                if r.status_code == 200:
                    return "lm-studio"
            except Exception:
@@ -785,7 +774,7 @@ def fetch_endpoint_model_metadata(
    if is_local_endpoint(normalized):
        try:
            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
-                server_url = _lmstudio_server_root(normalized)
+                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
                response = requests.get(
                    server_url.rstrip("/") + "/api/v1/models",
                    headers=headers,
@@ -1308,7 +1297,6 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
    server_url = base_url.rstrip("/")
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]
-    lmstudio_url = _lmstudio_server_root(base_url)

    headers = _auth_headers(api_key)

@@ -1352,7 +1340,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
            # Use _model_id_matches for fuzzy matching: LM Studio stores models as
            # "publisher/slug" but users configure only "slug" after "local:" prefix.
            if server_type == "lm-studio":
-                resp = client.get(f"{lmstudio_url}/api/v1/models")
+                resp = client.get(f"{server_url}/api/v1/models")
                if resp.status_code == 200:
                    data = resp.json()
                    for m in data.get("models", []):
@@ -1658,34 +1646,6 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

-    # 0a. MoA virtual provider — ``model`` is a preset name, not a real model,
-    # and ``base_url`` is the local virtual endpoint, so every probe below would
-    # miss and fall through to the 256K default. The aggregator is the acting
-    # model, so resolve the context window from the aggregator slot's real
-    # provider+model instead. References are advisory-only and never bound the
-    # acting context, so they're ignored here.
-    if (provider or "").strip().lower() == "moa":
-        try:
-            from hermes_cli.config import load_config
-            from hermes_cli.moa_config import resolve_moa_preset
-            from hermes_cli.runtime_provider import resolve_runtime_provider
-
-            preset = resolve_moa_preset(load_config().get("moa") or {}, model)
-            agg = preset.get("aggregator") or {}
-            agg_provider = str(agg.get("provider") or "").strip()
-            agg_model = str(agg.get("model") or "").strip()
-            if agg_model and agg_provider and agg_provider.lower() != "moa":
-                rt = resolve_runtime_provider(requested=agg_provider, target_model=agg_model)
-                return get_model_context_length(
-                    agg_model,
-                    base_url=rt.get("base_url", "") or "",
-                    api_key=rt.get("api_key", "") or "",
-                    provider=agg_provider,
-                )
-        except Exception:
-            logger.debug("MoA aggregator context-length resolution failed", exc_info=True)
-        # Fall through to the generic default if aggregator resolution failed.
-
    # 0b. custom_providers per-model override — check before any probe.
    # This closes the gap where /model switch and display paths used to fall
    # back to 128K despite the user having a per-model context_length set.
--- a/agent/process_bootstrap.py
+++ b/agent/process_bootstrap.py
@@ -26,7 +26,7 @@ from __future__ import annotations
 import os
 import sys
 import urllib.request
-from typing import Any, Optional
+from typing import Optional

 from utils import base_url_hostname, normalize_proxy_url

@@ -142,46 +142,6 @@ def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
    return proxy


-def build_keepalive_http_client(
-    base_url: str = "",
-    *,
-    async_mode: bool = False,
-) -> Optional[Any]:
-    """Build an httpx client for OpenAI SDK calls with env-only proxy policy.
-
-    Uses explicit ``HTTPS_PROXY`` / ``NO_PROXY`` env vars via
-    ``_get_proxy_for_base_url``. A custom transport disables httpx's default
-    ``trust_env`` path, so macOS system proxy settings from
-    ``urllib.request.getproxies()`` (which omit the ExceptionsList) are not
-    applied. Mirrors ``AIAgent._build_keepalive_http_client``.
-    """
-    try:
-        import httpx
-        import socket
-
-        if "api.githubcopilot.com" in str(base_url or "").lower():
-            client_cls = httpx.AsyncClient if async_mode else httpx.Client
-            return client_cls()
-
-        sock_opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
-        if hasattr(socket, "TCP_KEEPIDLE"):
-            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30))
-            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10))
-            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3))
-        elif hasattr(socket, "TCP_KEEPALIVE"):
-            sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 30))
-
-        proxy = _get_proxy_for_base_url(base_url)
-        transport_cls = httpx.AsyncHTTPTransport if async_mode else httpx.HTTPTransport
-        client_cls = httpx.AsyncClient if async_mode else httpx.Client
-        return client_cls(
-            transport=transport_cls(socket_options=sock_opts),
-            proxy=proxy,
-        )
-    except Exception:
-        return None
-
-
 def _install_safe_stdio() -> None:
    """Wrap stdout/stderr so best-effort console output cannot crash the agent."""
    for stream_name in ("stdout", "stderr"):
@@ -204,5 +164,4 @@ __all__ = [
    "_install_safe_stdio",
    "_get_proxy_from_env",
    "_get_proxy_for_base_url",
-    "build_keepalive_http_client",
 ]
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -88,15 +88,12 @@ def _find_hermes_md(cwd: Path) -> Optional[Path]:
    stop_at = _find_git_root(cwd)
    current = cwd.resolve()

-    # When there is no git root, only check cwd itself – walking parents
-    # could pick up a .hermes.md planted in /tmp, /home, etc.
-    search_dirs = [current, *current.parents] if stop_at else [current]
-
-    for directory in search_dirs:
+    for directory in [current, *current.parents]:
        for name in _HERMES_MD_NAMES:
            candidate = directory / name
            if candidate.is_file():
                return candidate
+        # Stop walking at the git root (or filesystem root).
        if stop_at and directory == stop_at:
            break
    return None
@@ -620,12 +617,7 @@ DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
-        "Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
-        "`code`, ```code blocks```, [links](url)) is auto-converted to "
-        "WhatsApp's native syntax (*bold*, _italic_, ~strike~, monospace) — "
-        "feel free to write in markdown, and use bullet lists ('- item') "
-        "freely. Tables are NOT supported — prefer bullet lists or labeled "
-        "key:value pairs. "
+        "Please do not use markdown as it does not render. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. The file "
        "will be sent as a native WhatsApp attachment — images (.jpg, .png, "
@@ -690,11 +682,7 @@ PLATFORM_HINTS = {
    ),
    "signal": (
        "You are on a text messaging communication platform, Signal. "
-        "Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
-        "`code`, ```code blocks```) is auto-converted to Signal's native "
-        "rich formatting — feel free to write in markdown, and use bullet "
-        "lists ('- item') freely (they render as • bullets). Tables are NOT "
-        "supported — prefer bullet lists or labeled key:value pairs. "
+        "Please do not use markdown as it does not render. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
@@ -929,7 +917,8 @@ def _probe_remote_backend(env_type: str) -> str | None:
    try:
        # Import locally: tools/ imports are heavy and only relevant when a
        # non-local backend is actually configured.
-        from tools.terminal_tool import _create_environment, _get_env_config  # type: ignore
+        from tools.terminal_tool import _get_env_config  # type: ignore
+        from tools.environments import get_environment  # type: ignore
    except Exception as e:
        logger.debug("Backend probe unavailable (import failed): %s", e)
        _BACKEND_PROBE_CACHE[cache_key] = ""
@@ -937,59 +926,7 @@ def _probe_remote_backend(env_type: str) -> str | None:

    try:
        config = _get_env_config()
-        # Build the environment the same way tools/terminal_tool.py does for a
-        # live command: select the backend image, then assemble ssh/container
-        # config from the env-derived dict. (There is no `get_environment`
-        # factory — the real entry point is `_create_environment`.)
-        if env_type == "docker":
-            image = config.get("docker_image", "")
-        elif env_type == "singularity":
-            image = config.get("singularity_image", "")
-        elif env_type == "modal":
-            image = config.get("modal_image", "")
-        elif env_type == "daytona":
-            image = config.get("daytona_image", "")
-        else:
-            image = ""
-
-        ssh_config = None
-        if env_type == "ssh":
-            ssh_config = {
-                "host": config.get("ssh_host", ""),
-                "user": config.get("ssh_user", ""),
-                "port": config.get("ssh_port", 22),
-                "key": config.get("ssh_key", ""),
-                "persistent": config.get("ssh_persistent", False),
-            }
-
-        container_config = None
-        if env_type in {"docker", "singularity", "modal", "daytona"}:
-            container_config = {
-                "container_cpu": config.get("container_cpu", 1),
-                "container_memory": config.get("container_memory", 5120),
-                "container_disk": config.get("container_disk", 51200),
-                "container_persistent": config.get("container_persistent", True),
-                "modal_mode": config.get("modal_mode", "auto"),
-                "docker_volumes": config.get("docker_volumes", []),
-                "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
-                "docker_forward_env": config.get("docker_forward_env", []),
-                "docker_env": config.get("docker_env", {}),
-                "docker_run_as_host_user": config.get("docker_run_as_host_user", False),
-                "docker_extra_args": config.get("docker_extra_args", []),
-                "docker_persist_across_processes": config.get("docker_persist_across_processes", True),
-                "docker_orphan_reaper": config.get("docker_orphan_reaper", True),
-            }
-
-        env = _create_environment(
-            env_type=env_type,
-            image=image,
-            cwd=config.get("cwd", ""),
-            timeout=config.get("timeout", 180),
-            ssh_config=ssh_config,
-            container_config=container_config,
-            task_id="prompt-backend-probe",
-            host_cwd=config.get("host_cwd"),
-        )
+        env = get_environment(config)
        # Single-line POSIX probe — works on any Unixy backend. Wrapped in
        # `2>/dev/null` so a missing binary doesn't pollute the output.
        probe_cmd = (
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -10,7 +10,6 @@ the first 6 and last 4 characters for debuggability.
 import logging
 import os
 import re
-import shlex

 logger = logging.getLogger(__name__)

@@ -108,60 +107,12 @@ _PREFIX_PATTERNS = [
    r"ntn_[A-Za-z0-9]{10,}",            # Notion internal integration token
 ]

-# ENV assignment patterns: KEY=value where KEY contains a secret-like name.
-# Uppercase keys tolerate spaces around "=" (e.g. ``FOO_SECRET = bar``) because
-# an all-caps key is almost never prose/code.
+# ENV assignment patterns: KEY=value where KEY contains a secret-like name
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
    rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
 )

-# Lowercase / dotted / hyphenated config keys from config files
-# (application.properties, .env, YAML-ish dumps): ``spring.datasource.password=secret``,
-# ``app.api.key=xyz``, ``password=secret``. The uppercase _ENV_ASSIGN_RE above
-# never matched these, so config-file passwords leaked verbatim (issue #16413).
-#
-# These run only in a config-file context, NOT in prose, code, or URLs — three
-# carve-outs preserved from the original design (#4367 + the documented
-# web-URL passthrough below):
-#   1. The value is bounded by ``[^\s&]`` (stops at whitespace AND ``&``) so
-#      form-urlencoded bodies are handled pair-by-pair (by _redact_form_body),
-#      not greedily swallowed.
-#   2. _CFG_DOTTED_RE only matches when the key is NAMESPACED (contains a dot),
-#      which is unambiguously a config key — never a prose word.
-#   3. _CFG_ANCHORED_RE matches a bare secret-word key only at line start
-#      (optionally after ``export``), so conversational ``I have password=foo``
-#      mid-sentence is left alone.
-# The colon-form URL guard (skip when ``://`` present) lives at the call site.
-_SECRET_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential|auth)"
-_CFG_VALUE = r"(['\"]?)([^\s&]+?)\2(?=[\s&]|$)"
-# Namespaced (dotted) key: the secret word may sit anywhere in a dotted path.
-_CFG_DOTTED_RE = re.compile(
-    rf"((?:[A-Za-z0-9_\-]+\.)+[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*"
-    rf"|[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*\.[A-Za-z0-9_.\-]+)"
-    rf"={_CFG_VALUE}",
-    re.IGNORECASE,
-)
-# Line-anchored bare key: ``password=…`` / ``export api_key=…`` at start of line.
-_CFG_ANCHORED_RE = re.compile(
-    rf"(^[ \t]*(?:export[ \t]+)?[A-Za-z0-9_\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_\-]*)={_CFG_VALUE}",
-    re.IGNORECASE | re.MULTILINE,
-)
-
-# Unquoted YAML / colon config (e.g. ``password: secret``,
-# ``spring.datasource.password: hunter2``). The secret keyword must be part of
-# the KEY (anchored to the start of the line/indent), and the value is a single
-# whitespace-free token — so prose like ``note: secret meeting`` (keyword in the
-# value) and ``error: token expired`` are left alone. Bare ``auth`` is excluded
-# from the key set so ``Authorization:`` / ``author:`` don't match (the former
-# is masked by _AUTH_HEADER_RE); ``auth_token``/``auth-token`` still match via
-# the ``token`` keyword. Quoted values defer to _JSON_FIELD_RE via the lookahead.
-_YAML_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential)"
-_YAML_ASSIGN_RE = re.compile(
-    rf"(^[ \t]*[A-Za-z0-9_.\-]*{_YAML_CFG_NAMES}[A-Za-z0-9_.\-]*)(:[ \t]*)(?!['\"])([^\s&]+)",
-    re.IGNORECASE | re.MULTILINE,
-)
-
 # JSON field patterns: "apiKey": "value", "token": "value", etc.
 _JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)"
 _JSON_FIELD_RE = re.compile(
@@ -174,15 +125,8 @@ _JSON_FIELD_RE = re.compile(
 # while the header name and scheme word are preserved for debuggability. The
 # previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
 # ``token <pat>`` leaked verbatim into logs/transcripts.
-#
-# The credential class excludes quote characters (``"`` / ``'``): a token sitting
-# flush against a closing quote (``"Authorization: Bearer sk-..."``) must not pull
-# that quote into the match, or masking turns value corruption into *syntax*
-# corruption — the closing quote vanishes and the command/string no longer parses
-# (unterminated quote → shell EOF / Python SyntaxError). Real credentials never
-# contain ``"`` or ``'``, so excluding them is safe. See #43083.
 _AUTH_HEADER_RE = re.compile(
-    r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?([^\s\"']+)",
+    r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
    re.IGNORECASE,
 )

@@ -210,37 +154,9 @@ _PRIVATE_KEY_RE = re.compile(
 )

 # Database connection strings: protocol://user:PASSWORD@host
-# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password.
-# The userinfo and password groups forbid whitespace ([^:\s]+ / [^@\s]+) so the
-# match can never span a line break. A real DSN password never contains
-# whitespace; without this bound the greedy [^@]+ would scan past the end of a
-# code line to the next stray "@" (e.g. a Python decorator), swallowing
-# intervening lines and corrupting tool OUTPUT for any source containing a
-# postgresql:// f-string template. See issue #33801.
+# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
 _DB_CONNSTR_RE = re.compile(
-    r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:\s]+:)([^@\s]+)(@)",
-    re.IGNORECASE,
-)
-
-# Bare-token credential in a web/transport URL: ``scheme://TOKEN@host``.
-# This is the ``git remote set-url origin https://PASSWORD@github.com/...``
-# shape from issue #6396 — a single opaque credential in the userinfo position
-# with NO ``user:pass`` colon. It is unambiguously a secret: legitimate
-# round-trip URLs (OAuth callbacks, magic links, pre-signed shares — see the
-# "Web-URL redaction is intentionally OFF" note in redact_sensitive_text) carry
-# their tokens in the QUERY STRING, never in bare userinfo. The colon form
-# ``user:pass@`` is deliberately left to pass through (commit "pass web URLs
-# through unchanged", #34029) and is NOT matched here — the token class forbids
-# ``:``. DB schemes are handled by _DB_CONNSTR_RE above and excluded here.
-#
-# Guards against false positives:
-#   - 8+ char floor skips short usernames (git, admin, root, deploy, ubuntu).
-#   - The token class ``[^\s:@/]`` cannot cross ``/``, so an ``@`` sitting in a
-#     path or query (e.g. ``?q=user@example.com``) is never treated as userinfo.
-_URL_BARE_TOKEN_RE = re.compile(
-    r"((?:https?|wss?|git|ssh|ftp|ftps|sftp)://)"  # scheme
-    r"([^\s:@/]{8,})"                               # bare token (no colon/slash/@), 8+ chars
-    r"(@[^\s]+)",                                   # @host...
+    r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
    re.IGNORECASE,
 )

@@ -424,40 +340,7 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def _mask_token_nonreusable(token: str) -> str:
-    """Redact a prefix-matched credential to a NON-REUSABLE sentinel.
-
-    Unlike :func:`_mask_token` (which keeps head/tail chars — fine for logs
-    that are never fed back into a config), this emits a marker that:
-
-    * cannot be mistaken for a usable-but-truncated key, so an agent that
-      reads it from a config file and writes it back does NOT corrupt the
-      stored credential into a dead 13-char string (issue #35519); and
-    * still does not leak the secret material (no head/tail chars).
-
-    The vendor prefix label is preserved for debuggability so the agent can
-    still tell *which* credential is present (e.g. a GitHub PAT vs an OpenAI
-    key) without seeing any of its bytes.
-    """
-    if not token:
-        return "«redacted-secret»"
-    # Preserve only the recognizable vendor prefix label (e.g. "ghp_", "sk-"),
-    # never any of the random secret body.
-    label = ""
-    for sub in _PREFIX_SUBSTRINGS:
-        if token.startswith(sub):
-            label = sub
-            break
-    return f"«redacted:{label}…»" if label else "«redacted-secret»"
-
-
-def redact_sensitive_text(
-    text: str,
-    *,
-    force: bool = False,
-    code_file: bool = False,
-    file_read: bool = False,
-) -> str:
+def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
@@ -470,17 +353,6 @@ def redact_sensitive_text(
    constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
    private keys, DB connstrings, JWTs, and URL secrets are still redacted.

-    Set file_read=True for file *content* returned to the agent (read_file /
-    search_files / cat). Secrets are STILL redacted — they are never exposed —
-    but prefix-matched credentials are replaced with a non-reusable sentinel
-    (``«redacted:ghp_…»``) instead of a head/tail-preserving mask
-    (``ghp_S1...Pn2T``). The old mask looked like a real-but-truncated key, so
-    an agent reading it from config.yaml and writing it back silently corrupted
-    the stored credential into a dead 13-char value → 401 (issue #35519). The
-    sentinel is syntactically invalid as a token, so it can't be mistaken for a
-    usable key or written back as one. Implies code_file=True (config/data
-    files shouldn't trigger the source-code ENV/JSON false-positive paths).
-
    Performance: each regex pattern is gated behind a cheap substring
    pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
    for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
@@ -499,15 +371,9 @@ def redact_sensitive_text(
    if not (force or _REDACT_ENABLED):
        return text

-    # file_read content shouldn't hit the source-code ENV/JSON false-positive
-    # paths either (it's config/data, not log lines).
-    if file_read:
-        code_file = True
-
    # Known prefixes (sk-, ghp_, etc.) — gate on substring presence
    if _has_known_prefix_substring(text):
-        _prefix_sub = _mask_token_nonreusable if file_read else _mask_token
-        text = _PREFIX_RE.sub(lambda m: _prefix_sub(m.group(1)), text)
+        text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)

    # ENV assignments: OPENAI_API_KEY=***  (skip for code files — false positives)
    if not code_file:
@@ -516,13 +382,6 @@ def redact_sensitive_text(
                name, quote, value = m.group(1), m.group(2), m.group(3)
                return f"{name}={quote}{_mask_token(value)}{quote}"
            text = _ENV_ASSIGN_RE.sub(_redact_env, text)
-            # Lowercase/dotted config keys (issue #16413). Skip URLs entirely —
-            # web-URL query params are intentionally passed through (see note
-            # near the bottom of this function); _DB_CONNSTR_RE still guards
-            # connection-string passwords.
-            if "://" not in text:
-                text = _CFG_DOTTED_RE.sub(_redact_env, text)
-                text = _CFG_ANCHORED_RE.sub(_redact_env, text)

        # JSON fields: "apiKey": "***"  (skip for code files — false positives)
        if ":" in text and '"' in text:
@@ -531,15 +390,6 @@ def redact_sensitive_text(
                return f'{key}: "{_mask_token(value)}"'
            text = _JSON_FIELD_RE.sub(_redact_json, text)

-        # Unquoted YAML / colon config: password: ***  (after JSON so quoted
-        # values are handled there; the lookahead in _YAML_ASSIGN_RE skips
-        # quotes). Skip URLs — web-URL query params pass through by design.
-        if ":" in text and "://" not in text:
-            def _redact_yaml(m):
-                key, sep, value = m.group(1), m.group(2), m.group(3)
-                return f"{key}{sep}{_mask_token(value)}"
-            text = _YAML_ASSIGN_RE.sub(_redact_yaml, text)
-
    # Authorization headers — _AUTH_HEADER_RE matches any scheme after
    # "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
    # cheapest substring gate that covers every casing without a casefold().
@@ -569,32 +419,9 @@ def redact_sensitive_text(
    if "BEGIN" in text and "-----" in text:
        text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)

-    # Database connection string passwords. With code_file=True, a password
-    # group that is a pure ``{...}`` brace expression is an f-string template
-    # reference (e.g. f"postgresql://{user}:{pass}@{host}"), not a literal
-    # credential — preserve it. Literal passwords are still redacted. The regex
-    # forbids whitespace in the password group, so a single-line template's
-    # group(2) is exactly the brace expression. See issue #33801.
+    # Database connection string passwords
    if "://" in text:
-        if code_file:
-            def _redact_db(m):
-                pw = m.group(2)
-                if pw.startswith("{") and pw.endswith("}"):
-                    return m.group(0)
-                return f"{m.group(1)}***{m.group(3)}"
-            text = _DB_CONNSTR_RE.sub(_redact_db, text)
-        else:
-            text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
-
-        # Bare-token userinfo in web/transport URLs: ``scheme://TOKEN@host``.
-        # The git-remote-with-embedded-password shape from #6396. Only the
-        # colon-less bare-token form is redacted — ``user:pass@`` and
-        # query-string tokens are left to pass through (see the web-URL note
-        # below). See _URL_BARE_TOKEN_RE for the false-positive guards.
-        text = _URL_BARE_TOKEN_RE.sub(
-            lambda m: f"{m.group(1)}{_mask_token(m.group(2))}{m.group(3)}",
-            text,
-        )
+        text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)

    # JWT tokens (eyJ... — base64-encoded JSON headers)
    if "eyJ" in text:
@@ -607,12 +434,7 @@ def redact_sensitive_text(
    # blanket-redacting param values by name breaks those skills mid-flow.
    # Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
    # caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
-    # are still caught by _DB_CONNSTR_RE. The ONE userinfo case still redacted
-    # is the colon-less bare-token form ``scheme://TOKEN@host`` (#6396, handled
-    # by _URL_BARE_TOKEN_RE in the ``://`` block above): a bare credential in
-    # userinfo is never a round-trip workflow token (those live in the query
-    # string), so masking it can't break a skill. The ``user:pass@`` form is
-    # left to pass through per #34029.
+    # are still caught by _DB_CONNSTR_RE.

    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
    if "&" in text and "=" in text:
@@ -630,66 +452,6 @@ def redact_sensitive_text(
    return text


-# Commands whose stdout is an environment-variable dump (KEY=value lines),
-# NOT source code. For these, terminal-output redaction must run the
-# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
-# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
-# masked. For all other commands, code_file=True is used to avoid mangling
-# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
-# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
-_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
-
-
-def is_env_dump_command(command: str | None) -> bool:
-    """Return True if ``command`` dumps environment variables to stdout.
-
-    Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
-    first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
-    ``||`` / ``|``). Conservative: a parse failure or anything unrecognized
-    returns False (callers then fall back to the safer code_file=True path,
-    which still masks prefix-shaped keys).
-    """
-    if not command or not isinstance(command, str):
-        return False
-    # Split on shell separators, then inspect the first token of each segment.
-    segments = re.split(r"[|;&]+", command)
-    for seg in segments:
-        seg = seg.strip()
-        if not seg:
-            continue
-        try:
-            tokens = shlex.split(seg)
-        except ValueError:
-            tokens = seg.split()
-        if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
-            return True
-    return False
-
-
-def redact_terminal_output(
-    output: str, command: str | None = None, *, force: bool = False
-) -> str:
-    """Redact secrets from terminal/process stdout.
-
-    Single redaction policy for ALL terminal-output surfaces — foreground
-    ``terminal`` results AND background ``process(action=poll/log/wait)``
-    output — so they can't diverge. Picks ``code_file`` based on whether
-    ``command`` is an environment dump:
-
-    - env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
-      → ``code_file=False`` so the ENV-assignment pass masks opaque tokens.
-    - anything else (or unknown command) → ``code_file=True`` to avoid
-      false positives on source/config dumps.
-
-    ``force=True`` bypasses the global ``security.redact_secrets`` preference
-    for safety boundaries that must never emit raw credentials.
-    """
-    if not output:
-        return output
-    code_file = not is_env_dump_command(command or "")
-    return redact_sensitive_text(output, force=force, code_file=code_file)
-
-
 # Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
 # the input string, the prefix regex cannot match anything, so we skip it.
 # False positives are fine (they just run the regex, which then matches
--- a/agent/replay_cleanup.py
+++ b/agent/replay_cleanup.py
@@ -1,140 +0,0 @@
-"""Replay-history sanitization shared across resume code paths.
-
-When a session's last turn dies mid-tool-loop — the process is killed by a
-restart/shutdown command, a stale-timeout fires, or an interrupt lands before
-the tool result is written — the persisted transcript can end with a dangling
-``assistant(tool_calls)`` (no matching ``tool`` answer) or an interrupted
-``assistant→tool`` block.  On resume the model sees that broken tail and
-re-issues the unanswered call, producing an endless "thinking"/reboot loop
-(#49201, #29086).
-
-These pure helpers strip those tails before the history is replayed to the
-model.  They were originally local to ``gateway/run.py`` (which fixed the
-messaging-gateway path) and are extracted here so every resume surface — the
-messaging gateway AND the TUI/WebUI gateway — shares the same cleanup instead
-of the WebUI path silently skipping it.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict, List
-
-logger = logging.getLogger(__name__)
-
-
-def is_interrupted_tool_result(content: Any) -> bool:
-    """Return True if a tool result indicates the tool was interrupted."""
-    if not isinstance(content, str):
-        return False
-    lowered = content.lower()
-    if "[command interrupted]" in lowered:
-        return True
-    if "exit_code" in lowered and ("130" in lowered or "-1" in lowered):
-        return "interrupt" in lowered
-    return False
-
-
-def strip_interrupted_tool_tails(
-    agent_history: List[Dict[str, Any]],
-) -> List[Dict[str, Any]]:
-    """Strip interrupted assistant→tool sequences from replay history.
-
-    Older interrupted gateway turns can be followed by a queued real user
-    message, so the interrupted assistant/tool block is not necessarily the
-    final tail by the time we rebuild replay history.  Remove any contiguous
-    assistant(tool_calls) + tool-result block that contains an interrupted tool
-    result, while preserving successful tool-call sequences intact.
-    """
-    if not agent_history:
-        return agent_history
-
-    cleaned: List[Dict[str, Any]] = []
-    i = 0
-    n = len(agent_history)
-    while i < n:
-        msg = agent_history[i]
-        if msg.get("role") == "assistant" and "tool_calls" in msg:
-            j = i + 1
-            tool_results: List[Dict[str, Any]] = []
-            while j < n and agent_history[j].get("role") == "tool":
-                tool_results.append(agent_history[j])
-                j += 1
-            if tool_results and any(
-                is_interrupted_tool_result(m.get("content", ""))
-                for m in tool_results
-            ):
-                logger.debug(
-                    "Stripping interrupted assistant→tool replay block "
-                    "(indices %d–%d, tool_results=%d)",
-                    i, j - 1, len(tool_results),
-                )
-                i = j
-                continue
-        if msg.get("role") == "tool" and is_interrupted_tool_result(msg.get("content", "")):
-            logger.debug("Stripping orphan interrupted tool result from replay history")
-            i += 1
-            continue
-        cleaned.append(msg)
-        i += 1
-
-    return cleaned
-
-
-def strip_dangling_tool_call_tail(
-    agent_history: List[Dict[str, Any]],
-) -> List[Dict[str, Any]]:
-    """Strip a trailing ``assistant(tool_calls)`` block left with NO answers.
-
-    When a tool call itself kills the gateway process (``docker restart``,
-    ``systemctl restart``, ``kill``, ``hermes gateway restart``), the process
-    is terminated by SIGKILL *mid-call* — before the tool result is ever
-    written and before the orderly shutdown rewind
-    (``_drop_trailing_empty_response_scaffolding``) can run.  The last thing
-    persisted is the ``assistant`` message that issued the ``tool_calls``,
-    with zero matching ``tool`` rows.
-
-    On resume the model sees an unanswered tool call at the tail and naturally
-    re-issues it — which restarts the gateway again, producing the infinite
-    reboot loop in #49201.  ``strip_interrupted_tool_tails`` does not catch
-    this because there is no tool result to inspect for an interrupt marker.
-
-    This strips that dangling tail at the source so there is nothing for the
-    model to re-execute.  It only acts when the tail is an
-    ``assistant(tool_calls)`` whose calls have NO corresponding ``tool``
-    results — a completed assistant→tool pair (any tool answers present) is
-    left untouched so genuine mid-progress tool loops still resume.
-    """
-    if not agent_history:
-        return agent_history
-
-    last = agent_history[-1]
-    if not (
-        isinstance(last, dict)
-        and last.get("role") == "assistant"
-        and last.get("tool_calls")
-    ):
-        return agent_history
-
-    logger.debug(
-        "Stripping dangling unanswered assistant(tool_calls) tail "
-        "(%d call(s)) — process likely killed mid-tool-call by a "
-        "restart/shutdown command (#49201)",
-        len(last.get("tool_calls") or []),
-    )
-    return agent_history[:-1]
-
-
-def sanitize_replay_history(
-    agent_history: List[Dict[str, Any]],
-) -> List[Dict[str, Any]]:
-    """Apply both replay-tail strippers in the canonical order.
-
-    Convenience entry point for resume code paths: removes interrupted
-    assistant→tool blocks anywhere in the history, then removes a dangling
-    unanswered ``assistant(tool_calls)`` tail.  Returns the same list object
-    when there is nothing to strip.
-    """
-    if not agent_history:
-        return agent_history
-    return strip_dangling_tool_call_tail(strip_interrupted_tool_tails(agent_history))
--- a/agent/shell_hooks.py
+++ b/agent/shell_hooks.py
@@ -122,8 +122,6 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple

-from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
-
 try:
    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
 except ImportError:  # pragma: no cover
@@ -443,7 +441,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
        return result

    t0 = time.monotonic()
-    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        proc = subprocess.run(
            argv,
@@ -452,7 +449,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
            timeout=spec.timeout,
            text=True,
            shell=False,
-            **_popen_kwargs,
        )
    except subprocess.TimeoutExpired:
        result["timed_out"] = True
--- a/agent/skill_preprocessing.py
+++ b/agent/skill_preprocessing.py
@@ -5,8 +5,6 @@ import re
 import subprocess
 from pathlib import Path

-from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
-
 logger = logging.getLogger(__name__)

 # Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
@@ -68,7 +66,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
    Failures return a short ``[inline-shell error: ...]`` marker instead of
    raising, so one bad snippet can't wreck the whole skill message.
    """
-    _popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
    try:
        completed = subprocess.run(
            ["bash", "-c", command],
@@ -78,7 +75,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
            timeout=max(1, int(timeout)),
            check=False,
            stdin=subprocess.DEVNULL,
-            **_popen_kwargs,
        )
    except subprocess.TimeoutExpired:
        return f"[inline-shell timeout after {timeout}s: {command}]"
--- a/agent/turn_context.py
+++ b/agent/turn_context.py
@@ -28,7 +28,6 @@ import uuid
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional

-from agent.conversation_compression import conversation_history_after_compression
 from agent.iteration_budget import IterationBudget
 from agent.model_metadata import (
    estimate_messages_tokens_rough,
@@ -401,9 +400,7 @@ def build_turn_context(
                    _orig_len, len(messages), _orig_tokens, _preflight_tokens
                ):
                    break  # Cannot compress further: neither rows nor tokens moved
-                conversation_history = conversation_history_after_compression(
-                    agent, messages
-                )
+                conversation_history = None
                agent._empty_content_retries = 0
                agent._thinking_prefill_retries = 0
                agent._last_content_with_tools = None
--- a/agent/turn_finalizer.py
+++ b/agent/turn_finalizer.py
@@ -289,14 +289,7 @@ def finalize_turn(
                    and len(_stripped) <= 24
                    and _stripped[-1:] not in {".", "!", "?", "。", "！", "？", "`", ")"}
                )
-                _is_partial_stream_recovery = (
-                    str(_turn_exit_reason) == "partial_stream_recovery"
-                )
-                if (
-                    _is_empty_terminal
-                    or _is_partial_fragment
-                    or _is_partial_stream_recovery
-                ):
+                if _is_empty_terminal or _is_partial_fragment:
                    _explanation = agent._format_turn_completion_explanation(
                        _turn_exit_reason
                    )
--- a/agent/turn_retry_state.py
+++ b/agent/turn_retry_state.py
@@ -67,11 +67,6 @@ class TurnRetryState:
    # ── Restart signals (read by the outer loop after the attempt) ───────
    restart_with_compressed_messages: bool = False
    restart_with_length_continuation: bool = False
-    # Set when a content-filter stream stall (e.g. MiniMax "new_sensitive")
-    # has been escalated to the fallback chain: the partial-stream content
-    # was rolled back off ``messages`` and the loop should re-issue the API
-    # call against the newly-activated provider (#32421).
-    restart_with_rebuilt_messages: bool = False

    def __iter__(self):
        # Convenience for debugging / tests: iterate (name, value) pairs.
--- a/agent/verification_stop.py
+++ b/agent/verification_stop.py
@@ -15,63 +15,6 @@ from typing import Any, Iterable

 _MAX_CHANGED_PATHS_IN_NUDGE = 8

-# Non-code file extensions whose edits carry no verifiable runtime behavior:
-# documentation, prose, and data/markup that no test/build exercises. When a
-# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is
-# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a
-# SKILL.md or README edit must never demand a /tmp verification script). A turn
-# that edits any non-listed path (a real source/code/config file) still nudges.
-_NON_CODE_VERIFY_EXTENSIONS = frozenset(
-    {
-        ".md",
-        ".markdown",
-        ".mdx",
-        ".rst",
-        ".txt",
-        ".text",
-        ".adoc",
-        ".asciidoc",
-        ".org",
-        ".log",
-        ".csv",
-        ".tsv",
-    }
-)
-
-# Filenames (case-insensitive, extension-less or otherwise) that are pure prose
-# even without a recognized doc extension.
-_NON_CODE_VERIFY_FILENAMES = frozenset(
-    {
-        "license",
-        "licence",
-        "notice",
-        "authors",
-        "contributors",
-        "changelog",
-        "codeowners",
-    }
-)
-
-
-def _is_non_code_path(raw: str) -> bool:
-    """Return True when a changed path is documentation/prose with nothing to verify."""
-    try:
-        p = Path(str(raw))
-    except Exception:
-        return False
-    suffix = p.suffix.lower()
-    if suffix in _NON_CODE_VERIFY_EXTENSIONS:
-        return True
-    if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES:
-        return True
-    return False
-
-
-def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]:
-    """Drop documentation/prose paths; keep paths that could have verifiable behavior."""
-    return [p for p in paths if p and not _is_non_code_path(p)]
-
-
 # Session identities (platform or source) that are NOT human conversational
 # messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
 # local, gateway) and programmatic callers (API server, webhooks, tools).
@@ -136,13 +79,12 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
    """Return whether edit -> verify-before-finish behavior is enabled.

    Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
-    explicit ``agent.verify_on_stop`` config value. The config default is
-    ``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
-    opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
-    sets it explicitly: it resolves to ON for interactive coding surfaces
+    explicit boolean ``agent.verify_on_stop`` config value, then a surface-aware
+    default. The config default is the sentinel ``"auto"`` (see
+    ``DEFAULT_CONFIG``), which resolves to ON for interactive coding surfaces
    (CLI, TUI, desktop) and programmatic callers, and OFF for conversational
-    messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
-    falls back to OFF.
+    messaging surfaces (Telegram, Discord, etc.) where the verification
+    narrative would otherwise reach a human as chat noise.
    """
    env = os.environ.get("HERMES_VERIFY_ON_STOP")
    if env is not None:
@@ -164,11 +106,8 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
            return True
        if token in {"0", "false", "no", "off"}:
            return False
-        if token == "auto":
-            # Explicit opt-in to the legacy surface-aware behavior.
-            return not _session_is_messaging_surface()
-    # Missing or unknown value -> OFF (the new default).
-    return False
+    # "auto", missing, or any other value -> surface-aware default.
+    return not _session_is_messaging_surface()


 def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
@@ -251,10 +190,7 @@ def build_verify_on_stop_nudge(
    max_attempts: int = 2,
 ) -> str | None:
    """Return a synthetic follow-up when edited code lacks fresh verification."""
-    # Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) —
-    # they carry no verifiable behavior, so a turn that touched only those has
-    # nothing to verify and must not nudge.
-    paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)})
+    paths = sorted({str(p) for p in changed_paths if p})
    if not paths or attempts >= max_attempts:
        return None

--- a/apps/desktop/electron/backend-env.cjs
+++ b/apps/desktop/electron/backend-env.cjs
@@ -1,4 +1,4 @@
-import path from 'node:path'
+const path = require('node:path')

 // Match the POSIX fallback surface used by the Python terminal environment.
 // macOS apps launched from Finder/Dock often inherit only /usr/bin:/bin:/usr/sbin:/sbin,
@@ -23,14 +23,12 @@ function pathModuleForPlatform(platform = process.platform) {
 }

 function pathEnvKey(env = process.env, platform = process.platform) {
-  if (platform !== 'win32') {return 'PATH'}
-
+  if (platform !== 'win32') return 'PATH'
  return Object.keys(env || {}).find(key => key.toUpperCase() === 'PATH') || 'PATH'
 }

 function currentPathValue(env = process.env, platform = process.platform) {
  const key = pathEnvKey(env, platform)
-
  return env?.[key] || ''
 }

@@ -39,11 +37,10 @@ function appendUniquePathEntries(entries, { delimiter = path.delimiter } = {}) {
  const ordered = []

  for (const entry of entries) {
-    if (!entry) {continue}
+    if (!entry) continue
    const parts = Array.isArray(entry) ? entry : String(entry).split(delimiter)
-
    for (const part of parts) {
-      if (!part || seen.has(part)) {continue}
+      if (!part || seen.has(part)) continue
      seen.add(part)
      ordered.push(part)
    }
@@ -58,7 +55,7 @@ function buildDesktopBackendPath({
  currentPath = '',
  platform = process.platform,
  pathModule = pathModuleForPlatform(platform)
-}: any = {}) {
+} = {}) {
  const delimiter = delimiterForPlatform(platform)
  const hermesNodeBin = hermesHome ? pathModule.join(hermesHome, 'node', 'bin') : null
  const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
@@ -67,15 +64,13 @@ function buildDesktopBackendPath({
  return appendUniquePathEntries([hermesNodeBin, venvBin, currentPath, saneEntries], { delimiter })
 }

-function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) }: any = {}) {
-  if (!hermesHome) {return hermesHome}
+function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
+  if (!hermesHome) return hermesHome
  const resolved = pathModule.resolve(String(hermesHome))
  const parent = pathModule.dirname(resolved)
-
  if (pathModule.basename(parent).toLowerCase() === 'profiles') {
    return pathModule.dirname(parent)
  }
-
  return resolved
 }

@@ -86,7 +81,7 @@ function buildDesktopBackendEnv({
  currentEnv = process.env,
  platform = process.platform,
  pathModule = pathModuleForPlatform(platform)
-}: any = {}) {
+} = {}) {
  const delimiter = delimiterForPlatform(platform)
  const currentPythonPath = currentEnv?.PYTHONPATH || ''
  const key = pathEnvKey(currentEnv, platform)
@@ -103,10 +98,12 @@ function buildDesktopBackendEnv({
  }
 }

-export { appendUniquePathEntries,
+module.exports = {
+  POSIX_SANE_PATH_ENTRIES,
+  appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  delimiterForPlatform,
  normalizeHermesHomeRoot,
-  pathEnvKey,
-  POSIX_SANE_PATH_ENTRIES }
+  pathEnvKey
+}
--- a/apps/desktop/electron/backend-env.test.cjs
+++ b/apps/desktop/electron/backend-env.test.cjs
@@ -1,13 +1,15 @@
-import assert from 'node:assert/strict'
-import path from 'node:path'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const path = require('node:path')

-import { appendUniquePathEntries,
+const {
+  POSIX_SANE_PATH_ENTRIES,
+  appendUniquePathEntries,
  buildDesktopBackendEnv,
  buildDesktopBackendPath,
  normalizeHermesHomeRoot,
-  pathEnvKey,
-  POSIX_SANE_PATH_ENTRIES } from './backend-env'
+  pathEnvKey
+} = require('./backend-env.cjs')

 test('desktop backend PATH adds Hermes-managed bins and missing POSIX sane entries', () => {
  const result = buildDesktopBackendPath({
--- a/apps/desktop/electron/backend-probes.cjs
+++ b/apps/desktop/electron/backend-probes.cjs
@@ -32,23 +32,12 @@
 * as bootstrap-platform.cjs and hardening.cjs).
 */

-import { execFileSync } from 'node:child_process'
+const { execFileSync } = require('node:child_process')

 const PROBE_TIMEOUT_MS = 5000

 /**
- * Return the Python snippet used to verify Hermes can import far enough to
- * launch the CLI. Kept exported for tests so dependency regressions are
- * caught without needing a real broken venv fixture.
- *
- * @returns {string}
- */
-function hermesRuntimeImportProbe() {
-  return 'import yaml; import hermes_cli.config'
-}
-
-/**
- * Return true iff the Hermes runtime import probe exits 0.
+ * Return true iff `python -c "import hermes_cli"` exits 0.
 *
 * Used to gate the "fallback to system Python with hermes_cli installed"
 * rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
@@ -57,25 +46,17 @@ function hermesRuntimeImportProbe() {
 * site-packages -- and the resolver returns a backend that immediately
 * dies on spawn.
 *
- * The probe intentionally imports hermes_cli.config, not just the top-level
- * package: a broken/empty Windows launcher venv can still see the source tree
- * through PYTHONPATH but lack PyYAML, then die on the first real CLI import.
- *
 * @param {string} pythonPath - Absolute path to a python.exe / python.
- * @param {object} [opts.env] - Additional environment for the probe.
 * @returns {boolean}
 */
-function canImportHermesCli(pythonPath: string, opts:{env?: Record<string, string>} = {}) {
-  if (!pythonPath) {return false}
-
+function canImportHermesCli(pythonPath) {
+  if (!pythonPath) return false
  try {
-    execFileSync(pythonPath, ['-c', hermesRuntimeImportProbe()], {
-      env: { ...process.env, ...(opts.env || {}) },
+    execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
      stdio: 'ignore',
      timeout: PROBE_TIMEOUT_MS,
      windowsHide: true
    })
-
    return true
  } catch {
    return false
@@ -96,30 +77,30 @@ function canImportHermesCli(pythonPath: string, opts:{env?: Record<string, strin
 *
 * @param {string} hermesCommand - Resolved absolute path to a hermes
 *   executable (or an interpreter+script wrapper).
+ * @param {object} [opts]
 * @param {boolean} [opts.shell] - Whether to run through a shell. For
 *   .cmd/.bat shims on Windows execFileSync needs shell:true to find
 *   the cmd interpreter; mirrors the same flag isCommandScript() drives
 *   in resolveHermesBackend.
 * @returns {boolean}
 */
-function verifyHermesCli(hermesCommand: string, opts?: {shell?: boolean}) {
-  if (!hermesCommand) {return false}
-
+function verifyHermesCli(hermesCommand, opts = {}) {
+  if (!hermesCommand) return false
  try {
    execFileSync(hermesCommand, ['--version'], {
      stdio: 'ignore',
      timeout: PROBE_TIMEOUT_MS,
-      shell: Boolean(opts?.shell),
+      shell: Boolean(opts.shell),
      windowsHide: true
    })
-
    return true
  } catch {
    return false
  }
 }

-export { canImportHermesCli,
-  hermesRuntimeImportProbe,
-  PROBE_TIMEOUT_MS,
-  verifyHermesCli }
+module.exports = {
+  canImportHermesCli,
+  verifyHermesCli,
+  PROBE_TIMEOUT_MS
+}
--- a/apps/desktop/electron/backend-probes.test.cjs
+++ b/apps/desktop/electron/backend-probes.test.cjs
@@ -5,13 +5,13 @@
 * (Wired into npm test:desktop:platforms in package.json.)
 */

-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')

-import { canImportHermesCli, hermesRuntimeImportProbe, verifyHermesCli } from './backend-probes'
+const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')

 // Resolve the host's own Node binary -- guaranteed to be on disk and
 // runnable. We use it as both a stand-in for "a python that doesn't
@@ -40,12 +40,6 @@ test('canImportHermesCli returns false when binary does not exist', () => {
  assert.equal(canImportHermesCli(ghost), false)
 })

-test('hermes runtime import probe checks config dependencies', () => {
-  const probe = hermesRuntimeImportProbe()
-  assert.match(probe, /\bimport yaml\b/)
-  assert.match(probe, /\bimport hermes_cli\.config\b/)
-})
-
 test('verifyHermesCli returns false when command is falsy', () => {
  assert.equal(verifyHermesCli(''), false)
  assert.equal(verifyHermesCli(null), false)
@@ -63,7 +57,6 @@ test('verifyHermesCli returns true when --version exits 0', () => {
  // verifyHermesCli only cares about the exit code.
  const scriptPath = path.join(os.tmpdir(), `hermes-probes-ok-${Date.now()}-${process.pid}.cjs`)
  fs.writeFileSync(scriptPath, 'process.exit(0)\n')
-
  try {
    // Use node as the launcher and our script as the "command". Pass
    // shell:false (default) -- node is a real binary, no shim.
--- a/apps/desktop/electron/backend-ready.cjs
+++ b/apps/desktop/electron/backend-ready.cjs
@@ -1,4 +1,4 @@
-import fs from 'node:fs'
+const fs = require('node:fs')

 const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m

@@ -23,11 +23,9 @@ const MIN_PORT_ANNOUNCE_TIMEOUT_MS = 45_000
 */
 function resolvePortAnnounceTimeoutMs(env = process.env) {
  const parsed = Number(env.HERMES_DESKTOP_PORT_ANNOUNCE_TIMEOUT_MS)
-
  if (Number.isFinite(parsed) && parsed > 0) {
    return Math.max(MIN_PORT_ANNOUNCE_TIMEOUT_MS, Math.round(parsed))
  }
-
  return DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS
 }

@@ -54,7 +52,7 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
    let done = false

    function cleanup() {
-      if (done) {return}
+      if (done) return
      done = true
      clearTimeout(timer)
      child.stdout.off('data', onData)
@@ -65,16 +63,13 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
    function onData(chunk) {
      buf += chunk.toString()
      let nl
-
      while ((nl = buf.indexOf('\n')) !== -1) {
        const line = buf.slice(0, nl)
        buf = buf.slice(nl + 1)
        const m = line.match(_READY_RE)
-
        if (m) {
          cleanup()
          resolve(parseInt(m[1], 10))
-
          return
        }
      }
@@ -101,13 +96,11 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
  })
 }

-function readDashboardReadyFile(readyFile: fs.PathOrFileDescriptor) {
-  if (!readyFile) {return null}
-
+function readDashboardReadyFile(readyFile) {
+  if (!readyFile) return null
  try {
    const parsed = JSON.parse(fs.readFileSync(readyFile, 'utf8'))
    const port = Number(parsed?.port)
-
    return Number.isInteger(port) && port > 0 ? port : null
  } catch {
    return null
@@ -120,18 +113,16 @@ function waitForDashboardReadyFile(readyFile, child, timeoutMs = resolvePortAnno
    let interval = null

    function cleanup() {
-      if (done) {return}
+      if (done) return
      done = true
      clearTimeout(timer)
-
-      if (interval) {clearInterval(interval)}
+      if (interval) clearInterval(interval)
      child.off('exit', onExit)
      child.off('error', onError)
    }

    function check() {
      const port = readDashboardReadyFile(readyFile)
-
      if (port) {
        cleanup()
        resolve(port)
@@ -156,29 +147,25 @@ function waitForDashboardReadyFile(readyFile, child, timeoutMs = resolvePortAnno
    child.on('exit', onExit)
    child.on('error', onError)
    interval = setInterval(check, 50)
-
-    if (typeof interval.unref === 'function') {interval.unref()}
+    if (typeof interval.unref === 'function') interval.unref()
    check()
  })
 }

-function waitForDashboardPortAnnouncement(child, options: {
-  readyFile?: fs.PathOrFileDescriptor,
-  timeoutMs?: number
-} = {}) {
+function waitForDashboardPortAnnouncement(child, options = {}) {
  const timeoutMs = options.timeoutMs ?? resolvePortAnnounceTimeoutMs()
-
  if (options.readyFile) {
    return waitForDashboardReadyFile(options.readyFile, child, timeoutMs)
  }
-
  return waitForDashboardPort(child, timeoutMs)
 }

-export { DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
-  MIN_PORT_ANNOUNCE_TIMEOUT_MS,
-  readDashboardReadyFile,
-  resolvePortAnnounceTimeoutMs,
+module.exports = {
  waitForDashboardPort,
  waitForDashboardPortAnnouncement,
-  waitForDashboardReadyFile }
+  waitForDashboardReadyFile,
+  readDashboardReadyFile,
+  resolvePortAnnounceTimeoutMs,
+  DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
+  MIN_PORT_ANNOUNCE_TIMEOUT_MS
+}
--- a/apps/desktop/electron/backend-ready.test.cjs
+++ b/apps/desktop/electron/backend-ready.test.cjs
@@ -11,32 +11,29 @@
 * HERMES_DESKTOP_PORT_ANNOUNCE_TIMEOUT_MS, clamped to a 45s floor.
 */

-import assert from 'node:assert/strict'
-import { EventEmitter } from 'node:events'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const { EventEmitter } = require('node:events')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')

-import { DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
-  MIN_PORT_ANNOUNCE_TIMEOUT_MS,
+const {
  readDashboardReadyFile,
-  resolvePortAnnounceTimeoutMs,
  waitForDashboardPort,
  waitForDashboardPortAnnouncement,
-  waitForDashboardReadyFile } from './backend-ready'
-
-type FakeChildProcess = EventEmitter & {
-  stdout: EventEmitter
-}
+  waitForDashboardReadyFile,
+  resolvePortAnnounceTimeoutMs,
+  DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
+  MIN_PORT_ANNOUNCE_TIMEOUT_MS
+} = require('./backend-ready.cjs')

 // A minimal stand-in for a spawned child process: an EventEmitter with a
 // stdout EventEmitter, matching the surface waitForDashboardPort consumes
 // (child.stdout.on('data'), child.on('exit'|'error') + the .off() teardown).
-function makeFakeChild(): FakeChildProcess {
-  const child = new EventEmitter() as FakeChildProcess
+function makeFakeChild() {
+  const child = new EventEmitter()
  child.stdout = new EventEmitter()
-
  return child
 }

@@ -135,7 +132,6 @@ test('a late announcement after timeout does not throw (listeners torn down)', a

 function mkTmpReadyFile() {
  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-ready-test-'))
-
  return {
    dir,
    file: path.join(dir, 'ready.json'),
@@ -145,7 +141,6 @@ function mkTmpReadyFile() {

 test('readDashboardReadyFile returns a valid port from JSON', () => {
  const tmp = mkTmpReadyFile()
-
  try {
    fs.writeFileSync(tmp.file, JSON.stringify({ port: 4567 }))
    assert.equal(readDashboardReadyFile(tmp.file), 4567)
@@ -156,7 +151,6 @@ test('readDashboardReadyFile returns a valid port from JSON', () => {

 test('readDashboardReadyFile ignores missing, malformed, or invalid files', () => {
  const tmp = mkTmpReadyFile()
-
  try {
    assert.equal(readDashboardReadyFile(tmp.file), null)
    fs.writeFileSync(tmp.file, '{')
@@ -171,7 +165,6 @@ test('readDashboardReadyFile ignores missing, malformed, or invalid files', () =
 test('waitForDashboardReadyFile resolves when the ready file appears', async () => {
  const tmp = mkTmpReadyFile()
  const child = makeFakeChild()
-
  try {
    const p = waitForDashboardReadyFile(tmp.file, child, 1000)
    setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 8765 })), 20)
@@ -184,7 +177,6 @@ test('waitForDashboardReadyFile resolves when the ready file appears', async ()
 test('waitForDashboardPortAnnouncement uses ready file when provided', async () => {
  const tmp = mkTmpReadyFile()
  const child = makeFakeChild()
-
  try {
    const p = waitForDashboardPortAnnouncement(child, { readyFile: tmp.file, timeoutMs: 1000 })
    setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 9876 })), 20)
@@ -197,7 +189,6 @@ test('waitForDashboardPortAnnouncement uses ready file when provided', async ()
 test('waitForDashboardReadyFile rejects when the child exits before file readiness', async () => {
  const tmp = mkTmpReadyFile()
  const child = makeFakeChild()
-
  try {
    const p = waitForDashboardReadyFile(tmp.file, child, 1000)
    child.emit('exit', 1, null)
--- a/apps/desktop/electron/bootstrap-platform.cjs
+++ b/apps/desktop/electron/bootstrap-platform.cjs
@@ -1,23 +1,20 @@
-import fs from 'node:fs'
+const fs = require('node:fs')

 function isWslEnvironment(env = process.env, platform = process.platform, kernelRelease = null) {
-  if (platform !== 'linux') {return false}
-
-  if (env.WSL_DISTRO_NAME || env.WSL_INTEROP) {return true}
+  if (platform !== 'linux') return false
+  if (env.WSL_DISTRO_NAME || env.WSL_INTEROP) return true

  try {
    const release = kernelRelease ?? fs.readFileSync('/proc/sys/kernel/osrelease', 'utf8')
-
    return /microsoft|wsl/i.test(release)
  } catch {
    return false
  }
 }

-function isWindowsBinaryPathInWsl(filePath, options: {isWsl?: boolean, env?: NodeJS.ProcessEnv, platform?: NodeJS.Platform} = {}) {
+function isWindowsBinaryPathInWsl(filePath, options = {}) {
  const isWsl = options.isWsl ?? isWslEnvironment(options.env, options.platform)
-
-  if (!isWsl) {return false}
+  if (!isWsl) return false

  const normalized = String(filePath || '')
    .replace(/\\/g, '/')
@@ -51,21 +48,19 @@ const GPU_OVERRIDE_OFF = new Set(['0', 'false', 'no', 'off'])
 *
 * Pure + dependency-free so it can be unit-tested and called before app ready.
 */
-function detectRemoteDisplay(options: {env?: NodeJS.ProcessEnv, platform?: NodeJS.Platform} = {}) {
+function detectRemoteDisplay(options = {}) {
  const env = options.env ?? process.env
  const platform = options.platform ?? process.platform

  const override = String(env.HERMES_DESKTOP_DISABLE_GPU || '')
    .trim()
    .toLowerCase()
-
-  if (GPU_OVERRIDE_ON.has(override)) {return 'override (HERMES_DESKTOP_DISABLE_GPU)'}
-
-  if (GPU_OVERRIDE_OFF.has(override)) {return null}
+  if (GPU_OVERRIDE_ON.has(override)) return 'override (HERMES_DESKTOP_DISABLE_GPU)'
+  if (GPU_OVERRIDE_OFF.has(override)) return null

  // Launched from an SSH session → the display is X11-forwarded or otherwise
  // remote. Covers the common `ssh user@box` + GUI-forwarding case.
-  if (env.SSH_CONNECTION || env.SSH_CLIENT || env.SSH_TTY) {return 'ssh-session'}
+  if (env.SSH_CONNECTION || env.SSH_CLIENT || env.SSH_TTY) return 'ssh-session'

  if (platform === 'linux') {
    // X11 forwarding sets DISPLAY to "<host>:N" (e.g. "localhost:10.0"); a
@@ -73,7 +68,6 @@ function detectRemoteDisplay(options: {env?: NodeJS.ProcessEnv, platform?: NodeJ
    // NB: WSLg deliberately isn't treated as remote — it reports
    // GPU-accelerated vGPU surfaces locally and doesn't show the flicker.
    const display = String(env.DISPLAY || '')
-
    if (display.includes(':') && display.split(':')[0]) {
      return `x11-forwarding (DISPLAY=${display})`
    }
@@ -83,14 +77,15 @@ function detectRemoteDisplay(options: {env?: NodeJS.ProcessEnv, platform?: NodeJ
    // RDP sessions report SESSIONNAME like "RDP-Tcp#7"; the local console is
    // "Console".
    const sessionName = String(env.SESSIONNAME || '')
-
-    if (/^rdp-/i.test(sessionName)) {return `rdp (SESSIONNAME=${sessionName})`}
+    if (/^rdp-/i.test(sessionName)) return `rdp (SESSIONNAME=${sessionName})`
  }

  return null
 }

-export { bundledRuntimeImportCheck,
+module.exports = {
+  bundledRuntimeImportCheck,
  detectRemoteDisplay,
  isWindowsBinaryPathInWsl,
-  isWslEnvironment }
+  isWslEnvironment
+}
--- a/apps/desktop/electron/bootstrap-platform.test.cjs
+++ b/apps/desktop/electron/bootstrap-platform.test.cjs
@@ -1,13 +1,14 @@
-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import path from 'node:path'
-import test from 'node:test'
-import { fileURLToPath } from 'node:url'
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const path = require('node:path')
+const test = require('node:test')

-import { bundledRuntimeImportCheck,
+const {
+  bundledRuntimeImportCheck,
  detectRemoteDisplay,
  isWindowsBinaryPathInWsl,
-  isWslEnvironment } from './bootstrap-platform'
+  isWslEnvironment
+} = require('./bootstrap-platform.cjs')

 test('isWslEnvironment detects WSL2 env vars on linux', () => {
  assert.equal(isWslEnvironment({ WSL_DISTRO_NAME: 'Ubuntu' }, 'linux'), true)
@@ -86,8 +87,8 @@ test('detectRemoteDisplay honors the HERMES_DESKTOP_DISABLE_GPU override both wa
 })

 test('packaged electron entrypoints do not require unpackaged npm modules', () => {
-  const electronDir = path.dirname(fileURLToPath(import.meta.url))
-  const entrypoints = ['main.ts', 'preload.ts', 'bootstrap-platform.ts']
+  const electronDir = __dirname
+  const entrypoints = ['main.cjs', 'preload.cjs', 'bootstrap-platform.cjs']
  // - electron: provided by the electron runtime, always resolvable in packaged builds.
  // - node-pty: hoisted by workspace dedup AND shipped via extraResources to
  //   resources/native-deps/node-pty (see scripts/stage-native-deps.cjs). main.cjs
@@ -99,7 +100,6 @@ test('packaged electron entrypoints do not require unpackaged npm modules', () =

  for (const entrypoint of entrypoints) {
    const source = fs.readFileSync(path.join(electronDir, entrypoint), 'utf8')
-
    const bareRequires = Array.from(source.matchAll(requirePattern))
      .map(match => match[1])
      .filter(specifier => !specifier.startsWith('node:'))
--- a/apps/desktop/electron/bootstrap-runner.cjs
+++ b/apps/desktop/electron/bootstrap-runner.cjs
@@ -1,3 +1,5 @@
+'use strict'
+
 /**
 * bootstrap-runner.cjs
 *
@@ -32,11 +34,11 @@
 *     no UI consumes them yet)
 */

-import { spawn } from 'node:child_process'
-import fs from 'node:fs'
-import fsp from 'node:fs/promises'
-import https from 'node:https'
-import path from 'node:path'
+const fs = require('node:fs')
+const fsp = require('node:fs/promises')
+const path = require('node:path')
+const https = require('node:https')
+const { spawn } = require('node:child_process')

 const IS_WINDOWS = process.platform === 'win32'

@@ -44,7 +46,6 @@ function hiddenWindowsChildOptions(options = {}) {
  if (!IS_WINDOWS || Object.prototype.hasOwnProperty.call(options, 'windowsHide')) {
    return options
  }
-
  return { ...options, windowsHide: true }
 }

@@ -70,12 +71,10 @@ function installScriptKind() {
 }

 function resolveLocalInstallScript(sourceRepoRoot) {
-  if (!sourceRepoRoot) {return null}
+  if (!sourceRepoRoot) return null
  const candidate = path.join(sourceRepoRoot, 'scripts', installScriptName())
-
  try {
    fs.accessSync(candidate, fs.constants.R_OK)
-
    return candidate
  } catch {
    return null
@@ -91,12 +90,10 @@ function bootstrapCacheDir(hermesHome) {
 // the pinned commit can't be fetched from GitHub (e.g. a locally-built desktop
 // app stamped to an unpushed HEAD).
 function installedAgentInstallScript(hermesHome) {
-  if (!hermesHome) {return null}
+  if (!hermesHome) return null
  const candidate = path.join(hermesHome, 'hermes-agent', 'scripts', installScriptName())
-
  try {
    fs.accessSync(candidate, fs.constants.R_OK)
-
    return candidate
  } catch {
    return null
@@ -113,7 +110,6 @@ function downloadInstallScript(commit, destPath) {
  // verification beyond "did the file we wrote pass a syntax probe."
  const scriptName = installScriptName()
  const url = `https://raw.githubusercontent.com/NousResearch/hermes-agent/${commit}/scripts/${scriptName}`
-
  return new Promise((resolve, reject) => {
    fs.mkdirSync(path.dirname(destPath), { recursive: true })
    const tmpPath = destPath + '.tmp'
@@ -133,10 +129,8 @@ function downloadInstallScript(commit, destPath) {
                    `Failed to download ${scriptName}: HTTP ${res2.statusCode} from redirect ${res.headers.location}`
                  )
                )
-
                return
              }
-
              const out2 = fs.createWriteStream(tmpPath)
              res2.pipe(out2)
              out2.on('finish', () => {
@@ -147,24 +141,18 @@ function downloadInstallScript(commit, destPath) {
              out2.on('error', reject)
            })
            .on('error', reject)
-
          return
        }
-
        if (res.statusCode !== 200) {
          out.close()
-
          try {
            fs.unlinkSync(tmpPath)
          } catch {
            void 0
          }
-
          reject(new Error(`Failed to download ${scriptName}: HTTP ${res.statusCode} from ${url}`))
-
          return
        }
-
        res.pipe(out)
        out.on('finish', () => {
          out.close()
@@ -177,7 +165,6 @@ function downloadInstallScript(commit, destPath) {
          } catch {
            void 0
          }
-
          reject(err)
        })
      })
@@ -187,7 +174,6 @@ function downloadInstallScript(commit, destPath) {
        } catch {
          void 0
        }
-
        reject(err)
      })
  })
@@ -204,10 +190,8 @@ async function resolveInstallScript({
  //    without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
  //    of APP_ROOT/../..).
  const localScript = resolveLocalInstallScript(sourceRepoRoot)
-
  if (localScript) {
    emit({ type: 'log', line: `[bootstrap] using local ${installScriptName()} at ${localScript}` })
-
    return { path: localScript, source: 'local', kind: installScriptKind() }
  }

@@ -220,14 +204,12 @@ async function resolveInstallScript({
  }

  const cached = cachedScriptPath(hermesHome, installStamp.commit)
-
  try {
    await fsp.access(cached, fs.constants.R_OK)
    emit({
      type: 'log',
      line: `[bootstrap] using cached ${installScriptName()} for ${installStamp.commit.slice(0, 12)}`
    })
-
    return { path: cached, source: 'cache', commit: installStamp.commit, kind: installScriptKind() }
  } catch {
    // not cached; download
@@ -237,11 +219,9 @@ async function resolveInstallScript({
    type: 'log',
    line: `[bootstrap] fetching ${installScriptName()} for ${installStamp.commit.slice(0, 12)} from GitHub`
  })
-
  try {
    await _download(installStamp.commit, cached)
    emit({ type: 'log', line: `[bootstrap] saved to ${cached}` })
-
    return { path: cached, source: 'download', commit: installStamp.commit, kind: installScriptKind() }
  } catch (err) {
    // The pinned commit may not be fetchable from GitHub -- most commonly a
@@ -250,7 +230,6 @@ async function resolveInstallScript({
    // ships inside the already-installed agent checkout so dev/self-builds can
    // still bootstrap instead of dying with a fatal 404.
    const installed = installedAgentInstallScript(hermesHome)
-
    if (installed) {
      emit({
        type: 'log',
@@ -258,18 +237,15 @@ async function resolveInstallScript({
          `[bootstrap] GitHub fetch failed (${err.message}); ` +
          `falling back to installed agent ${installScriptName()} at ${installed}`
      })
-
      try {
        fs.mkdirSync(path.dirname(cached), { recursive: true })
        fs.copyFileSync(installed, cached)
-
        return { path: cached, source: 'installed-agent', commit: installStamp.commit, kind: installScriptKind() }
      } catch {
        // Cache copy failed (read-only FS, etc.) -- use the source path directly.
        return { path: installed, source: 'installed-agent', commit: installStamp.commit, kind: installScriptKind() }
      }
    }
-
    throw err
  }
 }
@@ -295,37 +271,31 @@ function powershellUnderRoot(root) {
 function resolveWindowsPowerShell() {
  for (const v of ['SystemRoot', 'windir']) {
    const root = process.env[v]
-
    if (root) {
      const candidate = powershellUnderRoot(root)
-
      try {
-        if (fs.statSync(candidate).isFile()) {return candidate}
+        if (fs.statSync(candidate).isFile()) return candidate
      } catch {
        void 0
      }
    }
  }
-
  const pathDirs = (process.env.PATH || process.env.Path || '').split(path.delimiter).filter(Boolean)
-
  for (const exe of ['powershell.exe', 'pwsh.exe']) {
    for (const dir of pathDirs) {
      const candidate = path.join(dir, exe)
-
      try {
-        if (fs.statSync(candidate).isFile()) {return candidate}
+        if (fs.statSync(candidate).isFile()) return candidate
      } catch {
        void 0
      }
    }
  }
-
  return 'powershell.exe'
 }

-function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, hermesHome }: any = {}) {
-  return new Promise<any>((resolve, reject) => {
+function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, hermesHome } = {}) {
+  return new Promise((resolve, reject) => {
    const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
    const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]

@@ -349,14 +319,12 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme

    const onAbort = () => {
      killed = true
-
      try {
        child.kill('SIGTERM')
      } catch {
        void 0
      }
    }
-
    if (abortSignal) {
      if (abortSignal.aborted) {
        onAbort()
@@ -374,12 +342,10 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
      stdout += chunk
      stdoutBuf += chunk
      let nl
-
      while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
        const line = stdoutBuf.slice(0, nl).replace(/\r$/, '')
        stdoutBuf = stdoutBuf.slice(nl + 1)
-
-        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })}
+        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })
      }
    })

@@ -388,34 +354,30 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
      stderr += chunk
      stderrBuf += chunk
      let nl
-
      while ((nl = stderrBuf.indexOf('\n')) !== -1) {
        const line = stderrBuf.slice(0, nl).replace(/\r$/, '')
        stderrBuf = stderrBuf.slice(nl + 1)
-
-        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })}
+        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })
      }
    })

    child.on('error', err => {
-      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
+      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
      reject(err)
    })

    child.on('close', (code, signal) => {
-      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
-
+      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
      // Flush any trailing bytes
-      if (stdoutBuf) {emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' } as any)}
-
-      if (stderrBuf) {emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' } as any)}
-      resolve({ stdout, stderr, code, signal, killed } as any)
+      if (stdoutBuf) emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' })
+      if (stderrBuf) emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' })
+      resolve({ stdout, stderr, code, signal, killed })
    })
  })
 }

-function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome }: any = {}) {
-  return new Promise<any>((resolve, reject) => {
+function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome } = {}) {
+  return new Promise((resolve, reject) => {
    const child = spawn('bash', [scriptPath, ...args], {
      stdio: ['ignore', 'pipe', 'pipe'],
      env: {
@@ -430,14 +392,12 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome

    const onAbort = () => {
      killed = true
-
      try {
        child.kill('SIGTERM')
      } catch {
        void 0
      }
    }
-
    if (abortSignal) {
      if (abortSignal.aborted) {
        onAbort()
@@ -454,12 +414,10 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome
      stdout += chunk
      stdoutBuf += chunk
      let nl
-
      while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
        const line = stdoutBuf.slice(0, nl).replace(/\r$/, '')
        stdoutBuf = stdoutBuf.slice(nl + 1)
-
-        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })}
+        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stdout' })
      }
    })

@@ -468,26 +426,22 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome
      stderr += chunk
      stderrBuf += chunk
      let nl
-
      while ((nl = stderrBuf.indexOf('\n')) !== -1) {
        const line = stderrBuf.slice(0, nl).replace(/\r$/, '')
        stderrBuf = stderrBuf.slice(nl + 1)
-
-        if (line) {emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })}
+        if (line) emit && emit({ type: 'log', stage: stageName, line, stream: 'stderr' })
      }
    })

    child.on('error', err => {
-      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
+      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
      reject(err)
    })

    child.on('close', (code, signal) => {
-      if (abortSignal) {abortSignal.removeEventListener('abort', onAbort)}
-
-      if (stdoutBuf) {emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' })}
-
-      if (stderrBuf) {emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' })}
+      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
+      if (stdoutBuf) emit && emit({ type: 'log', stage: stageName, line: stdoutBuf, stream: 'stdout' })
+      if (stderrBuf) emit && emit({ type: 'log', stage: stageName, line: stderrBuf, stream: 'stderr' })
      resolve({ stdout, stderr, code, signal, killed })
    })
  })
@@ -502,60 +456,48 @@ function spawnBash(scriptPath, args, { emit, stageName, abortSignal, hermesHome
 // instead of falling back to install.ps1's default ($Branch = "main").
 function buildPinArgs(installStamp) {
  const args = []
-
  if (installStamp && installStamp.commit) {
    args.push('-Commit', installStamp.commit)
  }
-
  if (installStamp && installStamp.branch) {
    args.push('-Branch', installStamp.branch)
  }
-
  return args
 }

 function buildPosixPinArgs({ installStamp, activeRoot, hermesHome }) {
  const args = ['--dir', activeRoot, '--hermes-home', hermesHome]
-
  if (installStamp && installStamp.branch) {
    args.push('--branch', installStamp.branch)
  }
-
  if (installStamp && installStamp.commit) {
    args.push('--commit', installStamp.commit)
  }
-
  return args
 }

 async function fetchManifest({ scriptPath, installerKind, emit, hermesHome, activeRoot, installStamp }) {
  const isPosix = installerKind === 'posix'
-
  const args = isPosix
    ? ['--manifest', ...buildPosixPinArgs({ installStamp, activeRoot, hermesHome })]
    : ['-Manifest', ...buildPinArgs(installStamp)]
-
  const result = await (isPosix ? spawnBash : spawnPowerShell)(scriptPath, args, {
    emit,
    stageName: '__manifest__',
    hermesHome
  })
-
  if (result.code !== 0) {
    throw new Error(
      `${isPosix ? 'install.sh --manifest' : 'install.ps1 -Manifest'} failed: exit ${result.code}\n${result.stderr || result.stdout}`
    )
  }
-
  // The manifest is the LAST JSON line on stdout (install.ps1 may print
  // banner / info lines first depending on Console.OutputEncoding effects).
  // Find the last line that parses as JSON with a `stages` field.
  const lines = result.stdout.split(/\r?\n/).filter(Boolean)
-
  for (let i = lines.length - 1; i >= 0; i--) {
    try {
      const parsed = JSON.parse(lines[i])
-
      if (parsed && Array.isArray(parsed.stages)) {
        return parsed
      }
@@ -563,7 +505,6 @@ async function fetchManifest({ scriptPath, installerKind, emit, hermesHome, acti
      void 0
    }
  }
-
  throw new Error(
    `${isPosix ? 'install.sh --manifest' : 'install.ps1 -Manifest'} produced no parseable JSON payload\n${result.stdout}`
  )
@@ -574,11 +515,9 @@ async function fetchManifest({ scriptPath, installerKind, emit, hermesHome, acti
 // for the double-emit bug we addressed in the install.ps1 PR).
 function parseStageResult(stdout) {
  const lines = stdout.split(/\r?\n/).filter(Boolean)
-
  for (let i = lines.length - 1; i >= 0; i--) {
    try {
      const parsed = JSON.parse(lines[i])
-
      if (parsed && typeof parsed.ok === 'boolean' && typeof parsed.stage === 'string') {
        return parsed
      }
@@ -586,7 +525,6 @@ function parseStageResult(stdout) {
      void 0
    }
  }
-
  return null
 }

@@ -595,7 +533,6 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
  emit({ type: 'stage', name: stage.name, state: 'running' })

  const isPosix = installerKind === 'posix'
-
  const args = isPosix
    ? [
        '--stage',
@@ -605,7 +542,6 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
        ...buildPosixPinArgs({ installStamp, activeRoot, hermesHome })
      ]
    : ['-Stage', stage.name, '-NonInteractive', '-Json', ...buildPinArgs(installStamp)]
-
  const result = await (isPosix ? spawnBash : spawnPowerShell)(scriptPath, args, {
    emit,
    stageName: stage.name,
@@ -618,7 +554,6 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
  if (result.killed) {
    const ev = { type: 'stage', name: stage.name, state: 'failed', durationMs, error: 'cancelled by user' }
    emit(ev)
-
    return ev
  }

@@ -633,26 +568,20 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
      error: `${isPosix ? 'install.sh --stage' : 'install.ps1 -Stage'} ${stage.name} produced no JSON result frame (exit=${result.code})`,
      json: null
    }
-
    emit(ev)
-
    return ev
  }

  if (json.ok && json.skipped) {
    const ev = { type: 'stage', name: stage.name, state: 'skipped', durationMs, json }
    emit(ev)
-
    return ev
  }
-
  if (json.ok) {
    const ev = { type: 'stage', name: stage.name, state: 'succeeded', durationMs, json }
    emit(ev)
-
    return ev
  }
-
  const ev = {
    type: 'stage',
    name: stage.name,
@@ -661,9 +590,7 @@ async function runStage({ scriptPath, installerKind, stage, emit, hermesHome, ac
    json,
    error: json.reason || `exit code ${result.code}`
  }
-
  emit(ev)
-
  return ev
 }

@@ -676,7 +603,6 @@ function openRunLog(logRoot) {
  const ts = new Date().toISOString().replace(/[:.]/g, '-')
  const logPath = path.join(logRoot, `bootstrap-${ts}.log`)
  const stream = fs.createWriteStream(logPath, { flags: 'a' })
-
  return { path: logPath, stream }
 }

@@ -707,7 +633,6 @@ async function runBootstrap(opts) {
        void 0
      }
    }
-
    return { ok: false, cancelled: true }
  }

@@ -721,9 +646,8 @@ async function runBootstrap(opts) {
    } catch {
      void 0
    }
-
    try {
-      if (typeof onEvent === 'function') {onEvent(ev)}
+      if (typeof onEvent === 'function') onEvent(ev)
    } catch (err) {
      // Don't let a subscriber bug crash the bootstrap
      runLog.stream.write(`emit error: ${err && err.message}\n`)
@@ -753,7 +677,6 @@ async function runBootstrap(opts) {
      activeRoot,
      installStamp
    })
-
    emit({
      type: 'manifest',
      stages: manifest.stages,
@@ -767,10 +690,8 @@ async function runBootstrap(opts) {
    for (const stage of manifest.stages) {
      if (abortSignal && abortSignal.aborted) {
        emit({ type: 'failed', error: 'bootstrap cancelled by user' })
-
        return { ok: false, cancelled: true }
      }
-
      const ev = await runStage({
        scriptPath: scriptInfo.path,
        installerKind,
@@ -781,11 +702,9 @@ async function runBootstrap(opts) {
        abortSignal,
        installStamp
      })
-
      if (ev.state === 'failed') {
-        emit({ type: 'failed', stage: stage.name, error: (ev as any).error || 'stage failed' })
-
-        return { ok: false, failedStage: stage.name, error: (ev as any).error }
+        emit({ type: 'failed', stage: stage.name, error: ev.error || 'stage failed' })
+        return { ok: false, failedStage: stage.name, error: ev.error }
      }
    }

@@ -794,14 +713,11 @@ async function runBootstrap(opts) {
      pinnedCommit: installStamp ? installStamp.commit : null,
      pinnedBranch: installStamp ? installStamp.branch : null
    }
-
    const marker = typeof writeMarker === 'function' ? writeMarker(markerPayload) : markerPayload
    emit({ type: 'complete', marker })
-
    return { ok: true, marker }
  } catch (err) {
    emit({ type: 'failed', error: err.message || String(err) })
-
    return { ok: false, error: err.message || String(err) }
  } finally {
    try {
@@ -812,10 +728,12 @@ async function runBootstrap(opts) {
  }
 }

-export { cachedScriptPath,
-  installedAgentInstallScript,
+module.exports = {
+  runBootstrap,
  // Exposed for testability
  parseStageResult,
-  resolveInstallScript,
  resolveLocalInstallScript,
-  runBootstrap }
+  resolveInstallScript,
+  installedAgentInstallScript,
+  cachedScriptPath
+}
--- a/apps/desktop/electron/bootstrap-runner.test.cjs
+++ b/apps/desktop/electron/bootstrap-runner.test.cjs
@@ -1,13 +1,15 @@
-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
+const assert = require('node:assert/strict')
+const test = require('node:test')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')

-import { cachedScriptPath,
-  installedAgentInstallScript,
+const {
+  runBootstrap,
  resolveInstallScript,
-  runBootstrap } from './bootstrap-runner'
+  installedAgentInstallScript,
+  cachedScriptPath
+} = require('./bootstrap-runner.cjs')

 const SCRIPT_NAME = process.platform === 'win32' ? 'install.ps1' : 'install.sh'

@@ -20,7 +22,6 @@ test('runBootstrap bails immediately when the signal is already aborted', async
  controller.abort()

  const events = []
-
  const result = await runBootstrap({
    installStamp: null,
    activeRoot: '/tmp/hermes-runner-test',
@@ -41,7 +42,6 @@ test('runBootstrap bails immediately when the signal is already aborted', async

 test('installedAgentInstallScript resolves the installer in the agent checkout', () => {
  const home = mkTmpHome()
-
  try {
    assert.equal(installedAgentInstallScript(home), null, 'absent before the checkout exists')

@@ -59,7 +59,6 @@ test('installedAgentInstallScript resolves the installer in the agent checkout',

 test('resolveInstallScript prefers a cached script without touching the network', async () => {
  const home = mkTmpHome()
-
  try {
    const commit = 'a'.repeat(40)
    const cached = cachedScriptPath(home, commit)
@@ -67,7 +66,6 @@ test('resolveInstallScript prefers a cached script without touching the network'
    fs.writeFileSync(cached, '#!/bin/sh\necho cached\n')

    const logs = []
-
    const result = await resolveInstallScript({
      installStamp: { commit },
      sourceRepoRoot: null,
@@ -84,7 +82,6 @@ test('resolveInstallScript prefers a cached script without touching the network'

 test('resolveInstallScript falls back to the installed agent checkout on a 404', async () => {
  const home = mkTmpHome()
-
  try {
    const commit = 'a'.repeat(40)
    // Seed the installed agent checkout so the fallback has something to resolve.
@@ -94,7 +91,6 @@ test('resolveInstallScript falls back to the installed agent checkout on a 404',
    fs.writeFileSync(installed, '#!/bin/sh\necho fallback\n')

    const logs = []
-
    const result = await resolveInstallScript({
      installStamp: { commit },
      sourceRepoRoot: null,
@@ -121,7 +117,6 @@ test('resolveInstallScript falls back to the installed agent checkout on a 404',

 test('resolveInstallScript rethrows when the 404 fallback is unavailable', async () => {
  const home = mkTmpHome()
-
  try {
    const commit = 'a'.repeat(40)
    // No installed agent checkout seeded -> nothing to fall back to.
--- a/apps/desktop/electron/build-mode.cjs
+++ b/apps/desktop/electron/build-mode.cjs
@@ -0,0 +1,20 @@
+'use strict'
+
+/**
+ * build-mode.cjs — pure helper for the desktop's thin-vs-thick build mode.
+ *
+ * The desktop ships in two shapes:
+ *   - thick (default): bundles the first-launch bootstrap installer, can
+ *     spawn a local Hermes backend, and supports in-app self-update.
+ *   - thin: no bootstrap, no local backend, no self-update. Connects ONLY
+ *     to a remote gateway. Used for sandboxed/package-managed deployments
+ *     (Flatpak, Snap, etc.) where the agent lives elsewhere.
+ * 
+ * The esbuild bundler bakes this env var into the source code, so it's read at build time, not runtime.
+ */
+
+function isThinClient() {
+  return process.env.HERMES_DESKTOP_BUILD_MODE === 'thin'
+}
+
+module.exports = { isThinClient }
--- a/apps/desktop/electron/build-mode.test.cjs
+++ b/apps/desktop/electron/build-mode.test.cjs
@@ -0,0 +1,41 @@
+'use strict'
+
+const test = require('node:test')
+const assert = require('node:assert/strict')
+
+// We test build-mode.cjs by controlling process.env directly. The module
+// reads process.env.HERMES_DESKTOP_BUILD_MODE at call time (not import time),
+// so we can mutate the env and re-require to exercise both modes.
+
+function freshModule() {
+  // Bust the require cache so the module re-evaluates with the current env.
+  delete require.cache[require.resolve('./build-mode.cjs')]
+  return require('./build-mode.cjs')
+}
+
+test('isThinClient returns false by default (thick mode)', () => {
+  const prev = process.env.HERMES_DESKTOP_BUILD_MODE
+  delete process.env.HERMES_DESKTOP_BUILD_MODE
+  const { isThinClient } = freshModule()
+  assert.equal(isThinClient(), false)
+  process.env.HERMES_DESKTOP_BUILD_MODE = prev
+})
+
+test('isThinClient returns true when HERMES_DESKTOP_BUILD_MODE=thin', () => {
+  const prev = process.env.HERMES_DESKTOP_BUILD_MODE
+  process.env.HERMES_DESKTOP_BUILD_MODE = 'thin'
+  const { isThinClient } = freshModule()
+  assert.equal(isThinClient(), true)
+  process.env.HERMES_DESKTOP_BUILD_MODE = prev
+})
+
+test('isThinClient returns false for non-thin values', () => {
+  const prev = process.env.HERMES_DESKTOP_BUILD_MODE
+  process.env.HERMES_DESKTOP_BUILD_MODE = 'thick'
+  const { isThinClient } = freshModule()
+  assert.equal(isThinClient(), false)
+  process.env.HERMES_DESKTOP_BUILD_MODE = 'thick-client'
+  const { isThinClient: isThin2 } = freshModule()
+  assert.equal(isThin2(), false)
+  process.env.HERMES_DESKTOP_BUILD_MODE = prev
+})
--- a/apps/desktop/electron/connection-config.cjs
+++ b/apps/desktop/electron/connection-config.cjs
@@ -45,7 +45,6 @@ function normalizeRemoteBaseUrl(rawUrl) {
  }

  let parsed
-
  try {
    parsed = new URL(value)
  } catch (error) {
@@ -106,16 +105,13 @@ function buildGatewayWsUrlWithTicket(baseUrl, ticket) {
 * @param {{ mintTicket: (baseUrl: string) => Promise<string> }} deps
 * @returns {Promise<string|null>}
 */
-async function resolveTestWsUrl(baseUrl, authMode, token, deps: any = {}) {
+async function resolveTestWsUrl(baseUrl, authMode, token, deps = {}) {
  if (authMode === 'oauth') {
    const mintTicket = deps.mintTicket
-
    if (typeof mintTicket !== 'function') {
      throw new Error('resolveTestWsUrl: a mintTicket function is required in OAuth mode.')
    }
-
    let ticket
-
    try {
      ticket = await mintTicket(baseUrl)
    } catch (error) {
@@ -123,19 +119,15 @@ async function resolveTestWsUrl(baseUrl, authMode, token, deps: any = {}) {
        'Reached the gateway over HTTP, but could not mint a WebSocket ticket for the OAuth session ' +
          '(it may have expired). Open Settings → Gateway and sign in again.'
      )
-
-      ;(err as any).needsOauthLogin = true
+      err.needsOauthLogin = true
      err.cause = error
      throw err
    }
-
    return buildGatewayWsUrlWithTicket(baseUrl, ticket)
  }
-
  if (!token) {
    return null
  }
-
  return buildGatewayWsUrl(baseUrl, token)
 }

@@ -162,13 +154,11 @@ function normAuthMode(mode) {
 function profileRemoteOverride(config, profile) {
  const key = connectionScopeKey(profile)
  const entry = key ? config?.profiles?.[key] : null
-
  if (!entry || typeof entry !== 'object' || entry.mode !== 'remote') {
    return null
  }

  const url = String(entry.url || '').trim()
-
  if (!url) {
    return null
  }
@@ -182,21 +172,18 @@ function profileRemoteOverride(config, profile) {
 * query parameter. Local pooled backends and per-profile remote overrides do not
 * need this: they already run against a backend scoped to the target profile.
 */
-function pathWithGlobalRemoteProfile(path, profile, opts: any = {}) {
+function pathWithGlobalRemoteProfile(path, profile, opts = {}) {
  const scopedProfile = connectionScopeKey(profile)
-
  if (!scopedProfile || !opts.globalRemote || opts.profileRemoteOverride) {
    return path
  }

  const rawPath = String(path || '')
-
  if (!rawPath) {
    return path
  }

  let parsed
-
  try {
    parsed = new URL(rawPath, 'http://hermes.local')
  } catch {
@@ -237,12 +224,9 @@ function authModeFromStatus(statusBody) {
 * Returns 'oauth' | 'token'.
 */
 function resolveAuthMode(inputAuthMode, existingAuthMode) {
-  if (inputAuthMode === 'oauth') {return 'oauth'}
-
-  if (inputAuthMode === 'token') {return 'token'}
-
-  if (existingAuthMode === 'oauth') {return 'oauth'}
-
+  if (inputAuthMode === 'oauth') return 'oauth'
+  if (inputAuthMode === 'token') return 'token'
+  if (existingAuthMode === 'oauth') return 'oauth'
  return 'token'
 }

@@ -258,8 +242,7 @@ function resolveAuthMode(inputAuthMode, existingAuthMode) {
 * need to know whether an unexpired access token is present right now.
 */
 function cookiesHaveSession(cookies) {
-  if (!Array.isArray(cookies)) {return false}
-
+  if (!Array.isArray(cookies)) return false
  return cookies.some(c => c && AT_COOKIE_VARIANTS.includes(c.name) && c.value)
 }

@@ -277,23 +260,24 @@ function cookiesHaveSession(cookies) {
 * the RT is also dead/revoked).
 */
 function cookiesHaveLiveSession(cookies) {
-  if (!Array.isArray(cookies)) {return false}
-
+  if (!Array.isArray(cookies)) return false
  return cookies.some(c => c && c.value && (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)))
 }

-export { AT_COOKIE_VARIANTS,
+module.exports = {
+  AT_COOKIE_VARIANTS,
+  RT_COOKIE_VARIANTS,
  authModeFromStatus,
  buildGatewayWsUrl,
  buildGatewayWsUrlWithTicket,
  connectionScopeKey,
-  cookiesHaveLiveSession,
  cookiesHaveSession,
-  normalizeRemoteBaseUrl,
+  cookiesHaveLiveSession,
  normAuthMode,
+  normalizeRemoteBaseUrl,
  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
-  RT_COOKIE_VARIANTS,
-  tokenPreview }
+  tokenPreview
+}
--- a/apps/desktop/electron/connection-config.test.cjs
+++ b/apps/desktop/electron/connection-config.test.cjs
@@ -10,24 +10,26 @@
 * and the OAuth session-cookie detector.
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { AT_COOKIE_VARIANTS,
+const {
+  AT_COOKIE_VARIANTS,
+  RT_COOKIE_VARIANTS,
  authModeFromStatus,
  buildGatewayWsUrl,
  buildGatewayWsUrlWithTicket,
  connectionScopeKey,
-  cookiesHaveLiveSession,
  cookiesHaveSession,
-  normalizeRemoteBaseUrl,
+  cookiesHaveLiveSession,
  normAuthMode,
+  normalizeRemoteBaseUrl,
  pathWithGlobalRemoteProfile,
  profileRemoteOverride,
  resolveAuthMode,
  resolveTestWsUrl,
-  RT_COOKIE_VARIANTS,
-  tokenPreview } from './connection-config'
+  tokenPreview
+} = require('./connection-config.cjs')

 // --- connectionScopeKey / normAuthMode ---

@@ -71,7 +73,6 @@ test('profileRemoteOverride returns the per-profile remote with defaulted auth m
      coder: { mode: 'remote', url: '  https://coder.example.com/hermes  ', token: { value: 'sek' } }
    }
  }
-
  assert.deepEqual(profileRemoteOverride(config, 'coder'), {
    url: 'https://coder.example.com/hermes',
    authMode: 'token',
@@ -364,7 +365,6 @@ test('resolveTestWsUrl (oauth, mint ok) builds a ?ticket= URL', async () => {
  const url = await resolveTestWsUrl('https://gw.example.com', 'oauth', null, {
    mintTicket: async () => 'tkt-9'
  })
-
  assert.equal(url, 'wss://gw.example.com/api/ws?ticket=tkt-9')
 })

@@ -376,14 +376,13 @@ test('resolveTestWsUrl (oauth, mint FAILS) throws — must NOT skip WS validatio
          throw new Error('401 ticket mint failed')
        }
      }),
-    (err: any) => {
+    err => {
      // Actionable, points the user at re-auth, and preserves the cause + flag
      // the boot overlay uses to offer a sign-in prompt.
      assert.match(err.message, /WebSocket ticket/i)
      assert.match(err.message, /sign in again/i)
      assert.equal(err.needsOauthLogin, true)
      assert.ok(err.cause instanceof Error)
-
      return true
    }
  )
--- a/apps/desktop/electron/dashboard-token.cjs
+++ b/apps/desktop/electron/dashboard-token.cjs
@@ -9,35 +9,29 @@

 const DEFAULT_TOKEN_FETCH_TIMEOUT_MS = 3_000

-async function fetchPublicText(url, options: any = {}) {
+async function fetchPublicText(url, options = {}) {
  const { protocol } = new URL(url)
-
  if (protocol !== 'http:' && protocol !== 'https:') {
    throw new Error(`Unsupported Hermes backend URL protocol: ${protocol}`)
  }

  const timeoutMs = options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
-
  const res = await fetch(url, { signal: AbortSignal.timeout(timeoutMs) }).catch(error => {
    if (error.name === 'TimeoutError') {
      throw new Error(`Timed out connecting to Hermes backend after ${timeoutMs}ms`)
    }
-
    throw error
  })
-
  const text = await res.text()

-  if (!res.ok) {throw new Error(`${res.status}: ${text || res.statusText}`)}
+  if (!res.ok) throw new Error(`${res.status}: ${text || res.statusText}`)

  return text
 }

 function extractInjectedDashboardToken(html) {
  const match = /window\.__HERMES_SESSION_TOKEN__\s*=\s*("(?:\\.|[^"\\])*")/.exec(String(html || ''))
-
-  if (!match) {return null}
-
+  if (!match) return null
  try {
    return JSON.parse(match[1])
  } catch {
@@ -49,13 +43,11 @@ function dashboardIndexUrl(baseUrl) {
  return `${String(baseUrl || '').replace(/\/+$/, '')}/`
 }

-async function resolveServedDashboardToken(baseUrl, fallbackToken, options: any = {}) {
+async function resolveServedDashboardToken(baseUrl, fallbackToken, options = {}) {
  const fetchText = options.fetchText || fetchPublicText
-
  const html = await fetchText(dashboardIndexUrl(baseUrl), {
    timeoutMs: options.timeoutMs ?? DEFAULT_TOKEN_FETCH_TIMEOUT_MS
  })
-
  const servedToken = extractInjectedDashboardToken(html)

  if (servedToken && servedToken !== fallbackToken && typeof options.rememberLog === 'function') {
@@ -84,7 +76,6 @@ function isForeignBackendToken({ servedToken, spawnToken, childAlive }) {
 async function adoptServedDashboardToken(baseUrl, spawnToken, { childAlive, label = 'Hermes backend', ...options }) {
  const servedToken = await resolveServedDashboardToken(baseUrl, spawnToken, options).catch(error => {
    options.rememberLog?.(`[boot] could not read served dashboard token (${label}): ${error.message}`)
-
    return spawnToken
  })

@@ -97,10 +88,12 @@ async function adoptServedDashboardToken(baseUrl, spawnToken, { childAlive, labe
  return servedToken
 }

-export { adoptServedDashboardToken,
-  dashboardIndexUrl,
+module.exports = {
  DEFAULT_TOKEN_FETCH_TIMEOUT_MS,
+  adoptServedDashboardToken,
+  dashboardIndexUrl,
  extractInjectedDashboardToken,
  fetchPublicText,
  isForeignBackendToken,
-  resolveServedDashboardToken }
+  resolveServedDashboardToken
+}
--- a/apps/desktop/electron/dashboard-token.test.cjs
+++ b/apps/desktop/electron/dashboard-token.test.cjs
@@ -5,15 +5,17 @@
 * (Wired into npm test:desktop:platforms in package.json.)
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { adoptServedDashboardToken,
+const {
+  adoptServedDashboardToken,
  dashboardIndexUrl,
  extractInjectedDashboardToken,
  fetchPublicText,
  isForeignBackendToken,
-  resolveServedDashboardToken } from './dashboard-token'
+  resolveServedDashboardToken
+} = require('./dashboard-token.cjs')

 test('extractInjectedDashboardToken reads the JSON-encoded dashboard token', () => {
  const html = '<script>window.__HERMES_SESSION_TOKEN__="served-token";window.__HERMES_BASE_PATH__=""</script>'
@@ -37,11 +39,9 @@ test('dashboardIndexUrl preserves dashboard path prefixes', () => {

 test('resolveServedDashboardToken uses the served token and logs when it differs', async () => {
  const logs = []
-
  const token = await resolveServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
    fetchText: async url => {
      assert.equal(url, 'http://127.0.0.1:9120/')
-
      return '<script>window.__HERMES_SESSION_TOKEN__="served-token";</script>'
    },
    rememberLog: line => logs.push(line)
@@ -100,9 +100,8 @@ test('isForeignBackendToken only flags a mismatched token from a dead child', ()
    [{ servedToken: null, spawnToken: 'mine', childAlive: false }, false],
    [{ servedToken: '', spawnToken: 'mine', childAlive: false }, false]
  ]
-
  for (const [input, expected] of cases) {
-    assert.equal(isForeignBackendToken(input as any), expected, JSON.stringify(input))
+    assert.equal(isForeignBackendToken(input), expected, JSON.stringify(input))
  }
 })

@@ -129,7 +128,6 @@ test('adoptServedDashboardToken refuses a foreign token when our child is dead',

 test('adoptServedDashboardToken falls back to the spawn token when the fetch fails', async () => {
  const logs = []
-
  const token = await adoptServedDashboardToken('http://127.0.0.1:9120', 'spawn-token', {
    childAlive: () => true,
    fetchText: async () => {
--- a/apps/desktop/electron/desktop-uninstall.cjs
+++ b/apps/desktop/electron/desktop-uninstall.cjs
@@ -26,7 +26,7 @@
 * shape as the self-update swap-and-relaunch flow already in main.cjs.
 */

-import path from 'node:path'
+const path = require('node:path')

 const UNINSTALL_MODES = ['gui', 'lite', 'full']

@@ -41,7 +41,6 @@ function uninstallArgsForMode(mode) {
  if (!UNINSTALL_MODES.includes(mode)) {
    throw new Error(`Unknown uninstall mode: ${mode}`)
  }
-
  return ['-m', 'hermes_cli.uninstall', '--mode', mode]
 }

@@ -66,10 +65,9 @@ function modeRemovesUserData(mode) {
 * Returns null when we can't confidently identify a removable bundle (e.g.
 * running from a dev checkout, or a system-package install we must not rmtree).
 */
-function resolveRemovableAppPath(execPath, platform, env: any = {}) {
+function resolveRemovableAppPath(execPath, platform, env = {}) {
  const exe = String(execPath || '')
-
-  if (!exe) {return null}
+  if (!exe) return null

  // Use the path flavor that matches the TARGET platform, not the host running
  // this code — so the Windows branch parses backslash paths correctly even
@@ -81,28 +79,22 @@ function resolveRemovableAppPath(execPath, platform, env: any = {}) {
    const macOsDir = p.dirname(exe) // …/Contents/MacOS
    const contents = p.dirname(macOsDir) // …/Contents
    const appBundle = p.dirname(contents) // …/Hermes.app
-
-    if (appBundle.endsWith('.app')) {return appBundle}
-
+    if (appBundle.endsWith('.app')) return appBundle
    return null
  }

  if (platform === 'win32') {
    // NSIS per-user installs Hermes.exe directly in the install dir.
    const dir = p.dirname(exe)
-
-    if (/[\\/]Hermes$/i.test(dir) || /[\\/]hermes-desktop$/i.test(dir)) {return dir}
-
+    if (/[\\/]Hermes$/i.test(dir) || /[\\/]hermes-desktop$/i.test(dir)) return dir
    return null
  }

  // Linux: an AppImage exposes its own path via the APPIMAGE env var.
-  if (env.APPIMAGE) {return env.APPIMAGE}
+  if (env.APPIMAGE) return env.APPIMAGE
  // Unpacked electron-builder tree: …/linux-unpacked/hermes
  const dir = p.dirname(exe)
-
-  if (/-unpacked$/.test(dir)) {return dir}
-
+  if (/-unpacked$/.test(dir)) return dir
  return null
 }

@@ -129,7 +121,6 @@ function shouldRemoveAppBundle(isPackaged, appPath) {
 */
 function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot, uninstallArgs, appPath, hermesHome }) {
  const q = s => `'${String(s).replace(/'/g, `'\\''`)}'`
-
  const lines = [
    '#!/bin/bash',
    'set -u',
@@ -144,21 +135,16 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
    'fi',
    `export HERMES_HOME=${q(hermesHome)}`
  ]
-
  if (pythonPath) {
    lines.push(`export PYTHONPATH=${q(pythonPath)}\${PYTHONPATH:+:$PYTHONPATH}`)
  }
-
  lines.push(`cd ${q(agentRoot)} 2>/dev/null || true`, `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`)
-
  if (appPath) {
    lines.push(`rm -rf ${q(appPath)} || true`)
  }
-
  // Self-delete the script.
  lines.push('rm -f "$0" 2>/dev/null || true')
  lines.push('')
-
  return lines.join('\n')
 }

@@ -194,18 +180,15 @@ function buildWindowsCleanupScript({
  // under %LOCALAPPDATA% never contain them). `&`/`^` in a path would still be
  // a problem, but Hermes install paths don't use them.
  const q = s => `"${String(s).replace(/"/g, '')}"`
-
  const lines = [
    '@echo off',
    'setlocal enableextensions',
    `set "HERMES_HOME=${String(hermesHome).replace(/"/g, '')}"`,
    `set "PID=${pid}"`
  ]
-
  if (pythonPath) {
    lines.push(`set "PYTHONPATH=${String(pythonPath).replace(/"/g, '')};%PYTHONPATH%"`)
  }
-
  lines.push(
    'set /a waited=0',
    ':waitloop',
@@ -223,7 +206,6 @@ function buildWindowsCleanupScript({
    `cd /d ${q(agentRoot)}`,
    `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')}`
  )
-
  if (appPath) {
    lines.push(
      'set /a tries=0',
@@ -238,18 +220,18 @@ function buildWindowsCleanupScript({
      ':rmdone'
    )
  }
-
  lines.push('del "%~f0"')
  lines.push('')
-
  return lines.join('\r\n')
 }

-export { buildPosixCleanupScript,
+module.exports = {
+  UNINSTALL_MODES,
+  buildPosixCleanupScript,
  buildWindowsCleanupScript,
  modeRemovesAgent,
  modeRemovesUserData,
  resolveRemovableAppPath,
  shouldRemoveAppBundle,
-  UNINSTALL_MODES,
-  uninstallArgsForMode }
+  uninstallArgsForMode
+}
--- a/apps/desktop/electron/desktop-uninstall.test.cjs
+++ b/apps/desktop/electron/desktop-uninstall.test.cjs
@@ -9,17 +9,19 @@
 * cleanup-script builders (POSIX + Windows).
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { buildPosixCleanupScript,
+const {
+  UNINSTALL_MODES,
+  buildPosixCleanupScript,
  buildWindowsCleanupScript,
  modeRemovesAgent,
  modeRemovesUserData,
  resolveRemovableAppPath,
  shouldRemoveAppBundle,
-  UNINSTALL_MODES,
-  uninstallArgsForMode } from './desktop-uninstall'
+  uninstallArgsForMode
+} = require('./desktop-uninstall.cjs')

 // --- uninstallArgsForMode ---

@@ -130,7 +132,6 @@ test('buildPosixCleanupScript waits for the PID, runs the uninstall module, remo
    appPath: '/opt/hermes/linux-unpacked',
    hermesHome: '/home/x/.hermes'
  })
-
  assert.match(script, /^#!\/bin\/bash/)
  assert.match(script, /pid=4321/)
  assert.match(script, /kill -0 "\$pid"/)
@@ -151,7 +152,6 @@ test('buildPosixCleanupScript exports PYTHONPATH when pythonPath is set (lite/fu
    appPath: null,
    hermesHome: '/home/x/.hermes'
  })
-
  // System python + source on PYTHONPATH so import hermes_cli works while the
  // venv is torn down.
  assert.match(script, /export PYTHONPATH='\/home\/x\/\.hermes\/hermes-agent'/)
@@ -168,7 +168,6 @@ test('buildPosixCleanupScript omits PYTHONPATH when pythonPath is null (gui)', (
    appPath: null,
    hermesHome: '/h'
  })
-
  assert.doesNotMatch(script, /export PYTHONPATH/)
 })

@@ -182,7 +181,6 @@ test('buildPosixCleanupScript omits the bundle rm when appPath is null', () => {
    appPath: null,
    hermesHome: '/h'
  })
-
  assert.doesNotMatch(script, /rm -rf '\//)
  // Still runs the uninstall.
  assert.match(script, /'-m' 'hermes_cli\.uninstall' '--mode' 'lite'/)
@@ -198,7 +196,6 @@ test('buildPosixCleanupScript single-quote-escapes paths with apostrophes', () =
    appPath: null,
    hermesHome: '/h'
  })
-
  // The apostrophe is closed-escaped-reopened so the shell sees the literal.
  assert.match(script, /'\/home\/o'\\''brien\/python'/)
 })
@@ -215,7 +212,6 @@ test('buildWindowsCleanupScript waits (bounded) for PID, runs uninstall, rmdir b
    appPath: 'C:\\Users\\x\\AppData\\Local\\Programs\\Hermes',
    hermesHome: 'C:\\Users\\x\\AppData\\Local\\hermes'
  })
-
  assert.match(script, /@echo off/)
  assert.match(script, /set "PID=9988"/)
  // PYTHONPATH set so a system python can import hermes_cli from source.
@@ -242,7 +238,6 @@ test('buildWindowsCleanupScript omits PYTHONPATH + rmdir when not needed (gui, n
    appPath: null,
    hermesHome: 'C:\\h'
  })
-
  assert.doesNotMatch(script, /rmdir/)
  assert.doesNotMatch(script, /set "PYTHONPATH=/)
 })
--- a/apps/desktop/electron/embed-referer.cjs
+++ b/apps/desktop/electron/embed-referer.cjs
@@ -1,8 +1,9 @@
-import { session } from 'electron'
+'use strict'
+
+const { session } = require('electron')

 const EMBED_SESSION_PARTITION = 'persist:hermes-embed'
 const EMBED_REFERER = 'https://www.youtube.com/'
-
 const YOUTUBE_REFERER_HOST_RE =
  /(^|\.)(youtube\.com|youtube-nocookie\.com|googlevideo\.com|ytimg\.com|youtubei\.googleapis\.com)$/i

@@ -22,7 +23,6 @@ function installEmbedRefererForSession(embedSession) {

    if (!YOUTUBE_REFERER_HOST_RE.test(host)) {
      callback({ requestHeaders: details.requestHeaders })
-
      return
    }

@@ -45,4 +45,4 @@ function installEmbedReferer() {
  }
 }

-export { installEmbedReferer }
+module.exports = { installEmbedReferer }
--- a/apps/desktop/electron/fs-read-dir.cjs
+++ b/apps/desktop/electron/fs-read-dir.cjs
@@ -1,7 +1,8 @@
-import fs from 'node:fs'
-import path from 'node:path'
+'use strict'

-import { resolveDirectoryForIpc } from './hardening'
+const fs = require('node:fs')
+const path = require('node:path')
+const { resolveDirectoryForIpc } = require('./hardening.cjs')

 const FS_READDIR_STAT_CONCURRENCY = 16

@@ -36,7 +37,7 @@ function direntIsSymbolicLink(dirent) {
 }

 function shouldStatDirent(dirent) {
-  if (direntIsDirectory(dirent)) {return false}
+  if (direntIsDirectory(dirent)) return false

  return direntIsSymbolicLink(dirent) || !direntIsFile(dirent)
 }
@@ -69,13 +70,13 @@ async function mapWithStatConcurrency(items, mapper) {
  }

  const workerCount = Math.min(FS_READDIR_STAT_CONCURRENCY, items.length)
-  const workers = Array.from({ length: workerCount } as any, () => runWorker())
+  const workers = Array.from({ length: workerCount }, () => runWorker())
  await Promise.all(workers)

  return results
 }

-async function readDirForIpc(dirPath, options: any = {}) {
+async function readDirForIpc(dirPath, options = {}) {
  const fsImpl = options.fs || fs
  let resolved

@@ -101,4 +102,6 @@ async function readDirForIpc(dirPath, options: any = {}) {
  }
 }

-export { readDirForIpc }
+module.exports = {
+  readDirForIpc
+}
--- a/apps/desktop/electron/fs-read-dir.test.cjs
+++ b/apps/desktop/electron/fs-read-dir.test.cjs
@@ -1,17 +1,19 @@
-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
-import { pathToFileURL } from 'node:url'
+'use strict'

-import { readDirForIpc } from './fs-read-dir'
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+const { pathToFileURL } = require('node:url')
+
+const { readDirForIpc } = require('./fs-read-dir.cjs')

 function mkTmpDir() {
  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-fs-read-dir-'))
 }

-function fakeDirent(name, flags: any = {}) {
+function fakeDirent(name, flags = {}) {
  return {
    name,
    isDirectory: () => Boolean(flags.directory),
@@ -107,12 +109,10 @@ test('readDirForIpc accepts file URLs for directories', async () => {

 test('readDirForIpc returns invalid-path for blank or non-string input', async () => {
  let readdirCalls = 0
-
  const fsImpl = {
    promises: {
      readdir: async () => {
        readdirCalls += 1
-
        return []
      }
    }
@@ -126,12 +126,10 @@ test('readDirForIpc returns invalid-path for blank or non-string input', async (

 test('readDirForIpc rejects Windows device paths before readdir', async () => {
  let readdirCalls = 0
-
  const fsImpl = {
    promises: {
      readdir: async () => {
        readdirCalls += 1
-
        return []
      }
    }
@@ -226,7 +224,6 @@ test('readDirForIpc allows expanding symlink or junction directories outside the
    fs.writeFileSync(path.join(outside, 'outside.txt'), 'ok')

    const linkPath = path.join(root, 'outside-link')
-
    try {
      fs.symlinkSync(outside, linkPath, process.platform === 'win32' ? 'junction' : 'dir')
    } catch (error) {
@@ -255,7 +252,6 @@ test('readDirForIpc stats symbolic links and unknown entries without dropping th
  const input = path.join('virtual-root')
  const resolved = path.resolve(input)
  const statCalls = []
-
  const fsImpl = {
    promises: {
      readdir: async () => [
@@ -270,11 +266,9 @@ test('readDirForIpc stats symbolic links and unknown entries without dropping th
        }

        statCalls.push(fullPath)
-
        if (fullPath.endsWith(`${path.sep}linked-dir`)) {
          return { isDirectory: () => true }
        }
-
        throw Object.assign(new Error('gone'), { code: 'ENOENT' })
      }
    }
@@ -307,15 +301,12 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
  let peak = 0
  let releaseStats
  let markFirstStatStarted
-
  const statsReleased = new Promise(resolve => {
    releaseStats = resolve
  })
-
  const firstStatStarted = new Promise(resolve => {
    markFirstStatStarted = resolve
  })
-
  const fsImpl = {
    promises: {
      readdir: async () => [
@@ -335,7 +326,6 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
        active -= 1

        const name = path.basename(fullPath)
-
        if (name === failedName) {
          throw Object.assign(new Error('gone'), { code: 'ENOENT' })
        }
--- a/apps/desktop/electron/gateway-ws-probe.cjs
+++ b/apps/desktop/electron/gateway-ws-probe.cjs
@@ -36,13 +36,13 @@ const DEFAULT_READY_GRACE_MS = 750
 * Attempt a live WebSocket connection and classify the outcome.
 *
 * @param {string} wsUrl - Fully-formed ws(s):// URL including the credential.
+ * @param {object} [options]
+ * @param {new (url: string) => any} [options.WebSocketImpl] - WebSocket ctor.
+ * @param {number} [options.connectTimeoutMs]
+ * @param {number} [options.readyGraceMs]
 * @returns {Promise<{ ok: boolean, reason?: string }>}
 */
-function probeGatewayWebSocket<T>(wsUrl: string, options:{
-  WebSocketImpl?: any,
-  connectTimeoutMs?: number
-  readyGraceMs?: number
-} = {}) {
+function probeGatewayWebSocket(wsUrl, options = {}) {
  const WebSocketImpl = options.WebSocketImpl
  const connectTimeoutMs = options.connectTimeoutMs ?? DEFAULT_CONNECT_TIMEOUT_MS
  const readyGraceMs = options.readyGraceMs ?? DEFAULT_READY_GRACE_MS
@@ -54,7 +54,7 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
    })
  }

-  return new Promise<any>(resolve => {
+  return new Promise(resolve => {
    let settled = false
    let opened = false
    let connectTimer = null
@@ -66,7 +66,6 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
        clearTimeout(connectTimer)
        connectTimer = null
      }
-
      if (graceTimer !== null) {
        clearTimeout(graceTimer)
        graceTimer = null
@@ -74,16 +73,14 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
    }

    const finish = result => {
-      if (settled) {return}
+      if (settled) return
      settled = true
      clearTimers()
-
      try {
        socket?.close?.()
      } catch {
        // ignore — best effort teardown
      }
-
      resolve(result)
    }

@@ -94,12 +91,11 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
        ok: false,
        reason: error instanceof Error ? error.message : String(error)
      })
-
      return
    }

    const onOpen = () => {
-      if (settled) {return}
+      if (settled) return
      opened = true
      // Upgrade accepted. Give the server a brief window to reject the
      // credential post-handshake (early close) before declaring success.
@@ -122,8 +118,7 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
    }

    const onClose = event => {
-      if (settled) {return}
-
+      if (settled) return
      if (opened) {
        // Opened, then closed inside the grace window: the upgrade was accepted
        // but the session was refused (e.g. ws-ticket/token rejected, or a
@@ -132,10 +127,8 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
          ok: false,
          reason: closeReason(event, 'The gateway accepted the connection then closed it (credential rejected?).')
        })
-
        return
      }
-
      finish({
        ok: false,
        reason: closeReason(event, 'The gateway closed the WebSocket before it opened.')
@@ -161,10 +154,8 @@ function probeGatewayWebSocket<T>(wsUrl: string, options:{
 function addListener(socket, type, handler) {
  if (typeof socket.addEventListener === 'function') {
    socket.addEventListener(type, handler)
-
    return
  }
-
  // Node's global WebSocket implements addEventListener; this fallback keeps the
  // helper usable with the `ws` package's EventEmitter shape too.
  if (typeof socket.on === 'function') {
@@ -173,31 +164,25 @@ function addListener(socket, type, handler) {
 }

 function extractErrorReason(event) {
-  if (!event) {return ''}
-
-  if (event instanceof Error) {return event.message}
+  if (!event) return ''
+  if (event instanceof Error) return event.message
  const err = event.error || event.message
-
-  if (err instanceof Error) {return err.message}
-
-  if (typeof err === 'string') {return err}
-
+  if (err instanceof Error) return err.message
+  if (typeof err === 'string') return err
  return ''
 }

 function closeReason(event, fallback) {
  const code = event && typeof event.code === 'number' ? event.code : null
  const reason = event && typeof event.reason === 'string' ? event.reason.trim() : ''
-
-  if (code && reason) {return `${fallback} (code ${code}: ${reason})`}
-
-  if (code) {return `${fallback} (code ${code})`}
-
-  if (reason) {return `${fallback} (${reason})`}
-
+  if (code && reason) return `${fallback} (code ${code}: ${reason})`
+  if (code) return `${fallback} (code ${code})`
+  if (reason) return `${fallback} (${reason})`
  return fallback
 }

-export { DEFAULT_CONNECT_TIMEOUT_MS,
+module.exports = {
+  DEFAULT_CONNECT_TIMEOUT_MS,
  DEFAULT_READY_GRACE_MS,
-  probeGatewayWebSocket }
+  probeGatewayWebSocket
+}
--- a/apps/desktop/electron/gateway-ws-probe.test.cjs
+++ b/apps/desktop/electron/gateway-ws-probe.test.cjs
@@ -9,20 +9,16 @@
 * outcome (open, frame, error, early close, never-opens) without a network.
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { probeGatewayWebSocket } from './gateway-ws-probe'
+const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs')

 // Minimal WebSocket double: records listeners synchronously (the probe attaches
 // them in its executor) and exposes emit() so the test can replay events.
-function makeFakeWs(): { FakeWs: new (url: string) => any; instances: any[] } {
+function makeFakeWs() {
  const instances = []
-
  class FakeWs {
-    url: string
-    closed = false
-    listeners: Record<string, any[]> = {}
    constructor(url) {
      this.url = url
      this.listeners = {}
@@ -36,12 +32,9 @@ function makeFakeWs(): { FakeWs: new (url: string) => any; instances: any[] } {
      this.closed = true
    }
    emit(type, event) {
-      for (const fn of this.listeners[type] || []) {
-        fn(event)
-      }
+      for (const fn of this.listeners[type] || []) fn(event)
    }
  }
-
  return { FakeWs, instances }
 }

@@ -58,13 +51,11 @@ test('probe resolves ok when the socket opens and stays open', async () => {

 test('probe resolves ok immediately when a frame arrives', async () => {
  const { FakeWs, instances } = makeFakeWs()
-
  const promise = probeGatewayWebSocket('ws://host/api/ws?token=t', {
    WebSocketImpl: FakeWs,
    connectTimeoutMs: 1_000,
    readyGraceMs: 10_000 // long grace: success must come from the frame, not the timer
  })
-
  instances[0].emit('open')
  instances[0].emit('message', { data: '{"jsonrpc":"2.0"}' })
  const result = await promise
@@ -104,13 +95,11 @@ test('probe fails when the gateway accepts then immediately closes (auth rejecte

 test('probe times out when the socket never opens', async () => {
  const { FakeWs } = makeFakeWs()
-
  const result = await probeGatewayWebSocket('ws://host/api/ws?token=t', {
    WebSocketImpl: FakeWs,
    connectTimeoutMs: 20,
    readyGraceMs: 10
  })
-
  assert.equal(result.ok, false)
  assert.match(result.reason, /Timed out/)
 })
--- a/apps/desktop/electron/git-repo-scan.cjs
+++ b/apps/desktop/electron/git-repo-scan.cjs
@@ -1,12 +1,14 @@
+'use strict'
+
 // Repo-first discovery: walk bounded roots for git repos using only Node's `fs`
 // — no native addon, so it just works for anyone who pulls main (no
 // electron-rebuild). Mirrors how GitHub Desktop scans: stop at the first `.git`
 // (don't descend into a repo), cap depth, and skip heavy non-repo trees so the
 // first scan stays fast. Results are cached by the backend after the first run.

-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')

 const fsp = fs.promises

@@ -34,14 +36,14 @@ async function mapLimit(items, limit, fn) {
    }
  }

-  await Promise.all(Array.from({ length: Math.min(limit, items.length) } as any, worker))
+  await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker))
 }

 /**
 * Scan `roots` (default: the home dir) for git repositories. Returns deduped
 * `{ root, label }` entries. `options.maxDepth` caps recursion (default 3).
 */
-async function scanGitRepos(roots, options: any = {}) {
+async function scanGitRepos(roots, options = {}) {
  const maxDepth = Number(options.maxDepth) || DEFAULT_MAX_DEPTH
  const searchRoots = Array.isArray(roots) && roots.length > 0 ? roots : [os.homedir()]
  const found = new Map()
@@ -52,7 +54,6 @@ async function scanGitRepos(roots, options: any = {}) {
    }

    let entries
-
    try {
      entries = await fsp.readdir(dir, { withFileTypes: true })
    } catch {
@@ -72,7 +73,6 @@ async function scanGitRepos(roots, options: any = {}) {
    }

    const subdirs = []
-
    for (const entry of entries) {
      // Real directories only (skip symlinks to avoid loops), no hidden dirs, no
      // known heavy trees.
@@ -93,4 +93,4 @@ async function scanGitRepos(roots, options: any = {}) {
  return [...found.entries()].map(([root, label]) => ({ label, root }))
 }

-export { scanGitRepos }
+module.exports = { scanGitRepos }
--- a/apps/desktop/electron/git-review-ops.cjs
+++ b/apps/desktop/electron/git-review-ops.cjs
@@ -1,38 +1,18 @@
+'use strict'
+
 // Git ops backing the coding rail + Codex-style review pane. Built on `simple-git`
 // (a maintained wrapper around the system git binary — same git the rest of the
 // app shells to, no native build) so we read structured status()/diffSummary()
 // results instead of hand-parsing porcelain. Reads degrade to null/empty on a
 // non-repo / remote backend; mutations reject so the renderer can toast.

-import { execFile } from 'node:child_process'
-import fs from 'node:fs/promises'
-import path from 'node:path'
+const { execFile } = require('node:child_process')
+const fs = require('node:fs/promises')
+const path = require('node:path')

-import simpleGitFn from 'simple-git'
+const simpleGit = require('simple-git')

-import { resolveRequestedPathForIpc } from './hardening'
-
-// `simple-git` is a pure-JS runtime dep that workspace dedup hoists into the
-// repo-root node_modules.  Packaged builds set `files:` in package.json, which
-// excludes node_modules from the asar, so a normal import fails at launch
-// (issue #52735: "Cannot find module 'simple-git'").  We ship the dep's
-// closure under resources/native-deps/vendor/node_modules/ via extraResources
-// + scripts/stage-native-deps.mjs, and resolve from there when the hoisted
-// import isn't reachable.  The `vendor/` nesting matters: electron-builder
-// drops a node_modules dir at the root of an extraResources copy but keeps a
-// nested one.  Dev mode never hits the fallback -- Node's normal lookup finds
-// the hoisted copy.
-let simpleGit = simpleGitFn
-
-if (!simpleGit) {
-  const resourcesPath = (process as any).resourcesPath
-
-  if (!resourcesPath) {
-    throw new Error("git-review IPC: 'simple-git' not found and no resourcesPath to fall back to")
-  }
-
-  simpleGit = require(path.join(resourcesPath, 'native-deps', 'vendor', 'node_modules', 'simple-git'))
-}
+const { resolveRequestedPathForIpc } = require('./hardening.cjs')

 const COMMIT_CONTEXT_DIFF_MAX_CHARS = 120_000
 const COMMIT_CONTEXT_UNTRACKED_MAX = 80
@@ -53,7 +33,7 @@ function ghEnv(ghBin) {

 // Run the `gh` CLI in a repo. Resolves { ok, stdout } so callers branch on
 // availability/auth without a throw. gh missing/unauthed → ok:false.
-function runGh(args, cwd, ghBin): Promise<{ok: boolean, stdout: string}> {
+function runGh(args, cwd, ghBin) {
  return new Promise(resolve => {
    execFile(
      ghBin || 'gh',
@@ -261,11 +241,10 @@ async function reviewList(repoPath, scope, baseRef, gitBin) {

      const range = scope === 'branch' ? `${base}...HEAD` : base
      const summary = await git.diffSummary([range])
-
      const files = summary.files.map(file => ({
        path: resolveRenamePath(file.file),
-        added: 'insertions' in file ? file.insertions : 0 ,
-        removed: 'deletions' in file ? file.deletions : 0 ,
+        added: file.binary ? 0 : file.insertions,
+        removed: file.binary ? 0 : file.deletions,
        status: 'M',
        staged: false
      }))
@@ -293,7 +272,6 @@ async function reviewList(repoPath, scope, baseRef, gitBin) {
      git.diffSummary(['--cached']),
      git.diffSummary([])
    ])
-
    const stagedCounts = countsByPath(staged)
    const unstagedCounts = countsByPath(unstaged)

@@ -498,7 +476,6 @@ async function reviewCommitContext(repoPath, gitBin) {
  const safe = args => git.diff(args).catch(() => '')

  let status
-
  try {
    status = await git.status()
  } catch {
@@ -514,11 +491,9 @@ async function reviewCommitContext(repoPath, gitBin) {

  // Untracked files have no diff — list them so new files aren't invisible.
  const untracked = status.not_added || []
-
  if (untracked.length > 0) {
    const visible = untracked.slice(0, COMMIT_CONTEXT_UNTRACKED_MAX)
    const omitted = untracked.length - visible.length
-
    const note =
      `\n# New (untracked) files:\n${visible.map(p => `#   ${p}`).join('\n')}\n` +
      (omitted > 0 ? `#   ... ${omitted} more omitted\n` : '')
@@ -613,7 +588,6 @@ async function repoStatus(repoPath, gitBin) {
  // fail soft and hide the coding rail instead of spamming IPC handler errors.
  try {
    const stat = await fs.stat(cwd)
-
    if (!stat.isDirectory()) {
      return null
    }
@@ -622,13 +596,11 @@ async function repoStatus(repoPath, gitBin) {
  }

  let git
-
  try {
    git = gitFor(cwd, gitBin)
  } catch {
    return null
  }
-
  let status

  try {
@@ -639,7 +611,6 @@ async function repoStatus(repoPath, gitBin) {
  }

  const detached = typeof status.detached === 'boolean' ? status.detached : !status.current
-
  const files = status.files.map(file => ({
    path: file.path,
    staged: isStaged(file),
@@ -681,12 +652,10 @@ async function repoStatus(repoPath, gitBin) {
  // can't stall the probe.
  try {
    const untracked = status.not_added.slice(0, 500)
-
    for (let i = 0; i < untracked.length; i += UNTRACKED_LINE_COUNT_CONCURRENCY) {
      const batch = await Promise.all(
        untracked.slice(i, i + UNTRACKED_LINE_COUNT_CONCURRENCY).map(path => untrackedInsertions(cwd, path))
      )
-
      result.added += batch.reduce((sum, n) => sum + n, 0)
    }
  } catch {
@@ -696,7 +665,8 @@ async function repoStatus(repoPath, gitBin) {
  return result
 }

-export { branchBase,
+module.exports = {
+  branchBase,
  fileDiffVsHead,
  repoStatus,
  resolveRenamePath,
@@ -706,8 +676,9 @@ export { branchBase,
  reviewDiff,
  reviewList,
  reviewPush,
-  reviewRevert,
  reviewRevParse,
+  reviewRevert,
  reviewShipInfo,
  reviewStage,
-  reviewUnstage }
+  reviewUnstage
+}
--- a/apps/desktop/electron/git-review-ops.test.cjs
+++ b/apps/desktop/electron/git-review-ops.test.cjs
@@ -1,7 +1,9 @@
-import assert from 'node:assert/strict'
-import test from 'node:test'
+'use strict'

-import { resolveRenamePath } from './git-review-ops'
+const assert = require('node:assert/strict')
+const test = require('node:test')
+
+const { resolveRenamePath } = require('./git-review-ops.cjs')

 test('resolveRenamePath: plain path is unchanged', () => {
  assert.equal(resolveRenamePath('src/a.ts'), 'src/a.ts')
--- a/apps/desktop/electron/git-root.cjs
+++ b/apps/desktop/electron/git-root.cjs
@@ -1,7 +1,8 @@
-import fs from 'node:fs'
-import path from 'node:path'
+'use strict'

-import { resolveRequestedPathForIpc } from './hardening'
+const fs = require('node:fs')
+const path = require('node:path')
+const { resolveRequestedPathForIpc } = require('./hardening.cjs')

 function findGitRoot(start, fsImpl = fs) {
  let dir = start
@@ -27,7 +28,7 @@ function findGitRoot(start, fsImpl = fs) {
  return null
 }

-async function gitRootForIpc(startPath, options: {fs?: typeof fs} = {}) {
+async function gitRootForIpc(startPath, options = {}) {
  const fsImpl = options.fs || fs
  let resolved

@@ -47,5 +48,7 @@ async function gitRootForIpc(startPath, options: {fs?: typeof fs} = {}) {
  }
 }

-export { findGitRoot,
-  gitRootForIpc }
+module.exports = {
+  findGitRoot,
+  gitRootForIpc
+}
--- a/apps/desktop/electron/git-root.test.cjs
+++ b/apps/desktop/electron/git-root.test.cjs
@@ -1,11 +1,13 @@
-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
-import { pathToFileURL } from 'node:url'
+'use strict'

-import { gitRootForIpc } from './git-root'
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+const { pathToFileURL } = require('node:url')
+
+const { gitRootForIpc } = require('./git-root.cjs')

 function mkTmpDir() {
  return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-git-root-'))
--- a/apps/desktop/electron/git-worktree-ops.cjs
+++ b/apps/desktop/electron/git-worktree-ops.cjs
@@ -1,14 +1,16 @@
+'use strict'
+
 // Git-driven worktree operations for the desktop "Start work" flow: spin up a
 // fresh worktree the lightest way (`git worktree add -b`), list real worktrees,
 // and remove them. Git is the source of truth; the renderer just drives these.

-import { execFile } from 'node:child_process'
-import fs from 'node:fs'
-import path from 'node:path'
+const path = require('node:path')
+const fs = require('node:fs')
+const { execFile } = require('node:child_process')

-import { resolveRequestedPathForIpc } from './hardening'
+const { resolveRequestedPathForIpc } = require('./hardening.cjs')

-function runGit(gitBin, args, cwd): Promise<string> {
+function runGit(gitBin, args, cwd) {
  return new Promise((resolve, reject) => {
    execFile(
      gitBin,
@@ -304,7 +306,6 @@ async function listBranches(repoPath, gitBin) {
      ['for-each-ref', '--format=%(refname:short)', '--sort=-committerdate', 'refs/heads'],
      resolved
    )
-
    const trees = await listWorktrees(resolved, gitBin)
    const pathByBranch = new Map(trees.filter(tree => tree.branch).map(tree => [tree.branch, tree.path]))
    const trunk = await defaultBranch(gitBin, resolved)
@@ -337,11 +338,13 @@ async function switchBranch(repoPath, branch, gitBin) {
  return { branch: target }
 }

-export { addWorktree,
+module.exports = {
+  addWorktree,
  ensureGitRepo,
  listBranches,
  listWorktrees,
  parseWorktrees,
  removeWorktree,
  sanitizeBranch,
-  switchBranch }
+  switchBranch
+}
--- a/apps/desktop/electron/git-worktree-ops.test.cjs
+++ b/apps/desktop/electron/git-worktree-ops.test.cjs
@@ -1,16 +1,20 @@
-import assert from 'node:assert/strict'
-import { execFileSync } from 'node:child_process'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
+'use strict'

-import { addWorktree,
+const assert = require('node:assert/strict')
+const { execFileSync } = require('node:child_process')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+
+const {
+  addWorktree,
  ensureGitRepo,
  listBranches,
  parseWorktrees,
  sanitizeBranch,
-  switchBranch } from './git-worktree-ops'
+  switchBranch
+} = require('./git-worktree-ops.cjs')

 test('sanitizeBranch: spaces → hyphens, forbidden chars dropped, edges trimmed', () => {
  assert.equal(sanitizeBranch('beach vibes'), 'beach-vibes')
--- a/apps/desktop/electron/hardening.cjs
+++ b/apps/desktop/electron/hardening.cjs
@@ -1,7 +1,7 @@
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import { fileURLToPath } from 'node:url'
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const { fileURLToPath } = require('node:url')

 const DEFAULT_FETCH_TIMEOUT_MS = 15_000
 const DATA_URL_READ_MAX_BYTES = 16 * 1024 * 1024
@@ -13,7 +13,6 @@ const SENSITIVE_EXTENSIONS = new Set(['.kdbx', '.p12', '.pem', '.pfx'])
 function resolveTimeoutMs(timeoutMs, fallbackMs = DEFAULT_FETCH_TIMEOUT_MS) {
  const fallback =
    Number.isFinite(fallbackMs) && Number(fallbackMs) > 0 ? Math.round(Number(fallbackMs)) : DEFAULT_FETCH_TIMEOUT_MS
-
  const parsed = Number(timeoutMs)

  if (Number.isFinite(parsed) && parsed > 0) {
@@ -63,7 +62,6 @@ function sensitiveFileBlockReason(filePath) {
  const normalized = String(filePath || '')
    .replace(/\\/g, '/')
    .toLowerCase()
-
  const basename = path.basename(normalized)
  const ext = path.extname(basename)

@@ -89,7 +87,6 @@ function sensitiveFileBlockReason(filePath) {

  if (basename.startsWith('.env.')) {
    const suffix = basename.slice('.env.'.length)
-
    if (!SAFE_ENV_SUFFIXES.has(suffix)) {
      return `${basename} is blocked because it appears to contain environment secrets.`
    }
@@ -110,10 +107,9 @@ function sensitiveFileBlockReason(filePath) {
  return null
 }

-function ipcPathError(code: any, message: string): Error & {code: any} {
-  const error = new Error(message) as Error & {code: any}
-  (error as any).code = code
-
+function ipcPathError(code, message) {
+  const error = new Error(message)
+  error.code = code
  return error
 }

@@ -133,7 +129,6 @@ function rejectUnsafePathSyntax(filePath, purpose = 'File read') {
  }

  const normalized = raw.replace(/\\/g, '/').toLowerCase()
-
  if (
    normalized.startsWith('//?/') ||
    normalized.startsWith('//./') ||
@@ -146,7 +141,7 @@ function rejectUnsafePathSyntax(filePath, purpose = 'File read') {
  return raw
 }

-function resolveRequestedPathForIpc(filePath, options: {purpose?: string, baseDir?: fs.PathOrFileDescriptor} = {}) {
+function resolveRequestedPathForIpc(filePath, options = {}) {
  const purpose = String(options.purpose || 'File read')
  let raw = rejectUnsafePathSyntax(filePath, purpose)

@@ -159,21 +154,17 @@ function resolveRequestedPathForIpc(filePath, options: {purpose?: string, baseDi

  if (/^file:/i.test(raw)) {
    let resolvedPath
-
    try {
      const parsed = new URL(raw)
-
      if (parsed.protocol !== 'file:') {
        throw new Error('not a file URL')
      }
-
      resolvedPath = fileURLToPath(parsed)
    } catch {
      throw ipcPathError('invalid-path', `${purpose} failed: file URL is invalid.`)
    }

    rejectUnsafePathSyntax(resolvedPath, purpose)
-
    return path.resolve(resolvedPath)
  }

@@ -187,16 +178,14 @@ function resolveRequestedPathForIpc(filePath, options: {purpose?: string, baseDi
  return resolvedPath
 }

-async function statForIpc(fsImpl: {promises: {stat: typeof fs.promises.stat}}, resolvedPath, purpose, typeLabel) {
+async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
  try {
    return await fsImpl.promises.stat(resolvedPath)
  } catch (error) {
    const code = error && typeof error === 'object' ? error.code : ''
-
    if (code === 'ENOENT' || code === 'ENOTDIR') {
      throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
    }
-
    throw ipcPathError(
      code || 'read-error',
      `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
@@ -212,7 +201,6 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {
  try {
    const realPath = await fsImpl.promises.realpath(resolvedPath)
    rejectUnsafePathSyntax(realPath, purpose)
-
    return realPath
  } catch (error) {
    const code = error && typeof error === 'object' ? error.code : ''
@@ -225,13 +213,12 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {

 function rejectSensitiveFilePath(filePath, purpose) {
  const blockReason = sensitiveFileBlockReason(filePath)
-
  if (blockReason) {
    throw ipcPathError('sensitive-file', `${purpose} blocked for sensitive file: ${blockReason}`)
  }
 }

-async function resolveDirectoryForIpc(dirPath, options: {purpose?: string , baseDir?: fs.PathOrFileDescriptor, fs?: {promises:{stat: typeof fs.promises.stat}}} = {}) {
+async function resolveDirectoryForIpc(dirPath, options = {}) {
  const purpose = String(options.purpose || 'Directory read')
  const fsImpl = options.fs || fs
  const resolvedPath = resolveRequestedPathForIpc(dirPath, { baseDir: options.baseDir, purpose })
@@ -246,7 +233,7 @@ async function resolveDirectoryForIpc(dirPath, options: {purpose?: string , base
  return { realPath, resolvedPath, stat }
 }

-async function resolveReadableFileForIpc(filePath, options: {purpose?: string , baseDir?: fs.PathOrFileDescriptor, fs?: typeof fs, blockSensitive?: boolean, maxBytes?: number} = {}) {
+async function resolveReadableFileForIpc(filePath, options = {}) {
  const purpose = String(options.purpose || 'File read')
  const fsImpl = options.fs || fs
  const resolvedPath = resolveRequestedPathForIpc(filePath, { baseDir: options.baseDir, purpose })
@@ -266,13 +253,11 @@ async function resolveReadableFileForIpc(filePath, options: {purpose?: string ,
  }

  const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose)
-
  if (options.blockSensitive !== false) {
    rejectSensitiveFilePath(realPath, purpose)
  }

  const maxBytes = Number.isFinite(options.maxBytes) && Number(options.maxBytes) > 0 ? Number(options.maxBytes) : null
-
  if (maxBytes && stat.size > maxBytes) {
    throw ipcPathError('EFBIG', `${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`)
  }
@@ -286,13 +271,15 @@ async function resolveReadableFileForIpc(filePath, options: {purpose?: string ,
  return { realPath, resolvedPath, stat }
 }

-export { DATA_URL_READ_MAX_BYTES,
+module.exports = {
+  DATA_URL_READ_MAX_BYTES,
  DEFAULT_FETCH_TIMEOUT_MS,
+  TEXT_PREVIEW_SOURCE_MAX_BYTES,
  encryptDesktopSecret,
  rejectUnsafePathSyntax,
  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
  resolveRequestedPathForIpc,
  resolveTimeoutMs,
-  sensitiveFileBlockReason,
-  TEXT_PREVIEW_SOURCE_MAX_BYTES }
+  sensitiveFileBlockReason
+}
--- a/apps/desktop/electron/hardening.test.cjs
+++ b/apps/desktop/electron/hardening.test.cjs
@@ -1,22 +1,23 @@
-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
-import { pathToFileURL } from 'node:url'
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const test = require('node:test')
+const { pathToFileURL } = require('node:url')

-import { DEFAULT_FETCH_TIMEOUT_MS,
+const {
+  DEFAULT_FETCH_TIMEOUT_MS,
  encryptDesktopSecret,
  resolveDirectoryForIpc,
  resolveReadableFileForIpc,
  resolveRequestedPathForIpc,
  resolveTimeoutMs,
-  sensitiveFileBlockReason } from './hardening'
+  sensitiveFileBlockReason
+} = require('./hardening.cjs')

-async function rejectsWithCode(promise, code: string) {
-  await assert.rejects(promise, (error: any) => {
+async function rejectsWithCode(promise, code) {
+  await assert.rejects(promise, error => {
    assert.equal(error?.code, code)
-
    return true
  })
 }
@@ -75,9 +76,8 @@ test('path helpers reject blank non-string NUL and Windows device syntax', async
  for (const devicePath of devicePaths) {
    assert.throws(
      () => resolveRequestedPathForIpc(devicePath, { purpose: 'File preview' }),
-      (error: any) => {
+      error => {
        assert.equal(error?.code, 'device-path')
-
        return true
      }
    )
@@ -86,9 +86,8 @@ test('path helpers reject blank non-string NUL and Windows device syntax', async

  assert.throws(
    () => resolveRequestedPathForIpc('file:///%E0%A4%A', { purpose: 'File preview' }),
-    (error: any) => {
+    error => {
      assert.equal(error?.code, 'invalid-path')
-
      return true
    }
  )
@@ -132,23 +131,19 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',
    maxBytes: 256,
    purpose: 'File preview'
  })
-
  assert.equal(fromRelative.resolvedPath, textPath)
  assert.equal(fromRelative.stat.size, 11)

  const fromFileUrl = await resolveReadableFileForIpc(pathToFileURL(textPath).toString(), {
    purpose: 'File preview'
  })
-
  assert.equal(fromFileUrl.resolvedPath, textPath)

  const spacedPath = path.join(tempDir, 'notes with spaces.txt')
  fs.writeFileSync(spacedPath, 'space ok', 'utf8')
-
  const fromSpacedFileUrl = await resolveReadableFileForIpc(pathToFileURL(spacedPath).toString(), {
    purpose: 'File preview'
  })
-
  assert.equal(fromSpacedFileUrl.resolvedPath, spacedPath)

  await assert.rejects(
@@ -189,11 +184,9 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity',

  const envTemplatePath = path.join(tempDir, '.env.example')
  fs.writeFileSync(envTemplatePath, 'EXAMPLE_TOKEN=value', 'utf8')
-
  const envTemplate = await resolveReadableFileForIpc(envTemplatePath, {
    purpose: 'File preview'
  })
-
  assert.equal(envTemplate.resolvedPath, envTemplatePath)
 })

@@ -236,10 +229,8 @@ test('resolveReadableFileForIpc blocks symlinks whose realpath is sensitive', as
  } catch (error) {
    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
      t.skip(`symlink creation is not permitted on this platform (${error.code})`)
-
      return
    }
-
    throw error
  }

@@ -277,10 +268,8 @@ test('resolveDirectoryForIpc accepts directory symlinks or junctions', async t =
  } catch (error) {
    if (error?.code === 'EPERM' || error?.code === 'EACCES') {
      t.skip(`directory symlink creation is not permitted on this platform (${error.code})`)
-
      return
    }
-
    throw error
  }

--- a/apps/desktop/electron/link-title-window.cjs
+++ b/apps/desktop/electron/link-title-window.cjs
@@ -1,3 +1,5 @@
+'use strict'
+
 // Hidden BrowserWindow used by tier-2 link-title resolution: when curl can't
 // read a page <title> (bot walls, JS-rendered pages), we briefly load the URL
 // in an offscreen window and read its title. That window loads arbitrary
@@ -37,4 +39,4 @@ function createLinkTitleWindow(BrowserWindow, partitionSession) {
  return window
 }

-export { createLinkTitleWindow, linkTitleWindowOptions }
+module.exports = { createLinkTitleWindow, linkTitleWindowOptions }
--- a/apps/desktop/electron/link-title-window.test.cjs
+++ b/apps/desktop/electron/link-title-window.test.cjs
@@ -1,11 +1,10 @@
-import assert from 'node:assert/strict'
-import test from 'node:test'
+const assert = require('node:assert/strict')
+const test = require('node:test')

-import { createLinkTitleWindow, linkTitleWindowOptions } from './link-title-window'
+const { createLinkTitleWindow, linkTitleWindowOptions } = require('./link-title-window.cjs')

 function makeFakeBrowserWindow() {
  const calls = { audioMuted: [] }
-
  const FakeBrowserWindow = function (options) {
    this.options = options
    this.webContents = {
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
--- a/apps/desktop/electron/oauth-net-request.cjs
+++ b/apps/desktop/electron/oauth-net-request.cjs
@@ -14,5 +14,7 @@ function setJsonRequestHeaders(request) {
  request.setHeader('Content-Type', 'application/json')
 }

-export { serializeJsonBody,
-  setJsonRequestHeaders }
+module.exports = {
+  serializeJsonBody,
+  setJsonRequestHeaders
+}
--- a/apps/desktop/electron/oauth-net-request.test.cjs
+++ b/apps/desktop/electron/oauth-net-request.test.cjs
@@ -4,10 +4,10 @@
 * Run with: node --test electron/oauth-net-request.test.cjs
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { serializeJsonBody, setJsonRequestHeaders } from './oauth-net-request'
+const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs')

 test('serializeJsonBody returns undefined for absent bodies', () => {
  assert.equal(serializeJsonBody(undefined), undefined)
@@ -21,7 +21,6 @@ test('serializeJsonBody JSON-encodes request bodies', () => {

 test('setJsonRequestHeaders does not set Electron-restricted Content-Length', () => {
  const headers = []
-
  const request = {
    setHeader(name, value) {
      headers.push([name, value])
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -1,4 +1,4 @@
-import { contextBridge, ipcRenderer, webUtils } from 'electron'
+const { contextBridge, ipcRenderer, webUtils } = require('electron')

 contextBridge.exposeInMainWorld('hermesDesktop', {
  getConnection: profile => ipcRenderer.invoke('hermes:connection', profile),
@@ -24,14 +24,12 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    onState: callback => {
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on('hermes:pet-overlay:state', listener)
-
      return () => ipcRenderer.removeListener('hermes:pet-overlay:state', listener)
    },
    // Main renderer subscribes to overlay control messages.
    onControl: callback => {
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on('hermes:pet-overlay:control', listener)
-
      return () => ipcRenderer.removeListener('hermes:pet-overlay:control', listener)
    }
  },
@@ -122,76 +120,64 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
      const channel = `hermes:terminal:${id}:data`
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on(channel, listener)
-
      return () => ipcRenderer.removeListener(channel, listener)
    },
    onExit: (id, callback) => {
      const channel = `hermes:terminal:${id}:exit`
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on(channel, listener)
-
      return () => ipcRenderer.removeListener(channel, listener)
    }
  },
  onClosePreviewRequested: callback => {
    const listener = () => callback()
    ipcRenderer.on('hermes:close-preview-requested', listener)
-
    return () => ipcRenderer.removeListener('hermes:close-preview-requested', listener)
  },
  onOpenUpdatesRequested: callback => {
    const listener = () => callback()
    ipcRenderer.on('hermes:open-updates', listener)
-
    return () => ipcRenderer.removeListener('hermes:open-updates', listener)
  },
  onDeepLink: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:deep-link', listener)
-
    return () => ipcRenderer.removeListener('hermes:deep-link', listener)
  },
  signalDeepLinkReady: () => ipcRenderer.invoke('hermes:deep-link-ready'),
  onWindowStateChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:window-state-changed', listener)
-
    return () => ipcRenderer.removeListener('hermes:window-state-changed', listener)
  },
  onFocusSession: callback => {
    const listener = (_event, sessionId) => callback(sessionId)
    ipcRenderer.on('hermes:focus-session', listener)
-
    return () => ipcRenderer.removeListener('hermes:focus-session', listener)
  },
  onNotificationAction: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:notification-action', listener)
-
    return () => ipcRenderer.removeListener('hermes:notification-action', listener)
  },
  onPreviewFileChanged: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:preview-file-changed', listener)
-
    return () => ipcRenderer.removeListener('hermes:preview-file-changed', listener)
  },
  onBackendExit: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:backend-exit', listener)
-
    return () => ipcRenderer.removeListener('hermes:backend-exit', listener)
  },
  onPowerResume: callback => {
    const listener = () => callback()
    ipcRenderer.on('hermes:power-resume', listener)
-
    return () => ipcRenderer.removeListener('hermes:power-resume', listener)
  },
  onBootProgress: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:boot-progress', listener)
-
    return () => ipcRenderer.removeListener('hermes:boot-progress', listener)
  },
  // First-launch bootstrap progress -- emitted by the install.ps1 stage
@@ -206,7 +192,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
  onBootstrapEvent: callback => {
    const listener = (_event, payload) => callback(payload)
    ipcRenderer.on('hermes:bootstrap:event', listener)
-
    return () => ipcRenderer.removeListener('hermes:bootstrap:event', listener)
  },
  getVersion: () => ipcRenderer.invoke('hermes:version'),
@@ -223,7 +208,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
    onProgress: callback => {
      const listener = (_event, payload) => callback(payload)
      ipcRenderer.on('hermes:updates:progress', listener)
-
      return () => ipcRenderer.removeListener('hermes:updates:progress', listener)
    }
  },
--- a/apps/desktop/electron/session-windows.cjs
+++ b/apps/desktop/electron/session-windows.cjs
@@ -3,7 +3,7 @@
 // here so they can be unit-tested with node --test (mirroring how the rest of
 // electron/*.cjs splits testable logic out of the main.cjs monolith).

-import { pathToFileURL } from 'node:url'
+const { pathToFileURL } = require('node:url')

 // Secondary windows open at the minimum usable size — a compact side panel for
 // subagent watch / cmd-click session pop-out, not a second full desktop.
@@ -42,7 +42,7 @@ function chatWindowWebPreferences(preloadPath) {
 // scratch window; `watch=1` marks a spectator window (e.g. a running subagent's
 // session): the renderer resumes it lazily so the gateway never builds an agent
 // just to stream into it.
-function buildSessionWindowUrl(sessionId: string, { devServer, rendererIndexPath, watch, newSession }: any = {}) {
+function buildSessionWindowUrl(sessionId, { devServer, rendererIndexPath, watch, newSession } = {}) {
  const query = `?win=secondary${newSession ? '&new=1' : ''}${watch ? '&watch=1' : ''}`
  const route = newSession ? '#/' : `#/${encodeURIComponent(sessionId)}`

@@ -115,8 +115,10 @@ function createSessionWindowRegistry() {
  }
 }

-export { buildSessionWindowUrl,
+module.exports = {
+  buildSessionWindowUrl,
  chatWindowWebPreferences,
  createSessionWindowRegistry,
  SESSION_WINDOW_MIN_HEIGHT,
-  SESSION_WINDOW_MIN_WIDTH }
+  SESSION_WINDOW_MIN_WIDTH
+}
--- a/apps/desktop/electron/session-windows.test.cjs
+++ b/apps/desktop/electron/session-windows.test.cjs
@@ -1,9 +1,11 @@
-import assert from 'node:assert/strict'
-import test from 'node:test'
+const assert = require('node:assert/strict')
+const test = require('node:test')

-import { buildSessionWindowUrl,
+const {
+  buildSessionWindowUrl,
  chatWindowWebPreferences,
-  createSessionWindowRegistry } from './session-windows'
+  createSessionWindowRegistry
+} = require('./session-windows.cjs')

 // A minimal fake BrowserWindow: tracks listeners + destroyed state and lets a
 // test fire the 'closed' event, mirroring the slice of the Electron API the
@@ -94,7 +96,6 @@ test('registry opens one window per session and focuses on re-open', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
  const win = makeFakeWindow()
-
  const factory = () => {
    built += 1

@@ -144,7 +145,6 @@ test('registry rebuilds a fresh window after the previous one was destroyed', ()

  let built = 0
  const second = makeFakeWindow()
-
  const result = registry.openOrFocus('s1', () => {
    built += 1

@@ -158,7 +158,6 @@ test('registry rebuilds a fresh window after the previous one was destroyed', ()
 test('registry ignores empty / non-string session ids', () => {
  const registry = createSessionWindowRegistry()
  let built = 0
-
  const factory = () => {
    built += 1

--- a/apps/desktop/electron/titlebar-overlay-width.cjs
+++ b/apps/desktop/electron/titlebar-overlay-width.cjs
@@ -0,0 +1,11 @@
+// Pre-layout fallback for WCO right-edge reservation (--titlebar-tools-right).
+// Live width comes from navigator.windowControlsOverlay in the renderer.
+
+const OVERLAY_FALLBACK_WIDTH = 144
+
+/** @param {{ isWindows?: boolean, isWsl?: boolean }} opts */
+function nativeOverlayWidth({ isWindows = false, isWsl = false } = {}) {
+  return isWindows || isWsl ? OVERLAY_FALLBACK_WIDTH : 0
+}
+
+module.exports = { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth }
--- a/apps/desktop/electron/titlebar-overlay-width.test.cjs
+++ b/apps/desktop/electron/titlebar-overlay-width.test.cjs
@@ -1,7 +1,7 @@
-import assert from 'node:assert/strict'
-import test from 'node:test'
+const assert = require('node:assert/strict')
+const test = require('node:test')

-import { nativeOverlayWidth, OVERLAY_FALLBACK_WIDTH } from './titlebar-overlay-width'
+const { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth } = require('./titlebar-overlay-width.cjs')

 // This static reservation is only the pre-layout FALLBACK. Once laid out the
 // renderer reads the exact width from navigator.windowControlsOverlay
@@ -18,17 +18,10 @@ test('WSLg paints the same WCO, so it reserves the same fallback width', () => {
  assert.equal(nativeOverlayWidth({ isWsl: true }), OVERLAY_FALLBACK_WIDTH)
 })

-test('plain Linux paints the WCO too, so it reserves the fallback width', () => {
-  // Regression #53185: re-enabling the overlay on plain Linux (KDE/GNOME)
-  // without reserving its width left the native min/max/close buttons painting
-  // on top of the app's right-edge titlebar tools.
-  assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), OVERLAY_FALLBACK_WIDTH)
-  assert.equal(nativeOverlayWidth(), OVERLAY_FALLBACK_WIDTH)
-  assert.equal(nativeOverlayWidth({}), OVERLAY_FALLBACK_WIDTH)
-})
-
-test('macOS uses traffic lights, not a WCO overlay, so it reserves nothing', () => {
-  assert.equal(nativeOverlayWidth({ isMac: true }), 0)
+test('plain Linux and macOS reserve nothing', () => {
+  assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), 0)
+  assert.equal(nativeOverlayWidth(), 0)
+  assert.equal(nativeOverlayWidth({}), 0)
 })

 test('the fallback width is a sane positive pixel value', () => {
--- a/apps/desktop/electron/titlebar-overlay-width.ts
+++ b/apps/desktop/electron/titlebar-overlay-width.ts
@@ -1,23 +0,0 @@
-const OVERLAY_FALLBACK_WIDTH = 144
-
-/**
- * Static pre-layout reservation (px) for the right-side native window-controls
- * overlay (min/max/close). Only a FALLBACK — once laid out the renderer reads
- * the exact width from navigator.windowControlsOverlay
- * (use-window-controls-overlay-width.ts) and uses this value only when the WCO
- * API is unavailable.
- *
- * macOS uses traffic lights positioned via trafficLightPosition, not a WCO
- * overlay, so it reserves nothing here. Every other desktop platform now paints
- * the Electron overlay (Windows, WSLg, and plain Linux KDE/GNOME), so they all
- * reserve the fallback width.
- *
- * @param {{ isWindows?: boolean, isWsl?: boolean, isMac?: boolean }} opts
- */
-function nativeOverlayWidth({ isWindows = false, isWsl = false, isMac = false } = {}) {
-  if (isMac) {return 0}
-
-  return OVERLAY_FALLBACK_WIDTH
-}
-
-export { nativeOverlayWidth, OVERLAY_FALLBACK_WIDTH }
--- a/apps/desktop/electron/update-count.cjs
+++ b/apps/desktop/electron/update-count.cjs
@@ -1,3 +1,5 @@
+'use strict'
+
 // Whether `git rev-list HEAD..origin/<branch> --count` produces a meaningful
 // number worth computing. On a SHALLOW checkout (installer clones with
 // --depth 1) the local history often shares no merge-base with the freshly
@@ -17,12 +19,10 @@ function shouldCountCommits({ isShallow, hasMergeBase }) {
 // (developers / Docker dev images) keep the exact count path unchanged.
 function resolveBehindCount({ countStr, currentSha, targetSha, isShallow, hasMergeBase }) {
  if (!shouldCountCommits({ isShallow, hasMergeBase })) {
-    if (currentSha && targetSha && currentSha === targetSha) {return 0}
-
+    if (currentSha && targetSha && currentSha === targetSha) return 0
    return 1 // behind by an unknown amount — show a generic "update available"
  }
-
  return Number.parseInt(countStr, 10) || 0
 }

-export { resolveBehindCount, shouldCountCommits }
+module.exports = { resolveBehindCount, shouldCountCommits }
--- a/apps/desktop/electron/update-count.test.cjs
+++ b/apps/desktop/electron/update-count.test.cjs
@@ -1,7 +1,7 @@
-import assert from 'node:assert/strict'
-import test from 'node:test'
-
-import { resolveBehindCount, shouldCountCommits } from './update-count'
+'use strict'
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const { resolveBehindCount, shouldCountCommits } = require('./update-count.cjs')

 // FAIL-BEFORE: pre-fix the function did `Number.parseInt(countStr) || 0`
 // unconditionally, so a shallow checkout with no merge-base surfaced the bogus
--- a/apps/desktop/electron/update-marker.cjs
+++ b/apps/desktop/electron/update-marker.cjs
@@ -20,8 +20,8 @@
 * log sinks are.
 */

-import fs from 'fs'
-import path from 'path'
+const fs = require('fs')
+const path = require('path')

 // Even with a live-looking PID, never treat a marker older than this as a live
 // update. A full update (git pull + pip + desktop rebuild) is minutes, not tens
@@ -37,12 +37,10 @@ function markerPath(hermesHome) {
 // not deliver a signal — it just probes existence/permission. ESRCH => dead;
 // EPERM => alive but owned by another user (still "alive" for our purposes).
 // Injectable `kill` keeps it unit-testable.
-function isPidAlive(pid, kill: typeof process.kill = process.kill.bind(process)) {
-  if (!Number.isInteger(pid) || pid <= 0) {return false}
-
+function isPidAlive(pid, kill = process.kill.bind(process)) {
+  if (!Number.isInteger(pid) || pid <= 0) return false
  try {
    kill(pid, 0)
-
    return true
  } catch (err) {
    return Boolean(err && err.code === 'EPERM')
@@ -61,12 +59,9 @@ function isPidAlive(pid, kill: typeof process.kill = process.kill.bind(process))
 * Pure-ish: file I/O against the given path, plus an injectable pid probe and
 * clock for tests.
 */
-function readLiveUpdateMarker(hermesHome, { kill, now = Date.now, maxAgeMs = UPDATE_MARKER_MAX_AGE_MS }: {
-  now?: () => number, maxAgeMs?: number, kill?: typeof process.kill
-} = {}) {
+function readLiveUpdateMarker(hermesHome, { kill, now = Date.now, maxAgeMs = UPDATE_MARKER_MAX_AGE_MS } = {}) {
  const file = markerPath(hermesHome)
  let raw
-
  try {
    raw = fs.readFileSync(file, 'utf8')
  } catch {
@@ -85,14 +80,14 @@ function readLiveUpdateMarker(hermesHome, { kill, now = Date.now, maxAgeMs = UPD
    } catch {
      void 0
    }
-
    return null
  }
-
  return { pid, ageMs }
 }

-export { isPidAlive,
+module.exports = {
+  UPDATE_MARKER_MAX_AGE_MS,
  markerPath,
-  readLiveUpdateMarker,
-  UPDATE_MARKER_MAX_AGE_MS }
+  isPidAlive,
+  readLiveUpdateMarker
+}
--- a/apps/desktop/electron/update-marker.test.cjs
+++ b/apps/desktop/electron/update-marker.test.cjs
@@ -12,17 +12,16 @@
 * strand future launches, and (c) self-heal by deleting a stale marker file.
 */

-import fs from 'fs'
-import assert from 'node:assert/strict'
-import test from 'node:test'
-import os from 'os'
-import path from 'path'
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const fs = require('fs')
+const os = require('os')
+const path = require('path')

-import { isPidAlive, markerPath, readLiveUpdateMarker, UPDATE_MARKER_MAX_AGE_MS } from './update-marker'
+const { markerPath, isPidAlive, readLiveUpdateMarker, UPDATE_MARKER_MAX_AGE_MS } = require('./update-marker.cjs')

 function tmpHome(tag) {
  const dir = fs.mkdtempSync(path.join(os.tmpdir(), `hermes-marker-${tag}-`))
-
  return dir
 }

@@ -30,11 +29,10 @@ function writeMarker(home, pid, startedAtSec) {
  fs.writeFileSync(markerPath(home), `${pid}\n${startedAtSec}`)
 }

-const ALIVE: typeof process.kill = () => true // injected kill that "succeeds" => pid alive
-
-const DEAD : typeof process.kill= () => {
-  const err = new Error('no such process');
-  (err as any).code = 'ESRCH'
+const ALIVE = () => true // injected kill that "succeeds" => pid alive
+const DEAD = () => {
+  const err = new Error('no such process')
+  err.code = 'ESRCH'
  throw err
 }

@@ -86,10 +84,9 @@ test('isPidAlive: own pid is alive, impossible pid is dead', () => {

 test('isPidAlive: EPERM counts as alive (process owned by another user)', () => {
  const eperm = () => {
-    const err = new Error('operation not permitted');
-    (err as any).code = 'EPERM'
+    const err = new Error('operation not permitted')
+    err.code = 'EPERM'
    throw err
  }
-
  assert.equal(isPidAlive(4242, eperm), true)
 })
--- a/apps/desktop/electron/update-rebuild.cjs
+++ b/apps/desktop/electron/update-rebuild.cjs
@@ -1,3 +1,5 @@
+'use strict'
+
 /**
 * Retry-once policy for the desktop `--build-only` rebuild during self-update.
 *
@@ -18,12 +20,10 @@ function shouldRetryRebuild(code) {
 */
 async function runRebuildWithRetry(rebuild) {
  let result = await rebuild(0)
-
  if (shouldRetryRebuild(result.code)) {
    result = await rebuild(1)
  }
-
  return result
 }

-export { runRebuildWithRetry, shouldRetryRebuild }
+module.exports = { shouldRetryRebuild, runRebuildWithRetry }
--- a/apps/desktop/electron/update-rebuild.test.cjs
+++ b/apps/desktop/electron/update-rebuild.test.cjs
@@ -12,10 +12,10 @@
 * success, and must run at most twice.
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { runRebuildWithRetry, shouldRetryRebuild } from './update-rebuild'
+const { shouldRetryRebuild, runRebuildWithRetry } = require('./update-rebuild.cjs')

 test('shouldRetryRebuild retries only on a non-success exit', () => {
  assert.equal(shouldRetryRebuild(0), false)
@@ -25,39 +25,30 @@ test('shouldRetryRebuild retries only on a non-success exit', () => {

 test('a clean first rebuild runs once and does not retry', async () => {
  const codes = []
-
  const result = await runRebuildWithRetry(attempt => {
    codes.push(attempt)
-
    return Promise.resolve({ code: 0 })
  })
-
  assert.deepEqual(codes, [0])
  assert.equal(result.code, 0)
 })

 test('a failed first rebuild retries once and succeeds', async () => {
  const codes = []
-
  const result = await runRebuildWithRetry(attempt => {
    codes.push(attempt)
-
    return Promise.resolve({ code: attempt === 0 ? 1 : 0 })
  })
-
  assert.deepEqual(codes, [0, 1])
  assert.equal(result.code, 0)
 })

 test('a rebuild that keeps failing runs at most twice and reports the failure', async () => {
  const codes = []
-
  const result = await runRebuildWithRetry(attempt => {
    codes.push(attempt)
-
    return Promise.resolve({ code: 1, error: 'rebuild-failed' })
  })
-
  assert.deepEqual(codes, [0, 1])
  assert.equal(result.code, 1)
  assert.equal(result.error, 'rebuild-failed')
--- a/apps/desktop/electron/update-relaunch.cjs
+++ b/apps/desktop/electron/update-relaunch.cjs
@@ -1,3 +1,5 @@
+'use strict'
+
 /**
 * update-relaunch.cjs — pure decision + script-generation helpers for the
 * Linux in-app update relaunch (#45205).
@@ -35,14 +37,12 @@
 * the closeable manual-restart terminal state instead.
 */

-import path from 'node:path'
+const path = require('node:path')

 // Map process.platform → electron-builder's `release/<dir>-unpacked` name.
 function unpackedDirName(platform) {
-  if (platform === 'darwin') {return 'mac-unpacked'} // not used (mac swaps bundles)
-
-  if (platform === 'win32') {return 'win-unpacked'}
-
+  if (platform === 'darwin') return 'mac-unpacked' // not used (mac swaps bundles)
+  if (platform === 'win32') return 'win-unpacked'
  return 'linux-unpacked'
 }

@@ -56,17 +56,15 @@ function unpackedDirName(platform) {
 * `.../release/linux-unpacked-evil` can't masquerade as `.../release/linux-unpacked`.
 */
 function resolveUnpackedRelease(execPath, updateRoot, platform) {
-  if (!execPath || !updateRoot) {return null}
+  if (!execPath || !updateRoot) return null
  const releaseDir = path.join(updateRoot, 'apps', 'desktop', 'release')
  const unpacked = path.join(releaseDir, unpackedDirName(platform))
  const normalizedExec = path.resolve(String(execPath))
  // execPath must be the unpacked dir itself or a descendant of it.
  const withSep = unpacked.endsWith(path.sep) ? unpacked : unpacked + path.sep
-
  if (normalizedExec === unpacked || normalizedExec.startsWith(withSep)) {
    return unpacked
  }
-
  return null
 }

@@ -83,10 +81,8 @@ function resolveUnpackedRelease(execPath, updateRoot, platform) {
 *                app. Closeable manual-restart terminal state.
 */
 function decideRelaunchOutcome({ underUnpacked, sandboxOk }) {
-  if (!underUnpacked) {return 'guiSkew'}
-
-  if (!sandboxOk) {return 'manual'}
-
+  if (!underUnpacked) return 'guiSkew'
+  if (!sandboxOk) return 'manual'
  return 'relaunch'
 }

@@ -103,10 +99,9 @@ function decideRelaunchOutcome({ underUnpacked, sandboxOk }) {
 * `statSync` is injectable so this is testable without a real setuid file.
 */
 function sandboxPreflight(unpackedDir, statSync) {
-  if (!unpackedDir) {return { ok: false, reason: 'no-unpacked-dir', path: null }}
+  if (!unpackedDir) return { ok: false, reason: 'no-unpacked-dir', path: null }
  const sandboxPath = path.join(unpackedDir, 'chrome-sandbox')
  let st
-
  try {
    st = statSync(sandboxPath)
  } catch {
@@ -114,20 +109,15 @@ function sandboxPreflight(unpackedDir, statSync) {
    // sandbox; nothing to block the relaunch.
    return { ok: true, reason: 'no-sandbox-helper', path: sandboxPath }
  }
-
  const ownedByRoot = st.uid === 0
  const hasSetuid = (st.mode & 0o4000) !== 0
-
  if (ownedByRoot && hasSetuid) {
    return { ok: true, reason: 'launchable', path: sandboxPath }
  }
-
  if (!ownedByRoot && !hasSetuid) {
    return { ok: false, reason: 'not-root-not-setuid', path: sandboxPath }
  }
-
-  if (!ownedByRoot) {return { ok: false, reason: 'not-root', path: sandboxPath }}
-
+  if (!ownedByRoot) return { ok: false, reason: 'not-root', path: sandboxPath }
  return { ok: false, reason: 'not-setuid', path: sandboxPath }
 }

@@ -147,11 +137,8 @@ function sandboxPreflight(unpackedDir, statSync) {
 */
 function sandboxFallbackFromEnv(env, launchArgs) {
  const disable = String((env && env.ELECTRON_DISABLE_SANDBOX) || '').trim()
-
-  if (disable === '1' || disable.toLowerCase() === 'true') {return true}
-
-  if (Array.isArray(launchArgs) && launchArgs.some(a => a === '--no-sandbox')) {return true}
-
+  if (disable === '1' || disable.toLowerCase() === 'true') return true
+  if (Array.isArray(launchArgs) && launchArgs.some(a => a === '--no-sandbox')) return true
  return false
 }

@@ -189,11 +176,9 @@ const INTERNAL_ARG_PREFIXES = [
 * the exec path itself; there is no entry-script arg as in a dev run).
 */
 function collectRelaunchArgs(argv) {
-  if (!Array.isArray(argv)) {return []}
-
+  if (!Array.isArray(argv)) return []
  return argv.filter(arg => {
-    if (typeof arg !== 'string' || arg.length === 0) {return false}
-
+    if (typeof arg !== 'string' || arg.length === 0) return false
    return !INTERNAL_ARG_PREFIXES.some(prefix =>
      prefix.endsWith('=') ? arg.startsWith(prefix) : arg === prefix || arg.startsWith(prefix + '=')
    )
@@ -212,17 +197,13 @@ const PRESERVED_ENV_PREFIXES = ['HERMES_DESKTOP_']

 function collectRelaunchEnv(env) {
  const out = {}
-
-  if (!env || typeof env !== 'object') {return out}
-
+  if (!env || typeof env !== 'object') return out
  for (const [key, value] of Object.entries(env)) {
-    if (value == null) {continue}
-
+    if (value == null) continue
    if (PRESERVED_ENV_KEYS.includes(key) || PRESERVED_ENV_PREFIXES.some(p => key.startsWith(p))) {
      out[key] = String(value)
    }
  }
-
  return out
 }

@@ -242,10 +223,8 @@ function buildRelaunchScript({ pid, execPath, args, env, cwd }) {
  const exports = Object.entries(env || {})
    .map(([k, v]) => `export ${k}=${shellQuote(v)}`)
    .join('\n')
-
  const quotedArgs = (args || []).map(shellQuote).join(' ')
  const cwdLine = cwd ? `cd ${shellQuote(cwd)} 2>/dev/null || true` : ''
-
  // NOTE: `exec` replaces the watcher process with the relaunched app, so the
  // re-exec inherits exactly the env/cwd we set above.
  return `#!/bin/bash
@@ -270,15 +249,17 @@ exec ${shellQuote(execPath)}${quotedArgs ? ' ' + quotedArgs : ''}
 `
 }

-export { buildRelaunchScript,
+module.exports = {
+  unpackedDirName,
+  resolveUnpackedRelease,
+  decideRelaunchOutcome,
+  sandboxPreflight,
+  sandboxFallbackFromEnv,
  collectRelaunchArgs,
  collectRelaunchEnv,
-  decideRelaunchOutcome,
+  buildRelaunchScript,
+  shellQuote,
  INTERNAL_ARG_PREFIXES,
  PRESERVED_ENV_KEYS,
-  PRESERVED_ENV_PREFIXES,
-  resolveUnpackedRelease,
-  sandboxFallbackFromEnv,
-  sandboxPreflight,
-  shellQuote,
-  unpackedDirName }
+  PRESERVED_ENV_PREFIXES
+}
--- a/apps/desktop/electron/update-relaunch.test.cjs
+++ b/apps/desktop/electron/update-relaunch.test.cjs
@@ -17,22 +17,24 @@
 *      (keep a working window) unless a non-interactive fallback applies.
 */

-import assert from 'node:assert/strict'
-import { execFileSync } from 'node:child_process'
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const os = require('node:os')
+const path = require('node:path')
+const { execFileSync } = require('node:child_process')

-import { buildRelaunchScript,
+const {
+  unpackedDirName,
+  resolveUnpackedRelease,
+  decideRelaunchOutcome,
+  sandboxPreflight,
+  sandboxFallbackFromEnv,
  collectRelaunchArgs,
  collectRelaunchEnv,
-  decideRelaunchOutcome,
-  resolveUnpackedRelease,
-  sandboxFallbackFromEnv,
-  sandboxPreflight,
-  shellQuote,
-  unpackedDirName } from './update-relaunch'
+  buildRelaunchScript,
+  shellQuote
+} = require('./update-relaunch.cjs')

 const ROOT = '/home/u/.hermes/hermes-agent'
 const UNPACKED = path.join(ROOT, 'apps', 'desktop', 'release', 'linux-unpacked')
@@ -89,7 +91,6 @@ test('decideRelaunchOutcome: only under-unpacked + sandbox-ok relaunches', () =>
 // ---------------------------------------------------------------------------

 const fakeStat = (uid, mode) => () => ({ uid, mode })
-
 const throwStat = () => {
  throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' })
 }
@@ -149,7 +150,6 @@ test('collectRelaunchArgs drops Electron internals, keeps user/launcher args', (
    '--profile=work', // app flag — keep
    '--remote-debugging-port=9222' // internal — drop
  ]
-
  assert.deepEqual(collectRelaunchArgs(argv), ['--no-sandbox', 'hermes://open/agent/42', '--profile=work'])
  assert.deepEqual(collectRelaunchArgs(undefined), [])
 })
@@ -165,7 +165,6 @@ test('collectRelaunchEnv preserves HERMES_HOME + HERMES_DESKTOP_* + sandbox opt-
    HOME: '/home/u', // not preserved
    UNRELATED: 'x'
  }
-
  assert.deepEqual(collectRelaunchEnv(env), {
    HERMES_HOME: '/home/u/.hermes',
    HERMES_DESKTOP_REMOTE_URL: 'http://box:9119',
@@ -208,7 +207,6 @@ test('buildRelaunchScript embeds pid/exec/args/env/cwd and is valid bash', () =>
  // It must be syntactically valid bash (`bash -n`). Write to a temp file and lint.
  const tmp = path.join(os.tmpdir(), `hermes-relaunch-test-${Date.now()}.sh`)
  fs.writeFileSync(tmp, script)
-
  try {
    execFileSync('bash', ['-n', tmp], { stdio: 'pipe' })
  } finally {
@@ -224,16 +222,13 @@ test('buildRelaunchScript with no args/env still lints clean', () => {
    env: {},
    cwd: ''
  })
-
  const tmp = path.join(os.tmpdir(), `hermes-relaunch-test2-${Date.now()}.sh`)
  fs.writeFileSync(tmp, script)
-
  try {
    execFileSync('bash', ['-n', tmp], { stdio: 'pipe' })
  } finally {
    fs.rmSync(tmp, { force: true })
  }
-
  // exec line has no trailing args.
  assert.match(script, /exec '\/opt\/Hermes\/Hermes'\n/)
 })
--- a/apps/desktop/electron/update-remote.cjs
+++ b/apps/desktop/electron/update-remote.cjs
@@ -19,9 +19,8 @@ const OFFICIAL_REPO_CANONICAL = 'github.com/nousresearch/hermes-agent'
 // no trailing slash, no .git suffix) so SSH and HTTPS forms of the same repo
 // compare equal.
 function canonicalGitHubRemote(url) {
-  if (!url) {return ''}
+  if (!url) return ''
  let value = String(url).trim()
-
  if (value.startsWith('git@github.com:')) {
    value = `github.com/${value.slice('git@github.com:'.length)}`
  } else if (value.startsWith('ssh://git@github.com/')) {
@@ -29,17 +28,13 @@ function canonicalGitHubRemote(url) {
  } else {
    try {
      const parsed = new URL(value)
-
-      if (parsed.hostname && parsed.pathname) {value = `${parsed.hostname}${parsed.pathname}`}
+      if (parsed.hostname && parsed.pathname) value = `${parsed.hostname}${parsed.pathname}`
    } catch {
      // Leave non-URL forms unchanged.
    }
  }
-
  value = value.trim().replace(/\/+$/, '')
-
-  if (value.endsWith('.git')) {value = value.slice(0, -4)}
-
+  if (value.endsWith('.git')) value = value.slice(0, -4)
  return value.toLowerCase()
 }

@@ -47,7 +42,6 @@ function isSshRemote(url) {
  const value = String(url || '')
    .trim()
    .toLowerCase()
-
  return value.startsWith('git@') || value.startsWith('ssh://')
 }

@@ -55,8 +49,10 @@ function isOfficialSshRemote(url) {
  return isSshRemote(url) && canonicalGitHubRemote(url) === OFFICIAL_REPO_CANONICAL
 }

-export { canonicalGitHubRemote,
-  isOfficialSshRemote,
-  isSshRemote,
+module.exports = {
+  OFFICIAL_REPO_HTTPS_URL,
  OFFICIAL_REPO_CANONICAL,
-  OFFICIAL_REPO_HTTPS_URL }
+  canonicalGitHubRemote,
+  isSshRemote,
+  isOfficialSshRemote
+}
--- a/apps/desktop/electron/update-remote.test.cjs
+++ b/apps/desktop/electron/update-remote.test.cjs
@@ -15,14 +15,16 @@
 * never prompts and should keep the normal fetch path).
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { canonicalGitHubRemote,
-  isOfficialSshRemote,
-  isSshRemote,
+const {
+  OFFICIAL_REPO_HTTPS_URL,
  OFFICIAL_REPO_CANONICAL,
-  OFFICIAL_REPO_HTTPS_URL } from './update-remote'
+  canonicalGitHubRemote,
+  isSshRemote,
+  isOfficialSshRemote
+} = require('./update-remote.cjs')

 test('canonicalGitHubRemote normalizes SSH and HTTPS forms to the same value', () => {
  assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL)
--- a/apps/desktop/electron/vscode-marketplace.cjs
+++ b/apps/desktop/electron/vscode-marketplace.cjs
@@ -1,3 +1,5 @@
+'use strict'
+
 /**
 * VS Code Marketplace color-theme fetcher (main process).
 *
@@ -12,8 +14,8 @@
 * zip library into the desktop bundle for a feature this small.
 */

-import https from 'node:https'
-import zlib from 'node:zlib'
+const https = require('node:https')
+const zlib = require('node:zlib')

 const GALLERY_QUERY_URL = 'https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery'
 const VSIX_ASSET_TYPE = 'Microsoft.VisualStudio.Services.VSIXPackage'
@@ -28,7 +30,7 @@ function request(
  url,
  { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {},
  redirectsLeft = MAX_REDIRECTS
-): Promise<Buffer<ArrayBuffer>> {
+) {
  return new Promise((resolve, reject) => {
    const req = https.request(url, { method, headers }, res => {
      const status = res.statusCode ?? 0
@@ -100,7 +102,6 @@ async function resolveExtension(id) {
    // IncludeCategoryAndTags | IncludeLatestVersionOnly = 914.
    flags: 914
  })
-
  const extension = json?.results?.[0]?.extensions?.[0]

  if (!extension) {
@@ -126,7 +127,6 @@ async function resolveExtension(id) {
 /** POST an ExtensionQuery payload and return the parsed gallery response. */
 async function queryGallery(payload, { maxBytes = 4 * 1024 * 1024 } = {}) {
  const body = JSON.stringify(payload)
-
  const raw = await request(GALLERY_QUERY_URL, {
    method: 'POST',
    headers: {
@@ -332,12 +332,10 @@ async function fetchMarketplaceThemes(id) {
  return { extensionId: trimmed, displayName, themes }
 }

-const __testing = { themeEntryName, looksLikeIconTheme }
-
-export {
-  __testing,
-  extractThemes,
+module.exports = {
  fetchMarketplaceThemes,
+  searchMarketplaceThemes,
+  extractThemes,
  readCentralDirectory,
-  searchMarketplaceThemes
+  __testing: { themeEntryName, looksLikeIconTheme }
 }
--- a/apps/desktop/electron/vscode-marketplace.test.cjs
+++ b/apps/desktop/electron/vscode-marketplace.test.cjs
@@ -1,7 +1,9 @@
-import assert from 'node:assert'
-import test from 'node:test'
+'use strict'

-import { __testing, extractThemes, readCentralDirectory } from './vscode-marketplace'
+const assert = require('node:assert')
+const test = require('node:test')
+
+const { __testing, extractThemes, readCentralDirectory } = require('./vscode-marketplace.cjs')

 // Build a minimal zip with stored (uncompressed) entries so the test controls
 // the bytes exactly — exercises the central-directory reader + theme extraction
@@ -70,7 +72,6 @@ test('extractThemes reads contributed color themes (resolving ./ paths)', () =>
      themes: [{ label: 'Dracula', uiTheme: 'vs-dark', path: './themes/dracula.json' }]
    }
  })
-
  const themeJson = JSON.stringify({ name: 'Dracula', type: 'dark', colors: { 'editor.background': '#282a36' } })

  const zip = makeZip([
--- a/apps/desktop/electron/window-state.cjs
+++ b/apps/desktop/electron/window-state.cjs
@@ -21,59 +21,41 @@ const MIN_VISIBLE = 48
 const finite = v => typeof v === 'number' && Number.isFinite(v)
 const clamp = (v, lo, hi) => Math.max(lo, Math.min(v, hi))

-interface SanitizedWindowState{
-  width: number, height: number, isMaximized: boolean, x?: number,y?: number
-}
-
 // Parse raw JSON → clean state, or null if garbage. width/height are required
 // and floored; x/y survive only as a finite pair; isMaximized is strict.
-function sanitizeWindowState(raw?: any): SanitizedWindowState | null
+function sanitizeWindowState(raw) {
+  if (!raw || typeof raw !== 'object' || !finite(raw.width) || !finite(raw.height)) return null

-
- {
-  if (!raw || typeof raw !== 'object' || !finite(raw.width) || !finite(raw.height)) {return null}
-
-  const state: SanitizedWindowState = {
+  const state = {
    width: Math.max(MIN_WIDTH, Math.round(raw.width)),
    height: Math.max(MIN_HEIGHT, Math.round(raw.height)),
-    isMaximized: raw.isMaximized === true,
+    isMaximized: raw.isMaximized === true
  }
-
  if (finite(raw.x) && finite(raw.y)) {
-    state.x = Math.round(raw.x);
+    state.x = Math.round(raw.x)
    state.y = Math.round(raw.y)
  }
-
  return state
 }

 // True when `bounds` overlaps some display's work area by ≥ MIN_VISIBLE on both
 // axes. `displays` is Electron's screen.getAllDisplays() shape.
 function onScreen(bounds, displays) {
-  if (!Array.isArray(displays)) {return false}
-
+  if (!Array.isArray(displays)) return false
  return displays.some(({ workArea: a } = {}) => {
-    if (!a) {return false}
+    if (!a) return false
    const x = Math.min(bounds.x + bounds.width, a.x + a.width) - Math.max(bounds.x, a.x)
    const y = Math.min(bounds.y + bounds.height, a.y + a.height) - Math.max(bounds.y, a.y)
-
    return x >= MIN_VISIBLE && y >= MIN_VISIBLE
  })
 }

-interface WindowOptions {
-  width: number
-  height: number
-  x?: number
-  y?: number
-}
-
 // Sanitized state (or null) → BrowserWindow size/position options. Always sets
 // width/height, capped to the largest current display so a size saved on a
 // since-disconnected bigger monitor can't exceed any screen the user now has.
 // Sets x/y only when still on-screen; otherwise Electron centers the window.
-function computeWindowOptions(state, displays): WindowOptions {
-  const opts: WindowOptions = {
+function computeWindowOptions(state, displays) {
+  const opts = {
    width: finite(state?.width) ? state.width : DEFAULT_WIDTH,
    height: finite(state?.height) ? state.height : DEFAULT_HEIGHT
  }
@@ -85,7 +67,6 @@ function computeWindowOptions(state, displays): WindowOptions {
        : m,
    { width: 0, height: 0 }
  )
-
  if (cap.width && cap.height) {
    opts.width = clamp(opts.width, MIN_WIDTH, cap.width)
    opts.height = clamp(opts.height, MIN_HEIGHT, cap.height)
@@ -97,10 +78,9 @@ function computeWindowOptions(state, displays): WindowOptions {
    finite(state.y) &&
    onScreen({ x: state.x, y: state.y, width: opts.width, height: opts.height }, displays)
  ) {
-    opts.x = state.x;
+    opts.x = state.x
    opts.y = state.y
  }
-
  return opts
 }

@@ -109,7 +89,6 @@ function computeWindowOptions(state, displays): WindowOptions {
 // cancels the pending timer — used on close, before the window is gone.
 function debounce(fn, delayMs) {
  let timer = null
-
  const debounced = () => {
    clearTimeout(timer)
    timer = setTimeout(() => {
@@ -117,22 +96,22 @@ function debounce(fn, delayMs) {
      fn()
    }, delayMs)
  }
-
  debounced.flush = () => {
    clearTimeout(timer)
    timer = null
    fn()
  }
-
  return debounced
 }

-export { computeWindowOptions,
-  debounce,
-  DEFAULT_HEIGHT,
+module.exports = {
  DEFAULT_WIDTH,
+  DEFAULT_HEIGHT,
+  MIN_WIDTH,
  MIN_HEIGHT,
  MIN_VISIBLE,
-  MIN_WIDTH,
+  sanitizeWindowState,
  onScreen,
-  sanitizeWindowState }
+  computeWindowOptions,
+  debounce
+}
--- a/apps/desktop/electron/window-state.test.cjs
+++ b/apps/desktop/electron/window-state.test.cjs
@@ -4,17 +4,19 @@
 * clamping, and the debounce that collapses mid-drag write storms.
 */

-import assert from 'node:assert/strict'
-import test from 'node:test'
+const test = require('node:test')
+const assert = require('node:assert/strict')

-import { computeWindowOptions,
-  debounce,
-  DEFAULT_HEIGHT,
+const {
  DEFAULT_WIDTH,
-  MIN_HEIGHT,
+  DEFAULT_HEIGHT,
  MIN_WIDTH,
+  MIN_HEIGHT,
+  sanitizeWindowState,
  onScreen,
-  sanitizeWindowState } from './window-state'
+  computeWindowOptions,
+  debounce
+} = require('./window-state.cjs')

 // A single 1920×1080 monitor (work area trimmed for the taskbar).
 const PRIMARY = [{ workArea: { x: 0, y: 0, width: 1920, height: 1040 } }]
@@ -119,7 +121,6 @@ test('computeWindowOptions does not clamp when displays are unknown', () => {
 test('debounce coalesces a burst into one trailing run', t => {
  t.mock.timers.enable({ apis: ['setTimeout'] })
  let calls = 0
-
  const d = debounce(() => {
    calls += 1
  }, 250)
@@ -137,7 +138,6 @@ test('debounce coalesces a burst into one trailing run', t => {
 test('debounce.flush runs now and cancels the pending timer', t => {
  t.mock.timers.enable({ apis: ['setTimeout'] })
  let calls = 0
-
  const d = debounce(() => {
    calls += 1
  }, 250)
--- a/apps/desktop/electron/windows-child-process.test.cjs
+++ b/apps/desktop/electron/windows-child-process.test.cjs
@@ -1,10 +1,11 @@
-import assert from 'node:assert/strict'
-import fs from 'node:fs'
-import path from 'node:path'
-import test from 'node:test'
-import { fileURLToPath } from 'node:url'
+'use strict'

-const ELECTRON_DIR = path.dirname(fileURLToPath(import.meta.url))
+const test = require('node:test')
+const assert = require('node:assert/strict')
+const fs = require('node:fs')
+const path = require('node:path')
+
+const ELECTRON_DIR = __dirname

 function readElectronFile(name) {
  return fs.readFileSync(path.join(ELECTRON_DIR, name), 'utf8').replace(/\r\n/g, '\n')
@@ -23,7 +24,7 @@ function requireHiddenChildOptions(source, needle) {
 }

 test('desktop background child processes opt into hidden Windows consoles', () => {
-  const source = readElectronFile('main.ts')
+  const source = readElectronFile('main.cjs')

  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)

@@ -52,25 +53,8 @@ test('desktop background child processes opt into hidden Windows consoles', () =
  assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.dashboardArgs\]/)
 })

-test('getNoConsoleVenvPython prefers base pythonw over the uv re-exec shim', () => {
-  const source = readElectronFile('main.ts')
-
-  const body = source.slice(
-    source.indexOf('function getNoConsoleVenvPython(venvRoot)'),
-    source.indexOf('function getVenvSitePackagesEntries(venvRoot)')
-  )
-
-  // The venv Scripts\pythonw.exe re-execs a console python.exe (flashes a
-  // conhost); the base pythonw must be resolved first so it never runs.
-  const baseIdx = body.indexOf('basePythonw')
-  const shimIdx = body.indexOf("'Scripts', 'pythonw.exe'")
-  assert.notEqual(baseIdx, -1, 'base pythonw resolution missing')
-  assert.notEqual(shimIdx, -1, 'venv shim fallback missing')
-  assert.ok(baseIdx < shimIdx, 'base pythonw must be preferred before the venv Scripts shim')
-})
-
 test('intentional or interactive desktop child processes stay documented', () => {
-  const source = readElectronFile('main.ts')
+  const source = readElectronFile('main.cjs')

  assert.match(source, /windowsHide: false/)
  assert.match(source, /handOffWindowsBootstrapRecovery/)
@@ -81,7 +65,7 @@ test('intentional or interactive desktop child processes stay documented', () =>
 })

 test('bootstrap PowerShell runner hides Windows console children', () => {
-  const source = readElectronFile('bootstrap-runner.ts')
+  const source = readElectronFile('bootstrap-runner.cjs')

  assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
  requireHiddenChildOptions(source, 'spawn(ps, fullArgs')
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ethernet	0a8d4da69a	WIPipw wipwip	2026-06-26 22:07:54 -04:00
ethernet	4dce531189	wip thin client	2026-06-26 19:30:29 -04:00