fix(cron): action='run' executes inline when no gateway ticker is running

Closes #16612. cronjob(action='run') and POST /api/jobs/{id}/run previously only called trigger_job(), which just sets next_run_at=now. When no gateway process was running, there was no ticker to pick the job up, so last_run_at stayed null forever — the tool returned success=true with nothing actually executing. - cron.scheduler: extract the per-job run+save+deliver+mark pipeline from tick()'s closure into a module-level _execute_and_record helper, and add run_job_now() which acquires the shared tick file-lock and runs one job inline (advancing next_run_at first for recurring jobs to preserve at-most-once semantics). - tools/cronjob_tools: branch action='run' on gateway presence via find_gateway_pids(). Ticker up → defer to next tick (≤60s) with a clear message. Ticker down → execute inline via run_job_now() and return the updated job including last_run_at/last_status. Tool schema description updated to reflect the two modes. - gateway/platforms/api_server: same branching on the HTTP endpoint, running the inline call off the event loop via run_in_executor. - hermes_cli/cron: CLI 'hermes cron run' now surfaces the message, last_run_at, and last_status from the tool result instead of always printing 'It will run on the next scheduler tick.' - docs: update cli-commands and cron-troubleshooting to describe the inline-when-no-gateway behaviour. Tests: 11 new unit tests (tests/cron/test_run_job_now.py) covering run_job_now's inline path, _execute_and_record's success/empty/error paths, and the tool/API gateway-presence branching. Plus a new TestRunJob::test_run_job_executes_inline_when_no_gateway on the api_server suite. E2E verified in a temp HERMES_HOME against the real file-based job store.
2026-05-08 03:37:13 +08:00 · 2026-04-29 08:18:11 -07:00
1306 changed files with 8704 additions and 221580 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,12 +9,6 @@ node_modules
 .venv
 **/.venv

-# Built artifacts that are regenerated inside the image.  Excluded so local
-# rebuilds on the developer's machine don't invalidate the npm-install layer
-# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
-ui-tui/dist/
-ui-tui/packages/hermes-ink/dist/
-
 # CI/CD
 .github

@@ -25,7 +19,3 @@ ui-tui/packages/hermes-ink/dist/

 # Runtime data (bind-mounted at /opt/data; must not leak into build context)
 data/
-
-# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
-hermes-config/
-runtime/
--- a/.env.example
+++ b/.env.example
@@ -244,15 +244,6 @@ BROWSERBASE_PROXIES=true
 # Uses custom Chromium build to avoid bot detection altogether
 BROWSERBASE_ADVANCED_STEALTH=false

-# Browser engine for local mode (default: auto = Chrome)
-# "auto"       — use Chrome (don't pass --engine flag)
-# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
-# "chrome"     — explicitly request Chrome
-# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
-# empty results are automatically retried with Chrome.
-# Also configurable via browser.engine in config.yaml.
-# AGENT_BROWSER_ENGINE=auto
-
 # Browser session timeout in seconds (default: 300)
 # Sessions are cleaned up after this duration of inactivity
 BROWSER_SESSION_TIMEOUT=300
@@ -393,9 +384,9 @@ IMAGE_TOOLS_DEBUG=false
 # Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
 # Install with: pip install faster-whisper
 # Model downloads automatically on first use (~150 MB for "base").
-# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
-# Provider priority: local > groq > openai > mistral > xai > elevenlabs
-# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
+# To use cloud providers instead, set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY above.
+# Provider priority: local > groq > openai
+# Configure in config.yaml: stt.provider: local | groq | openai

 # =============================================================================
 # STT ADVANCED OVERRIDES (optional)
@@ -403,25 +394,7 @@ IMAGE_TOOLS_DEBUG=false
 # Override default STT models per provider (normally set via stt.model in config.yaml)
 # STT_GROQ_MODEL=whisper-large-v3-turbo
 # STT_OPENAI_MODEL=whisper-1
-# STT_ELEVENLABS_MODEL=scribe_v2

 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
-# ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1
-
-# =============================================================================
-# MICROSOFT TEAMS INTEGRATION
-# =============================================================================
-# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
-# Or use Azure Portal: Azure Active Directory → App registrations → New registration
-# Then add the bot to Teams via the Bot Framework or App Studio.
-#
-# TEAMS_CLIENT_ID=                     # Azure AD App (client) ID
-# TEAMS_CLIENT_SECRET=                 # Azure AD client secret value
-# TEAMS_TENANT_ID=                     # Azure AD tenant ID (or "common" for multi-tenant)
-# TEAMS_ALLOWED_USERS=                 # Comma-separated AAD object IDs or UPNs
-# TEAMS_ALLOW_ALL_USERS=false          # Set true to skip the allowlist
-# TEAMS_HOME_CHANNEL=                  # Default channel/chat ID for cron delivery
-# TEAMS_HOME_CHANNEL_NAME=             # Display name for the home channel
-# TEAMS_PORT=3978                      # Webhook listen port (Bot Framework default)
--- a/.github/actions/nix-setup/action.yml
+++ b/.github/actions/nix-setup/action.yml
@@ -1,18 +1,8 @@
 name: 'Setup Nix'
-description: 'Install Nix and configure Cachix binary cache'
-
-inputs:
-  cachix-auth-token:
-    description: 'Cachix auth token (enables push). Omit for read-only.'
-    required: false
-    default: ''
+description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'

 runs:
  using: composite
  steps:
    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
-      with:
-        name: hermes-agent
-        authToken: ${{ inputs.cachix-auth-token }}
-      continue-on-error: true
+    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,44 +0,0 @@
-# Dependabot configuration for hermes-agent.
-#
-# Deliberately scoped to github-actions only.
-#
-# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
-# because we pin source dependencies exactly (uv.lock, package-lock.json) as
-# part of our supply-chain posture. Automatic version-bump PRs against those
-# pins would undermine the strategy — pins are moved deliberately, after
-# review, not on a schedule.
-#
-# github-actions is the exception: action pins (we use full commit SHAs per
-# supply-chain policy) must be updated when upstream actions publish
-# patches — usually themselves security fixes. Dependabot opens a PR with
-# the new SHA and release notes; we review and merge like any other PR.
-#
-# Security-update PRs for source dependencies (opened ONLY when a CVE is
-# published affecting a currently-pinned version) are enabled separately
-# via the repo's Dependabot security updates setting
-# (Settings → Code security → Dependabot → Dependabot security updates).
-# Those are CVE-only, not schedule-driven, and do not conflict with our
-# pinning strategy — they fire when a pinned version becomes known-bad,
-# which is exactly when we want to move the pin.
-
-version: 2
-updates:
-  - package-ecosystem: "github-actions"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-      day: "monday"
-    open-pull-requests-limit: 5
-    labels:
-      - "dependencies"
-      - "github-actions"
-    commit-message:
-      prefix: "chore(actions)"
-      include: "scope"
-    groups:
-      # Batch routine action bumps into one PR per week to reduce noise.
-      # Security updates still open individually and bypass grouping.
-      actions-minor-patch:
-        update-types:
-          - "minor"
-          - "patch"
--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -76,16 +76,6 @@ jobs:
        run: |
          mkdir -p _site/docs
          cp -r website/build/* _site/docs/
-          # llms.txt / llms-full.txt are also published at the site root
-          # (https://hermes-agent.nousresearch.com/llms.txt) because some
-          # agents and IDE plugins probe the classic root-level path rather
-          # than /docs/llms.txt. Same file, two URLs, one source of truth.
-          if [ -f website/build/llms.txt ]; then
-            cp website/build/llms.txt _site/llms.txt
-          fi
-          if [ -f website/build/llms-full.txt ]; then
-            cp website/build/llms-full.txt _site/llms-full.txt
-          fi

      - name: Upload artifact
        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
--- a/.github/workflows/desktop-release.yml
+++ b/.github/workflows/desktop-release.yml
@@ -1,341 +0,0 @@
-name: Desktop Release
-
-on:
-  push:
-    branches: [main]
-  release:
-    types: [published]
-  workflow_dispatch:
-    inputs:
-      channel:
-        description: Release channel to build
-        required: true
-        default: nightly
-        type: choice
-        options:
-          - nightly
-          - stable
-      release_tag:
-        description: "Required when channel=stable (example: v2026.5.5)"
-        required: false
-        type: string
-
-permissions:
-  contents: write
-
-concurrency:
-  group: desktop-release-${{ github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  prepare:
-    if: github.repository == 'NousResearch/hermes-agent'
-    runs-on: ubuntu-latest
-    outputs:
-      channel: ${{ steps.meta.outputs.channel }}
-      release_name: ${{ steps.meta.outputs.release_name }}
-      release_tag: ${{ steps.meta.outputs.release_tag }}
-      version: ${{ steps.meta.outputs.version }}
-      is_stable: ${{ steps.meta.outputs.is_stable }}
-    steps:
-      - id: meta
-        env:
-          EVENT_NAME: ${{ github.event_name }}
-          INPUT_CHANNEL: ${{ github.event.inputs.channel }}
-          INPUT_RELEASE_TAG: ${{ github.event.inputs.release_tag }}
-          RELEASE_TAG_FROM_EVENT: ${{ github.event.release.tag_name }}
-          GITHUB_SHA: ${{ github.sha }}
-        run: |
-          set -euo pipefail
-
-          channel="nightly"
-          release_tag="desktop-nightly"
-          is_stable="false"
-
-          if [[ "$EVENT_NAME" == "release" ]]; then
-            channel="stable"
-            release_tag="$RELEASE_TAG_FROM_EVENT"
-            is_stable="true"
-          elif [[ "$EVENT_NAME" == "workflow_dispatch" && "$INPUT_CHANNEL" == "stable" ]]; then
-            channel="stable"
-            release_tag="$INPUT_RELEASE_TAG"
-            is_stable="true"
-          fi
-
-          if [[ "$channel" == "stable" ]]; then
-            if [[ -z "$release_tag" ]]; then
-              echo "Stable desktop releases require a release tag." >&2
-              exit 1
-            fi
-
-            version="${release_tag#v}"
-            release_name="Hermes Desktop ${release_tag}"
-          else
-            stamp="$(date -u +%Y%m%d)"
-            short_sha="${GITHUB_SHA::7}"
-            version="0.0.0-nightly.${stamp}.${short_sha}"
-            release_name="Hermes Desktop Nightly ${stamp}-${short_sha}"
-          fi
-
-          {
-            echo "channel=$channel"
-            echo "release_name=$release_name"
-            echo "release_tag=$release_tag"
-            echo "version=$version"
-            echo "is_stable=$is_stable"
-          } >> "$GITHUB_OUTPUT"
-
-  build:
-    if: github.repository == 'NousResearch/hermes-agent'
-    needs: prepare
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - platform: mac
-            runner: macos-latest
-            build_args: --mac dmg zip
-          - platform: win
-            runner: windows-latest
-            build_args: --win nsis msi
-    runs-on: ${{ matrix.runner }}
-    env:
-      DESKTOP_CHANNEL: ${{ needs.prepare.outputs.channel }}
-      DESKTOP_VERSION: ${{ needs.prepare.outputs.version }}
-      MAC_CSC_LINK: ${{ secrets.CSC_LINK }}
-      MAC_CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
-      APPLE_API_KEY: ${{ secrets.APPLE_API_KEY }}
-      APPLE_API_KEY_ID: ${{ secrets.APPLE_API_KEY_ID }}
-      APPLE_API_ISSUER: ${{ secrets.APPLE_API_ISSUER }}
-      WIN_CSC_LINK: ${{ secrets.WIN_CSC_LINK }}
-      WIN_CSC_KEY_PASSWORD: ${{ secrets.WIN_CSC_KEY_PASSWORD }}
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
-        with:
-          node-version: 20
-          cache: npm
-          cache-dependency-path: package-lock.json
-
-      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
-        with:
-          python-version: "3.11"
-
-      - name: Enforce signing gates for stable releases
-        if: needs.prepare.outputs.is_stable == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          missing=()
-
-          if [[ "${{ matrix.platform }}" == "mac" ]]; then
-            [[ -z "${MAC_CSC_LINK:-}" ]] && missing+=("CSC_LINK")
-            [[ -z "${MAC_CSC_KEY_PASSWORD:-}" ]] && missing+=("CSC_KEY_PASSWORD")
-            [[ -z "${APPLE_API_KEY:-}" ]] && missing+=("APPLE_API_KEY")
-            [[ -z "${APPLE_API_KEY_ID:-}" ]] && missing+=("APPLE_API_KEY_ID")
-            [[ -z "${APPLE_API_ISSUER:-}" ]] && missing+=("APPLE_API_ISSUER")
-          else
-            [[ -z "${WIN_CSC_LINK:-}" ]] && missing+=("WIN_CSC_LINK")
-            [[ -z "${WIN_CSC_KEY_PASSWORD:-}" ]] && missing+=("WIN_CSC_KEY_PASSWORD")
-          fi
-
-          if (( ${#missing[@]} > 0 )); then
-            echo "::error::Stable desktop release missing required secrets: ${missing[*]}"
-            exit 1
-          fi
-
-      - name: Install workspace dependencies
-        run: npm ci
-
-      - name: Build bundled TUI payload
-        run: npm --prefix ui-tui run build
-
-      - name: Build desktop renderer
-        run: npm --prefix apps/desktop run build
-
-      - name: Stage Hermes payload
-        run: npm --prefix apps/desktop run stage:hermes
-
-      - name: Map macOS signing credentials
-        if: matrix.platform == 'mac'
-        shell: bash
-        run: |
-          set -euo pipefail
-          has_link=0
-          has_pass=0
-          [[ -n "${MAC_CSC_LINK:-}" ]] && has_link=1
-          [[ -n "${MAC_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
-
-          if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
-            echo "CSC_LINK=${MAC_CSC_LINK}" >> "$GITHUB_ENV"
-            echo "CSC_KEY_PASSWORD=${MAC_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
-          elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
-            echo "::error::macOS signing secrets are partially configured. Set both CSC_LINK and CSC_KEY_PASSWORD."
-            exit 1
-          fi
-
-      - name: Map Windows signing credentials
-        if: matrix.platform == 'win'
-        shell: bash
-        run: |
-          set -euo pipefail
-          has_link=0
-          has_pass=0
-          [[ -n "${WIN_CSC_LINK:-}" ]] && has_link=1
-          [[ -n "${WIN_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
-
-          if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
-            echo "CSC_LINK=${WIN_CSC_LINK}" >> "$GITHUB_ENV"
-            echo "CSC_KEY_PASSWORD=${WIN_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
-            echo "CSC_FOR_PULL_REQUEST=true" >> "$GITHUB_ENV"
-          elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
-            echo "::error::Windows signing secrets are partially configured. Set both WIN_CSC_LINK and WIN_CSC_KEY_PASSWORD."
-            exit 1
-          fi
-
-      - name: Build desktop installers
-        shell: bash
-        run: |
-          set -euo pipefail
-          npm --prefix apps/desktop exec electron-builder -- \
-            ${{ matrix.build_args }} \
-            --publish never \
-            --config.extraMetadata.version="${DESKTOP_VERSION}" \
-            --config.extraMetadata.desktopChannel="${DESKTOP_CHANNEL}" \
-            '--config.artifactName=Hermes-${version}-${env.DESKTOP_CHANNEL}-${os}-${arch}.${ext}'
-
-      - name: Notarize and staple macOS DMG
-        if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
-          node apps/desktop/scripts/notarize-artifact.cjs "$dmg_path"
-
-      - name: Validate macOS notarization and Gatekeeper trust
-        if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          app_path="$(ls -d apps/desktop/release/mac*/Hermes.app | head -n 1)"
-          dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
-          xcrun stapler validate "$app_path"
-          xcrun stapler validate "$dmg_path"
-          spctl --assess --type execute --verbose=4 "$app_path"
-
-      - name: Generate desktop checksums
-        shell: bash
-        run: |
-          set -euo pipefail
-          node <<'EOF'
-          const crypto = require('node:crypto')
-          const fs = require('node:fs')
-          const path = require('node:path')
-
-          const releaseDir = path.resolve('apps/desktop/release')
-          const platform = process.env.PLATFORM
-          const extensions = platform === 'mac' ? ['.dmg', '.zip'] : ['.exe', '.msi']
-          const files = fs
-            .readdirSync(releaseDir)
-            .filter(name => extensions.some(ext => name.endsWith(ext)))
-            .sort()
-
-          if (!files.length) {
-            throw new Error(`No release artifacts were produced for ${platform}`)
-          }
-
-          const lines = files.map(name => {
-            const full = path.join(releaseDir, name)
-            const hash = crypto.createHash('sha256').update(fs.readFileSync(full)).digest('hex')
-            return `${hash}  ${name}`
-          })
-          fs.writeFileSync(path.join(releaseDir, `SHA256SUMS-${platform}.txt`), `${lines.join('\n')}\n`)
-          EOF
-        env:
-          PLATFORM: ${{ matrix.platform }}
-
-      - name: Upload packaged desktop artifacts
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
-        with:
-          name: desktop-${{ matrix.platform }}
-          path: |
-            apps/desktop/release/*.dmg
-            apps/desktop/release/*.zip
-            apps/desktop/release/*.exe
-            apps/desktop/release/*.msi
-            apps/desktop/release/SHA256SUMS-${{ matrix.platform }}.txt
-          if-no-files-found: error
-
-  publish:
-    if: github.repository == 'NousResearch/hermes-agent'
-    needs: [prepare, build]
-    runs-on: ubuntu-latest
-    env:
-      GH_TOKEN: ${{ github.token }}
-      CHANNEL: ${{ needs.prepare.outputs.channel }}
-      RELEASE_NAME: ${{ needs.prepare.outputs.release_name }}
-      RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          fetch-depth: 0
-
-      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
-        with:
-          pattern: desktop-*
-          merge-multiple: true
-          path: dist/desktop
-
-      - name: Publish desktop assets to GitHub release
-        shell: bash
-        run: |
-          set -euo pipefail
-          shopt -s globstar nullglob
-
-          files=(
-            dist/desktop/**/*.dmg
-            dist/desktop/**/*.zip
-            dist/desktop/**/*.exe
-            dist/desktop/**/*.msi
-            dist/desktop/**/SHA256SUMS-*.txt
-          )
-
-          if (( ${#files[@]} == 0 )); then
-            echo "No desktop artifacts were downloaded for publishing." >&2
-            exit 1
-          fi
-
-          if [[ "$CHANNEL" == "nightly" ]]; then
-            git tag -f "$RELEASE_TAG" "$GITHUB_SHA"
-            git push origin "refs/tags/$RELEASE_TAG" --force
-
-            notes="Automated nightly desktop build from main. This prerelease is replaced on each new run."
-
-            if gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
-              while IFS= read -r asset_name; do
-                gh release delete-asset "$RELEASE_TAG" "$asset_name" --yes
-              done < <(gh release view "$RELEASE_TAG" --json assets -q '.assets[].name')
-
-              gh release edit "$RELEASE_TAG" \
-                --title "$RELEASE_NAME" \
-                --prerelease \
-                --notes "$notes"
-            else
-              gh release create "$RELEASE_TAG" \
-                --target "$GITHUB_SHA" \
-                --title "$RELEASE_NAME" \
-                --notes "$notes" \
-                --prerelease
-            fi
-          else
-            if ! gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
-              notes="Automated desktop artifacts attached by desktop-release workflow."
-              gh release create "$RELEASE_TAG" \
-                --target "$GITHUB_SHA" \
-                --title "$RELEASE_NAME" \
-                --notes "$notes"
-            fi
-          fi
-
-          gh release upload "$RELEASE_TAG" "${files[@]}" --clobber
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,151 +0,0 @@
-name: Lint (ruff + ty)
-
-# Surface ruff and ty diagnostics as a diff vs the target branch.
-# This check is advisory only ATM it always exits zero and never blocks merge.
-# It posts a Markdown summary to the workflow run and, for pull requests,
-# comments the same summary on the PR.
-
-on:
-  push:
-    branches: [main]
-    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
-      - "website/**"
-  pull_request:
-    branches: [main]
-    paths-ignore:
-      - "**/*.md"
-      - "docs/**"
-      - "website/**"
-
-permissions:
-  contents: read
-  pull-requests: write # needed to post/update PR comments
-
-concurrency:
-  group: lint-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  lint-diff:
-    name: ruff + ty diff
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-        with:
-          fetch-depth: 0 # need full history for merge-base + worktree
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
-
-      - name: Install ruff + ty
-        run: |
-          uv tool install ruff
-          uv tool install ty
-
-      - name: Determine base ref
-        id: base
-        run: |
-          # For PRs, diff against the merge base with the target branch.
-          # For pushes to main, diff against the previous commit on main.
-          if [ "${{ github.event_name }}" = "pull_request" ]; then
-            BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
-            BASE_REF="origin/${{ github.base_ref }}"
-          else
-            BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
-            BASE_REF="HEAD~1"
-          fi
-          echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
-          echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
-          echo "Base SHA: ${BASE_SHA}"
-          echo "Base ref: ${BASE_REF}"
-
-      - name: Run ruff + ty on HEAD
-        run: |
-          mkdir -p .lint-reports/head
-          ruff check --output-format json --exit-zero \
-            > .lint-reports/head/ruff.json || true
-          ty check --output-format gitlab --exit-zero \
-            > .lint-reports/head/ty.json || true
-          echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
-          echo "HEAD ty:   $(wc -c < .lint-reports/head/ty.json) bytes"
-
-      - name: Run ruff + ty on base (via git worktree)
-        run: |
-          mkdir -p .lint-reports/base
-          # Use a worktree so we don't clobber the main checkout. If the basex
-          # SHA is identical to HEAD (e.g. first commit), skip and leave the
-          # base reports empty — the diff script handles missing files.
-          HEAD_SHA=$(git rev-parse HEAD)
-          BASE_SHA="${{ steps.base.outputs.sha }}"
-          if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
-            echo "Base SHA == HEAD SHA, skipping base scan."
-            echo '[]' > .lint-reports/base/ruff.json
-            echo '[]' > .lint-reports/base/ty.json
-          else
-            git worktree add --detach /tmp/lint-base "$BASE_SHA"
-            (
-              cd /tmp/lint-base
-              ruff check --output-format json --exit-zero \
-                > "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
-              ty check --output-format gitlab --exit-zero \
-                > "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
-            )
-            git worktree remove --force /tmp/lint-base
-          fi
-          echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
-          echo "base ty:   $(wc -c < .lint-reports/base/ty.json) bytes"
-
-      - name: Generate diff summary
-        run: |
-          python scripts/lint_diff.py \
-            --base-ruff .lint-reports/base/ruff.json \
-            --head-ruff .lint-reports/head/ruff.json \
-            --base-ty   .lint-reports/base/ty.json \
-            --head-ty   .lint-reports/head/ty.json \
-            --base-ref  "${{ steps.base.outputs.ref }}" \
-            --head-ref  "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
-            --output    .lint-reports/summary.md
-          cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
-
-      - name: Upload reports as artifact
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
-        with:
-          name: lint-reports
-          path: .lint-reports/
-          retention-days: 14
-
-      - name: Post / update PR comment
-        if: github.event_name == 'pull_request'
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
-        with:
-          script: |
-            const fs = require('fs');
-            const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
-            const marker = '<!-- lint-diff-summary -->';
-            const fullBody = marker + '\n' + body;
-
-            const { data: comments } = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo:  context.repo.repo,
-              issue_number: context.issue.number,
-            });
-            const existing = comments.find(c => c.body && c.body.includes(marker));
-            if (existing) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo:  context.repo.repo,
-                comment_id: existing.id,
-                body: fullBody,
-              });
-            } else {
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo:  context.repo.repo,
-                issue_number: context.issue.number,
-                body: fullBody,
-              });
-            }
--- a/.github/workflows/nix-lockfile-check.yml
+++ b/.github/workflows/nix-lockfile-check.yml
@@ -0,0 +1,74 @@
+name: Nix Lockfile Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-check-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  nix-lockfile-check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Resolve head SHA
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check lockfile hashes
+        id: check
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      - name: Fail if check crashed without reporting
+        if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
+      - name: Post sticky PR comment (stale)
+        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
+
+      - name: Clear sticky PR comment (resolved)
+        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Fail if stale
+        if: steps.check.outputs.stale == 'true'
+        run: exit 1
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -6,8 +6,8 @@ on:
    paths:
      - 'ui-tui/package-lock.json'
      - 'ui-tui/package.json'
-      - 'apps/dashboard/package-lock.json'
-      - 'apps/dashboard/package.json'
+      - 'web/package-lock.json'
+      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -28,7 +28,7 @@ concurrency:
 jobs:
  # ── Auto-fix on main ───────────────────────────────────────────────
  # Fires when a push to main touches package.json or package-lock.json
-  # in ui-tui/ or apps/dashboard/. Runs fix-lockfiles and pushes the hash
+  # in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
  # update commit directly to main so Nix builds never stay broken.
  #
  # Safety invariants:
@@ -62,8 +62,6 @@ jobs:
          token: ${{ steps.app-token.outputs.token }}

      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}

      - name: Apply lockfile hashes
        id: apply
@@ -110,7 +108,7 @@ jobs:
            # run recompute from the correct package-lock state.
            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
              'ui-tui/package-lock.json' 'ui-tui/package.json' \
-              'apps/dashboard/package-lock.json' 'apps/dashboard/package.json' || true)"
+              'web/package-lock.json' 'web/package.json' || true)"
            if [ -n "$pkg_changed" ]; then
              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
              exit 0
@@ -202,12 +200,10 @@ jobs:
          fetch-depth: 0

      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}

      - name: Apply lockfile hashes
        id: apply
-        run: nix run .#fix-lockfiles
+        run: nix run .#fix-lockfiles -- --apply

      - name: Commit & push
        if: steps.apply.outputs.changed == 'true'
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -7,7 +7,6 @@ on:

 permissions:
  contents: read
-  pull-requests: write

 concurrency:
  group: nix-${{ github.ref }}
@@ -23,95 +22,12 @@ jobs:
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Resolve head SHA
-        if: github.event_name == 'pull_request'
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
      - name: Check flake
-        id: flake
        if: runner.os == 'Linux'
-        continue-on-error: true
        run: nix flake check --print-build-logs
-
      - name: Build package
-        id: build
        if: runner.os == 'Linux'
-        continue-on-error: true
        run: nix build --print-build-logs
-
-      # When the real Nix build fails, run a targeted diagnostic to see if
-      # the failure is specifically a stale npm lockfile hash in one of the
-      # known npm subpackages (tui / web).  This avoids surfacing a generic
-      # "build failed" message when the fix is a single known command.
-      - name: Diagnose npm lockfile hashes
-        id: hash_check
-        if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
-      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
-      # mode rather than silently ignoring it.
-      - name: Fail if hash check crashed without reporting
-        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
-        run: |
-          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
-          exit 1
-
-      - name: Post sticky PR comment (stale hashes)
-        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.hash_check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles` and commit the diff
-
-      # Clear the sticky comment when either the build passed outright (no
-      # hash check needed) or the hash check explicitly returned stale=false
-      # (build failed for a non-hash reason).
-      - name: Clear sticky PR comment (resolved)
-        if: |
-          github.event_name == 'pull_request' &&
-          runner.os == 'Linux' &&
-          (steps.hash_check.outputs.stale == 'false' ||
-           (steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Final fail if build or flake failed
-        if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
-        run: |
-          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
-            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
-          else
-            echo "::error::Nix build/flake check failed. See logs above."
-          fi
-          exit 1
-
      - name: Evaluate flake (macOS)
        if: runner.os == 'macOS'
        run: nix flake show --json > /dev/null
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -1,67 +0,0 @@
-name: OSV-Scanner
-
-# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
-# database. Runs on every PR that touches a lockfile and on a weekly schedule
-# against main.
-#
-# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
-# It reports known CVEs in currently-pinned dependency versions so we can
-# decide when and how to patch on our own schedule. Our pinning strategy
-# (full SHA / exact version) is preserved; only the notification signal
-# is added.
-#
-# Complements the existing supply-chain-audit.yml workflow (which scans
-# for malicious code patterns in PR diffs) by covering the orthogonal
-# "currently-pinned dep became known-vulnerable" case.
-#
-# Uses Google's officially-recommended reusable workflow, pinned by SHA.
-# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
-# fail-on-vuln is disabled so the job does not block merges on pre-existing
-# vulnerabilities in pinned deps that we may need to patch deliberately.
-
-on:
-  pull_request:
-    branches: [main]
-    paths:
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'package.json'
-      - 'package-lock.json'
-      - 'ui-tui/package.json'
-      - 'ui-tui/package-lock.json'
-      - 'website/package.json'
-      - 'website/package-lock.json'
-      - '.github/workflows/osv-scanner.yml'
-  push:
-    branches: [main]
-    paths:
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - 'package.json'
-      - 'package-lock.json'
-      - 'ui-tui/package-lock.json'
-      - 'website/package-lock.json'
-  schedule:
-    # Weekly scan against main — catches CVEs published after merge for
-    # deps that haven't changed since.
-    - cron: '0 9 * * 1'
-  workflow_dispatch:
-
-permissions:
-  # Required by the reusable workflow to upload SARIF to the Security tab.
-  actions: read
-  contents: read
-  security-events: write
-
-jobs:
-  scan:
-    name: Scan lockfiles
-    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c51854704019a247608d928f370c98740469d4b5  # v2.3.5
-    with:
-      # Scan explicit lockfiles rather than recursing, so we only look at
-      # the three sources of truth and skip vendored / test / worktree dirs.
-      scan-args: |-
-        --lockfile=uv.lock
-        --lockfile=ui-tui/package-lock.json
-        --lockfile=website/package-lock.json
-      fail-on-vuln: false
--- a/.gitignore
+++ b/.gitignore
@@ -54,10 +54,6 @@ environments/benchmarks/evals/

 # Web UI build output
 hermes_cli/web_dist/
-apps/desktop/build/
-apps/desktop/dist/
-apps/desktop/release/
-apps/desktop/*.tsbuildinfo

 # Web UI assets — synced from @nous-research/ui at build time via
 # `npm run sync-assets` (see web/package.json).
@@ -74,12 +70,3 @@ mini-swe-agent/
 result
 website/static/api/skills-index.json
 models-dev-upstream/
-
-# Local editor / agent tooling (machine-specific; keep in global config, not the repo)
-.codex/
-.cursor/
-.gemini/
-.zed/
-.mcp.json
-opencode.json
-config/mcporter.json
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -2,8 +2,6 @@

 Instructions for AI coding assistants and developers working on the hermes-agent codebase.

-**Never give up on the right solution.**
-
 ## Development Environment

 ```bash
@@ -39,18 +37,12 @@ hermes-agent/
 │   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
-│   │                     #   yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
 │   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
-│   ├── model-providers/  # Inference backend plugins (openrouter, anthropic, gmi, ...)
-│   ├── kanban/           # Multi-agent board dispatcher + worker plugin
-│   ├── hermes-achievements/  # Gamified achievement tracking
-│   ├── observability/    # Metrics / traces / logs plugin
-│   ├── image_gen/        # Image-generation providers
-│   └── <others>/         # disk-cleanup, example-dashboard, google_meet, platforms,
-│                         #   spotify, strike-freedom-cockpit, ...
+│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
 ├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
 ├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
@@ -61,7 +53,7 @@ hermes-agent/
 ├── environments/         # RL training environments (Atropos)
 ├── scripts/              # run_tests.sh, release.py, auxiliary scripts
 ├── website/              # Docusaurus docs site
-└── tests/                # Pytest suite (~17k tests across ~900 files as of May 2026)
+└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
 ```

 **User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
@@ -69,29 +61,6 @@ hermes-agent/
 `gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
 Browse with `hermes logs [--follow] [--level ...] [--session ...]`.

-## TypeScript Style
-
-Applies to TypeScript across Hermes: desktop, TUI, website, and future TS packages.
-
- Prefer small nanostores over component state when state is shared, reused, or read by distant UI.
- Let each feature own its atoms. Chat state belongs near chat, shell state near shell, shared state in `src/store`.
- Components that render from an atom should use `useStore`. Non-rendering actions should read with `$atom.get()`.
- Do not pass state through three components when the leaf can subscribe to the atom.
- Keep persistence beside the atom that owns it.
- Keep route roots thin. They compose routes and shell; they should not become controllers.
- No monolithic hooks. A hook should own one narrow job.
- Prefer colocated action modules over hidden god hooks.
- If a callback is pure side effect, use the terse void form:
-  `onState={st => void setGatewayState(st)}`.
- Async UI handlers should make intent explicit:
-  `onClick={() => void save()}`.
- Prefer interfaces for public props and shared object shapes. Avoid `type X = { ... }` for object props.
- Extend React primitives for props: `React.ComponentProps<'button'>`, `React.ComponentProps<typeof Dialog>`, `Omit<...>`, `Pick<...>`.
- Table-driven beats condition ladders when mapping ids, routes, or views.
- `src/app` owns routes, pages, and page-specific components.
- `src/store` owns shared atoms.
- `src/lib` owns shared pure helpers.
-
 ## File Dependency Chain

 ```
@@ -275,7 +244,7 @@ npm test          # vitest

 The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.

- Browser loads `apps/dashboard/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
 - `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
 - The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
 - Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
@@ -288,16 +257,7 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes

 ## Adding New Tools

-For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
-route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
-`~/.hermes/plugins/<name>/__init__.py`, then register tools with
-`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
-enabled or disabled without touching `tools/` or `toolsets.py`.
-
-Use the built-in route below only when the user is explicitly contributing a new
-core Hermes tool that should ship in the base system.
-
-Built-in/core tools require changes in **2 files**:
+Requires changes in **2 files**:

 **1. Create `tools/your_tool.py`:**
 ```python
@@ -320,9 +280,9 @@ registry.register(
 )
 ```

-**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.
+**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.

-Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.

 The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.

@@ -344,22 +304,6 @@ The registry handles schema collection, dispatch, availability checking, and err
   section is handled automatically by the deep-merge and does NOT require
   a version bump.

-### Top-level `config.yaml` sections (non-exhaustive):
-
-`model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
-`memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
-`auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
-`plugins`, `honcho`.
-
-`auxiliary` holds per-task overrides for side-LLM work (curator, vision,
-embedding, title generation, session_search, etc.) — each task can pin
-its own provider/model/base_url/max_tokens/reasoning_effort. See
-`agent/auxiliary_client.py::_resolve_auto` for resolution order.
-
-`curator` holds the background skill-maintenance config —
-`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
-`archive_after_days`, `backup` (nested).
-
 ### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
@@ -538,31 +482,6 @@ generic plugin surface (new hook, new ctx method) — never hardcode
 plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
 honcho argparse from `main.py` for exactly this reason.

-### Model-provider plugins (`plugins/model-providers/<name>/`)
-
-Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
-ships as a plugin here. Each plugin's `__init__.py` calls
-`providers.register_provider(ProviderProfile(...))` at module load.
-`providers/__init__.py._discover_providers()` is a **lazy, separate
-discovery system** — scanned on first `get_provider_profile()` or
-`list_providers()` call, NOT by the general PluginManager.
-
-Scan order:
-1. Bundled: `<repo>/plugins/model-providers/<name>/`
-2. User: `$HERMES_HOME/plugins/model-providers/<name>/`
-3. Legacy: `<repo>/providers/<name>.py` (back-compat)
-
-User plugins of the same name override bundled ones — `register_provider()`
-is last-writer-wins. This lets third parties swap out any built-in
-profile without a repo patch.
-
-The general PluginManager records `kind: model-provider` manifests but does
-NOT import them (would double-instantiate `ProviderProfile`). Plugins
-without an explicit `kind:` get auto-coerced via a source-text heuristic
-(`register_provider` + `ProviderProfile` in `__init__.py`).
-
-Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
-
 ### Dashboard / context-engine / image-gen plugin directories

 `plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
@@ -591,176 +510,11 @@ niche skills belong in `optional-skills/`.

 ### SKILL.md frontmatter

-Standard fields: `name`, `description`, `version`, `author`, `license`,
-`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
+Standard fields: `name`, `description`, `version`, `platforms`
+(OS-gating list: `[macos]`, `[linux, macos]`, ...),
 `metadata.hermes.tags`, `metadata.hermes.category`,
-`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
-settings the skill needs — stored under `skills.config.<key>`, prompted
-during setup, injected at load time).
-
-Top-level `tags:` and `category:` are also accepted and mirrored from
-`metadata.hermes.*` by the loader.
-
---
-
-## Toolsets
-
-All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
-Each platform's adapter picks a base toolset (e.g. Telegram uses
-`"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
-platforms inherit from.
-
-Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
-`debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
-`feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
-`messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
-`spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
-
-Enable/disable per platform via `hermes tools` (the curses UI) or the
-`tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
-`config.yaml`.
-
---
-
-## Delegation (`delegate_task`)
-
-`tools/delegate_tool.py` spawns a subagent with an isolated
-context + terminal session. Synchronous: the parent waits for the
-child's summary before continuing its own loop — if the parent is
-interrupted, the child is cancelled.
-
-Two shapes:
-
- **Single:** pass `goal` (+ optional `context`, `toolsets`).
- **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
-  running concurrently. Concurrency is capped by
-  `delegation.max_concurrent_children` (default 3).
-
-Roles:
-
- `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
-  `clarify`, `memory`, `send_message`, `execute_code`.
- `role="orchestrator"` — retains `delegate_task` so it can spawn its
-  own workers. Gated by `delegation.orchestrator_enabled` (default true)
-  and bounded by `delegation.max_spawn_depth` (default 2).
-
-Key config knobs (under `delegation:` in `config.yaml`):
-`max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
-`orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
-`max_iterations`.
-
-Synchronicity rule: delegate_task is **not** durable. For long-running
-work that must outlive the current turn, use `cronjob` or
-`terminal(background=True, notify_on_complete=True)` instead.
-
---
-
-## Curator (skill lifecycle)
-
-Background skill-maintenance system that tracks usage on agent-created
-skills and auto-archives stale ones. Users never lose skills; archives
-go to `~/.hermes/skills/.archive/` and are restorable.
-
- **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
-  prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
- **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
-  verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
-  `archive`, `restore`, `prune`, `backup`, `rollback`.
- **Telemetry:** `tools/skill_usage.py` owns the sidecar
-  `~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
-  `patch_count`, `last_activity_at`, `state` (active / stale /
-  archived), `pinned`.
-
-Invariants:
- Curator only touches skills with `created_by: "agent"` provenance —
-  bundled + hub-installed skills are off-limits.
- Never deletes; max destructive action is archive.
- Pinned skills are exempt from every auto-transition and from the
-  LLM review pass.
- `skill_manage(action="delete")` refuses pinned skills; patch/edit/
-  write_file/remove_file go through so the agent can keep improving
-  pinned skills.
-
-Config section (`curator:` in `config.yaml`):
-`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
-`archive_after_days`, `backup.*`.
-
-Full user-facing docs: `website/docs/user-guide/features/curator.md`.
-
---
-
-## Cron (scheduled jobs)
-
-`cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
-schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
-(`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
-`/cron` slash command.
-
-Supported schedule formats:
- Duration: `"30m"`, `"2h"`, `"1d"`
- "every" phrase: `"every 2h"`, `"every monday 9am"`
- 5-field cron expression: `"0 9 * * *"`
- ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
-
-Per-job fields include `skills` (load specific skills), `model` /
-`provider` overrides, `script` (pre-run data-collection script whose
-stdout is injected into the prompt; `no_agent=True` turns the script
-into the entire job), `context_from` (chain job A's last output into
-job B's prompt), `workdir` (run in a specific directory with its
-`AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
-
-Hardening invariants:
- **3-minute hard interrupt** on cron sessions — runaway agent loops
-  cannot monopolize the scheduler.
- Catchup window: half the job's period, clamped to 120s–2h.
- Grace window: 120s for one-shot jobs whose fire time was missed.
- File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
-  across processes.
- Cron sessions pass `skip_memory=True` by default; memory providers
-  intentionally do not run during cron.
-
-Cron deliveries are **not** mirrored into the target gateway session —
-they land in their own cron session with a header/footer frame so the
-main conversation's message-role alternation stays intact.
-
---
-
-## Kanban (multi-agent work queue)
-
-Durable SQLite-backed board that lets multiple profiles / workers
-collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
-workers spawned by the dispatcher drive it via a dedicated `kanban_*`
-toolset so their schema footprint is zero when they're not inside a
-kanban task.
-
- **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
-  `init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
-  `unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
-  `tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
-  `assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
- **Worker toolset:** `tools/kanban_tools.py` exposes `kanban_show`,
-  `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`,
-  `kanban_create`, `kanban_link` — gated by `HERMES_KANBAN_TASK` so
-  the schema only appears for processes actually running as a worker.
- **Dispatcher:** long-lived loop that (default every 60s) reclaims
-  stale claims, promotes ready tasks, atomically claims, and spawns
-  assigned profiles. Runs **inside the gateway** by default via
-  `kanban.dispatch_in_gateway: true`.
- **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
-  `plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
-  standalone dispatcher deployment).
-
-Isolation model:
- **Board** is the hard boundary — workers are spawned with
-  `HERMES_KANBAN_BOARD` pinned in their env so they can't see other
-  boards.
- **Tenant** is a soft namespace *within* a board — one specialist
-  fleet can serve multiple businesses with workspace-path + memory-key
-  isolation.
- After ~5 consecutive spawn failures on the same task the dispatcher
-  auto-blocks it to prevent spin loops.
-
-Full user-facing docs: `website/docs/user-guide/features/kanban.md`.
+`metadata.hermes.config` (config.yaml settings the skill needs — stored
+under `skills.config.<key>`, prompted during setup, injected at load time).

 ---

--- a/28
+++ b/28
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+    build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -28,26 +28,10 @@ WORKDIR /opt/hermes
 # ---------- Layer-cached dependency install ----------
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
-#
-# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
-# because it is referenced as a `file:` workspace dependency from
-# ui-tui/package.json.  Copying the tree up front lets npm resolve the
-# workspace to real content instead of stopping at a bare package.json.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
 COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
-COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
-
-# `npm_config_install_links=false` forces npm to install `file:` deps as
-# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
-# which defaults to `install-links=true` and installs file deps as *copies*.
-# The host-side package-lock.json is generated with a newer npm that uses
-# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
-# that permanently disagrees with the root lock on the @hermes/ink entry.
-# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
-# check on every startup and triggers a runtime `npm install` that then
-# fails with EACCES (node_modules/ is root-owned from build time).
-ENV npm_config_install_links=false
+COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
@@ -61,7 +45,13 @@ COPY --chown=hermes:hermes . .

 # Build browser dashboard and terminal UI assets.
 RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build
+    cd ../ui-tui && npm run build && \
+    rm -rf node_modules/@hermes/ink && \
+    rm -rf packages/hermes-ink/node_modules && \
+    cp -R packages/hermes-ink node_modules/@hermes/ink && \
+    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
+    rm -rf node_modules/@hermes/ink/node_modules/react && \
+    node --input-type=module -e "await import('@hermes/ink')"

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
--- a/README.md
+++ b/README.md
@@ -9,7 +9,6 @@
  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
-  <a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
 </p>

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
@@ -22,7 +21,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
 <tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
 <tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
 <tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
-<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
+<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
 <tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
 </table>

--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -1,186 +0,0 @@
-<p align="center">
-  <img src="assets/banner.png" alt="Hermes Agent" width="100%">
-</p>
-
-# Hermes Agent ☤
-
-<p align="center">
-  <a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
-  <a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
-  <a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
-  <a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
-  <a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
-</p>
-
-**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能，在使用中改进技能，主动持久化知识，搜索过往对话，并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行，也可以在 GPU 集群上运行，或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话，而它在云端 VM 上工作。
-
-支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)（200+ 模型）、[NVIDIA NIM](https://build.nvidia.com)（Nemotron）、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI，或自定义端点。使用 `hermes model` 即可切换——无需改代码，无锁定。
-
-<table>
-<tr><td><b>真正的终端界面</b></td><td>完整的 TUI，支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
-<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
-<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
-<tr><td><b>定时自动化</b></td><td>内置 cron 调度器，支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述，无人值守运行。</td></tr>
-<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具，将多步管道压缩为零上下文开销的轮次。</td></tr>
-<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒，空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
-<tr><td><b>研究就绪</b></td><td>批量轨迹生成、Atropos RL 环境、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
-</table>
-
---
-
-## 快速安装
-
-```bash
-curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
-```
-
-支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
-
-> **Android / Termux：** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上，Hermes 会安装精选的 `.[termux]` 扩展，因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
->
-> **Windows：** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
-
-安装后：
-
-```bash
-source ~/.bashrc    # 重新加载 shell（或: source ~/.zshrc）
-hermes              # 开始对话！
-```
-
---
-
-## 快速入门
-
-```bash
-hermes              # 交互式 CLI — 开始对话
-hermes model        # 选择 LLM 提供商和模型
-hermes tools        # 配置启用的工具
-hermes config set   # 设置单个配置项
-hermes gateway      # 启动消息网关（Telegram、Discord 等）
-hermes setup        # 运行完整设置向导（一次性配置所有内容）
-hermes claw migrate # 从 OpenClaw 迁移（如果来自 OpenClaw）
-hermes update       # 更新到最新版本
-hermes doctor       # 诊断问题
-```
-
-📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
-
-## CLI 与消息平台 快速对照
-
-Hermes 有两种入口：用 `hermes` 启动终端 UI，或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后，许多斜杠命令在两种界面中通用。
-
-| 操作 | CLI | 消息平台 |
-|------|-----|----------|
-| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`，然后给机器人发消息 |
-| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
-| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
-| 设置人格 | `/personality [name]` | `/personality [name]` |
-| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
-| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
-| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
-| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
-| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
-
-完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
-
---
-
-## 文档
-
-所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**：
-
-| 章节 | 内容 |
-|------|------|
-| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
-| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
-| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
-| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
-| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
-| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
-| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
-| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
-| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
-| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
-| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
-| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
-| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
-| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
-| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
-
---
-
-## 从 OpenClaw 迁移
-
-如果你来自 OpenClaw，Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
-
-**首次安装时：** 安装向导（`hermes setup`）会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
-
-**安装后任意时间：**
-
-```bash
-hermes claw migrate              # 交互式迁移（完整预设）
-hermes claw migrate --dry-run    # 预览将要迁移的内容
-hermes claw migrate --preset user-data   # 仅迁移用户数据，不含密钥
-hermes claw migrate --overwrite  # 覆盖已有冲突
-```
-
-导入内容：
- **SOUL.md** — 人格文件
- **记忆** — MEMORY.md 和 USER.md 条目
- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
- **命令白名单** — 审批模式
- **消息设置** — 平台配置、允许用户、工作目录
- **API 密钥** — 白名单中的密钥（Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs）
- **TTS 资产** — 工作区音频文件
- **工作区指令** — AGENTS.md（使用 `--workspace-target`）
-
-使用 `hermes claw migrate --help` 查看所有选项，或使用 `openclaw-migration` 技能进行交互式代理引导迁移（含干运行预览）。
-
---
-
-## 贡献
-
-欢迎贡献！请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
-
-贡献者快速开始——克隆并使用 `setup-hermes.sh`：
-
-```bash
-git clone https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-./setup-hermes.sh     # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
-./hermes              # 自动检测 venv，无需先 source
-```
-
-手动安装（等效于上述命令）：
-
-```bash
-curl -LsSf https://astral.sh/uv/install.sh | sh
-uv venv venv --python 3.11
-source venv/bin/activate
-uv pip install -e ".[all,dev]"
-python -m pytest tests/ -q
-```
-
-> **RL 训练（可选）：** 如需参与 RL/Tinker-Atropos 集成开发：
-> ```bash
-> git submodule update --init tinker-atropos
-> uv pip install -e "./tinker-atropos"
-> ```
-
---
-
-## 社区
-
- 💬 [Discord](https://discord.gg/NousResearch)
- 📚 [技能中心](https://agentskills.io)
- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接：在同一微信账号上运行 Hermes Agent 和 OpenClaw。
-
---
-
-## 许可证
-
-MIT — 详见 [LICENSE](LICENSE)。
-
-由 [Nous Research](https://nousresearch.com) 构建。
--- a/RELEASE_v0.12.0.md
+++ b/RELEASE_v0.12.0.md
@@ -1,505 +0,0 @@
-# Hermes Agent v0.12.0 (v2026.4.30)
-
-**Release Date:** April 30, 2026
-**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
-
-> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
-
---
-
-## ✨ Highlights
-
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
-
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
-
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
-
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
-
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
-
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
-
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
-
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
-
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
-
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
-
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
-
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
-
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
-
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
-
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
-
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
-
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
-
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
-
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
-
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
-
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
-
---
-
-## 🧠 Autonomous Curator & Self-Improvement Loop
-
-### Curator — autonomous skill maintenance
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
-
-### Self-improvement loop (background review fork)
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
-
---
-
-## 🧩 Skills Ecosystem
-
-### Skill integrations — newly bundled or promoted
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
-
-### Skills UX
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Provider & Model Support
-
-#### New providers
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
-
-#### Model catalog
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
-
-#### Model configuration
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
-
-### Agent Loop & Conversation
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
-
-### Compression
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
-
-### Session, Memory & State
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
-
-### Auxiliary models
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platforms
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
-
-### Pluggable Gateway Platforms
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
-
-### Telegram
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
-
-### Discord
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
-
-### Slack
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
-
-### Signal
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
-
-### Feishu / Mattermost / Email / Signal
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
-
-### Gateway Core
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
-
---
-
-## 🔧 Tool System
-
-### Plugin-first architecture
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
-
-### Browser
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
-
-### Execute code / Terminal
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
-
-### Image generation
- See Provider section for updates; no new image providers this window.
-
-### TTS / Voice
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
-
-### Cron
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
-
-### Web search
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
-
-### Maps
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
-
-### Approvals
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
-
-### ACP
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
-
-### API Server
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
-
-### Nix
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
-
---
-
-## 🖥️ TUI
-
-### New features
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
-
-### Fixes
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
-
---
-
-## 🖱️ CLI & User Experience
-
-### New commands
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
-
-### Setup / onboarding
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
-
-### Update / backup
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
-
-### Slash-command housekeeping
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
-
-### OpenClaw migration (for folks coming from OpenClaw)
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
-
---
-
-## 📊 Web Dashboard
-
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
-
---
-
-## ⚡ Performance
-
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
-
---
-
-## 🔒 Security & Reliability
-
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
-
---
-
-## 🐛 Notable Bug Fixes
-
-This window includes 360 `fix:` PRs. Selected highlights from across the stack:
-
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
-
-The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
-
---
-
-## 🧪 Testing & CI
-
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
-
---
-
-## 📚 Documentation
-
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
-
---
-
-## ⚖️ Removed / Reverted
-
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** (Teknium)
-
-### Top Community Contributors (by merged PR count since v0.11.0)
-
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
- **@ethernet8023** — 4 PRs
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
- **@vominh1919** — 2 PRs
- **@stephenschoettler** — 2 PRs
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
- **@y0shua1ee** — curator `use` activity fix (#17953)
-
-### Also contributing
-Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
-
-### All Contributors (alphabetical, excluding @teknium1)
-
-@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
-@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
-@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
-@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
-@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
-@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
-@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
-@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
-@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
-@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
-@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
-@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
-@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
-@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
-@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
-@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
-@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
-@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
-@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
-@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
-@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
-@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
-@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
-@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
-@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
-@ztexydt-cqh.
-
-Also: @Siddharth Balyan, @YuShu.
-
---
-
-**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -4,7 +4,6 @@ from __future__ import annotations

 import asyncio
 import contextvars
-import json
 import logging
 import os
 from collections import defaultdict, deque
@@ -14,7 +13,6 @@ from typing import Any, Deque, Optional
 import acp
 from acp.schema import (
    AgentCapabilities,
-    AgentMessageChunk,
    AuthenticateResponse,
    AvailableCommand,
    AvailableCommandsUpdate,
@@ -32,7 +30,6 @@ from acp.schema import (
    McpServerStdio,
    ModelInfo,
    NewSessionResponse,
-    PromptCapabilities,
    PromptResponse,
    ResumeSessionResponse,
    SetSessionConfigOptionResponse,
@@ -48,8 +45,6 @@ from acp.schema import (
    TextContentBlock,
    UnstructuredCommandInput,
    Usage,
-    UsageUpdate,
-    UserMessageChunk,
 )

 # AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
@@ -67,7 +62,6 @@ from acp_adapter.events import (
 )
 from acp_adapter.permissions import make_approval_callback
 from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
-from acp_adapter.tools import build_tool_complete, build_tool_start

 logger = logging.getLogger(__name__)

@@ -94,69 +88,17 @@ def _extract_text(
        | EmbeddedResourceContentBlock
    ],
 ) -> str:
-    """Extract plain text from ACP content blocks for display/commands."""
+    """Extract plain text from ACP content blocks."""
    parts: list[str] = []
    for block in prompt:
        if isinstance(block, TextContentBlock):
            parts.append(block.text)
        elif hasattr(block, "text"):
            parts.append(str(block.text))
+        # Non-text blocks are ignored for now.
    return "\n".join(parts)


-def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
-    """Convert an ACP image content block to OpenAI-style multimodal content."""
-    data = str(getattr(block, "data", "") or "").strip()
-    uri = str(getattr(block, "uri", "") or "").strip()
-    mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
-
-    if data:
-        url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
-    elif uri:
-        url = uri
-    else:
-        return None
-
-    return {"type": "image_url", "image_url": {"url": url}}
-
-
-def _content_blocks_to_openai_user_content(
-    prompt: list[
-        TextContentBlock
-        | ImageContentBlock
-        | AudioContentBlock
-        | ResourceContentBlock
-        | EmbeddedResourceContentBlock
-    ],
-) -> str | list[dict[str, Any]]:
-    """Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
-    parts: list[dict[str, Any]] = []
-    text_parts: list[str] = []
-
-    for block in prompt:
-        if isinstance(block, TextContentBlock):
-            if block.text:
-                parts.append({"type": "text", "text": block.text})
-                text_parts.append(block.text)
-            continue
-        if isinstance(block, ImageContentBlock):
-            image_part = _image_block_to_openai_part(block)
-            if image_part is not None:
-                parts.append(image_part)
-            continue
-
-    if not parts:
-        return _extract_text(prompt)
-
-    # Keep pure text prompts as strings so slash-command handling and text-only
-    # providers keep the exact legacy path. Switch to structured content only
-    # when an actual non-text block is present.
-    if all(part.get("type") == "text" for part in parts):
-        return "\n".join(text_parts)
-
-    return parts
-
-
 class HermesACPAgent(acp.Agent):
    """ACP Agent implementation wrapping Hermes AIAgent."""

@@ -167,8 +109,6 @@ class HermesACPAgent(acp.Agent):
        "context": "Show conversation context info",
        "reset": "Clear conversation history",
        "compact": "Compress conversation context",
-        "steer": "Inject guidance into the currently running agent turn",
-        "queue": "Queue a prompt to run after the current turn finishes",
        "version": "Show Hermes version",
    }

@@ -198,16 +138,6 @@ class HermesACPAgent(acp.Agent):
            "name": "compact",
            "description": "Compress conversation context",
        },
-        {
-            "name": "steer",
-            "description": "Inject guidance into the currently running agent turn",
-            "input_hint": "guidance for the active turn",
-        },
-        {
-            "name": "queue",
-            "description": "Queue a prompt to run after the current turn finishes",
-            "input_hint": "prompt to run next",
-        },
        {
            "name": "version",
            "description": "Show Hermes version",
@@ -318,66 +248,6 @@ class HermesACPAgent(acp.Agent):

        return target_provider, new_model

-    @staticmethod
-    def _build_usage_update(state: SessionState) -> UsageUpdate | None:
-        """Build ACP native context-usage data for clients like Zed.
-
-        Zed's circular context indicator is driven by ACP ``usage_update``
-        session updates: ``size`` is the model context window and ``used`` is
-        the current request pressure.  Hermes estimates ``used`` from the same
-        buckets it sends to providers: system prompt, conversation history, and
-        tool schemas.
-        """
-        agent = state.agent
-        compressor = getattr(agent, "context_compressor", None)
-        size = int(getattr(compressor, "context_length", 0) or 0)
-        if size <= 0:
-            return None
-
-        try:
-            from agent.model_metadata import estimate_request_tokens_rough
-
-            used = estimate_request_tokens_rough(
-                state.history,
-                system_prompt=getattr(agent, "_cached_system_prompt", "") or "",
-                tools=getattr(agent, "tools", None) or None,
-            )
-        except Exception:
-            logger.debug("Could not estimate ACP native context usage", exc_info=True)
-            used = int(getattr(compressor, "last_prompt_tokens", 0) or 0)
-
-        return UsageUpdate(
-            session_update="usage_update",
-            size=max(size, 0),
-            used=max(used, 0),
-        )
-
-    async def _send_usage_update(self, state: SessionState) -> None:
-        """Send ACP native context usage to the connected client."""
-        if not self._conn:
-            return
-        update = self._build_usage_update(state)
-        if update is None:
-            return
-        try:
-            await self._conn.session_update(
-                session_id=state.session_id,
-                update=update,
-            )
-        except Exception:
-            logger.warning(
-                "Failed to send ACP usage update for session %s",
-                state.session_id,
-                exc_info=True,
-            )
-
-    def _schedule_usage_update(self, state: SessionState) -> None:
-        """Schedule native context indicator refresh after ACP responses."""
-        if not self._conn:
-            return
-        loop = asyncio.get_running_loop()
-        loop.call_soon(asyncio.create_task, self._send_usage_update(state))
-
    async def _register_session_mcp_servers(
        self,
        state: SessionState,
@@ -482,7 +352,6 @@ class HermesACPAgent(acp.Agent):
            agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
            agent_capabilities=AgentCapabilities(
                load_session=True,
-                prompt_capabilities=PromptCapabilities(image=True),
                session_capabilities=SessionCapabilities(
                    fork=SessionForkCapabilities(),
                    list=SessionListCapabilities(),
@@ -508,140 +377,6 @@ class HermesACPAgent(acp.Agent):

    # ---- Session management -------------------------------------------------

-    @staticmethod
-    def _history_message_text(message: dict[str, Any]) -> str:
-        """Extract displayable text from a persisted OpenAI-style message."""
-        content = message.get("content")
-        if isinstance(content, str):
-            return content.strip()
-        if isinstance(content, list):
-            parts: list[str] = []
-            for item in content:
-                if isinstance(item, dict):
-                    text = item.get("text")
-                    if isinstance(text, str):
-                        parts.append(text)
-                    elif item.get("type") == "text" and isinstance(item.get("content"), str):
-                        parts.append(item["content"])
-                elif isinstance(item, str):
-                    parts.append(item)
-            return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
-        return ""
-
-    @staticmethod
-    def _history_message_update(
-        *,
-        role: str,
-        text: str,
-    ) -> UserMessageChunk | AgentMessageChunk | None:
-        """Build an ACP history replay update for a user/assistant message."""
-        block = TextContentBlock(type="text", text=text)
-        if role == "user":
-            return UserMessageChunk(
-                session_update="user_message_chunk",
-                content=block,
-            )
-        if role == "assistant":
-            return AgentMessageChunk(
-                session_update="agent_message_chunk",
-                content=block,
-            )
-        return None
-
-    @staticmethod
-    def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
-        """Extract function name/arguments from an OpenAI-style tool_call."""
-        function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
-        name = str(function.get("name") or tool_call.get("name") or "unknown_tool")
-        raw_args = function.get("arguments") or tool_call.get("arguments") or tool_call.get("args") or {}
-        if isinstance(raw_args, str):
-            try:
-                parsed = json.loads(raw_args)
-            except Exception:
-                parsed = {"raw": raw_args}
-            raw_args = parsed
-        if not isinstance(raw_args, dict):
-            raw_args = {}
-        return name, raw_args
-
-    @staticmethod
-    def _history_tool_call_id(tool_call: dict[str, Any]) -> str:
-        """Return the stable provider tool call id for ACP history replay."""
-        return str(
-            tool_call.get("id")
-            or tool_call.get("call_id")
-            or tool_call.get("tool_call_id")
-            or ""
-        ).strip()
-
-    async def _replay_session_history(self, state: SessionState) -> None:
-        """Send persisted user/assistant history to clients during session/load.
-
-        Zed's ACP history UI calls ``session/load`` after the user picks an item
-        from the Agents sidebar. The agent must then replay the full conversation
-        as user/assistant chunks plus reconstructed tool-call start/completion
-        notifications; merely restoring server-side state makes Hermes remember
-        context, but leaves the editor looking like a clean thread.
-        """
-        if not self._conn or not state.history:
-            return
-
-        active_tool_calls: dict[str, tuple[str, dict[str, Any]]] = {}
-
-        async def _send(update: Any) -> bool:
-            try:
-                await self._conn.session_update(session_id=state.session_id, update=update)
-                return True
-            except Exception:
-                logger.warning(
-                    "Failed to replay ACP history for session %s",
-                    state.session_id,
-                    exc_info=True,
-                )
-                return False
-
-        for message in state.history:
-            role = str(message.get("role") or "")
-
-            if role in {"user", "assistant"}:
-                text = self._history_message_text(message)
-                if text:
-                    update = self._history_message_update(role=role, text=text)
-                    if update is not None and not await _send(update):
-                        return
-
-            if role == "assistant" and isinstance(message.get("tool_calls"), list):
-                for tool_call in message["tool_calls"]:
-                    if not isinstance(tool_call, dict):
-                        continue
-                    tool_call_id = self._history_tool_call_id(tool_call)
-                    if not tool_call_id:
-                        continue
-                    tool_name, args = self._history_tool_call_name_args(tool_call)
-                    active_tool_calls[tool_call_id] = (tool_name, args)
-                    if not await _send(build_tool_start(tool_call_id, tool_name, args)):
-                        return
-                continue
-
-            if role == "tool":
-                tool_call_id = str(message.get("tool_call_id") or "").strip()
-                tool_name = str(message.get("tool_name") or "").strip()
-                function_args: dict[str, Any] | None = None
-                if tool_call_id in active_tool_calls:
-                    tool_name, function_args = active_tool_calls.pop(tool_call_id)
-                if not tool_call_id or not tool_name:
-                    continue
-                result = message.get("content")
-                if not await _send(
-                    build_tool_complete(
-                        tool_call_id,
-                        tool_name,
-                        result=result if isinstance(result, str) else None,
-                        function_args=function_args,
-                    )
-                ):
-                    return
-
    async def new_session(
        self,
        cwd: str,
@@ -652,24 +387,11 @@ class HermesACPAgent(acp.Agent):
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("New session %s (cwd=%s)", state.session_id, cwd)
        self._schedule_available_commands_update(state.session_id)
-        self._schedule_usage_update(state)
        return NewSessionResponse(
            session_id=state.session_id,
            models=self._build_model_state(state),
        )

-    def _schedule_history_replay(self, state: SessionState) -> None:
-        """Replay persisted history after session/load or session/resume returns.
-
-        Zed only attaches streamed transcript/tool updates once the load/resume
-        response has completed. Sending replay notifications while the request is
-        still in-flight can make the server look correct in logs while the editor
-        drops or fails to attach the tool-call history.
-        """
-        loop = asyncio.get_running_loop()
-        replay_coro = self._replay_session_history(state)
-        loop.call_soon(asyncio.create_task, replay_coro)
-
    async def load_session(
        self,
        cwd: str,
@@ -683,9 +405,7 @@ class HermesACPAgent(acp.Agent):
            return None
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
-        self._schedule_history_replay(state)
        self._schedule_available_commands_update(session_id)
-        self._schedule_usage_update(state)
        return LoadSessionResponse(models=self._build_model_state(state))

    async def resume_session(
@@ -701,17 +421,12 @@ class HermesACPAgent(acp.Agent):
            state = self.session_manager.create_session(cwd=cwd)
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
-        self._schedule_history_replay(state)
        self._schedule_available_commands_update(state.session_id)
-        self._schedule_usage_update(state)
        return ResumeSessionResponse(models=self._build_model_state(state))

    async def cancel(self, session_id: str, **kwargs: Any) -> None:
        state = self.session_manager.get_session(session_id)
        if state and state.cancel_event:
-            with state.runtime_lock:
-                if state.is_running and state.current_prompt_text:
-                    state.interrupted_prompt_text = state.current_prompt_text
            state.cancel_event.set()
            try:
                if getattr(state, "agent", None) and hasattr(state.agent, "interrupt"):
@@ -802,77 +517,18 @@ class HermesACPAgent(acp.Agent):
            return PromptResponse(stop_reason="refusal")

        user_text = _extract_text(prompt).strip()
-        user_content = _content_blocks_to_openai_user_content(prompt)
-        has_content = bool(user_text) or (
-            isinstance(user_content, list) and bool(user_content)
-        )
-        if not has_content:
+        if not user_text:
            return PromptResponse(stop_reason="end_turn")

-        # /steer on an idle session has no in-flight tool call to inject into.
-        # Rewrite it so the payload runs as a normal user prompt, matching the
-        # gateway's behavior (gateway/run.py ~L4898). Two sub-cases:
-        #   1. Zed-interrupt salvage — a prior prompt was cancelled by the
-        #      client right before /steer arrived; replay it with the steer
-        #      text attached as explicit correction/guidance so the user's
-        #      in-flight work isn't lost.
-        #   2. Plain idle — no prior work to salvage; just run the steer
-        #      payload as a regular prompt. Without this, _cmd_steer would
-        #      silently append to state.queued_prompts and respond with
-        #      "No active turn — queued for the next turn", which looks like
-        #      /queue even though the user never typed /queue.
-        if isinstance(user_content, str) and user_text.startswith("/steer"):
-            steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else ""
-            interrupted_prompt = ""
-            rewrite_idle = False
-            with state.runtime_lock:
-                if not state.is_running and steer_text:
-                    if state.interrupted_prompt_text:
-                        interrupted_prompt = state.interrupted_prompt_text
-                        state.interrupted_prompt_text = ""
-                    else:
-                        rewrite_idle = True
-            if interrupted_prompt:
-                user_text = (
-                    f"{interrupted_prompt}\n\n"
-                    f"User correction/guidance after interrupt: {steer_text}"
-                )
-                user_content = user_text
-            elif rewrite_idle:
-                user_text = steer_text
-                user_content = steer_text
-
-        # Intercept slash commands — handle locally without calling the LLM.
-        # Slash commands are text-only; if the client included images/resources,
-        # send the whole multimodal prompt to the agent instead of treating it as
-        # an ACP command.
-        if isinstance(user_content, str) and user_text.startswith("/"):
+        # Intercept slash commands — handle locally without calling the LLM
+        if user_text.startswith("/"):
            response_text = self._handle_slash_command(user_text, state)
            if response_text is not None:
                if self._conn:
                    update = acp.update_agent_message_text(response_text)
                    await self._conn.session_update(session_id, update)
-                    await self._send_usage_update(state)
                return PromptResponse(stop_reason="end_turn")

-        # If Zed sends another regular prompt while the same ACP session is
-        # still running, queue it instead of racing two AIAgent loops against
-        # the same state.history. /steer and /queue are handled above and can
-        # land immediately.
-        with state.runtime_lock:
-            if state.is_running:
-                queued_text = user_text or "[Image attachment]"
-                state.queued_prompts.append(queued_text)
-                depth = len(state.queued_prompts)
-                if self._conn:
-                    update = acp.update_agent_message_text(
-                        f"Queued for the next turn. ({depth} queued)"
-                    )
-                    await self._conn.session_update(session_id, update)
-                return PromptResponse(stop_reason="end_turn")
-            state.is_running = True
-            state.current_prompt_text = user_text or "[Image attachment]"
-
        logger.info("Prompt on session %s: %s", session_id, user_text[:100])

        conn = self._conn
@@ -885,37 +541,24 @@ class HermesACPAgent(acp.Agent):
        tool_call_meta: dict[str, dict[str, Any]] = {}
        previous_approval_cb = None

-        streamed_message = False
-
        if conn:
            tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
-            reasoning_cb = make_thinking_cb(conn, session_id, loop)
+            thinking_cb = make_thinking_cb(conn, session_id, loop)
            step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
            message_cb = make_message_cb(conn, session_id, loop)
-
-            def stream_delta_cb(text: str) -> None:
-                nonlocal streamed_message
-                if text:
-                    streamed_message = True
-                message_cb(text)
-
            approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
        else:
            tool_progress_cb = None
-            reasoning_cb = None
+            thinking_cb = None
            step_cb = None
-            stream_delta_cb = None
+            message_cb = None
            approval_cb = None

        agent = state.agent
        agent.tool_progress_callback = tool_progress_cb
-        # ACP thought panes should not receive Hermes' local kawaii waiting/status
-        # updates. Route provider/model reasoning deltas instead; if the provider
-        # emits no reasoning, Zed should not get a fake "thinking" accordion.
-        agent.thinking_callback = None
-        agent.reasoning_callback = reasoning_cb
+        agent.thinking_callback = thinking_cb
        agent.step_callback = step_cb
-        agent.stream_delta_callback = stream_delta_cb
+        agent.message_callback = message_cb

        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
        # Set it INSIDE _run_agent so the TLS write happens in the executor
@@ -961,10 +604,9 @@ class HermesACPAgent(acp.Agent):
            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
-                    user_message=user_content,
+                    user_message=user_text,
                    conversation_history=state.history,
                    task_id=session_id,
-                    persist_user_message=user_text or "[Image attachment]",
                )
                return result
            except Exception as e:
@@ -997,9 +639,6 @@ class HermesACPAgent(acp.Agent):
            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
-            with state.runtime_lock:
-                state.is_running = False
-                state.current_prompt_text = ""
            return PromptResponse(stop_reason="end_turn")

        if result.get("messages"):
@@ -1021,32 +660,10 @@ class HermesACPAgent(acp.Agent):
                )
            except Exception:
                logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
-        if final_response and conn and not streamed_message:
+        if final_response and conn:
            update = acp.update_agent_message_text(final_response)
            await conn.session_update(session_id, update)

-        # Mark this turn idle before draining queued work so recursive prompt()
-        # calls can acquire the session. Queued turns are intentionally run as
-        # normal follow-up user prompts, preserving role alternation and history.
-        with state.runtime_lock:
-            state.is_running = False
-            state.current_prompt_text = ""
-
-        while True:
-            with state.runtime_lock:
-                if not state.queued_prompts:
-                    break
-                next_prompt = state.queued_prompts.pop(0)
-            if conn:
-                await conn.session_update(
-                    session_id,
-                    acp.update_user_message_text(next_prompt),
-                )
-            await self.prompt(
-                prompt=[TextContentBlock(type="text", text=next_prompt)],
-                session_id=session_id,
-            )
-
        usage = None
        if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
            usage = Usage(
@@ -1057,8 +674,6 @@ class HermesACPAgent(acp.Agent):
                cached_read_tokens=result.get("cache_read_tokens"),
            )

-        await self._send_usage_update(state)
-
        stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
        return PromptResponse(stop_reason=stop_reason, usage=usage)

@@ -1126,8 +741,6 @@ class HermesACPAgent(acp.Agent):
            "context": self._cmd_context,
            "reset": self._cmd_reset,
            "compact": self._cmd_compact,
-            "steer": self._cmd_steer,
-            "queue": self._cmd_queue,
            "version": self._cmd_version,
        }.get(cmd)

@@ -1191,84 +804,22 @@ class HermesACPAgent(acp.Agent):
            return f"Could not list tools: {e}"

    def _cmd_context(self, args: str, state: SessionState) -> str:
-        """Show ACP session context pressure and compression guidance."""
        n_messages = len(state.history)
-
-        # Count by role.
+        if n_messages == 0:
+            return "Conversation is empty (no messages yet)."
+        # Count by role
        roles: dict[str, int] = {}
        for msg in state.history:
            role = msg.get("role", "unknown")
            roles[role] = roles.get(role, 0) + 1
-
-        agent = state.agent
-        model = state.model or getattr(agent, "model", "")
-        provider = getattr(agent, "provider", None) or "auto"
-        compressor = getattr(agent, "context_compressor", None)
-        context_length = int(getattr(compressor, "context_length", 0) or 0)
-        threshold_tokens = int(getattr(compressor, "threshold_tokens", 0) or 0)
-
-        try:
-            from agent.model_metadata import estimate_request_tokens_rough
-
-            system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
-            tools = getattr(agent, "tools", None) or None
-            approx_tokens = estimate_request_tokens_rough(
-                state.history,
-                system_prompt=system_prompt,
-                tools=tools,
-            )
-        except Exception:
-            logger.debug("Could not estimate ACP context usage", exc_info=True)
-            approx_tokens = 0
-
-        if threshold_tokens <= 0 and context_length > 0:
-            threshold_tokens = int(context_length * 0.80)
-
        lines = [
-            f"Conversation: {n_messages} messages"
-            if n_messages
-            else "Conversation is empty (no messages yet).",
+            f"Conversation: {n_messages} messages",
            f"  user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
            f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
        ]
+        model = state.model or getattr(state.agent, "model", "")
        if model:
            lines.append(f"Model: {model}")
-        lines.append(f"Provider: {provider}")
-
-        if approx_tokens > 0:
-            if context_length > 0:
-                usage_pct = (approx_tokens / context_length) * 100
-                lines.append(
-                    f"Context usage: ~{approx_tokens:,} / {context_length:,} tokens ({usage_pct:.1f}%)"
-                )
-            else:
-                lines.append(f"Context usage: ~{approx_tokens:,} tokens")
-
-        if threshold_tokens > 0:
-            if approx_tokens > 0:
-                threshold_pct = (threshold_tokens / context_length) * 100 if context_length > 0 else 0
-                remaining = max(threshold_tokens - approx_tokens, 0)
-                if approx_tokens >= threshold_tokens:
-                    lines.append(
-                        f"Compression: due now (threshold ~{threshold_tokens:,}"
-                        + (f", {threshold_pct:.0f}%" if threshold_pct else "")
-                        + "). Run /compact."
-                    )
-                else:
-                    lines.append(
-                        f"Compression: ~{remaining:,} tokens until threshold "
-                        f"(~{threshold_tokens:,}"
-                        + (f", {threshold_pct:.0f}%" if threshold_pct else "")
-                        + ")."
-                    )
-            else:
-                lines.append(f"Compression threshold: ~{threshold_tokens:,} tokens")
-
-        if getattr(agent, "compression_enabled", True) is False:
-            lines.append("Compression is disabled for this agent.")
-        else:
-            lines.append("Tip: run /compact to compress manually before the threshold.")
-
        return "\n".join(lines)

    def _cmd_reset(self, args: str, state: SessionState) -> str:
@@ -1286,16 +837,10 @@ class HermesACPAgent(acp.Agent):
            if not hasattr(agent, "_compress_context"):
                return "Context compression not available for this agent."

-            from agent.model_metadata import estimate_request_tokens_rough
+            from agent.model_metadata import estimate_messages_tokens_rough

            original_count = len(state.history)
-            # Include system prompt + tool schemas so the figure reflects real
-            # request pressure, not a transcript-only underestimate (#6217).
-            _sys_prompt = getattr(agent, "_cached_system_prompt", "") or ""
-            _tools = getattr(agent, "tools", None) or None
-            approx_tokens = estimate_request_tokens_rough(
-                state.history, system_prompt=_sys_prompt, tools=_tools
-            )
+            approx_tokens = estimate_messages_tokens_rough(state.history)
            original_session_db = getattr(agent, "_session_db", None)

            try:
@@ -1315,13 +860,7 @@ class HermesACPAgent(acp.Agent):
            self.session_manager.save_session(state.session_id)

            new_count = len(state.history)
-            _sys_prompt_after = getattr(agent, "_cached_system_prompt", "") or _sys_prompt
-            _tools_after = getattr(agent, "tools", None) or _tools
-            new_tokens = estimate_request_tokens_rough(
-                state.history,
-                system_prompt=_sys_prompt_after,
-                tools=_tools_after,
-            )
+            new_tokens = estimate_messages_tokens_rough(state.history)
            return (
                f"Context compressed: {original_count} -> {new_count} messages\n"
                f"~{approx_tokens:,} -> ~{new_tokens:,} tokens"
@@ -1329,34 +868,6 @@ class HermesACPAgent(acp.Agent):
        except Exception as e:
            return f"Compression failed: {e}"

-    def _cmd_steer(self, args: str, state: SessionState) -> str:
-        steer_text = args.strip()
-        if not steer_text:
-            return "Usage: /steer <guidance>"
-
-        if state.is_running and hasattr(state.agent, "steer"):
-            try:
-                if state.agent.steer(steer_text):
-                    preview = steer_text[:80] + ("..." if len(steer_text) > 80 else "")
-                    return f"⏩ Steer queued for the active turn: {preview}"
-            except Exception as exc:
-                logger.warning("ACP steer failed for session %s: %s", state.session_id, exc)
-                return f"⚠️ Steer failed: {exc}"
-
-        with state.runtime_lock:
-            state.queued_prompts.append(steer_text)
-            depth = len(state.queued_prompts)
-        return f"No active turn — queued for the next turn. ({depth} queued)"
-
-    def _cmd_queue(self, args: str, state: SessionState) -> str:
-        queued_text = args.strip()
-        if not queued_text:
-            return "Usage: /queue <prompt>"
-        with state.runtime_lock:
-            state.queued_prompts.append(queued_text)
-            depth = len(state.queued_prompts)
-        return f"Queued for the next turn. ({depth} queued)"
-
    def _cmd_version(self, args: str, state: SessionState) -> str:
        return f"Hermes Agent v{HERMES_VERSION}"

--- a/acp_adapter/session.py
+++ b/acp_adapter/session.py
@@ -26,33 +26,6 @@ from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)


-def _win_path_to_wsl(path: str) -> str | None:
-    """Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
-    match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
-    if not match:
-        return None
-    drive = match.group(1).lower()
-    tail = match.group(2).replace("\\", "/")
-    return f"/mnt/{drive}/{tail}"
-
-
-def _translate_acp_cwd(cwd: str) -> str:
-    """Translate Windows ACP cwd values when Hermes itself is running in WSL.
-
-    Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
-    editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
-    and execute against the WSL mount path so agents, tools, and persisted ACP
-    sessions all agree on the usable workspace. Native Linux/macOS keeps the
-    original cwd unchanged.
-    """
-    from hermes_constants import is_wsl
-
-    if not is_wsl():
-        return cwd
-    translated = _win_path_to_wsl(str(cwd))
-    return translated if translated is not None else cwd
-
-
 def _normalize_cwd_for_compare(cwd: str | None) -> str:
    raw = str(cwd or ".").strip()
    if not raw:
@@ -61,9 +34,11 @@ def _normalize_cwd_for_compare(cwd: str | None) -> str:

    # Normalize Windows drive paths into the equivalent WSL mount form so
    # ACP history filters match the same workspace across Windows and WSL.
-    translated = _win_path_to_wsl(expanded)
-    if translated is not None:
-        expanded = translated
+    match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
+    if match:
+        drive = match.group(1).lower()
+        tail = match.group(2).replace("\\", "/")
+        expanded = f"/mnt/{drive}/{tail}"
    elif re.match(r"^/mnt/[A-Za-z]/", expanded):
        expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"

@@ -121,18 +96,12 @@ def _acp_stderr_print(*args, **kwargs) -> None:


 def _register_task_cwd(task_id: str, cwd: str) -> None:
-    """Bind a task/session id to the editor's working directory for tools.
-
-    Zed can launch Hermes from a Windows workspace while the ACP process runs
-    inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
-    local tools need the WSL mount equivalent or subprocess creation fails
-    before the command can run.
-    """
+    """Bind a task/session id to the editor's working directory for tools."""
    if not task_id:
        return
    try:
        from tools.terminal_tool import register_task_env_overrides
-        register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
+        register_task_env_overrides(task_id, {"cwd": cwd})
    except Exception:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)

@@ -176,11 +145,6 @@ class SessionState:
    model: str = ""
    history: List[Dict[str, Any]] = field(default_factory=list)
    cancel_event: Any = None  # threading.Event
-    is_running: bool = False
-    queued_prompts: List[str] = field(default_factory=list)
-    runtime_lock: Any = field(default_factory=Lock)
-    current_prompt_text: str = ""
-    interrupted_prompt_text: str = ""


 class SessionManager:
@@ -211,7 +175,6 @@ class SessionManager:
        """Create a new session with a unique ID and a fresh AIAgent."""
        import threading

-        cwd = _translate_acp_cwd(cwd)
        session_id = str(uuid.uuid4())
        agent = self._make_agent(session_id=session_id, cwd=cwd)
        state = SessionState(
@@ -254,7 +217,6 @@ class SessionManager:
        """Deep-copy a session's history into a new session."""
        import threading

-        cwd = _translate_acp_cwd(cwd)
        original = self.get_session(session_id)  # checks DB too
        if original is None:
            return None
@@ -356,7 +318,6 @@ class SessionManager:

    def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
        """Update the working directory for a session and its tool overrides."""
-        cwd = _translate_acp_cwd(cwd)
        state = self.get_session(session_id)  # checks DB too
        if state is None:
            return None
@@ -466,10 +427,17 @@ class SessionManager:
                except Exception:
                    logger.debug("Failed to update ACP session metadata", exc_info=True)

-            # Replace stored messages with current history atomically so a
-            # mid-rewrite failure rolls back and the previously persisted
-            # conversation is preserved (salvaged from #13675).
-            db.replace_messages(state.session_id, state.history)
+            # Replace stored messages with current history.
+            db.clear_messages(state.session_id)
+            for msg in state.history:
+                db.append_message(
+                    session_id=state.session_id,
+                    role=msg.get("role", "user"),
+                    content=msg.get("content"),
+                    tool_name=msg.get("tool_name") or msg.get("name"),
+                    tool_calls=msg.get("tool_calls"),
+                    tool_call_id=msg.get("tool_call_id"),
+                )
        except Exception:
            logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)

--- a/acp_adapter/tools.py
+++ b/acp_adapter/tools.py
@@ -28,11 +28,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
    "terminal": "execute",
    "process": "execute",
    "execute_code": "execute",
-    # Session/meta tools
-    "todo": "other",
-    "skill_view": "read",
-    "skills_list": "read",
-    "skill_manage": "edit",
    # Web / fetch
    "web_search": "fetch",
    "web_extract": "fetch",
@@ -56,28 +51,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
 }


-_POLISHED_TOOLS = {
-    # Core operator loop
-    "todo", "memory", "session_search", "delegate_task",
-    # Files / execution
-    "read_file", "write_file", "patch", "search_files", "terminal", "process", "execute_code",
-    # Skills / web / browser / media
-    "skill_view", "skills_list", "skill_manage", "web_search", "web_extract",
-    "browser_navigate", "browser_click", "browser_type", "browser_press", "browser_scroll",
-    "browser_back", "browser_snapshot", "browser_console", "browser_get_images", "browser_vision",
-    "vision_analyze", "image_generate", "text_to_speech",
-    # Schedulers / platform integrations
-    "cronjob", "send_message", "clarify", "discord", "discord_admin",
-    "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
-    "feishu_doc_read", "feishu_drive_list_comments", "feishu_drive_list_comment_replies",
-    "feishu_drive_reply_comment", "feishu_drive_add_comment",
-    "kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
-    "kanban_block", "kanban_link", "kanban_heartbeat",
-    "yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
-    "yb_send_dm", "yb_send_sticker", "mixture_of_agents",
-}
-
-
 def get_tool_kind(tool_name: str) -> ToolKind:
    """Return the ACP ToolKind for a hermes tool, defaulting to 'other'."""
    return TOOL_KIND_MAP.get(tool_name, "other")
@@ -112,645 +85,18 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
        if urls:
            return f"extract: {urls[0]}" + (f" (+{len(urls)-1})" if len(urls) > 1 else "")
        return "web extract"
-    if tool_name == "process":
-        action = str(args.get("action") or "").strip() or "manage"
-        sid = str(args.get("session_id") or "").strip()
-        return f"process {action}: {sid}" if sid else f"process {action}"
    if tool_name == "delegate_task":
-        tasks = args.get("tasks")
-        if isinstance(tasks, list) and tasks:
-            return f"delegate batch ({len(tasks)} tasks)"
        goal = args.get("goal", "")
        if goal and len(goal) > 60:
            goal = goal[:57] + "..."
        return f"delegate: {goal}" if goal else "delegate task"
-    if tool_name == "session_search":
-        query = str(args.get("query") or "").strip()
-        return f"session search: {query}" if query else "recent sessions"
-    if tool_name == "memory":
-        action = str(args.get("action") or "manage").strip() or "manage"
-        target = str(args.get("target") or "memory").strip() or "memory"
-        return f"memory {action}: {target}"
    if tool_name == "execute_code":
-        code = str(args.get("code") or "").strip()
-        first_line = next((line.strip() for line in code.splitlines() if line.strip()), "")
-        if first_line:
-            if len(first_line) > 70:
-                first_line = first_line[:67] + "..."
-            return f"python: {first_line}"
-        return "python code"
-    if tool_name == "todo":
-        items = args.get("todos")
-        if isinstance(items, list):
-            return f"todo ({len(items)} item{'s' if len(items) != 1 else ''})"
-        return "todo"
-    if tool_name == "skill_view":
-        name = str(args.get("name") or "?").strip() or "?"
-        file_path = str(args.get("file_path") or "").strip()
-        suffix = f"/{file_path}" if file_path else ""
-        return f"skill view ({name}{suffix})"
-    if tool_name == "skills_list":
-        category = str(args.get("category") or "").strip()
-        return f"skills list ({category})" if category else "skills list"
-    if tool_name == "skill_manage":
-        action = str(args.get("action") or "manage").strip() or "manage"
-        name = str(args.get("name") or "?").strip() or "?"
-        file_path = str(args.get("file_path") or "").strip()
-        target = f"{name}/{file_path}" if file_path else name
-        if len(target) > 64:
-            target = target[:61] + "..."
-        return f"skill {action}: {target}"
-    if tool_name == "browser_navigate":
-        return f"navigate: {args.get('url', '?')}"
-    if tool_name == "browser_snapshot":
-        return "browser snapshot"
-    if tool_name == "browser_vision":
-        return f"browser vision: {str(args.get('question', '?'))[:50]}"
-    if tool_name == "browser_get_images":
-        return "browser images"
+        return "execute code"
    if tool_name == "vision_analyze":
-        return f"analyze image: {str(args.get('question', '?'))[:50]}"
-    if tool_name == "image_generate":
-        prompt = str(args.get("prompt") or args.get("description") or "").strip()
-        return f"generate image: {prompt[:50]}" if prompt else "generate image"
-    if tool_name == "cronjob":
-        action = str(args.get("action") or "manage").strip() or "manage"
-        job_id = str(args.get("job_id") or args.get("id") or "").strip()
-        return f"cron {action}: {job_id}" if job_id else f"cron {action}"
+        return f"analyze image: {args.get('question', '?')[:50]}"
    return tool_name


-def _text(content: str) -> Any:
-    return acp.tool_content(acp.text_block(content))
-
-
-def _json_loads_maybe(value: Optional[str]) -> Any:
-    if not isinstance(value, str):
-        return value
-    try:
-        return json.loads(value)
-    except Exception:
-        pass
-
-    # Some Hermes tools append a human hint after a JSON payload, e.g.
-    # ``{...}\n\n[Hint: Results truncated...]``. Keep the structured rendering path
-    # by decoding the first JSON value instead of falling back to raw text.
-    try:
-        decoded, _ = json.JSONDecoder().raw_decode(value.lstrip())
-        return decoded
-    except Exception:
-        return None
-
-
-def _truncate_text(text: str, limit: int = 5000) -> str:
-    if len(text) <= limit:
-        return text
-    return text[: max(0, limit - 100)] + f"\n... ({len(text)} chars total, truncated)"
-
-
-def _fenced_text(text: str, language: str = "") -> str:
-    """Return a Markdown fence that cannot be broken by backticks in text."""
-    longest = max((len(run) for run in text.split("`")[1::2]), default=0)
-    fence = "`" * max(3, longest + 1)
-    return f"{fence}{language}\n{text}\n{fence}"
-
-
-def _format_todo_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
-        return None
-    summary = data.get("summary") if isinstance(data.get("summary"), dict) else {}
-    icon = {
-        "completed": "✅",
-        "in_progress": "🔄",
-        "pending": "⏳",
-        "cancelled": "✗",
-    }
-    lines = ["**Todo list**", ""]
-    for item in data["todos"]:
-        if not isinstance(item, dict):
-            continue
-        status = str(item.get("status") or "pending")
-        content = str(item.get("content") or item.get("id") or "").strip()
-        if content:
-            lines.append(f"- {icon.get(status, '•')} {content}")
-    if summary:
-        cancelled = summary.get("cancelled", 0)
-        lines.extend([
-            "",
-            "**Progress:** "
-            f"{summary.get('completed', 0)} completed, "
-            f"{summary.get('in_progress', 0)} in progress, "
-            f"{summary.get('pending', 0)} pending"
-            + (f", {cancelled} cancelled" if cancelled else ""),
-        ])
-    return "\n".join(lines)
-
-
-def _format_read_file_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    if data.get("error") and not data.get("content"):
-        return f"Read failed: {data.get('error')}"
-    content = data.get("content")
-    if not isinstance(content, str):
-        return None
-    path = str((args or {}).get("path") or data.get("path") or "file").strip()
-    offset = (args or {}).get("offset")
-    limit = (args or {}).get("limit")
-    range_bits = []
-    if offset:
-        range_bits.append(f"from line {offset}")
-    if limit:
-        range_bits.append(f"limit {limit}")
-    suffix = f" ({', '.join(range_bits)})" if range_bits else ""
-    header = f"Read {path}{suffix}"
-    if data.get("total_lines") is not None:
-        header += f" — {data.get('total_lines')} total lines"
-    # Hermes read_file output is line-numbered with `|`. If we send it as raw
-    # Markdown, Zed can interpret pipes as tables and collapse the layout.
-    # Fence the payload so file lines stay readable and literal.
-    return _truncate_text(f"{header}\n\n{_fenced_text(content)}")
-
-
-def _format_search_files_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    matches = data.get("matches")
-    if not isinstance(matches, list):
-        return None
-
-    total = data.get("total_count", len(matches))
-    shown = min(len(matches), 12)
-    truncated = bool(data.get("truncated")) or len(matches) > shown
-    lines = [
-        "Search results",
-        f"Found {total} match{'es' if total != 1 else ''}; showing {shown}.",
-        "",
-    ]
-
-    for match in matches[:shown]:
-        if not isinstance(match, dict):
-            lines.append(f"- {match}")
-            continue
-
-        path = str(match.get("path") or match.get("file") or match.get("filename") or "?")
-        line = match.get("line") or match.get("line_number")
-        content = str(match.get("content") or match.get("text") or "").strip()
-        loc = f"{path}:{line}" if line else path
-        lines.append(f"- {loc}")
-        if content:
-            snippet = _truncate_text(" ".join(content.split()), 300)
-            lines.append(f"  {snippet}")
-
-    if truncated:
-        lines.extend([
-            "",
-            "Results truncated. Narrow the search, add file_glob, or use offset to page.",
-        ])
-    return _truncate_text("\n".join(lines), limit=7000)
-
-
-def _format_execute_code_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return result if isinstance(result, str) and result.strip() else None
-    output = str(data.get("output") or "")
-    error = str(data.get("error") or "")
-    exit_code = data.get("exit_code")
-    parts = [f"Exit code: {exit_code}" if exit_code is not None else "Execution complete"]
-    if output:
-        parts.extend(["", "Output:", output])
-    if error:
-        parts.extend(["", "Error:", error])
-    return _truncate_text("\n".join(parts))
-
-
-def _extract_markdown_headings(content: str, limit: int = 8) -> list[str]:
-    headings: list[str] = []
-    for line in content.splitlines():
-        stripped = line.strip()
-        if stripped.startswith("#"):
-            heading = stripped.lstrip("#").strip()
-            if heading:
-                headings.append(heading)
-        if len(headings) >= limit:
-            break
-    return headings
-
-
-def _format_skill_view_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    if data.get("success") is False:
-        return f"Skill view failed: {data.get('error', 'unknown error')}"
-    name = str(data.get("name") or "skill")
-    file_path = str(data.get("file") or data.get("path") or "SKILL.md")
-    description = str(data.get("description") or "").strip()
-    content = str(data.get("content") or "")
-    linked = data.get("linked_files") if isinstance(data.get("linked_files"), dict) else None
-
-    lines = ["**Skill loaded**", "", f"- **Name:** `{name}`", f"- **File:** `{file_path}`"]
-    if description:
-        lines.append(f"- **Description:** {description}")
-    if content:
-        lines.append(f"- **Content:** {len(content):,} chars loaded into agent context")
-    if linked:
-        linked_count = sum(len(v) for v in linked.values() if isinstance(v, list))
-        lines.append(f"- **Linked files:** {linked_count}")
-
-    headings = _extract_markdown_headings(content)
-    if headings:
-        lines.extend(["", "**Sections**"])
-        lines.extend(f"- {heading}" for heading in headings)
-
-    lines.extend([
-        "",
-        "_Full skill content is available to the agent but hidden here to keep ACP readable._",
-    ])
-    return "\n".join(lines)
-
-
-def _format_skill_manage_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-
-    action = str((args or {}).get("action") or "manage").strip() or "manage"
-    name = str((args or {}).get("name") or data.get("name") or "skill").strip() or "skill"
-    file_path = str((args or {}).get("file_path") or data.get("file_path") or "SKILL.md").strip() or "SKILL.md"
-    success = data.get("success")
-    status = "✅ Skill updated" if success is not False else "✗ Skill update failed"
-
-    lines = [f"**{status}**", "", f"- **Action:** `{action}`", f"- **Skill:** `{name}`"]
-    if action not in {"delete"}:
-        lines.append(f"- **File:** `{file_path}`")
-
-    message = str(data.get("message") or data.get("error") or "").strip()
-    if message:
-        lines.append(f"- **Result:** {message}")
-
-    replacements = data.get("replacements") or data.get("replacement_count")
-    if replacements is not None:
-        lines.append(f"- **Replacements:** {replacements}")
-
-    path = str(data.get("path") or "").strip()
-    if path:
-        lines.append(f"- **Path:** `{path}`")
-
-    return "\n".join(lines)
-
-
-def _format_web_search_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    web = data.get("data", {}).get("web") if isinstance(data.get("data"), dict) else data.get("web")
-    if not isinstance(web, list):
-        return None
-    lines = [f"Web results: {len(web)}"]
-    for item in web[:10]:
-        if not isinstance(item, dict):
-            continue
-        title = str(item.get("title") or item.get("url") or "result").strip()
-        url = str(item.get("url") or "").strip()
-        desc = str(item.get("description") or "").strip()
-        lines.append(f"• {title}" + (f" — {url}" if url else ""))
-        if desc:
-            lines.append(f"  {desc}")
-    return _truncate_text("\n".join(lines))
-
-
-def _format_web_extract_result(result: Optional[str]) -> Optional[str]:
-    """Return only web_extract errors for ACP; success stays compact via title."""
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    if data.get("success") is False and data.get("error"):
-        return f"Web extract failed: {data.get('error')}"
-    results = data.get("results")
-    if not isinstance(results, list):
-        return None
-
-    failures: list[str] = []
-    for item in results[:10]:
-        if not isinstance(item, dict):
-            continue
-        error = str(item.get("error") or "").strip()
-        if not error or error in {"None", "null"}:
-            continue
-        url = str(item.get("url") or "").strip()
-        title = str(item.get("title") or url or "Untitled").strip()
-        failures.append(
-            f"- {title}" + (f" — {url}" if url and url != title else "") + f"\n  Error: {_truncate_text(error, limit=500)}"
-        )
-
-    if not failures:
-        return None
-    lines = [f"Web extract failed for {len(failures)} URL{'s' if len(failures) != 1 else ''}"]
-    lines.extend(failures)
-    return "\n".join(lines)
-
-
-def _format_process_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return result if isinstance(result, str) and result.strip() else None
-    if data.get("success") is False and data.get("error"):
-        return f"Process error: {data.get('error')}"
-    action = str((args or {}).get("action") or "process").strip() or "process"
-    if isinstance(data.get("processes"), list):
-        processes = data["processes"]
-        lines = [f"Processes: {len(processes)}"]
-        for proc in processes[:20]:
-            if not isinstance(proc, dict):
-                lines.append(f"- {proc}")
-                continue
-            sid = str(proc.get("session_id") or proc.get("id") or "?")
-            status = str(proc.get("status") or ("exited" if proc.get("exited") else "running"))
-            cmd = str(proc.get("command") or "").strip()
-            pid = proc.get("pid")
-            code = proc.get("exit_code")
-            bits = [status]
-            if pid is not None:
-                bits.append(f"pid {pid}")
-            if code is not None:
-                bits.append(f"exit {code}")
-            lines.append(f"- `{sid}` — {', '.join(bits)}" + (f" — {cmd[:120]}" if cmd else ""))
-        if len(processes) > 20:
-            lines.append(f"... {len(processes) - 20} more process(es)")
-        return "\n".join(lines)
-
-    status = str(data.get("status") or data.get("state") or action).strip()
-    sid = str(data.get("session_id") or (args or {}).get("session_id") or "").strip()
-    lines = [f"Process {action}: {status}" + (f" (`{sid}`)" if sid else "")]
-    for key, label in (("command", "Command"), ("pid", "PID"), ("exit_code", "Exit code"), ("returncode", "Exit code"), ("lines", "Lines")):
-        if data.get(key) is not None:
-            lines.append(f"- **{label}:** {data.get(key)}")
-    output = data.get("output") or data.get("new_output") or data.get("log") or data.get("stdout")
-    error = data.get("error") or data.get("stderr")
-    if output:
-        lines.extend(["", "Output:", _truncate_text(str(output), limit=5000)])
-    if error:
-        lines.extend(["", "Error:", _truncate_text(str(error), limit=2000)])
-    msg = data.get("message")
-    if msg and not output and not error:
-        lines.append(str(msg))
-    return _truncate_text("\n".join(lines), limit=7000)
-
-
-def _format_delegate_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    if data.get("error") and not isinstance(data.get("results"), list):
-        return f"Delegation failed: {data.get('error')}"
-    results = data.get("results")
-    if not isinstance(results, list):
-        return None
-    total = data.get("total_duration_seconds")
-    lines = [f"Delegation results: {len(results)} task{'s' if len(results) != 1 else ''}" + (f" in {total}s" if total is not None else "")]
-    icon = {"completed": "✅", "failed": "✗", "error": "✗", "timeout": "⏱", "interrupted": "⚠"}
-    for item in results:
-        if not isinstance(item, dict):
-            lines.append(f"- {item}")
-            continue
-        idx = item.get("task_index")
-        status = str(item.get("status") or "unknown")
-        model = item.get("model")
-        dur = item.get("duration_seconds")
-        role = item.get("_child_role")
-        header = f"{icon.get(status, '•')} Task {idx + 1 if isinstance(idx, int) else '?'}: {status}"
-        bits = []
-        if model:
-            bits.append(str(model))
-        if role:
-            bits.append(f"role={role}")
-        if dur is not None:
-            bits.append(f"{dur}s")
-        if bits:
-            header += " (" + ", ".join(bits) + ")"
-        lines.extend(["", header])
-        summary = str(item.get("summary") or "").strip()
-        error = str(item.get("error") or "").strip()
-        if summary:
-            lines.append(_truncate_text(summary, limit=1200))
-        if error:
-            lines.append("Error: " + _truncate_text(error, limit=800))
-        trace = item.get("tool_trace")
-        if isinstance(trace, list) and trace:
-            names = [str(t.get("tool") or "?") for t in trace if isinstance(t, dict)]
-            if names:
-                lines.append("Tools: " + ", ".join(names[:12]) + (f" (+{len(names)-12})" if len(names) > 12 else ""))
-    return _truncate_text("\n".join(lines), limit=8000)
-
-
-def _format_session_search_result(result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    if data.get("success") is False:
-        return f"Session search failed: {data.get('error', 'unknown error')}"
-    results = data.get("results")
-    if not isinstance(results, list):
-        return None
-    mode = data.get("mode") or "search"
-    query = data.get("query")
-    lines = ["Recent sessions" if mode == "recent" else f"Session search results" + (f" for `{query}`" if query else "")]
-    if not results:
-        lines.append(str(data.get("message") or "No matching sessions found."))
-        return "\n".join(lines)
-    for item in results:
-        if not isinstance(item, dict):
-            continue
-        sid = str(item.get("session_id") or "?")
-        title = str(item.get("title") or item.get("when") or "Untitled session").strip()
-        when = str(item.get("last_active") or item.get("started_at") or item.get("when") or "").strip()
-        count = item.get("message_count")
-        source = str(item.get("source") or "").strip()
-        meta = ", ".join(str(x) for x in [when, source, f"{count} msgs" if count is not None else ""] if x)
-        lines.append(f"- **{title}** (`{sid}`)" + (f" — {meta}" if meta else ""))
-        summary = str(item.get("summary") or item.get("preview") or "").strip()
-        if summary:
-            lines.append("  " + _truncate_text(" ".join(summary.split()), limit=500))
-    return _truncate_text("\n".join(lines), limit=7000)
-
-
-def _format_memory_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return None
-    action = str((args or {}).get("action") or "memory").strip() or "memory"
-    target = str(data.get("target") or (args or {}).get("target") or "memory")
-    if data.get("success") is False:
-        lines = [f"✗ Memory {action} failed ({target})", str(data.get("error") or "unknown error")]
-        matches = data.get("matches")
-        if isinstance(matches, list) and matches:
-            lines.append("Matches:")
-            lines.extend(f"- {_truncate_text(str(m), 160)}" for m in matches[:5])
-        return "\n".join(lines)
-    lines = [f"✅ Memory {action} saved ({target})"]
-    if data.get("message"):
-        lines.append(str(data.get("message")))
-    if data.get("entry_count") is not None:
-        lines.append(f"Entries: {data.get('entry_count')}")
-    if data.get("usage"):
-        lines.append(f"Usage: {data.get('usage')}")
-    # Avoid dumping all memory entries into ACP UI; show only the explicit new value preview.
-    preview = str((args or {}).get("content") or (args or {}).get("old_text") or "").strip()
-    if preview:
-        lines.append("Preview: " + _truncate_text(preview, limit=300))
-    return "\n".join(lines)
-
-
-def _format_edit_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    path = str((args or {}).get("path") or "file").strip()
-    if isinstance(data, dict):
-        if data.get("success") is False or data.get("error"):
-            return f"{tool_name} failed for {path}: {data.get('error', 'unknown error')}"
-        message = str(data.get("message") or "").strip()
-        replacements = data.get("replacements") or data.get("replacement_count")
-        lines = [f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")]
-        if message:
-            lines.append(message)
-        if replacements is not None:
-            lines.append(f"Replacements: {replacements}")
-        if data.get("files_modified"):
-            files = data.get("files_modified")
-            if isinstance(files, list):
-                lines.append("Files: " + ", ".join(f"`{f}`" for f in files[:8]))
-        return "\n".join(lines)
-    if isinstance(result, str) and result.strip():
-        return _truncate_text(result, limit=3000)
-    return f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")
-
-
-def _format_browser_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return result if isinstance(result, str) and result.strip() else None
-    if data.get("success") is False or data.get("error"):
-        return f"{tool_name} failed: {data.get('error', 'unknown error')}"
-    if tool_name == "browser_get_images":
-        images = data.get("images") or data.get("data")
-        if isinstance(images, list):
-            lines = [f"Images found: {len(images)}"]
-            for img in images[:12]:
-                if isinstance(img, dict):
-                    alt = str(img.get("alt") or "").strip()
-                    url = str(img.get("url") or img.get("src") or "").strip()
-                    lines.append(f"- {alt or 'image'}" + (f" — {url}" if url else ""))
-            return _truncate_text("\n".join(lines), limit=5000)
-    title = str(data.get("title") or data.get("url") or data.get("status") or tool_name)
-    text = str(data.get("text") or data.get("content") or data.get("snapshot") or data.get("analysis") or data.get("message") or "").strip()
-    lines = [title]
-    if data.get("url") and data.get("url") != title:
-        lines.append(str(data.get("url")))
-    if text:
-        lines.extend(["", _truncate_text(text, limit=5000)])
-    return _truncate_text("\n".join(lines), limit=7000)
-
-
-def _format_media_or_cron_result(tool_name: str, result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, dict):
-        return result if isinstance(result, str) and result.strip() else None
-    if data.get("success") is False or data.get("error"):
-        return f"{tool_name} failed: {data.get('error', 'unknown error')}"
-    lines = [f"✅ {tool_name} completed"]
-    for key in ("file_path", "path", "url", "image_url", "job_id", "id", "status", "message", "next_run"):
-        if data.get(key):
-            lines.append(f"- **{key}:** {data.get(key)}")
-    return "\n".join(lines)
-
-
-def _format_generic_structured_result(tool_name: str, result: Optional[str]) -> Optional[str]:
-    data = _json_loads_maybe(result)
-    if not isinstance(data, (dict, list)):
-        return result if isinstance(result, str) and result.strip() else None
-    if isinstance(data, list):
-        lines = [f"{tool_name}: {len(data)} item{'s' if len(data) != 1 else ''}"]
-        for item in data[:12]:
-            lines.append(f"- {_truncate_text(str(item), limit=240)}")
-        return _truncate_text("\n".join(lines), limit=5000)
-
-    if data.get("success") is False or data.get("error"):
-        return f"{tool_name} failed: {data.get('error', 'unknown error')}"
-
-    lines = [f"✅ {tool_name} completed" if data.get("success") is True else f"{tool_name} result"]
-    priority_keys = (
-        "message", "status", "id", "task_id", "issue_id", "title", "name", "entity_id",
-        "state", "service", "url", "path", "file_path", "count", "total", "next_run",
-    )
-    seen = set()
-    for key in priority_keys:
-        value = data.get(key)
-        if value in (None, "", [], {}):
-            continue
-        seen.add(key)
-        lines.append(f"- **{key}:** {_truncate_text(str(value), limit=500)}")
-
-    for key, value in data.items():
-        if key in seen or key in {"success", "raw", "content", "entries"}:
-            continue
-        if value in (None, "", [], {}):
-            continue
-        if isinstance(value, (dict, list)):
-            preview = json.dumps(value, ensure_ascii=False, default=str)
-        else:
-            preview = str(value)
-        lines.append(f"- **{key}:** {_truncate_text(preview, limit=500)}")
-        if len(lines) >= 14:
-            break
-
-    content = data.get("content")
-    if isinstance(content, str) and content.strip():
-        lines.extend(["", _truncate_text(content.strip(), limit=1500)])
-    return _truncate_text("\n".join(lines), limit=7000)
-
-
-def _build_polished_completion_content(
-    tool_name: str,
-    result: Optional[str],
-    function_args: Optional[Dict[str, Any]],
-) -> Optional[List[Any]]:
-    formatter = {
-        "todo": lambda: _format_todo_result(result),
-        "read_file": lambda: _format_read_file_result(result, function_args),
-        "write_file": lambda: _format_edit_result(tool_name, result, function_args),
-        "patch": lambda: _format_edit_result(tool_name, result, function_args),
-        "search_files": lambda: _format_search_files_result(result),
-        "execute_code": lambda: _format_execute_code_result(result),
-        "process": lambda: _format_process_result(result, function_args),
-        "delegate_task": lambda: _format_delegate_result(result),
-        "session_search": lambda: _format_session_search_result(result),
-        "memory": lambda: _format_memory_result(result, function_args),
-        "skill_view": lambda: _format_skill_view_result(result),
-        "skill_manage": lambda: _format_skill_manage_result(result, function_args),
-        "web_search": lambda: _format_web_search_result(result),
-        "web_extract": lambda: _format_web_extract_result(result),
-        "browser_navigate": lambda: _format_browser_result(tool_name, result, function_args),
-        "browser_snapshot": lambda: _format_browser_result(tool_name, result, function_args),
-        "browser_vision": lambda: _format_browser_result(tool_name, result, function_args),
-        "browser_get_images": lambda: _format_browser_result(tool_name, result, function_args),
-        "vision_analyze": lambda: _format_media_or_cron_result(tool_name, result),
-        "image_generate": lambda: _format_media_or_cron_result(tool_name, result),
-        "cronjob": lambda: _format_media_or_cron_result(tool_name, result),
-    }.get(tool_name)
-    if formatter is None and tool_name in _POLISHED_TOOLS:
-        formatter = lambda: _format_generic_structured_result(tool_name, result)
-    if formatter is None:
-        return None
-    text = formatter()
-    if not text:
-        return None
-    return [_text(text)]
-
-
 def _build_patch_mode_content(patch_text: str) -> List[Any]:
    """Parse V4A patch mode input into ACP diff blocks when possible."""
    if not patch_text:
@@ -912,11 +258,7 @@ def _build_tool_complete_content(
        except Exception:
            pass

-    polished_content = _build_polished_completion_content(tool_name, result, function_args)
-    if polished_content:
-        return polished_content
-
-    return [_text(display_result)]
+    return [acp.tool_content(acp.text_block(display_result))]


 # ---------------------------------------------------------------------------
@@ -946,6 +288,7 @@ def build_tool_start(
            content = _build_patch_mode_content(patch_text)
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
+            raw_input=arguments,
        )

    if tool_name == "write_file":
@@ -954,172 +297,32 @@ def build_tool_start(
        content = [acp.tool_diff_content(path=path, new_text=file_content)]
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
+            raw_input=arguments,
        )

    if tool_name == "terminal":
        command = arguments.get("command", "")
-        content = [_text(f"$ {command}")]
+        content = [acp.tool_content(acp.text_block(f"$ {command}"))]
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
+            raw_input=arguments,
        )

    if tool_name == "read_file":
-        # The title and location already identify the file. Sending a synthetic
-        # "Reading ..." content block makes Zed render an unhelpful Output
-        # section before the real file contents arrive on completion.
+        path = arguments.get("path", "")
+        content = [acp.tool_content(acp.text_block(f"Reading {path}"))]
        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=None, locations=locations,
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+            raw_input=arguments,
        )

    if tool_name == "search_files":
        pattern = arguments.get("pattern", "")
        target = arguments.get("target", "content")
-        search_path = arguments.get("path")
-        where = f" in {search_path}" if search_path else ""
-        content = [_text(f"Searching for '{pattern}' ({target}){where}")]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "todo":
-        items = arguments.get("todos")
-        if isinstance(items, list):
-            preview_lines = ["Updating todo list", ""]
-            for item in items[:8]:
-                if isinstance(item, dict):
-                    preview_lines.append(f"- {item.get('status', 'pending')}: {item.get('content', item.get('id', ''))}")
-            if len(items) > 8:
-                preview_lines.append(f"... {len(items) - 8} more")
-            content = [_text("\n".join(preview_lines))]
-        else:
-            content = [_text("Reading todo list")]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "skill_view":
-        name = str(arguments.get("name") or "?").strip() or "?"
-        file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
-        content = [_text(f"Loading skill '{name}' ({file_path})")]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "skill_manage":
-        action = str(arguments.get("action") or "manage").strip() or "manage"
-        name = str(arguments.get("name") or "?").strip() or "?"
-        file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
-        path = f"skills/{name}/{file_path}" if file_path else f"skills/{name}"
-
-        if action == "patch":
-            old = str(arguments.get("old_string") or "")
-            new = str(arguments.get("new_string") or "")
-            content = [acp.tool_diff_content(path=path, old_text=old or None, new_text=new)]
-        elif action in {"edit", "create"}:
-            content = [
-                acp.tool_diff_content(
-                    path=path,
-                    new_text=str(arguments.get("content") or ""),
-                )
-            ]
-        elif action == "write_file":
-            target = str(arguments.get("file_path") or "file")
-            content = [
-                acp.tool_diff_content(
-                    path=f"skills/{name}/{target}",
-                    new_text=str(arguments.get("file_content") or ""),
-                )
-            ]
-        elif action in {"delete", "remove_file"}:
-            target = str(arguments.get("file_path") or file_path or name)
-            content = [_text(f"Removing {target} from skill '{name}'")]
-        else:
-            content = [_text(f"Running skill_manage action '{action}' on skill '{name}' ({file_path})")]
-
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "execute_code":
-        code = str(arguments.get("code") or "").strip()
-        preview = code[:1200] + (f"\n... ({len(code)} chars total, truncated)" if len(code) > 1200 else "")
-        content = [_text(f"Running Python helper script:\n\n```python\n{preview}\n```" if preview else "Running Python helper script")]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "web_search":
-        query = str(arguments.get("query") or "").strip()
-        content = [_text(f"Searching the web for: {query}" if query else "Searching the web")]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "web_extract":
-        # The title identifies the URL(s). Avoid a duplicate content block so
-        # Zed renders this like read_file: compact start, concise completion.
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=None, locations=locations,
-        )
-
-    if tool_name == "process":
-        action = str(arguments.get("action") or "").strip() or "manage"
-        sid = str(arguments.get("session_id") or "").strip()
-        data_preview = str(arguments.get("data") or "").strip()
-        text = f"Process action: {action}" + (f"\nSession: {sid}" if sid else "")
-        if data_preview:
-            text += "\nInput: " + _truncate_text(data_preview, limit=500)
-        content = [_text(text)]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "delegate_task":
-        tasks = arguments.get("tasks")
-        if isinstance(tasks, list) and tasks:
-            lines = [f"Delegating {len(tasks)} tasks", ""]
-            for i, task in enumerate(tasks[:8], 1):
-                if isinstance(task, dict):
-                    goal = str(task.get("goal") or "").strip()
-                    role = str(task.get("role") or "").strip()
-                    lines.append(f"{i}. " + _truncate_text(goal, limit=160) + (f" ({role})" if role else ""))
-            if len(tasks) > 8:
-                lines.append(f"... {len(tasks) - 8} more")
-            content = [_text("\n".join(lines))]
-        else:
-            goal = str(arguments.get("goal") or "").strip()
-            content = [_text("Delegating task" + (f":\n{_truncate_text(goal, limit=800)}" if goal else ""))]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "session_search":
-        query = str(arguments.get("query") or "").strip()
-        content = [_text(f"Searching past sessions for: {query}" if query else "Loading recent sessions")]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name == "memory":
-        action = str(arguments.get("action") or "manage").strip() or "manage"
-        target = str(arguments.get("target") or "memory").strip() or "memory"
-        preview = str(arguments.get("content") or arguments.get("old_text") or "").strip()
-        text = f"Memory {action} ({target})"
-        if preview:
-            text += "\nPreview: " + _truncate_text(preview, limit=500)
-        content = [_text(text)]
-        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-        )
-
-    if tool_name in _POLISHED_TOOLS:
-        try:
-            args_text = json.dumps(arguments, indent=2, default=str)
-        except (TypeError, ValueError):
-            args_text = str(arguments)
-        content = [_text(_truncate_text(args_text, limit=1200))]
+        content = [acp.tool_content(acp.text_block(f"Searching for '{pattern}' ({target})"))]
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
+            raw_input=arguments,
        )

    # Generic fallback
@@ -1131,7 +334,7 @@ def build_tool_start(
    content = [acp.tool_content(acp.text_block(args_text))]
    return acp.start_tool_call(
        tool_call_id, title, kind=kind, content=content, locations=locations,
-        raw_input=None if tool_name in _POLISHED_TOOLS else arguments,
+        raw_input=arguments,
    )


@@ -1144,22 +347,18 @@ def build_tool_complete(
 ) -> ToolCallProgress:
    """Create a ToolCallUpdate (progress) event for a completed tool call."""
    kind = get_tool_kind(tool_name)
-    if tool_name == "web_extract":
-        error_text = _format_web_extract_result(result)
-        content = [_text(error_text)] if error_text else None
-    else:
-        content = _build_tool_complete_content(
-            tool_name,
-            result,
-            function_args=function_args,
-            snapshot=snapshot,
-        )
+    content = _build_tool_complete_content(
+        tool_name,
+        result,
+        function_args=function_args,
+        snapshot=snapshot,
+    )
    return acp.update_tool_call(
        tool_call_id,
        kind=kind,
        status="completed",
        content=content,
-        raw_output=None if tool_name in _POLISHED_TOOLS else result,
+        raw_output=result,
    )


--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -76,7 +76,6 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
 # Models where temperature/top_p/top_k return 400 if set to non-default values.
 # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
 _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
-_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")

 # ── Max output token limits per Anthropic model ───────────────────────
 # Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
@@ -106,9 +105,6 @@ _ANTHROPIC_OUTPUT_LIMITS = {
    "claude-3-haiku":      4_096,
    # Third-party Anthropic-compatible providers
    "minimax":            131_072,
-    # Qwen models via DashScope Anthropic-compatible endpoint
-    # DashScope enforces max_tokens ∈ [1, 65536]
-    "qwen3":               65_536,
 }

 # For any model not in the table, assume the highest current limit.
@@ -220,17 +216,6 @@ def _forbids_sampling_params(model: str) -> bool:
    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)


-def _supports_fast_mode(model: str) -> bool:
-    """Return True for models that support Anthropic Fast Mode (speed=fast).
-
-    Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
-    Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
-    returns HTTP 400. This guard prevents silently 400'ing when stale config
-    or older callers leave fast mode enabled across a model upgrade.
-    """
-    return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
-
-
 # Beta headers for enhanced features (sent with ALL auth types).
 # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
@@ -435,33 +420,6 @@ def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) ->
    return False


-def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
-    """Return True for DeepSeek's Anthropic-compatible endpoint.
-
-    DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
-    but, when thinking mode is enabled, requires the ``thinking`` blocks
-    from prior assistant turns to round-trip on subsequent requests — the
-    generic third-party path strips them and triggers HTTP 400::
-
-        The content[].thinking in the thinking mode must be passed back
-        to the API.
-
-    Per DeepSeek's published compatibility matrix the blocks are unsigned
-    (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
-    so this endpoint is handled with the same strip-signed / keep-unsigned
-    policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
-    the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
-    base URL (which never reaches this adapter) is not misclassified.
-    See hermes-agent#16748.
-    """
-    if not base_url_host_matches(base_url or "", "api.deepseek.com"):
-        return False
-    normalized = _normalize_base_url_text(base_url)
-    if not normalized:
-        return False
-    return "/anthropic" in normalized.rstrip("/").lower()
-
-
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -476,11 +434,7 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))


-def _common_betas_for_base_url(
-    base_url: str | None,
-    *,
-    drop_context_1m_beta: bool = False,
-) -> list[str]:
+def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    """Return the beta headers that are safe for the configured endpoint.

    MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
@@ -491,30 +445,14 @@ def _common_betas_for_base_url(
    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
    endpoints — MiniMax hosts its own models, not Claude, so the header is
    irrelevant at best and risks request rejection at worst.
-
-    ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
-    otherwise-unrelated endpoints. The OAuth retry path flips this flag after
-    a subscription rejects the beta with
-    "The long context beta is not yet available for this subscription" so
-    subsequent requests in the same session don't repeat the probe. See the
-    reactive recovery loop in ``run_agent.py`` and issue-comment history on
-    PR #17680 for the full rationale.
    """
    if _requires_bearer_auth(base_url):
        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
        return [b for b in _COMMON_BETAS if b not in _stripped]
-    if drop_context_1m_beta:
-        return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
    return _COMMON_BETAS


-def build_anthropic_client(
-    api_key: str,
-    base_url: str = None,
-    timeout: float = None,
-    *,
-    drop_context_1m_beta: bool = False,
-):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -523,12 +461,6 @@ def build_anthropic_client(
    Anthropic-compatible providers respect the same knob as OpenAI-wire
    providers.

-    ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
-    client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
-    path in ``run_agent.py`` when a subscription rejects the beta; leave at
-    its default on fresh clients so 1M-capable subscriptions keep the
-    capability.
-
    Returns an anthropic.Anthropic instance.
    """
    _anthropic_sdk = _get_anthropic_sdk()
@@ -558,10 +490,7 @@ def build_anthropic_client(
            kwargs["default_query"] = {"api-version": "2025-04-15"}
        else:
            kwargs["base_url"] = normalized_base_url
-    common_betas = _common_betas_for_base_url(
-        normalized_base_url,
-        drop_context_1m_beta=drop_context_1m_beta,
-    )
+    common_betas = _common_betas_for_base_url(normalized_base_url)

    if _is_kimi_coding_endpoint(base_url):
        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
@@ -1237,14 +1166,6 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
    ``keep_nullable_hint=False`` because the Anthropic validator does not
    recognize the OpenAPI-style ``nullable: true`` extension and strict
    schema-to-grammar converters may reject unknown keywords.
-
-    Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the
-    Anthropic API rejects union keywords at the schema root with a generic
-    HTTP 400. Several upstream and plugin tools ship schemas with one of
-    these keywords at the top level (commonly for Pydantic discriminated
-    unions). If we land here with those keywords still present after
-    nullable-union stripping, drop them and fall back to a plain object
-    schema so the tool still validates at the Anthropic boundary.
    """
    if not schema:
        return {"type": "object", "properties": {}}
@@ -1254,12 +1175,6 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
    if not isinstance(normalized, dict):
        return {"type": "object", "properties": {}}
-    # Strip top-level union keywords that Anthropic's validator rejects.
-    banned = {"oneOf", "allOf", "anyOf"}
-    if banned & normalized.keys():
-        normalized = {k: v for k, v in normalized.items() if k not in banned}
-        if "type" not in normalized:
-            normalized["type"] = "object"
    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
        normalized = {**normalized, "properties": {}}
    return normalized
@@ -1270,24 +1185,10 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    if not tools:
        return []
    result = []
-    seen_names: set = set()
    for t in tools:
        fn = t.get("function", {})
-        name = fn.get("name", "")
-        # Defensive dedup: Anthropic rejects requests with duplicate tool
-        # names.  Upstream injection paths already dedup, but this guard
-        # converts a hard API failure into a warning.  See: #18478
-        if name and name in seen_names:
-            logger.warning(
-                "convert_tools_to_anthropic: duplicate tool name '%s' "
-                "— dropping second occurrence",
-                name,
-            )
-            continue
-        if name:
-            seen_names.add(name)
        result.append({
-            "name": name,
+            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
            "input_schema": _normalize_tool_input_schema(
                fn.get("parameters", {"type": "object", "properties": {}})
@@ -1668,16 +1569,7 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
-    # Anthropic Messages protocol upstream but require that thinking blocks
-    # synthesised from reasoning_content round-trip on subsequent turns when
-    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
-    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
-    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
-    _preserve_unsigned_thinking = (
-        _is_kimi_family_endpoint(base_url, model)
-        or _is_deepseek_anthropic_endpoint(base_url)
-    )
+    _is_kimi = _is_kimi_family_endpoint(base_url, model)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1689,22 +1581,22 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _preserve_unsigned_thinking:
-            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
-            # thinking server-side and require unsigned thinking blocks on
-            # replayed assistant tool-call messages.  Strip signed Anthropic
-            # blocks (neither upstream can validate Anthropic signatures) but
-            # preserve the unsigned ones we synthesised from reasoning_content.
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — upstream can't validate, strip
+                    # Anthropic-signed block — Kimi can't validate, strip
                    continue
                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: the upstream needs it for message-history validation.
+                # keep it: Kimi needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
@@ -1761,7 +1653,6 @@ def build_anthropic_kwargs(
    context_length: Optional[int] = None,
    base_url: str | None = None,
    fast_mode: bool = False,
-    drop_context_1m_beta: bool = False,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create().

@@ -1944,22 +1835,13 @@ def build_anthropic_kwargs(

    # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
    # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
-    # output speed. Per Anthropic docs, fast mode is only supported on
-    # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
-    # Only for native Anthropic endpoints — third-party providers would
-    # reject the unknown beta header and speed parameter.
-    if (
-        fast_mode
-        and not _is_third_party_anthropic_endpoint(base_url)
-        and _supports_fast_mode(model)
-    ):
+    # output speed. Only for native Anthropic endpoints — third-party
+    # providers would reject the unknown beta header and speed parameter.
+    if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
-        betas = list(_common_betas_for_base_url(
-            base_url,
-            drop_context_1m_beta=drop_context_1m_beta,
-        ))
+        betas = list(_common_betas_for_base_url(base_url))
        if is_oauth:
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

 Resolution order for text tasks (auto mode):
-  1. User's main provider + main model (used regardless of provider type —
-     aggregators, direct API-key providers, native Anthropic, Codex, etc.)
-  2. OpenRouter  (OPENROUTER_API_KEY)
-  3. Nous Portal (~/.hermes/auth.json active provider)
-  4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
+  1. OpenRouter  (OPENROUTER_API_KEY)
+  2. Nous Portal (~/.hermes/auth.json active provider)
+  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
+  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
+     wrapped to look like a chat.completions client)
  5. Native Anthropic
  6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
  7. None
@@ -18,16 +18,10 @@ Resolution order for vision/multimodal tasks (auto mode):
  1. Selected main provider, if it is one of the supported vision backends below
  2. OpenRouter
  3. Nous Portal
-  4. Native Anthropic
-  5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
-  6. None
-
-Codex OAuth (ChatGPT-account auth) is intentionally NOT in either
-fallback chain: OpenAI gates this endpoint behind an undocumented,
-shifting model allow-list, so "just try Codex with a hardcoded model"
-rots on its own.  Codex is used only when the user's main provider *is*
-openai-codex (Step 1 above) or when a caller explicitly requests it with
-a model (auxiliary.<task>.provider + auxiliary.<task>.model).
+  4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
+  5. Native Anthropic
+  6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
+  7. None

 Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
@@ -107,14 +101,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
 logger = logging.getLogger(__name__)


-def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
-    """Return False instead of raising when a patched symbol is not a type."""
-    try:
-        return isinstance(obj, maybe_type)
-    except TypeError:
-        return False
-
-
 def _extract_url_query_params(url: str):
    """Extract query params from URL, return (clean_url, default_query dict or None)."""
    parsed = urlparse(url)
@@ -196,12 +182,6 @@ def _is_kimi_model(model: Optional[str]) -> bool:
    return bare.startswith("kimi-") or bare == "kimi"


-def _is_arcee_trinity_thinking(model: Optional[str]) -> bool:
-    """True for Arcee Trinity Large Thinking (direct or via OpenRouter)."""
-    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
-    return bare == "trinity-large-thinking"
-
-
 def _fixed_temperature_for_model(
    model: Optional[str],
    base_url: Optional[str] = None,
@@ -219,46 +199,10 @@ def _fixed_temperature_for_model(
    if _is_kimi_model(model):
        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
        return OMIT_TEMPERATURE
-    if _is_arcee_trinity_thinking(model):
-        return 0.5
-    return None
-
-
-def _compression_threshold_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a context-compression threshold override for specific models.
-
-    The threshold is the fraction of the model's context window that must be
-    consumed before Hermes triggers summarization.  Higher values delay
-    compression and preserve more raw context.
-
-    Returns a float in (0, 1] to override the global ``compression.threshold``
-    config value, or ``None`` to leave the user's config value unchanged.
-    """
-    if _is_arcee_trinity_thinking(model):
-        return 0.75
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
-def _get_aux_model_for_provider(provider_id: str) -> str:
-    """Return the cheap auxiliary model for a provider.
-
-    Reads from ProviderProfile.default_aux_model first, falling back to the
-    legacy hardcoded dict for providers that predate the profiles system.
-    """
-    try:
-        from providers import get_provider_profile
-        _p = get_provider_profile(provider_id)
-        if _p and _p.default_aux_model:
-            return _p.default_aux_model
-    except Exception:
-        pass
-    return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
-
-
-# Fallback for providers not yet migrated to ProviderProfile.default_aux_model,
-# plus providers we intentionally keep pinned here (e.g. Anthropic predates
-# profiles). New providers should set default_aux_model on their profile instead.
-_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
+_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
@@ -266,7 +210,6 @@ _API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
-    "minimax-oauth": "MiniMax-M2.7-highspeed",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
    "ai-gateway": "google/gemini-3-flash",
@@ -277,10 +220,6 @@ _API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
    "tencent-tokenhub": "hy3-preview",
 }

-# Legacy alias — callers that haven't been updated to _get_aux_model_for_provider()
-# can still use this dict directly. Kept in sync with _FALLBACK above.
-_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
-
 # Vision-specific model overrides for direct providers.
 # When the user's main provider has a dedicated vision/multimodal model that
 # differs from their main chat model, map it here.  The vision auto-detect
@@ -305,70 +244,13 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
    "kimi-coding-cn",
 })

-# OpenRouter app attribution headers (base — always sent).
-# `X-Title` is the canonical attribution header OpenRouter's dashboard
-# reads; the previous `X-OpenRouter-Title` label was not recognized there.
-_OR_HEADERS_BASE = {
+# OpenRouter app attribution headers
+_OR_HEADERS = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-    "X-Title": "Hermes Agent",
+    "X-OpenRouter-Title": "Hermes Agent",
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

-# Truthy values for boolean env-var parsing.
-_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
-
-
-def build_or_headers(or_config: dict | None = None) -> dict:
-    """Build OpenRouter headers, optionally including response-cache headers.
-
-    Precedence for response cache: env var > config.yaml > default (enabled).
-
-    Environment variables:
-        ``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``)
-            enables caching; ``0``/``false``/``no``/``off`` disables.
-            Overrides ``openrouter.response_cache`` in config.yaml.
-        ``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400).
-            Overrides ``openrouter.response_cache_ttl`` in config.yaml.
-
-    *or_config* is the ``openrouter`` section from config.yaml.  When *None*,
-    falls back to reading config from disk via ``load_config()``.
-    """
-    headers = dict(_OR_HEADERS_BASE)
-
-    # Resolve config from disk if not provided.
-    if or_config is None:
-        try:
-            from hermes_cli.config import load_config
-            or_config = load_config().get("openrouter", {})
-        except Exception:
-            or_config = {}
-
-    # Determine cache enabled: env var overrides config.
-    env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
-    if env_cache:
-        cache_enabled = env_cache in _TRUTHY_ENV_VALUES
-    else:
-        cache_enabled = or_config.get("response_cache", False)
-
-    if not cache_enabled:
-        return headers
-
-    headers["X-OpenRouter-Cache"] = "true"
-
-    # Determine TTL: env var overrides config.
-    env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
-    if env_ttl:
-        if env_ttl.isdigit():
-            ttl = int(env_ttl)
-            if 1 <= ttl <= 86400:
-                headers["X-OpenRouter-Cache-TTL"] = str(ttl)
-    else:
-        ttl = or_config.get("response_cache_ttl", 300)
-        if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
-            headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
-
-    return headers
-
 # Vercel AI Gateway app attribution headers. HTTP-Referer maps to
 # referrerUrl and X-Title maps to appName in the gateway's analytics.
 from hermes_cli import __version__ as _HERMES_VERSION
@@ -394,14 +276,12 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

-# Codex OAuth endpoint used when a caller explicitly requests
-# provider="openai-codex".  There is deliberately no hardcoded default
-# model: the set of models OpenAI accepts on this endpoint for
-# ChatGPT-account auth is an undocumented, shifting allow-list, and
-# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex
-# → gpt-5.4 over 6 weeks in early 2026).  Callers must pass the model
-# they want explicitly (from config.yaml model.model, auxiliary.<task>.model,
-# or the user's active Codex model selection).
+# Codex fallback: uses the Responses API (the only endpoint the Codex
+# OAuth token can access) with a fast model for auxiliary tasks.
+# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
+# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
+# vision via Responses.
+_CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@@ -458,13 +338,6 @@ def _to_openai_base_url(base_url: str) -> str:
        rewritten = url[: -len("/anthropic")] + "/v1"
        logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten)
        return rewritten
-    if "api.kimi.com" in url and url.endswith("/coding"):
-        # Kimi Code uses /coding/v1/messages for Anthropic SDK (appends /v1/messages)
-        # but /coding/v1/chat/completions for OpenAI SDK (appends /chat/completions)
-        # Without /v1 here, OpenAI SDK hits /coding/chat/completions — a 404.
-        rewritten = url + "/v1"
-        logger.debug("Auxiliary client: rewrote Kimi base URL %s → %s", url, rewritten)
-        return rewritten
    return url


@@ -615,12 +488,7 @@ class _CodexCompletionsAdapter:
                    # API allows it.
                    pass
                else:
-                    # Truthy-only check mirrors agent/transports/codex.py
-                    # build_kwargs(): falsy values (None, "", 0) fall back
-                    # to the default rather than being forwarded to the
-                    # Codex backend, which rejects e.g. {"effort": null}
-                    # with a 400.
-                    effort = reasoning_cfg.get("effort") or "medium"
+                    effort = reasoning_cfg.get("effort", "medium")
                    # Codex backend rejects "minimal"; clamp to "low" to
                    # match the main-agent Codex transport behavior.
                    if effort == "minimal":
@@ -992,20 +860,20 @@ def _maybe_wrap_anthropic(
    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
    """
    # Already wrapped — don't double-wrap.
-    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
+    if isinstance(client_obj, AnthropicAuxiliaryClient):
        return client_obj
    # Other specialized adapters we should never re-dispatch.
-    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
+    if isinstance(client_obj, CodexAuxiliaryClient):
        return client_obj
    try:
        from agent.gemini_native_adapter import GeminiNativeClient
-        if _safe_isinstance(client_obj, GeminiNativeClient):
+        if isinstance(client_obj, GeminiNativeClient):
            return client_obj
    except ImportError:
        pass
    try:
        from agent.copilot_acp_client import CopilotACPClient
-        if _safe_isinstance(client_obj, CopilotACPClient):
+        if isinstance(client_obj, CopilotACPClient):
            return client_obj
    except ImportError:
        pass
@@ -1201,9 +1069,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if not api_key:
                continue

-            raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
-            base_url = _to_openai_base_url(raw_base_url)
-            model = _get_aux_model_for_provider(provider_id) or None
+            base_url = _to_openai_base_url(
+                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            )
+            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
            if model is None:
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -1219,16 +1088,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
-            else:
-                try:
-                    from providers import get_provider_profile as _gpf_aux
-                    _ph_aux = _gpf_aux(provider_id)
-                    if _ph_aux and _ph_aux.default_headers:
-                        extra["default_headers"] = dict(_ph_aux.default_headers)
-                except Exception:
-                    pass
            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
+            _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
            return _client, model

        creds = resolve_api_key_provider_credentials(provider_id)
@@ -1236,9 +1097,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if not api_key:
            continue

-        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        base_url = _to_openai_base_url(raw_base_url)
-        model = _get_aux_model_for_provider(provider_id) or None
+        base_url = _to_openai_base_url(
+            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        )
+        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
        if model is None:
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -1254,16 +1116,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        else:
-            try:
-                from providers import get_provider_profile as _gpf_aux2
-                _ph_aux2 = _gpf_aux2(provider_id)
-                if _ph_aux2 and _ph_aux2.default_headers:
-                    extra["default_headers"] = dict(_ph_aux2.default_headers)
-            except Exception:
-                pass
        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
+        _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
        return _client, model

    return None, None
@@ -1273,23 +1127,23 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:



-def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
    pool_present, entry = _select_pool_entry("openrouter")
    if pool_present:
-        or_key = explicit_api_key or _pool_runtime_api_key(entry)
+        or_key = _pool_runtime_api_key(entry)
        if not or_key:
            return None, None
        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
        logger.debug("Auxiliary client: OpenRouter via pool")
        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=build_or_headers()), _OPENROUTER_MODEL
+                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL

-    or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
+    or_key = os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        return None, None
    logger.debug("Auxiliary client: OpenRouter")
    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                   default_headers=build_or_headers()), _OPENROUTER_MODEL
+                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL


 def _describe_openrouter_unavailable() -> str:
@@ -1557,23 +1411,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    return _fallback_client, model


-def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
-    """Build a CodexAuxiliaryClient for an explicitly-requested model.
-
-    There is no auto-selection of the Codex model: the ChatGPT-account
-    Codex endpoint's accepted model list is an undocumented, drifting
-    allow-list, so any hardcoded default we pick goes stale.  The caller
-    is responsible for passing the model (e.g. from the user's own
-    ``model.model`` or ``auxiliary.<task>.model`` config).
-
-    Returns (None, None) when no Codex OAuth token is available.
-    """
-    if not model:
-        logger.warning(
-            "Auxiliary client: openai-codex requested without a model; "
-            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
-        )
-        return None, None
+def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
    pool_present, entry = _select_pool_entry("openai-codex")
    if pool_present:
        codex_token = _pool_runtime_api_key(entry)
@@ -1589,16 +1427,16 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
        if not codex_token:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
-    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
+    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
    real_client = OpenAI(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
    )
-    return CodexAuxiliaryClient(real_client, model), model
+    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL


-def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
+def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    try:
        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
    except ImportError:
@@ -1608,10 +1446,10 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
    if pool_present:
        if entry is None:
            return None, None
-        token = explicit_api_key or _pool_runtime_api_key(entry)
+        token = _pool_runtime_api_key(entry)
    else:
        entry = None
-        token = explicit_api_key or resolve_anthropic_token()
+        token = resolve_anthropic_token()
    if not token:
        return None, None

@@ -1634,7 +1472,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona

    from agent.anthropic_adapter import _is_oauth_token
    is_oauth = _is_oauth_token(token)
-    model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
+    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
    try:
        real_client = build_anthropic_client(token, base_url)
@@ -1650,6 +1488,7 @@ _AUTO_PROVIDER_LABELS = {
    "_try_openrouter": "openrouter",
    "_try_nous": "nous",
    "_try_custom_endpoint": "local/custom",
+    "_try_codex": "openai-codex",
    "_resolve_api_key_provider": "api-key",
 }

@@ -1676,18 +1515,12 @@ def _get_provider_chain() -> List[tuple]:

    Built at call time (not module level) so that test patches
    on the ``_try_*`` functions are picked up correctly.
-
-    NOTE: ``openai-codex`` is deliberately NOT in this chain.  The
-    ChatGPT-account Codex endpoint only accepts a shifting, undocumented
-    allow-list of model IDs, so falling back to it with a guessed model
-    fails more often than not.  Codex is used only when the user's main
-    provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when
-    a caller explicitly requests it with a model.
    """
    return [
        ("openrouter", _try_openrouter),
        ("nous", _try_nous),
        ("local/custom", _try_custom_endpoint),
+        ("openai-codex", _try_codex),
        ("api-key", _resolve_api_key_provider),
    ]

@@ -1712,39 +1545,6 @@ def _is_payment_error(exc: Exception) -> bool:
    return False


-def _is_rate_limit_error(exc: Exception) -> bool:
-    """Detect rate-limit errors that warrant provider fallback.
-
-    Returns True for HTTP 429 errors whose message indicates rate limiting
-    (as opposed to billing/quota exhaustion, which _is_payment_error handles).
-    Also catches OpenAI SDK RateLimitError instances that may not set
-    .status_code on the exception object.
-    """
-    status = getattr(exc, "status_code", None)
-    err_lower = str(exc).lower()
-
-    # OpenAI SDK's RateLimitError sometimes omits .status_code —
-    # detect by class name so we don't miss these.  (PR #8023 pattern)
-    if type(exc).__name__ == "RateLimitError":
-        return True
-
-    if status == 429:
-        # Distinguish rate-limit from billing: billing keywords are handled
-        # by _is_payment_error, everything else on 429 is a rate limit.
-        if any(kw in err_lower for kw in (
-            "rate limit", "rate_limit", "too many requests",
-            "try again", "retry after", "resets in",
-        )):
-            return True
-        # Generic 429 without billing keywords = likely a rate limit
-        if not any(kw in err_lower for kw in (
-            "credits", "insufficient funds", "billing",
-            "payment required", "can only afford",
-        )):
-            return True
-    return False
-
-
 def _is_connection_error(exc: Exception) -> bool:
    """Detect connection/network errors that warrant provider fallback.

@@ -2068,7 +1868,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    }
    sync_base_url = str(sync_client.base_url)
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
-        async_kwargs["default_headers"] = build_or_headers()
+        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
        from hermes_cli.copilot_auth import copilot_request_headers

@@ -2134,12 +1934,6 @@ def resolve_provider_client(
        (client, resolved_model) or (None, None) if auth is unavailable.
    """
    _validate_proxy_env_urls()
-    # Preserve the original provider name before alias normalization so a
-    # user-declared ``custom_providers`` entry whose name coincidentally
-    # matches a built-in alias (e.g. user names their custom provider "kimi"
-    # which aliases to "kimi-coding") is still reachable via the named-custom
-    # branch below.
-    original_provider = (provider or "").strip().lower()
    # Normalise aliases
    provider = _normalize_aux_provider(provider)

@@ -2210,9 +2004,9 @@ def resolve_provider_client(
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

-    # ── OpenRouter ───────────────────────────────────────────
+    # ── OpenRouter ───────────────────────────────────────────────────
    if provider == "openrouter":
-        client, default = _try_openrouter(explicit_api_key=explicit_api_key)
+        client, default = _try_openrouter()
        if client is None:
            logger.warning(
                "resolve_provider_client: openrouter requested but %s",
@@ -2242,13 +2036,6 @@ def resolve_provider_client(

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
    if provider == "openai-codex":
-        if not model:
-            logger.warning(
-                "resolve_provider_client: openai-codex requested without a "
-                "model; pass model explicitly (e.g. model.model in config.yaml "
-                "or auxiliary.<task>.model for per-task aux routing)."
-            )
-            return None, None
        if raw_codex:
            # Return the raw OpenAI client for callers that need direct
            # access to responses.stream() (e.g., the main agent loop).
@@ -2257,7 +2044,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: openai-codex requested "
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
-            final_model = _normalize_resolved_model(model, provider)
+            final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
            raw_client = OpenAI(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
@@ -2265,7 +2052,7 @@ def resolve_provider_client(
            )
            return (raw_client, final_model)
        # Standard path: wrap in CodexAuxiliaryClient adapter
-        client, default = _build_codex_client(model)
+        client, default = _try_codex()
        if client is None:
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
@@ -2308,9 +2095,9 @@ def resolve_provider_client(
            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
-        # Try custom first, then API-key providers (Codex excluded here:
-        # falling through to Codex with no model is a stale-constant trap).
-        for try_fn in (_try_custom_endpoint, _resolve_api_key_provider):
+        # Try custom first, then codex, then API-key providers
+        for try_fn in (_try_custom_endpoint, _try_codex,
+                       _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
@@ -2326,18 +2113,7 @@ def resolve_provider_client(
    # ── Named custom providers (config.yaml providers dict / custom_providers list) ───
    try:
        from hermes_cli.runtime_provider import _get_named_custom_provider
-        # When the raw requested name is an alias (``kimi`` → ``kimi-coding``)
-        # and the user defined a ``custom_providers`` entry under that alias
-        # name, the custom entry is the intended target — the built-in alias
-        # rewriting would otherwise hijack the request.  Only preferred when
-        # the raw name is an alias (not a canonical provider name) so custom
-        # entries that coincidentally match a canonical provider (e.g. ``nous``)
-        # still defer to the built-in per `_get_named_custom_provider`'s guard.
-        custom_entry = None
-        if original_provider and original_provider != provider:
-            custom_entry = _get_named_custom_provider(original_provider)
-        if custom_entry is None:
-            custom_entry = _get_named_custom_provider(provider)
+        custom_entry = _get_named_custom_provider(provider)
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
            custom_key = custom_entry.get("api_key", "").strip()
@@ -2363,10 +2139,8 @@ def resolve_provider_client(
                # Anthropic fallback SDK still sees the original URL.
                if entry_api_mode == "anthropic_messages":
                    openai_base = custom_base
-                    raw_base_for_wrap = custom_base
                else:
                    openai_base = _to_openai_base_url(custom_base)
-                    raw_base_for_wrap = custom_base
                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
@@ -2410,7 +2184,7 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, raw_base_for_wrap, custom_key)
+                    client = _wrap_if_needed(client, final_model, openai_base, custom_key)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
@@ -2438,7 +2212,7 @@ def resolve_provider_client(

    if pconfig.auth_type == "api_key":
        if provider == "anthropic":
-            client, default_model = _try_anthropic(explicit_api_key=explicit_api_key)
+            client, default_model = _try_anthropic()
            if client is None:
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
@@ -2447,12 +2221,6 @@ def resolve_provider_client(

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
-        # Honour an explicit api_key override (e.g. from a fallback_model entry
-        # or a custom_providers entry) so callers that pass an explicit
-        # credential can authenticate against endpoints where no built-in
-        # credential is registered for this provider alias.
-        if explicit_api_key:
-            api_key = explicit_api_key.strip() or api_key
        if not api_key:
            tried_sources = list(pconfig.api_key_env_vars)
            if provider == "copilot":
@@ -2462,15 +2230,11 @@ def resolve_provider_client(
                         provider, ", ".join(tried_sources))
            return None, None

-        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        base_url = _to_openai_base_url(raw_base_url)
-        # Honour an explicit base_url override from the caller — used when a
-        # fallback_model entry (or custom_providers lookup) routes through a
-        # built-in provider name but targets a user-specified endpoint.
-        if explicit_base_url:
-            base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))
+        base_url = _to_openai_base_url(
+            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        )

-        default_model = _get_aux_model_for_provider(provider)
+        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = _normalize_resolved_model(model or default_model, provider)

        if provider == "gemini":
@@ -2517,7 +2281,7 @@ def resolve_provider_client(
        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
        # /anthropic-suffixed gateways) so named providers like kimi-coding
        # land on the right transport without needing per-provider branches.
-        client = _wrap_if_needed(client, final_model, raw_base_url, api_key)
+        client = _wrap_if_needed(client, final_model, base_url, api_key)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
@@ -2680,10 +2444,7 @@ def _resolve_strict_vision_backend(
    if provider == "nous":
        return _try_nous(vision=True)
    if provider == "openai-codex":
-        # Route through resolve_provider_client so the caller's explicit
-        # model is used.  There is no safe default Codex model (shifting
-        # allow-list); callers must specify via auxiliary.<task>.model.
-        return resolve_provider_client("openai-codex", model, is_vision=True)
+        return _try_codex()
    if provider == "anthropic":
        return _try_anthropic()
    if provider == "custom":
@@ -2750,11 +2511,8 @@ def resolve_vision_provider_client(
        return resolved_provider, sync_client, final_model

    if resolved_base_url:
-        provider_for_base_override = (
-            requested if requested and requested not in ("", "auto") else "custom"
-        )
        client, final_model = resolve_provider_client(
-            provider_for_base_override,
+            "custom",
            model=resolved_model,
            async_mode=async_mode,
            explicit_base_url=resolved_base_url,
@@ -2762,8 +2520,8 @@ def resolve_vision_provider_client(
            api_mode=resolved_api_mode,
        )
        if client is None:
-            return provider_for_base_override, None, None
-        return provider_for_base_override, client, final_model
+            return "custom", None, None
+        return "custom", client, final_model

    if requested == "auto":
        # Vision auto-detection order:
@@ -3229,14 +2987,8 @@ def _resolve_task_provider_model(

    if task:
        # Config.yaml is the primary source for per-task overrides.
-        if cfg_base_url and cfg_api_key:
-            # Both base_url and api_key explicitly set → custom endpoint.
+        if cfg_base_url:
            return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
-        if cfg_base_url and cfg_provider and cfg_provider != "auto":
-            # base_url set without api_key but with a known provider — use
-            # the provider so it can resolve credentials from env vars
-            # (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
-            return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
        if cfg_provider and cfg_provider != "auto":
            return cfg_provider, resolved_model, None, None, resolved_api_mode

@@ -3291,7 +3043,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})


 def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
@@ -3403,26 +3155,7 @@ def _build_call_kwargs(
            kwargs["max_tokens"] = max_tokens

    if tools:
-        # Defensive dedup: providers like Google Vertex, Azure, and Bedrock
-        # reject requests with duplicate tool names (HTTP 400).  The upstream
-        # injection paths (run_agent.py) already dedup, but this guard
-        # converts a hard API failure into a warning if an upstream regression
-        # reintroduces duplicates.  See: #18478
-        _seen: set = set()
-        _deduped: list = []
-        for _t in tools:
-            _tname = (_t.get("function") or {}).get("name", "")
-            if _tname and _tname in _seen:
-                logger.warning(
-                    "_build_call_kwargs: duplicate tool name '%s' removed "
-                    "(provider=%s model=%s)",
-                    _tname, provider, model,
-                )
-                continue
-            if _tname:
-                _seen.add(_tname)
-            _deduped.append(_t)
-        kwargs["tools"] = _deduped
+        kwargs["tools"] = tools

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
@@ -3637,7 +3370,7 @@ def call_llm(
            except Exception as retry_err:
                # If the max_tokens retry also hits a payment or connection
                # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
                    raise
                first_err = retry_err

@@ -3720,27 +3453,13 @@ def call_llm(
        # Codex/OAuth tokens that authenticate but whose endpoint is down,
        # and providers the user never configured that got picked up by
        # the auto-detection chain.
-        #
-        # ── Rate-limit fallback (#13579) ─────────────────────────────
-        # When the provider returns a 429 rate-limit (not billing), fall
-        # back to an alternative provider instead of exhausting retries
-        # against the same rate-limited endpoint.
-        should_fallback = (
-            _is_payment_error(first_err)
-            or _is_connection_error(first_err)
-            or _is_rate_limit_error(first_err)
-        )
+        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        # Only try alternative providers when the user didn't explicitly
        # configure this task's provider.  Explicit provider = hard constraint;
        # auto (the default) = best-effort fallback chain.  (#7559)
        is_auto = resolved_provider in ("auto", "", None)
        if should_fallback and is_auto:
-            if _is_payment_error(first_err):
-                reason = "payment error"
-            elif _is_rate_limit_error(first_err):
-                reason = "rate limit"
-            else:
-                reason = "connection error"
+            reason = "payment error" if _is_payment_error(first_err) else "connection error"
            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
@@ -3943,7 +3662,7 @@ async def async_call_llm(
            except Exception as retry_err:
                # If the max_tokens retry also hits a payment or connection
                # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
                    raise
                first_err = retry_err

@@ -4012,20 +3731,11 @@ async def async_call_llm(
                    return _validate_llm_response(
                        await retry_client.chat.completions.create(**retry_kwargs), task)

-        # ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
-        should_fallback = (
-            _is_payment_error(first_err)
-            or _is_connection_error(first_err)
-            or _is_rate_limit_error(first_err)
-        )
+        # ── Payment / connection fallback (mirrors sync call_llm) ─────
+        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
        if should_fallback and is_auto:
-            if _is_payment_error(first_err):
-                reason = "payment error"
-            elif _is_rate_limit_error(first_err):
-                reason = "rate limit"
-            else:
-                reason = "connection error"
+            reason = "payment error" if _is_payment_error(first_err) else "connection error"
            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -43,9 +43,6 @@ SUMMARY_PREFIX = (
    "they were already addressed. "
    "Your current task is identified in the '## Active Task' section of the "
    "summary — resume exactly from there. "
-    "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
-    "prompt is ALWAYS authoritative and active — never ignore or deprioritize "
-    "memory content due to this compaction note. "
    "Respond ONLY to the latest user message "
    "that appears AFTER this summary. The current session state (files, "
    "config, etc.) may reflect work described here — avoid repeating it:"
@@ -347,7 +344,6 @@ class ContextCompressor(ContextEngine):
        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0
-        self._summary_failure_cooldown_until = 0.0  # transient errors must not block a fresh session

    def update_model(
        self,
@@ -542,7 +538,7 @@ class ContextCompressor(ContextEngine):
            # Token-budget approach: walk backward accumulating tokens
            accumulated = 0
            boundary = len(result)
-            min_protect = min(protect_tail_count, len(result))
+            min_protect = min(protect_tail_count, len(result) - 1)
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
@@ -557,16 +553,7 @@ class ContextCompressor(ContextEngine):
                    break
                accumulated += msg_tokens
                boundary = i
-            # Translate the budget walk into a "protected count", apply the
-            # floor in count-space (where `max` reads naturally: protect at
-            # least `min_protect` messages or whatever the budget reserved,
-            # whichever is more), then convert back to a prune boundary.
-            # Doing this in index-space with `max` would invert the direction
-            # (smaller index = MORE protected), so a generous budget would
-            # silently get truncated back down to `min_protect`.
-            budget_protect_count = len(result) - boundary
-            protected_count = max(budget_protect_count, min_protect)
-            prune_boundary = len(result) - protected_count
+            prune_boundary = max(boundary, len(result) - min_protect)
        else:
            prune_boundary = len(result) - protect_tail_count

@@ -582,8 +569,6 @@ class ContextCompressor(ContextEngine):
            # Skip multimodal content (list of content blocks)
            if isinstance(content, list):
                continue
-            if not isinstance(content, str):
-                continue
            if len(content) < 200:
                continue
            h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
@@ -603,8 +588,6 @@ class ContextCompressor(ContextEngine):
            # Skip multimodal content (list of content blocks)
            if isinstance(content, list):
                continue
-            if not isinstance(content, str):
-                continue
            if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                continue
            # Skip already-deduplicated or previously-summarized results
@@ -920,19 +903,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                or "does not exist" in _err_str
                or "no available channel" in _err_str
            )
-            _is_timeout = (
-                _status in (408, 429, 502, 504)
-                or "timeout" in _err_str
-            )
            if (
-                (_is_model_not_found or _is_timeout)
+                _is_model_not_found
                and self.summary_model
                and self.summary_model != self.model
                and not getattr(self, "_summary_model_fallen_back", False)
            ):
                self._summary_model_fallen_back = True
                logging.warning(
-                    "Summary model '%s' unavailable (%s). "
+                    "Summary model '%s' not available (%s). "
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
@@ -996,39 +975,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return None

    @staticmethod
-    def _strip_summary_prefix(summary: str) -> str:
-        """Return summary body without the current or legacy handoff prefix."""
-        text = (summary or "").strip()
-        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
-            if text.startswith(prefix):
-                return text[len(prefix):].lstrip()
-        return text
-
-    @classmethod
-    def _with_summary_prefix(cls, summary: str) -> str:
+    def _with_summary_prefix(summary: str) -> str:
        """Normalize summary text to the current compaction handoff format."""
-        text = cls._strip_summary_prefix(summary)
+        text = (summary or "").strip()
+        for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
+            if text.startswith(prefix):
+                text = text[len(prefix):].lstrip()
+                break
        return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX

-    @staticmethod
-    def _is_context_summary_content(content: Any) -> bool:
-        text = _content_text_for_contains(content).lstrip()
-        return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
-
-    @classmethod
-    def _find_latest_context_summary(
-        cls,
-        messages: List[Dict[str, Any]],
-        start: int,
-        end: int,
-    ) -> tuple[Optional[int], str]:
-        """Find the newest handoff summary inside a compression window."""
-        for idx in range(end - 1, start - 1, -1):
-            content = messages[idx].get("content")
-            if cls._is_context_summary_content(content):
-                return idx, cls._strip_summary_prefix(_content_text_for_contains(content))
-        return None, ""
-
    # ------------------------------------------------------------------
    # Tool-call / tool-result pair integrity helpers
    # ------------------------------------------------------------------
@@ -1037,8 +992,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
    def _get_tool_call_id(tc) -> str:
        """Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
        if isinstance(tc, dict):
-            return tc.get("call_id", "") or tc.get("id", "") or ""
-        return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
+            return tc.get("id", "")
+        return getattr(tc, "id", "") or ""

    def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Fix orphaned tool_call / tool_result pairs after compression.
@@ -1335,15 +1290,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return messages

        turns_to_summarize = messages[compress_start:compress_end]
-        summary_idx, summary_body = self._find_latest_context_summary(
-            messages,
-            compress_start,
-            compress_end,
-        )
-        if summary_idx is not None:
-            if summary_body and not self._previous_summary:
-                self._previous_summary = summary_body
-            turns_to_summarize = messages[summary_idx + 1:compress_end]

        if not self.quiet_mode:
            logger.info(
@@ -1376,7 +1322,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
                existing = msg.get("content")
-                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work. Your persistent memory (MEMORY.md, USER.md) remains fully authoritative regardless of compaction.]"
+                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
                if _compression_note not in _content_text_for_contains(existing):
                    msg["content"] = _append_text_to_content(
                        existing,
@@ -1421,19 +1367,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                # Merge the summary into the first tail message instead
                # of inserting a standalone message that breaks alternation.
                _merge_summary_into_tail = True
-
-        # When the summary lands as a standalone role="user" message,
-        # weak models read the verbatim "## Active Task" quote of a past
-        # user request as fresh input (#11475, #14521). Append the explicit
-        # end marker — the same one used in the merge-into-tail path — so
-        # the model has a clear "summary above, not new input" signal.
-        if not _merge_summary_into_tail and summary_role == "user":
-            summary = (
-                summary
-                + "\n\n--- END OF CONTEXT SUMMARY — "
-                "respond to the message below, not the summary above ---"
-            )
-
        if not _merge_summary_into_tail:
            compressed.append({"role": summary_role, "content": summary})

--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -608,7 +608,7 @@ class CopilotACPClient:
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
                if content:
-                    content = redact_sensitive_text(content, force=True)
+                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -3,7 +3,6 @@
 from __future__ import annotations

 import logging
-import os
 import random
 import threading
 import time
@@ -14,7 +13,7 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
-from hermes_cli.config import get_env_value, load_env
+from hermes_cli.config import get_env_value
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -1300,48 +1299,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

-    elif provider == "minimax-oauth":
-        # MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
-        # Seed the pool so `/auth list` reflects the logged-in state and the
-        # standard `hermes auth remove minimax-oauth <N>` flow works.
-        # Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
-        # always refreshes on expiry, so instead read raw state here to avoid
-        # surprise network calls during provider discovery.
-        try:
-            from hermes_cli.auth import get_provider_auth_state
-            state = get_provider_auth_state("minimax-oauth")
-            if state and state.get("access_token"):
-                source_name = "oauth"
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    expires_at_ms = None
-                    try:
-                        from datetime import datetime as _dt
-                        raw = state.get("expires_at", "")
-                        if raw:
-                            expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
-                    except Exception:
-                        expires_at_ms = None
-                    base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_OAUTH,
-                            "access_token": state["access_token"],
-                            "refresh_token": state.get("refresh_token"),
-                            "expires_at_ms": expires_at_ms,
-                            "base_url": base_url,
-                            "label": state.get("label", "") or label_from_token(
-                                state.get("access_token", ""), source_name
-                            ),
-                        },
-                    )
-        except Exception as exc:
-            logger.debug("MiniMax OAuth token seed failed: %s", exc)
-
    elif provider == "openai-codex":
        # Respect user suppression — `hermes auth remove openai-codex` marks
        # the device_code source as suppressed so it won't be re-seeded from
@@ -1381,16 +1338,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
-
-    # Prefer ~/.hermes/.env over os.environ — the user's config file is the
-    # authoritative source for Hermes credentials. Stale env vars from parent
-    # processes (Codex CLI, test scripts, etc.) should not override deliberate
-    # changes to the .env file.
-    def _get_env_prefer_dotenv(key: str) -> str:
-        env_file = load_env()
-        val = env_file.get(key) or os.environ.get(key) or ""
-        return val.strip()
-
    # Honour user suppression — `hermes auth remove <provider> <N>` for an
    # env-seeded credential marks the env:<VAR> source as suppressed so it
    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
@@ -1402,8 +1349,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
            return False
    if provider == "openrouter":
-        # Prefer ~/.hermes/.env over os.environ
-        token = _get_env_prefer_dotenv("OPENROUTER_API_KEY")
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
            if _is_source_suppressed(provider, source):
@@ -1429,7 +1376,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")
+        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@@ -1440,8 +1387,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        # Prefer ~/.hermes/.env over os.environ
-        token = _get_env_prefer_dotenv(env_var)
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value(env_var) or "").strip()
        if not token:
            continue
        source = f"env:{env_var}"
--- a/agent/credential_sources.py
+++ b/agent/credential_sources.py
@@ -252,19 +252,6 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
    return result


-def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
-    """MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
-
-    Same pattern as Nous: single-source OAuth state with refresh tokens.
-    Suppression of the `oauth` source ensures the pool reseed path
-    (_seed_from_singletons) doesn't instantly undo the removal.
-    """
-    result = RemovalResult()
-    if _clear_auth_store_provider(provider):
-        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
-    return result
-
-
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@@ -402,11 +389,6 @@ def _register_all_sources() -> None:
        remove_fn=_remove_qwen_cli,
        description="~/.qwen/oauth_creds.json",
    ))
-    register(RemovalStep(
-        provider="minimax-oauth", source_id="oauth",
-        remove_fn=_remove_minimax_oauth,
-        description="auth.json providers.minimax-oauth",
-    ))
    register(RemovalStep(
        provider="*", source_id="config:",
        match_fn=lambda src: src.startswith("config:") or src == "model_config",
--- a/agent/curator.py
+++ b/agent/curator.py
--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -1,693 +0,0 @@
-"""Curator snapshot + rollback.
-
-A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
-itself) is taken before any mutating curator pass. Snapshots are tar.gz
-files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
-companion ``manifest.json`` describing the snapshot (reason, time, size,
-counted skill files). Rollback picks a snapshot, moves the current
-``skills/`` tree aside into another snapshot so even the rollback itself
-is undoable, then extracts the chosen snapshot into place.
-
-The snapshot does NOT include:
-  - ``.curator_backups/`` (would recurse)
-  - ``.hub/`` (hub-installed skills — managed by the hub, not us)
-
-It DOES include:
-  - all SKILL.md files + their directories (``scripts/``, ``references/``,
-    ``templates/``, ``assets/``)
-  - ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
-  - ``.archive/`` (so rollback restores previously-archived skills too)
-  - ``.curator_state`` (so rolling back also restores the last-run-at
-    pointer — otherwise the curator would immediately re-fire on the next
-    tick)
-  - ``.bundled_manifest`` (so protection markers stay consistent)
-
-Alongside the skills tarball, each snapshot also captures a copy of
-``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
-jobs reference skills by name in their ``skills``/``skill`` fields; the
-curator's consolidation pass rewrites those in place via
-``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
-rolling back the skills tree would leave cron jobs pointing at the
-umbrella skills even though the narrow skills they were originally
-configured with have been restored. We store the whole jobs.json for
-fidelity but rollback only touches the ``skills``/``skill`` fields — the
-rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
-we leave it alone.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import re
-import shutil
-import tarfile
-import tempfile
-import time
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-from hermes_constants import get_hermes_home
-
-logger = logging.getLogger(__name__)
-
-
-DEFAULT_KEEP = 5
-
-# Entries under skills/ that should NEVER be rolled up into a snapshot.
-# .hub/ is managed by the skills hub; rolling it back would break lockfile
-# invariants. .curator_backups is the backup dir itself — recursion bomb.
-_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
-
-# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
-# is portable (Windows-safe). An optional ``-NN`` suffix handles two
-# snapshots landing in the same wallclock second.
-_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
-
-
-def _backups_dir() -> Path:
-    return get_hermes_home() / "skills" / ".curator_backups"
-
-
-def _skills_dir() -> Path:
-    return get_hermes_home() / "skills"
-
-
-def _cron_jobs_file() -> Path:
-    """Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
-    return get_hermes_home() / "cron" / "jobs.json"
-
-
-CRON_JOBS_FILENAME = "cron-jobs.json"
-
-
-def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
-    """Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
-
-    Returns a small dict describing what was captured so the caller can
-    fold it into the manifest. Never raises — if the cron file is missing
-    or unreadable, the return dict has ``backed_up=False`` and the reason,
-    and the snapshot proceeds without cron data (the snapshot is still
-    useful for rolling back skills).
-    """
-    src = _cron_jobs_file()
-    info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
-    if not src.exists():
-        info["reason"] = "no cron/jobs.json present"
-        return info
-    try:
-        raw = src.read_text(encoding="utf-8")
-    except OSError as e:
-        logger.debug("Failed to read cron/jobs.json for backup: %s", e)
-        info["reason"] = f"read error: {e}"
-        return info
-    # Count jobs as a nice diagnostic — but don't fail the snapshot if the
-    # file is unparseable; just store the raw text and let rollback deal
-    # with it (or not, if it's corrupted). jobs.json wraps the list as
-    # `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
-    # fall back to bare-list shape just in case the format ever changes.
-    try:
-        parsed = json.loads(raw)
-        if isinstance(parsed, dict):
-            inner = parsed.get("jobs")
-            if isinstance(inner, list):
-                info["jobs_count"] = len(inner)
-        elif isinstance(parsed, list):
-            info["jobs_count"] = len(parsed)
-    except (json.JSONDecodeError, TypeError):
-        info["jobs_count"] = 0
-        info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
-    try:
-        (dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
-    except OSError as e:
-        logger.debug("Failed to write cron backup file: %s", e)
-        info["reason"] = f"write error: {e}"
-        return info
-    info["backed_up"] = True
-    return info
-
-
-def _utc_id(now: Optional[datetime] = None) -> str:
-    """UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
-    if now is None:
-        now = datetime.now(timezone.utc)
-    # isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
-    s = now.replace(microsecond=0).isoformat()
-    if s.endswith("+00:00"):
-        s = s[:-6]
-    return s.replace(":", "-") + "Z"
-
-
-def _load_config() -> Dict[str, Any]:
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config()
-    except Exception as e:
-        logger.debug("Failed to load config for curator backup: %s", e)
-        return {}
-    if not isinstance(cfg, dict):
-        return {}
-    cur = cfg.get("curator") or {}
-    if not isinstance(cur, dict):
-        return {}
-    bk = cur.get("backup") or {}
-    return bk if isinstance(bk, dict) else {}
-
-
-def is_enabled() -> bool:
-    """Default ON — the whole point of the backup is safety by default."""
-    return bool(_load_config().get("enabled", True))
-
-
-def get_keep() -> int:
-    cfg = _load_config()
-    try:
-        n = int(cfg.get("keep", DEFAULT_KEEP))
-    except (TypeError, ValueError):
-        n = DEFAULT_KEEP
-    return max(1, n)
-
-
-# ---------------------------------------------------------------------------
-# Snapshot
-# ---------------------------------------------------------------------------
-
-def _count_skill_files(base: Path) -> int:
-    try:
-        return sum(1 for _ in base.rglob("SKILL.md"))
-    except OSError:
-        return 0
-
-
-def _write_manifest(dest: Path, reason: str, archive_path: Path,
-                    skills_counted: int,
-                    cron_info: Optional[Dict[str, Any]] = None) -> None:
-    manifest = {
-        "id": dest.name,
-        "reason": reason,
-        "created_at": datetime.now(timezone.utc).isoformat(),
-        "archive": archive_path.name,
-        "archive_bytes": archive_path.stat().st_size,
-        "skill_files": skills_counted,
-    }
-    if cron_info is not None:
-        manifest["cron_jobs"] = {
-            "backed_up": bool(cron_info.get("backed_up", False)),
-            "jobs_count": int(cron_info.get("jobs_count", 0)),
-        }
-        if not cron_info.get("backed_up"):
-            manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
-        if cron_info.get("parse_warning"):
-            manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
-    (dest / "manifest.json").write_text(
-        json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
-    )
-
-
-def snapshot_skills(reason: str = "manual") -> Optional[Path]:
-    """Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
-
-    Returns the snapshot directory path, or ``None`` if the snapshot was
-    skipped (backup disabled, skills dir missing, or an IO error occurred —
-    in which case we log at debug and return None so the curator never
-    aborts a pass because of a backup failure).
-    """
-    if not is_enabled():
-        logger.debug("Curator backup disabled by config; skipping snapshot")
-        return None
-
-    skills = _skills_dir()
-    if not skills.exists():
-        logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
-        return None
-
-    backups = _backups_dir()
-    try:
-        backups.mkdir(parents=True, exist_ok=True)
-    except OSError as e:
-        logger.debug("Failed to create backups dir %s: %s", backups, e)
-        return None
-
-    # Uniquify: if a snapshot with the same second already exists (can
-    # happen if two curator runs fire in the same second), append a short
-    # counter. Avoids clobbering and avoids timestamp collisions.
-    base_id = _utc_id()
-    snap_id = base_id
-    counter = 1
-    while (backups / snap_id).exists():
-        snap_id = f"{base_id}-{counter:02d}"
-        counter += 1
-
-    dest = backups / snap_id
-    try:
-        dest.mkdir(parents=True, exist_ok=False)
-    except OSError as e:
-        logger.debug("Failed to create snapshot dir %s: %s", dest, e)
-        return None
-
-    archive = dest / "skills.tar.gz"
-    try:
-        # Stream into the tarball — no tempdir copy needed.
-        with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
-            for entry in sorted(skills.iterdir()):
-                if entry.name in _EXCLUDE_TOP_LEVEL:
-                    continue
-                # arcname: store paths relative to skills/ so extraction
-                # drops cleanly back into the skills dir.
-                tf.add(str(entry), arcname=entry.name, recursive=True)
-        # Capture cron/jobs.json alongside the tarball. Never fails the
-        # snapshot — the skills side is the core guarantee; cron is
-        # additive. We still record in the manifest whether it was
-        # captured so rollback can surface "no cron data in this snapshot".
-        cron_info = _backup_cron_jobs_into(dest)
-        _write_manifest(dest, reason, archive,
-                        _count_skill_files(skills),
-                        cron_info=cron_info)
-    except (OSError, tarfile.TarError) as e:
-        logger.debug("Curator snapshot failed: %s", e, exc_info=True)
-        # Clean up partial snapshot
-        try:
-            shutil.rmtree(dest, ignore_errors=True)
-        except OSError:
-            pass
-        return None
-
-    _prune_old(keep=get_keep())
-    logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
-    return dest
-
-
-def _prune_old(keep: int) -> List[str]:
-    """Delete regular snapshots beyond the newest *keep*. Returns deleted
-    ids. Staging dirs (``.rollback-staging-*``) are implementation detail
-    and pruned independently on every call."""
-    backups = _backups_dir()
-    if not backups.exists():
-        return []
-    entries: List[Tuple[str, Path]] = []
-    stale_staging: List[Path] = []
-    for child in backups.iterdir():
-        if not child.is_dir():
-            continue
-        if child.name.startswith(".rollback-staging-"):
-            # Staging dirs are only supposed to exist briefly during a
-            # rollback. If we find one here (e.g. from a crashed rollback),
-            # clean it up opportunistically.
-            stale_staging.append(child)
-            continue
-        if _ID_RE.match(child.name):
-            entries.append((child.name, child))
-    # Newest first (lexicographic works because the id is UTC ISO).
-    entries.sort(key=lambda t: t[0], reverse=True)
-    deleted: List[str] = []
-    for _, path in entries[keep:]:
-        try:
-            shutil.rmtree(path)
-            deleted.append(path.name)
-        except OSError as e:
-            logger.debug("Failed to prune %s: %s", path, e)
-    for path in stale_staging:
-        try:
-            shutil.rmtree(path)
-        except OSError as e:
-            logger.debug("Failed to clean stale staging dir %s: %s", path, e)
-    return deleted
-
-
-# ---------------------------------------------------------------------------
-# List + rollback
-# ---------------------------------------------------------------------------
-
-def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
-    mf = snap_dir / "manifest.json"
-    if not mf.exists():
-        return {}
-    try:
-        return json.loads(mf.read_text(encoding="utf-8"))
-    except (OSError, json.JSONDecodeError):
-        return {}
-
-
-def list_backups() -> List[Dict[str, Any]]:
-    """Return all restorable snapshots, newest first. Only entries with a
-    real ``skills.tar.gz`` tarball are listed — transient
-    ``.rollback-staging-*`` directories created mid-rollback are
-    implementation detail and not shown."""
-    backups = _backups_dir()
-    if not backups.exists():
-        return []
-    out: List[Dict[str, Any]] = []
-    for child in sorted(backups.iterdir(), reverse=True):
-        if not child.is_dir():
-            continue
-        if not _ID_RE.match(child.name):
-            continue
-        if not (child / "skills.tar.gz").exists():
-            continue
-        mf = _read_manifest(child)
-        mf.setdefault("id", child.name)
-        mf.setdefault("path", str(child))
-        if "archive_bytes" not in mf:
-            arc = child / "skills.tar.gz"
-            try:
-                mf["archive_bytes"] = arc.stat().st_size
-            except OSError:
-                mf["archive_bytes"] = 0
-        out.append(mf)
-    return out
-
-
-def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
-    """Return the path of the requested backup, or the newest one if
-    *backup_id* is None. Returns None if no match."""
-    backups = _backups_dir()
-    if not backups.exists():
-        return None
-    if backup_id:
-        target = backups / backup_id
-        if (
-            target.is_dir()
-            and _ID_RE.match(backup_id)
-            and (target / "skills.tar.gz").exists()
-        ):
-            return target
-        return None
-    candidates = [
-        c for c in sorted(backups.iterdir(), reverse=True)
-        if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
-    ]
-    return candidates[0] if candidates else None
-
-
-def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
-    """Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
-
-    We do NOT overwrite the whole cron file. Only the ``skills`` and
-    ``skill`` fields are restored, and only on jobs that still exist in the
-    current file (matched by ``id``). Everything else about the job —
-    schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
-    is live state that the user/scheduler has modified since the snapshot;
-    overwriting it would regress unrelated cron activity.
-
-    Rules:
-    - Jobs present in backup AND live, with differing skills → skills restored.
-    - Jobs present in backup AND live, with matching skills → no-op.
-    - Jobs present in backup but gone from live (user deleted the job
-      after the snapshot) → skipped, noted in the return report.
-    - Jobs present in live but not in backup (user created a new cron
-      job after the snapshot) → left untouched.
-
-    Never raises; failures are captured in the return dict. Writes through
-    ``cron.jobs`` to pick up the same lock + atomic-write path that tick()
-    uses, so we don't race the scheduler.
-    """
-    report: Dict[str, Any] = {
-        "attempted": False,
-        "restored": [],
-        "skipped_missing": [],
-        "unchanged": 0,
-        "error": None,
-    }
-    backup_file = snapshot_dir / CRON_JOBS_FILENAME
-    if not backup_file.exists():
-        report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
-        return report
-
-    try:
-        backup_text = backup_file.read_text(encoding="utf-8")
-        backup_parsed = json.loads(backup_text)
-    except (OSError, json.JSONDecodeError) as e:
-        report["error"] = f"failed to load backed-up jobs: {e}"
-        return report
-    # jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
-    # that shape and a bare list for forward compat.
-    if isinstance(backup_parsed, dict):
-        backup_jobs = backup_parsed.get("jobs")
-    elif isinstance(backup_parsed, list):
-        backup_jobs = backup_parsed
-    else:
-        backup_jobs = None
-    if not isinstance(backup_jobs, list):
-        report["error"] = "backed-up cron-jobs.json has no jobs list"
-        return report
-
-    # Build a lookup of the backed-up skill state keyed by job id.
-    # We only need the two skill-ish fields (legacy single and modern list).
-    backup_by_id: Dict[str, Dict[str, Any]] = {}
-    for job in backup_jobs:
-        if not isinstance(job, dict):
-            continue
-        jid = job.get("id")
-        if not isinstance(jid, str) or not jid:
-            continue
-        backup_by_id[jid] = {
-            "skills": job.get("skills"),
-            "skill": job.get("skill"),
-            "name": job.get("name") or jid,
-        }
-
-    if not backup_by_id:
-        report["attempted"] = True  # we tried but there was nothing to do
-        return report
-
-    # Load and rewrite the live jobs under the scheduler's lock.
-    try:
-        from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
-    except ImportError as e:
-        report["error"] = f"cron module unavailable: {e}"
-        return report
-
-    report["attempted"] = True
-    try:
-        with _jobs_file_lock:
-            live_jobs = load_jobs()
-            changed = False
-
-            live_ids = set()
-            for live in live_jobs:
-                if not isinstance(live, dict):
-                    continue
-                jid = live.get("id")
-                if not isinstance(jid, str) or not jid:
-                    continue
-                live_ids.add(jid)
-
-                backup = backup_by_id.get(jid)
-                if backup is None:
-                    continue  # live job didn't exist at snapshot time
-
-                cur_skills = live.get("skills")
-                cur_skill = live.get("skill")
-                bkp_skills = backup.get("skills")
-                bkp_skill = backup.get("skill")
-
-                if cur_skills == bkp_skills and cur_skill == bkp_skill:
-                    report["unchanged"] += 1
-                    continue
-
-                # Restore. Preserve absence (don't force the key to appear
-                # if the backup didn't have it either).
-                if bkp_skills is None:
-                    live.pop("skills", None)
-                else:
-                    live["skills"] = bkp_skills
-                if bkp_skill is None:
-                    live.pop("skill", None)
-                else:
-                    live["skill"] = bkp_skill
-
-                report["restored"].append({
-                    "job_id": jid,
-                    "job_name": backup.get("name") or jid,
-                    "from": {"skills": cur_skills, "skill": cur_skill},
-                    "to": {"skills": bkp_skills, "skill": bkp_skill},
-                })
-                changed = True
-
-            # Jobs in backup but not in live = user deleted them after snapshot
-            for jid, backup in backup_by_id.items():
-                if jid not in live_ids:
-                    report["skipped_missing"].append({
-                        "job_id": jid,
-                        "job_name": backup.get("name") or jid,
-                    })
-
-            if changed:
-                save_jobs(live_jobs)
-    except Exception as e:  # noqa: BLE001 — rollback must not die mid-restore
-        logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
-        report["error"] = f"restore failed mid-flight: {e}"
-
-    return report
-
-
-
-def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
-    """Restore ``~/.hermes/skills/`` from a snapshot.
-
-    Strategy:
-      1. Resolve the target snapshot (explicit id or newest regular).
-      2. Take a safety snapshot of the CURRENT skills tree under
-         ``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
-         undoable.
-      3. Move all current top-level entries (except ``.curator_backups``
-         and ``.hub``) into a tempdir.
-      4. Extract the chosen snapshot into ``~/.hermes/skills/``.
-      5. On failure during 4, move the tempdir contents back (best-effort)
-         and return failure.
-
-    Returns ``(ok, message, snapshot_path)``.
-    """
-    target = _resolve_backup(backup_id)
-    if target is None:
-        return (
-            False,
-            f"no matching backup found"
-            + (f" for id '{backup_id}'" if backup_id else "")
-            + " (use `hermes curator rollback --list` to see available snapshots)",
-            None,
-        )
-    archive = target / "skills.tar.gz"
-    if not archive.exists():
-        return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
-
-    skills = _skills_dir()
-    skills.mkdir(parents=True, exist_ok=True)
-    backups = _backups_dir()
-    backups.mkdir(parents=True, exist_ok=True)
-
-    # Step 2: safety snapshot of current state FIRST. If this fails we bail
-    # out before touching anything — otherwise a failed extract could leave
-    # the user with no skills.
-    try:
-        snapshot_skills(reason=f"pre-rollback to {target.name}")
-    except Exception as e:
-        return (False, f"pre-rollback safety snapshot failed: {e}", None)
-
-    # Additionally move current entries into an internal staging dir so
-    # the extract happens into an empty skills tree (predictable result).
-    # This dir is implementation detail — not listed as a restorable
-    # backup. The safety snapshot above is the user-facing undo handle.
-    staged = backups / f".rollback-staging-{_utc_id()}"
-    try:
-        staged.mkdir(parents=True, exist_ok=False)
-    except OSError as e:
-        return (False, f"failed to create staging dir: {e}", None)
-
-    moved: List[Tuple[Path, Path]] = []
-    try:
-        for entry in list(skills.iterdir()):
-            if entry.name in _EXCLUDE_TOP_LEVEL:
-                continue
-            dest = staged / entry.name
-            shutil.move(str(entry), str(dest))
-            moved.append((entry, dest))
-    except OSError as e:
-        # Best-effort rollback of the move
-        for orig, dest in moved:
-            try:
-                shutil.move(str(dest), str(orig))
-            except OSError:
-                pass
-        try:
-            shutil.rmtree(staged, ignore_errors=True)
-        except OSError:
-            pass
-        return (False, f"failed to stage current skills: {e}", None)
-
-    # Step 4: extract the snapshot into skills/
-    try:
-        with tarfile.open(archive, "r:gz") as tf:
-            # Python 3.12+ supports filter='data' for safer extraction.
-            # Fall back to the unfiltered call for older interpreters but
-            # still reject absolute paths and .. components defensively.
-            for member in tf.getmembers():
-                name = member.name
-                if name.startswith("/") or ".." in Path(name).parts:
-                    raise tarfile.TarError(
-                        f"refusing to extract unsafe path: {name!r}"
-                    )
-            try:
-                tf.extractall(str(skills), filter="data")  # type: ignore[call-arg]
-            except TypeError:
-                # Python < 3.12 — no filter kwarg
-                tf.extractall(str(skills))
-    except (OSError, tarfile.TarError) as e:
-        # Best-effort recover: move staged contents back
-        for orig, dest in moved:
-            try:
-                shutil.move(str(dest), str(orig))
-            except OSError:
-                pass
-        try:
-            shutil.rmtree(staged, ignore_errors=True)
-        except OSError:
-            pass
-        return (False, f"snapshot extract failed (state restored): {e}", None)
-
-    # Extract succeeded — the staging dir has served its purpose. The
-    # user's undo handle is the safety snapshot tarball we took earlier.
-    try:
-        shutil.rmtree(staged, ignore_errors=True)
-    except OSError:
-        pass
-
-    # Reconcile cron skill-links. Surgical: only the skills/skill fields
-    # on jobs matched by id. Everything else in jobs.json is live state
-    # (schedule, next_run_at, enabled, prompt, etc.) and we leave it
-    # alone. Failures here don't fail the overall rollback — the skills
-    # tree is already restored, which is the main guarantee.
-    cron_report = _restore_cron_skill_links(target)
-
-    summary_bits = [f"restored from snapshot {target.name}"]
-    if cron_report.get("attempted"):
-        restored_n = len(cron_report.get("restored") or [])
-        skipped_n = len(cron_report.get("skipped_missing") or [])
-        if cron_report.get("error"):
-            summary_bits.append(f"cron links: error — {cron_report['error']}")
-        elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
-            # Attempted but nothing matched — empty snapshot or no overlapping ids.
-            pass
-        else:
-            parts = []
-            if restored_n:
-                parts.append(f"{restored_n} job(s) had skill links restored")
-            if skipped_n:
-                parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
-            if cron_report.get("unchanged"):
-                parts.append(f"{cron_report['unchanged']} already matched")
-            summary_bits.append("cron links: " + ", ".join(parts))
-
-    logger.info("Curator rollback: restored from %s (cron_report=%s)",
-                target.name, cron_report)
-    return (True, "; ".join(summary_bits), target)
-
-
-# ---------------------------------------------------------------------------
-# Human-readable summary for CLI
-# ---------------------------------------------------------------------------
-
-def format_size(n: int) -> str:
-    for unit in ("B", "KB", "MB", "GB"):
-        if n < 1024 or unit == "GB":
-            return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
-        n /= 1024
-    return f"{n:.1f} GB"
-
-
-def summarize_backups() -> str:
-    rows = list_backups()
-    if not rows:
-        return "No curator snapshots yet."
-    lines = [f"{'id':<24}  {'reason':<40}  {'skills':>6}  {'size':>8}"]
-    lines.append("─" * len(lines[0]))
-    for r in rows:
-        lines.append(
-            f"{r.get('id','?'):<24}  "
-            f"{(r.get('reason','?') or '?')[:40]:<40}  "
-            f"{r.get('skill_files', 0):>6}  "
-            f"{format_size(int(r.get('archive_bytes', 0))):>8}"
-        )
-    return "\n".join(lines)
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -54,8 +54,6 @@ class FailoverReason(enum.Enum):
    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
    long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
-    oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
-    llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern"  # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry

    # Catch-all
    unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -452,50 +450,6 @@ def classify_api_error(
            should_compress=True,
        )

-    # Anthropic OAuth subscription rejects the 1M-context beta header.
-    # Observed error body: "The long context beta is not yet available for
-    # this subscription." Returned as HTTP 400 from native Anthropic when
-    # the subscription doesn't include 1M context, even though the request
-    # carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
-    # in run_agent.py rebuilds the Anthropic client with the beta stripped
-    # and retries once. Pattern is narrow enough that it won't collide with
-    # the 429 tier-gate pattern above (different status, different phrase).
-    if (
-        status_code == 400
-        and "long context beta" in error_msg
-        and "not yet available" in error_msg
-    ):
-        return _result(
-            FailoverReason.oauth_long_context_beta_forbidden,
-            retryable=True,
-            should_compress=False,
-        )
-
-    # llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
-    # server to build GBNF tool-call parsers) rejects regex escape classes
-    # like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
-    # routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
-    # email params. llama.cpp surfaces this as HTTP 400 with one of a few
-    # recognizable phrases; on match we strip ``pattern``/``format`` from
-    # ``self.tools`` in the retry loop and retry once. Cloud providers are
-    # unaffected — they accept these keywords and we never hit this branch.
-    if (
-        status_code == 400
-        and (
-            "error parsing grammar" in error_msg
-            or "json-schema-to-grammar" in error_msg
-            or (
-                "unable to generate parser" in error_msg
-                and "template" in error_msg
-            )
-        )
-    ):
-        return _result(
-            FailoverReason.llama_cpp_grammar_pattern,
-            retryable=True,
-            should_compress=False,
-        )
-
    # ── 2. HTTP status code classification ──────────────────────────

    if status_code is not None:
@@ -546,12 +500,7 @@ def classify_api_error(

    is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
    if is_disconnect and not status_code:
-        # Absolute token/message-count thresholds are only a proxy for smaller
-        # context windows.  Large-context sessions can have hundreds of
-        # messages while still being far below their actual token budget.
-        is_large = approx_tokens > context_length * 0.6 or (
-            context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
-        )
+        is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
        if is_large:
            return _result(
                FailoverReason.context_overflow,
@@ -797,12 +746,7 @@ def _classify_400(
        if not err_body_msg:
            err_body_msg = str(body.get("message") or "").strip().lower()
    is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
-    # Absolute token/message-count thresholds are only a proxy for smaller
-    # context windows.  Large-context sessions can have many messages while
-    # still being far below their actual token budget.
-    is_large = approx_tokens > context_length * 0.4 or (
-        context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
-    )
+    is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80

    if is_generic and is_large:
        return result_fn(
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -679,21 +679,7 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices:
    finish_reason_raw = str(cand.get("finishReason") or "")
    if finish_reason_raw:
        mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
-        finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
-        # Attach usage from this event's usageMetadata so the streaming
-        # loop in run_agent.py can record token counts (mirrors the
-        # non-streaming path in translate_gemini_response).
-        usage_meta = event.get("usageMetadata") or {}
-        if usage_meta:
-            finish_chunk.usage = SimpleNamespace(
-                prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
-                completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
-                total_tokens=int(usage_meta.get("totalTokenCount") or 0),
-                prompt_tokens_details=SimpleNamespace(
-                    cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
-                ),
-            )
-        chunks.append(finish_chunk)
+        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
    return chunks


--- a/agent/google_oauth.py
+++ b/agent/google_oauth.py
@@ -489,29 +489,16 @@ def save_credentials(creds: GoogleCredentials) -> Path:
    """Atomically write creds to disk with 0o600 permissions."""
    path = _credentials_path()
    path.parent.mkdir(parents=True, exist_ok=True)
-    # Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
-    # On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
-    try:
-        os.chmod(path.parent, 0o700)
-    except OSError:
-        pass
    payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"

    with _credentials_lock():
        tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}")
        try:
-            # Create with 0o600 atomically to close the TOCTOU window where the
-            # default umask (often 0o644) would briefly expose tokens to other
-            # local users between open() and chmod().
-            fd = os.open(
-                str(tmp_path),
-                os.O_WRONLY | os.O_CREAT | os.O_EXCL,
-                stat.S_IRUSR | stat.S_IWUSR,
-            )
-            with os.fdopen(fd, "w", encoding="utf-8") as fh:
+            with open(tmp_path, "w", encoding="utf-8") as fh:
                fh.write(payload)
                fh.flush()
                os.fsync(fh.fileno())
+            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
            atomic_replace(tmp_path, path)
        finally:
            try:
--- a/agent/i18n.py
+++ b/agent/i18n.py
@@ -1,233 +0,0 @@
-"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
-
-Scope (thin slice, by design): only the highest-impact static strings shown
-to the user by Hermes itself -- approval prompts, a handful of gateway slash
-command replies, restart-drain notices.  Agent-generated output, log lines,
-error tracebacks, tool outputs, and slash-command descriptions all stay in
-English.
-
-Catalog files live under ``locales/<lang>.yaml`` at the repo root.  Each
-catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
-``gateway.approval_expired``).  Missing keys fall back to English; if English
-is missing too, the key path itself is returned so a broken catalog never
-crashes the agent.
-
-Usage::
-
-    from agent.i18n import t
-    print(t("approval.choose_long"))                       # current lang
-    print(t("gateway.draining", count=3))                  # {count} formatted
-    print(t("approval.choose_long", lang="zh"))            # explicit override
-
-Language resolution order:
-    1. Explicit ``lang=`` argument passed to :func:`t`
-    2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
-    3. ``display.language`` from config.yaml
-    4. ``"en"`` (baseline)
-
-Supported languages: en, zh, ja, de, es, fr, tr, uk.  Unknown values fall back to en.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import threading
-from functools import lru_cache
-from pathlib import Path
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-SUPPORTED_LANGUAGES: tuple[str, ...] = ("en", "zh", "ja", "de", "es", "fr", "tr", "uk")
-DEFAULT_LANGUAGE = "en"
-
-# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
-# get the right catalog instead of silently falling back to English.
-_LANGUAGE_ALIASES: dict[str, str] = {
-    "english": "en", "en-us": "en", "en-gb": "en",
-    "chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-tw": "zh", "zh-hans": "zh", "zh-hant": "zh",
-    "japanese": "ja", "jp": "ja", "ja-jp": "ja",
-    "german": "de", "deutsch": "de", "de-de": "de",
-    "spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es",
-    "french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
-    "ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
-    "turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
-}
-
-_catalog_cache: dict[str, dict[str, str]] = {}
-_catalog_lock = threading.Lock()
-
-
-def _locales_dir() -> Path:
-    """Return the directory containing locale YAML files.
-
-    Lives next to the repo root so both the bundled install and editable
-    checkouts find it without PYTHONPATH gymnastics.
-    """
-    # agent/i18n.py -> agent/ -> repo root
-    return Path(__file__).resolve().parent.parent / "locales"
-
-
-def _normalize_lang(value: Any) -> str:
-    """Normalize a user-supplied language value to a supported code.
-
-    Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
-    and case-insensitive regional tags (``zh-CN`` -> ``zh``).  Returns the
-    default language for unknown values.
-    """
-    if not isinstance(value, str):
-        return DEFAULT_LANGUAGE
-    key = value.strip().lower()
-    if not key:
-        return DEFAULT_LANGUAGE
-    if key in SUPPORTED_LANGUAGES:
-        return key
-    if key in _LANGUAGE_ALIASES:
-        return _LANGUAGE_ALIASES[key]
-    # Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
-    # but "zh-CN" -> "zh" will).
-    base = key.split("-", 1)[0]
-    if base in SUPPORTED_LANGUAGES:
-        return base
-    return DEFAULT_LANGUAGE
-
-
-def _load_catalog(lang: str) -> dict[str, str]:
-    """Load and flatten one locale YAML file into a dotted-key dict.
-
-    YAML files can be nested for human readability; this produces the flat
-    key space :func:`t` expects.  Cached per-language for the process.
-    """
-    with _catalog_lock:
-        cached = _catalog_cache.get(lang)
-        if cached is not None:
-            return cached
-
-    path = _locales_dir() / f"{lang}.yaml"
-    if not path.is_file():
-        logger.debug("i18n catalog missing for %s at %s", lang, path)
-        with _catalog_lock:
-            _catalog_cache[lang] = {}
-        return {}
-
-    try:
-        import yaml  # PyYAML is already a hermes dependency
-        with path.open("r", encoding="utf-8") as f:
-            raw = yaml.safe_load(f) or {}
-    except Exception as exc:
-        logger.warning("Failed to load i18n catalog %s: %s", path, exc)
-        with _catalog_lock:
-            _catalog_cache[lang] = {}
-        return {}
-
-    flat: dict[str, str] = {}
-    _flatten_into(raw, "", flat)
-    with _catalog_lock:
-        _catalog_cache[lang] = flat
-    return flat
-
-
-def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
-    if isinstance(node, dict):
-        for key, value in node.items():
-            child_key = f"{prefix}.{key}" if prefix else str(key)
-            _flatten_into(value, child_key, out)
-    elif isinstance(node, str):
-        out[prefix] = node
-    # Non-string, non-dict leaves are ignored -- catalogs are text-only.
-
-
-@lru_cache(maxsize=1)
-def _config_language_cached() -> str | None:
-    """Read ``display.language`` from config.yaml once per process.
-
-    Cached because ``t()`` is called in hot paths (every approval prompt,
-    every gateway reply) and re-reading YAML each call would be wasteful.
-    ``reset_language_cache()`` clears this when config changes at runtime
-    (e.g. after the setup wizard).
-    """
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config()
-        lang = (cfg.get("display") or {}).get("language")
-        if lang:
-            return _normalize_lang(lang)
-    except Exception as exc:
-        logger.debug("Could not read display.language from config: %s", exc)
-    return None
-
-
-def reset_language_cache() -> None:
-    """Invalidate cached language resolution and catalogs.
-
-    Call after :func:`hermes_cli.config.save_config` if a running process
-    needs to pick up a changed ``display.language`` without restart.
-    """
-    _config_language_cached.cache_clear()
-    with _catalog_lock:
-        _catalog_cache.clear()
-
-
-def get_language() -> str:
-    """Resolve the active language using env > config > default order."""
-    env_lang = os.environ.get("HERMES_LANGUAGE")
-    if env_lang:
-        return _normalize_lang(env_lang)
-    cfg_lang = _config_language_cached()
-    if cfg_lang:
-        return cfg_lang
-    return DEFAULT_LANGUAGE
-
-
-def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
-    """Translate a dotted key to the active language.
-
-    Parameters
-    ----------
-    key
-        Dotted path into the catalog, e.g. ``"approval.choose_long"``.
-    lang
-        Explicit language override.  Takes precedence over env + config.
-    **format_kwargs
-        ``str.format`` substitution arguments (``t("gateway.drain", count=3)``
-        expects a catalog entry with a ``{count}`` placeholder).
-
-    Returns
-    -------
-    The translated string, or the English fallback if the key is missing in
-    the target language, or the bare key if English is also missing.
-    """
-    target = _normalize_lang(lang) if lang else get_language()
-    catalog = _load_catalog(target)
-    value = catalog.get(key)
-
-    if value is None and target != DEFAULT_LANGUAGE:
-        # Fall through to English rather than showing a key path to the user.
-        value = _load_catalog(DEFAULT_LANGUAGE).get(key)
-
-    if value is None:
-        # Last-ditch: return the key itself.  A broken catalog should not
-        # crash anything; it just looks ugly until someone fixes it.
-        logger.debug("i18n miss: key=%r lang=%r", key, target)
-        value = key
-
-    if format_kwargs:
-        try:
-            return value.format(**format_kwargs)
-        except (KeyError, IndexError, ValueError) as exc:
-            logger.warning(
-                "i18n format failed for key=%r lang=%r kwargs=%r: %s",
-                key, target, format_kwargs, exc,
-            )
-            return value
-    return value
-
-
-__all__ = [
-    "SUPPORTED_LANGUAGES",
-    "DEFAULT_LANGUAGE",
-    "t",
-    "get_language",
-    "reset_language_cache",
-]
--- a/agent/manual_compression_feedback.py
+++ b/agent/manual_compression_feedback.py
@@ -20,25 +20,25 @@ def summarize_manual_compression(
        headline = f"No changes from compression: {before_count} messages"
        if after_tokens == before_tokens:
            token_line = (
-                f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
+                f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
            )
        else:
            token_line = (
-                f"Approx request size: ~{before_tokens:,} → "
+                f"Rough transcript estimate: ~{before_tokens:,} → "
                f"~{after_tokens:,} tokens"
            )
    else:
        headline = f"Compressed: {before_count} → {after_count} messages"
        token_line = (
-            f"Approx request size: ~{before_tokens:,} → "
+            f"Rough transcript estimate: ~{before_tokens:,} → "
            f"~{after_tokens:,} tokens"
        )

    note = None
    if not noop and after_count < before_count and after_tokens > before_tokens:
        note = (
-            "Note: fewer messages can still raise this estimate when "
-            "compression rewrites the transcript into denser summaries."
+            "Note: fewer messages can still raise this rough transcript estimate "
+            "when compression rewrites the transcript into denser summaries."
        )

    return {
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -1,14 +1,17 @@
-"""MemoryManager — orchestrates memory providers for the agent.
+"""MemoryManager — orchestrates the built-in memory provider plus at most
+ONE external plugin memory provider.

 Single integration point in run_agent.py. Replaces scattered per-backend
 code with one manager that delegates to registered providers.

-Only ONE external plugin provider is allowed at a time — attempting to
-register a second external provider is rejected with a warning.  This
+The BuiltinMemoryProvider is always registered first and cannot be removed.
+Only ONE external (non-builtin) provider is allowed at a time — attempting
+to register a second external provider is rejected with a warning.  This
 prevents tool schema bloat and conflicting memory backends.

 Usage in run_agent.py:
    self._memory_manager = MemoryManager()
+    self._memory_manager.add_provider(BuiltinMemoryProvider(...))
    # Only ONE of these:
    self._memory_manager.add_provider(plugin_provider)

@@ -46,7 +49,7 @@ _INTERNAL_CONTEXT_RE = re.compile(
    re.IGNORECASE,
 )
 _INTERNAL_NOTE_RE = re.compile(
-    r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as (?:informational background data|authoritative reference data[^\]]*)\.\]\s*',
+    r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
    re.IGNORECASE,
 )

@@ -180,8 +183,7 @@ def build_memory_context_block(raw_context: str) -> str:
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
-        "NOT new user input. Treat as authoritative reference data — "
-        "this is the agent's persistent memory and should inform all responses.]\n\n"
+        "NOT new user input. Treat as informational background data.]\n\n"
        f"{clean}\n"
        "</memory-context>"
    )
--- a/agent/memory_provider.py
+++ b/agent/memory_provider.py
@@ -1,16 +1,17 @@
 """Abstract base class for pluggable memory providers.

-Memory providers give the agent persistent recall across sessions.
-The MemoryManager enforces a one-external-provider limit to prevent
-tool schema bloat and conflicting memory backends.
+Memory providers give the agent persistent recall across sessions. One
+external provider is active at a time alongside the always-on built-in
+memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.

-External providers (Honcho, Hindsight, Mem0, etc.) are registered
-and managed via MemoryManager. Only one external provider runs at a
-time.
+Built-in memory is always active as the first provider and cannot be removed.
+External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
+disable the built-in store. Only one external provider runs at a time to
+prevent tool schema bloat and conflicting memory backends.

 Registration:
-  Plugins ship in plugins/memory/<name>/ and are activated via
-  the memory.provider config key.
+  1. Built-in: BuiltinMemoryProvider — always present, not removable.
+  2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.

 Lifecycle (called by MemoryManager, wired in run_agent.py):
  initialize()          — connect, create resources, warm up
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -46,7 +46,7 @@ def _resolve_requests_verify() -> bool | str:
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -318,17 +318,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "ollama.com": "ollama-cloud",
 }

-# Auto-extend with hostnames derived from provider profiles.
-# Any provider with a base_url not already in the map gets added automatically.
-try:
-    from providers import list_providers as _list_providers
-    for _pp in _list_providers():
-        _host = _pp.get_hostname()
-        if _host and _host not in _URL_TO_PROVIDER:
-            _URL_TO_PROVIDER[_host] = _pp.name
-except Exception:
-    pass
-

 def _infer_provider_from_url(base_url: str) -> Optional[str]:
    """Infer the models.dev provider name from a base URL.
@@ -1258,7 +1247,7 @@ def get_model_context_length(
    6. Nous suffix-match via OpenRouter cache
    7. models.dev registry lookup (provider-aware)
    8. Thin hardcoded defaults (broad family patterns)
-    9. Default fallback (256K)
+    9. Default fallback (128K)
    """
    # 0. Explicit config override — user knows best
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
@@ -1438,7 +1427,7 @@ def get_model_context_length(
                save_context_length(model, base_url, local_ctx)
            return local_ctx

-    # 10. Default fallback — 256K
+    # 10. Default fallback — 128K
    return DEFAULT_FALLBACK_CONTEXT


--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -149,7 +149,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
-    "minimax-oauth": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
    "alibaba": "alibaba",
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -81,56 +81,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
        return repaired

    # Rule 2: when anyOf is present, type belongs only on the children.
-    # Additionally, Moonshot rejects null-type branches inside anyOf
-    # (enum value (<nil>) does not match any type in [string]).
-    # Collapse the anyOf to the first non-null branch and infer its type.
    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
        repaired.pop("type", None)
-        non_null = [b for b in repaired["anyOf"]
-                    if isinstance(b, dict) and b.get("type") != "null"]
-        if non_null and len(non_null) < len(repaired["anyOf"]):
-            # Drop the anyOf wrapper — keep only the non-null branch.
-            # If there's a single non-null branch, promote it and fall
-            # through to Rules 1/3 so nullable/enum cleanup still applies
-            # to the merged node.
-            if len(non_null) == 1:
-                merge = {k: v for k, v in repaired.items() if k != "anyOf"}
-                merge.update(non_null[0])
-                repaired = merge
-            else:
-                repaired["anyOf"] = non_null
-                return repaired
-        else:
-            # Nothing to collapse — parent type stripped, children already
-            # repaired by the recursive walk above.
-            return repaired
-
-    # Moonshot also rejects non-standard keywords like ``nullable`` on
-    # parameter schemas — strip it.
-    repaired.pop("nullable", None)
+        return repaired

    # Rule 1: property schemas without type need one.  $ref nodes are exempt
    # — their type comes from the referenced definition.
-    # Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
-    if "$ref" not in repaired:
-        repaired = _fill_missing_type(repaired)
-
-    # Rule 3: Moonshot rejects null/empty-string values inside enum arrays
-    # when the parent type is a scalar (string, integer, etc.).  The error:
-    #   "enum value (<nil>) does not match any type in [string]"
-    # Strip null and empty-string from enum values, and if the enum becomes
-    # empty, drop it entirely.
-    if "enum" in repaired and isinstance(repaired["enum"], list):
-        node_type = repaired.get("type")
-        if node_type in ("string", "integer", "number", "boolean"):
-            cleaned = [v for v in repaired["enum"]
-                       if v is not None and v != ""]
-            if cleaned:
-                repaired["enum"] = cleaned
-            else:
-                repaired.pop("enum")
-
-    return repaired
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)


 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
--- a/agent/onboarding.py
+++ b/agent/onboarding.py
@@ -98,19 +98,17 @@ def tool_progress_hint_cli() -> str:
 def openclaw_residue_hint_cli() -> str:
    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.

-    Points users at ``hermes claw migrate`` (non-destructive port of config,
-    memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
-    follow-up step for users who have already migrated and want to archive
-    the old directory — with a warning that archiving breaks OpenClaw.
+    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
+    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
+    get carried forward and the agent dutifully reads them). ``hermes claw
+    cleanup`` renames the directory so the agent stops finding it.
    """
    return (
-        "A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
-        "To port your config, memory, and skills over to Hermes, run "
-        "`hermes claw migrate`.\n"
-        "If you've already migrated and want to archive the old directory, "
-        "run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
-        "OpenClaw will stop working after this).\n"
-        "This tip only shows once."
+        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
+        "After migrating, the agent can still get confused and read that "
+        "directory's config/memory instead of Hermes's.\n"
+        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
+        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
    )


--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -182,64 +182,6 @@ SKILLS_GUIDANCE = (
    "Skills that aren't maintained become liabilities."
 )

-KANBAN_GUIDANCE = (
-    "# Kanban task execution protocol\n"
-    "You have been assigned ONE task from "
-    "the shared board at `~/.hermes/kanban.db`. Your task id is in "
-    "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
-    "The `kanban_*` tools in your schema are your primary coordination surface — "
-    "they write directly to the shared SQLite DB and work regardless of terminal "
-    "backend (local/docker/modal/ssh).\n"
-    "\n"
-    "## Lifecycle\n"
-    "\n"
-    "1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
-    "task). The response includes title, body, parent-task handoffs (summary + "
-    "metadata), any prior attempts on this task if you're a retry, the full "
-    "comment thread, and a pre-formatted `worker_context` you can treat as "
-    "ground truth.\n"
-    "2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
-    "any file operations. The workspace is yours for this run. Don't modify "
-    "files outside it unless the task explicitly asks.\n"
-    "3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
-    "every few minutes during long subprocesses (training, encoding, crawling). "
-    "Skip heartbeats for short tasks.\n"
-    "4. **Block on genuine ambiguity.** If you need a human decision you cannot "
-    "infer (missing credentials, UX choice, paywalled source, peer output you "
-    "need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
-    "The user will unblock with context and the dispatcher will respawn you.\n"
-    "5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
-    "metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
-    "artifacts. `metadata` is machine-readable facts "
-    "(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
-    "workers read both via their own `kanban_show`. Never put secrets / "
-    "tokens / raw PII in either field — run rows are durable forever.\n"
-    "6. **If follow-up work appears, create it; don't do it.** Use "
-    "`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
-    "to spawn a child task for the appropriate specialist profile instead of "
-    "scope-creeping into the next thing.\n"
-    "\n"
-    "## Orchestrator mode\n"
-    "\n"
-    "If your task is itself a decomposition task (e.g. a planner profile given "
-    "a high-level goal), use `kanban_create` to fan out into child tasks — one "
-    "per specialist, each with an explicit `assignee` and `parents=[...]` to "
-    "express dependencies. Then `kanban_complete` your own task with a summary "
-    "of the decomposition. Do NOT execute the work yourself; your job is "
-    "routing, not implementation.\n"
-    "\n"
-    "## Do NOT\n"
-    "\n"
-    "- Do not shell out to `hermes kanban <verb>` for board operations. Use "
-    "the `kanban_*` tools — they work across all terminal backends.\n"
-    "- Do not complete a task you didn't actually finish. Block it.\n"
-    "- Do not assign follow-up work to yourself. Assign it to the right "
-    "specialist profile.\n"
-    "- Do not call `delegate_task` as a board substitute. `delegate_task` is "
-    "for short reasoning subtasks inside your own run; board tasks are for "
-    "cross-agent handoffs that outlive one API loop."
-)
-
 TOOL_USE_ENFORCEMENT_GUIDANCE = (
    "# Tool-use enforcement\n"
    "You MUST use your tools to take action — do not describe what you would do "
@@ -513,12 +455,6 @@ PLATFORM_HINTS = {
        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
        "— when a sticker is the right response, use yb_send_sticker."
    ),
-    "api_server": (
-        "You're responding through an API server. The rendering layer is unknown — "
-        "assume plain text. No markdown formatting (no asterisks, bullets, headers, "
-        "code fences). Treat this like a conversation, not a document. Keep responses "
-        "brief and natural."
-    ),
 }

 # ---------------------------------------------------------------------------
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -305,18 +305,11 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
+def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
    Disabled by default — enable via security.redact_secrets: true in config.yaml.
-    Set force=True for safety boundaries that must never return raw secrets
-    regardless of the user's global logging redaction preference.
-
-    Set code_file=True to skip the ENV-assignment and JSON-field regex
-    patterns when the text is known to be source code (e.g. MAX_TOKENS=***
-    constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
-    private keys, DB connstrings, JWTs, and URL secrets are still redacted.
    """
    if text is None:
        return None
@@ -324,24 +317,23 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
        text = str(text)
    if not text:
        return text
-    if not (force or _REDACT_ENABLED):
+    if not _REDACT_ENABLED:
        return text

    # Known prefixes (sk-, ghp_, etc.)
    text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)

-    # ENV assignments: OPENAI_API_KEY=***  (skip for code files — false positives)
-    if not code_file:
-        def _redact_env(m):
-            name, quote, value = m.group(1), m.group(2), m.group(3)
-            return f"{name}={quote}{_mask_token(value)}{quote}"
-        text = _ENV_ASSIGN_RE.sub(_redact_env, text)
+    # ENV assignments: OPENAI_API_KEY=sk-abc...
+    def _redact_env(m):
+        name, quote, value = m.group(1), m.group(2), m.group(3)
+        return f"{name}={quote}{_mask_token(value)}{quote}"
+    text = _ENV_ASSIGN_RE.sub(_redact_env, text)

-        # JSON fields: "apiKey": "***"  (skip for code files — false positives)
-        def _redact_json(m):
-            key, value = m.group(1), m.group(2)
-            return f'{key}: "{_mask_token(value)}"'
-        text = _JSON_FIELD_RE.sub(_redact_json, text)
+    # JSON fields: "apiKey": "value"
+    def _redact_json(m):
+        key, value = m.group(1), m.group(2)
+        return f'{key}: "{_mask_token(value)}"'
+    text = _JSON_FIELD_RE.sub(_redact_json, text)

    # Authorization headers
    text = _AUTH_HEADER_RE.sub(
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -6,7 +6,6 @@ can invoke skills via /skill-name commands.

 import json
 import logging
-import os
 import re
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -21,35 +20,10 @@ from agent.skill_preprocessing import (
 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
-_skill_commands_platform: Optional[str] = None
 # Patterns for sanitizing skill names into clean hyphen-separated slugs.
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-
-def _resolve_skill_commands_platform() -> Optional[str]:
-    """Return the current platform scope used for disabled-skill filtering.
-
-    Used to detect when the active platform has shifted so
-    :func:`get_skill_commands` can drop a stale cache that was populated
-    for a different platform's ``skills.platform_disabled`` view (#14536).
-
-    Resolves from (in order) ``HERMES_PLATFORM`` env var and
-    ``HERMES_SESSION_PLATFORM`` from the gateway session context. Returns
-    ``None`` when no platform scope is active (e.g. classic CLI, RL
-    rollouts, standalone scripts).
-    """
-    try:
-        from gateway.session_context import get_session_env
-
-        resolved_platform = (
-            os.getenv("HERMES_PLATFORM")
-            or get_session_env("HERMES_SESSION_PLATFORM")
-        )
-    except Exception:
-        resolved_platform = os.getenv("HERMES_PLATFORM")
-    return resolved_platform or None
-
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -244,8 +218,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    Returns:
        Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
    """
-    global _skill_commands, _skill_commands_platform
-    _skill_commands_platform = _resolve_skill_commands_platform()
+    global _skill_commands
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
@@ -261,7 +234,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:

        for scan_dir in dirs_to_scan:
            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
-                if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
+                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
                    content = skill_md.read_text(encoding='utf-8')
@@ -305,85 +278,12 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:


 def get_skill_commands() -> Dict[str, Dict[str, Any]]:
-    """Return the current skill commands mapping (scan first if empty).
-
-    Rescans when the active platform scope changes (e.g. a gateway
-    process serving Telegram and Discord concurrently) so each platform
-    sees its own ``skills.platform_disabled`` view (#14536).
-    """
-    if (
-        not _skill_commands
-        or _skill_commands_platform != _resolve_skill_commands_platform()
-    ):
+    """Return the current skill commands mapping (scan first if empty)."""
+    if not _skill_commands:
        scan_skill_commands()
    return _skill_commands


-def reload_skills() -> Dict[str, Any]:
-    """Re-scan the skills directory and return a diff of what changed.
-
-    Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
-    slash-command map (``agent.skill_commands._skill_commands``) reflects
-    skills added or removed on disk.
-
-    This does NOT invalidate the skills system-prompt cache. Skills are
-    called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
-    — they don't need to be in the system prompt for the model to use them.
-    Keeping the prompt cache intact preserves prefix caching across the
-    reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
-
-    Returns:
-        Dict with keys::
-
-            {
-              "added":      [{"name": str, "description": str}, ...],
-              "removed":    [{"name": str, "description": str}, ...],
-              "unchanged":  [skill names present before and after],
-              "total":      total skill count after rescan,
-              "commands":   total /slash-skill count after rescan,
-            }
-
-        ``description`` is the skill's full SKILL.md frontmatter
-        ``description:`` field — the same string the system prompt renders
-        as ``    - name: description`` for pre-existing skills.
-    """
-    # Snapshot pre-reload state (name -> description) from the current
-    # slash-command cache. Using dicts lets the post-rescan diff carry
-    # descriptions for newly-visible or just-removed skills without a
-    # second disk walk.
-    def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
-        out: Dict[str, str] = {}
-        for slash_key, info in cmds.items():
-            bare = slash_key.lstrip("/")
-            out[bare] = (info or {}).get("description") or ""
-        return out
-
-    before = _snapshot(_skill_commands)
-
-    # Rescan the skills dir. ``scan_skill_commands`` resets
-    # ``_skill_commands = {}`` internally and repopulates it.
-    new_commands = scan_skill_commands()
-
-    after = _snapshot(new_commands)
-
-    added_names = sorted(set(after) - set(before))
-    removed_names = sorted(set(before) - set(after))
-    unchanged = sorted(set(after) & set(before))
-
-    added = [{"name": n, "description": after[n]} for n in added_names]
-    # For removed skills, use the description we had cached pre-rescan
-    # (the skill file is gone so we can't re-read it).
-    removed = [{"name": n, "description": before[n]} for n in removed_names]
-
-    return {
-        "added": added,
-        "removed": removed,
-        "unchanged": unchanged,
-        "total": len(after),
-        "commands": len(new_commands),
-    }
-
-
 def resolve_skill_command_key(command: str) -> Optional[str]:
    """Resolve a user-typed /command to its canonical skill_cmds key.

@@ -428,14 +328,6 @@ def build_skill_invocation_message(
        return f"[Failed to load skill: {skill_info['name']}]"

    loaded_skill, skill_dir, skill_name = loaded
-
-    # Track active usage for Curator lifecycle management (#17782)
-    try:
-        from tools.skill_usage import bump_use
-        bump_use(skill_name)
-    except Exception:
-        pass  # Non-critical — skill invocation proceeds regardless
-
    activation_note = (
        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
@@ -475,14 +367,6 @@ def build_preloaded_skills_prompt(
            continue

        loaded_skill, skill_dir, skill_name = loaded
-
-        # Track active usage for Curator lifecycle management (#17782)
-        try:
-            from tools.skill_usage import bump_use
-            bump_use(skill_name)
-        except Exception:
-            pass  # Non-critical
-
        activation_note = (
            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
    "windows": "win32",
 }

-EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
+EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))

 # ── Lazy YAML loader ─────────────────────────────────────────────────────

@@ -440,7 +440,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
+    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
--- a/agent/think_scrubber.py
+++ b/agent/think_scrubber.py
@@ -1,386 +0,0 @@
-"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.
-
-``run_agent._strip_think_blocks`` is regex-based and correct for a complete
-string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
-the state that downstream consumers (CLI ``_stream_delta``, gateway
-``GatewayStreamConsumer._filter_and_accumulate``) rely on.
-
-Concretely, when MiniMax-M2.7 streams
-
-    delta1 = "<think>"
-    delta2 = "Let me check their config"
-    delta3 = "</think>"
-
-the per-delta regex erases delta1 entirely (case 2: unterminated-open at
-boundary matches ``^<think>...``), so the downstream state machine never
-sees the open tag, treats delta2 as regular content, and leaks reasoning
-to the user.  Consumers that don't run their own state machine (ACP,
-api_server, TTS) never had any defence at all — they just emitted
-whatever survived the upstream regex.
-
-This module centralises the tag-suppression state machine at the
-upstream layer so every stream_delta_callback sees text that has
-already had reasoning blocks removed.  Partial tags at delta
-boundaries are held back until the next delta resolves them, and
-end-of-stream flushing surfaces any held-back prose that turned out
-not to be a real tag.
-
-Usage::
-
-    scrubber = StreamingThinkScrubber()
-    for delta in stream:
-        visible = scrubber.feed(delta)
-        if visible:
-            emit(visible)
-    tail = scrubber.flush()  # at end of stream
-    if tail:
-        emit(tail)
-
-The scrubber is re-entrant per agent instance.  Call ``reset()`` at
-the top of each new turn so a hung block from an interrupted prior
-stream cannot taint the next turn's output.
-
-Tag variants handled (case-insensitive):
-  ``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
-  ``<REASONING_SCRATCHPAD>``.
-
-Block-boundary rule for opens: an opening tag is only treated as a
-reasoning-block opener when it appears at the start of the stream,
-after a newline (optionally followed by whitespace), or when only
-whitespace has been emitted on the current line.  This prevents prose
-that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
-being incorrectly suppressed.  Closed pairs (``<think>X</think>``) are
-always suppressed regardless of boundary; a closed pair is an
-intentional, bounded construct.
-"""
-
-from __future__ import annotations
-
-from typing import Tuple
-
-__all__ = ["StreamingThinkScrubber"]
-
-
-class StreamingThinkScrubber:
-    """Stateful scrubber for streaming reasoning/thinking blocks.
-
-    State machine:
-      - ``_in_block``: True while inside an opened block, waiting for
-        a close tag.  All text inside is discarded.
-      - ``_buf``: held-back partial-tag tail.  Emitted / discarded on
-        the next ``feed()`` call or by ``flush()``.
-      - ``_last_emitted_ended_newline``: True iff the most recent
-        emission to the consumer ended with ``\\n``, or nothing has
-        been emitted yet (start-of-stream counts as a boundary).  Used
-        to decide whether an open tag at buffer position 0 is at a
-        block boundary.
-    """
-
-    _OPEN_TAG_NAMES: Tuple[str, ...] = (
-        "think",
-        "thinking",
-        "reasoning",
-        "thought",
-        "REASONING_SCRATCHPAD",
-    )
-
-    # Materialise literal tag strings so the hot path does string
-    # operations, not regex compilation per feed().
-    _OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
-    _CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)
-
-    # Pre-compute the longest tag (for partial-tag hold-back bound).
-    _MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)
-
-    def __init__(self) -> None:
-        self._in_block: bool = False
-        self._buf: str = ""
-        self._last_emitted_ended_newline: bool = True
-
-    def reset(self) -> None:
-        """Reset all state.  Call at the top of every new turn."""
-        self._in_block = False
-        self._buf = ""
-        self._last_emitted_ended_newline = True
-
-    def feed(self, text: str) -> str:
-        """Feed one delta; return the scrubbed visible portion.
-
-        May return an empty string when the entire delta is reasoning
-        content or is being held back pending resolution of a partial
-        tag at the boundary.
-        """
-        if not text:
-            return ""
-        buf = self._buf + text
-        self._buf = ""
-        out: list[str] = []
-
-        while buf:
-            if self._in_block:
-                # Hunt for the earliest close tag.
-                close_idx, close_len = self._find_first_tag(
-                    buf, self._CLOSE_TAGS,
-                )
-                if close_idx == -1:
-                    # No close yet — hold back a potential partial
-                    # close-tag prefix; discard everything else.
-                    held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
-                    self._buf = buf[-held:] if held else ""
-                    return "".join(out)
-                # Found close: discard block content + tag, continue.
-                buf = buf[close_idx + close_len:]
-                self._in_block = False
-            else:
-                # Priority 1 — closed <tag>X</tag> pair anywhere in
-                # buf.  Closed pairs are always an intentional,
-                # bounded construct (even mid-line prose containing
-                # an open/close pair is almost certainly a model
-                # leaking reasoning inline), so no boundary gating.
-                pair = self._find_earliest_closed_pair(buf)
-                # Priority 2 — unterminated open tag at a block
-                # boundary.  Boundary-gated so prose that mentions
-                # '<think>' isn't over-stripped.
-                open_idx, open_len = self._find_open_at_boundary(
-                    buf, out,
-                )
-
-                # Pick whichever match comes earliest in the buffer.
-                if pair is not None and (
-                    open_idx == -1 or pair[0] <= open_idx
-                ):
-                    start_idx, end_idx = pair
-                    preceding = buf[:start_idx]
-                    if preceding:
-                        preceding = self._strip_orphan_close_tags(preceding)
-                        if preceding:
-                            out.append(preceding)
-                            self._last_emitted_ended_newline = (
-                                preceding.endswith("\n")
-                            )
-                    buf = buf[end_idx:]
-                    continue
-
-                if open_idx != -1:
-                    # Unterminated open at boundary — emit preceding,
-                    # enter block, continue loop with remainder.
-                    preceding = buf[:open_idx]
-                    if preceding:
-                        preceding = self._strip_orphan_close_tags(preceding)
-                        if preceding:
-                            out.append(preceding)
-                            self._last_emitted_ended_newline = (
-                                preceding.endswith("\n")
-                            )
-                    self._in_block = True
-                    buf = buf[open_idx + open_len:]
-                    continue
-
-                # No resolvable tag structure in buf.  Hold back any
-                # partial-tag prefix at the tail so a split tag
-                # across deltas isn't missed, then emit the rest.
-                held = self._max_partial_suffix(buf, self._OPEN_TAGS)
-                held_close = self._max_partial_suffix(
-                    buf, self._CLOSE_TAGS,
-                )
-                held = max(held, held_close)
-                if held:
-                    emit_text = buf[:-held]
-                    self._buf = buf[-held:]
-                else:
-                    emit_text = buf
-                    self._buf = ""
-                if emit_text:
-                    emit_text = self._strip_orphan_close_tags(emit_text)
-                    if emit_text:
-                        out.append(emit_text)
-                        self._last_emitted_ended_newline = (
-                            emit_text.endswith("\n")
-                        )
-                return "".join(out)
-
-        return "".join(out)
-
-    def flush(self) -> str:
-        """End-of-stream flush.
-
-        If still inside an unterminated block, held-back content is
-        discarded — leaking partial reasoning is worse than a
-        truncated answer.  Otherwise the held-back partial-tag tail is
-        emitted verbatim (it turned out not to be a real tag prefix).
-        """
-        if self._in_block:
-            self._buf = ""
-            self._in_block = False
-            return ""
-        tail = self._buf
-        self._buf = ""
-        if not tail:
-            return ""
-        tail = self._strip_orphan_close_tags(tail)
-        if tail:
-            self._last_emitted_ended_newline = tail.endswith("\n")
-        return tail
-
-    # ── internal helpers ───────────────────────────────────────────────
-
-    @staticmethod
-    def _find_first_tag(
-        buf: str, tags: Tuple[str, ...],
-    ) -> Tuple[int, int]:
-        """Return (earliest_index, tag_length) over *tags*, or (-1, 0).
-
-        Case-insensitive match.
-        """
-        buf_lower = buf.lower()
-        best_idx = -1
-        best_len = 0
-        for tag in tags:
-            idx = buf_lower.find(tag.lower())
-            if idx != -1 and (best_idx == -1 or idx < best_idx):
-                best_idx = idx
-                best_len = len(tag)
-        return best_idx, best_len
-
-    def _find_earliest_closed_pair(self, buf: str):
-        """Return (start_idx, end_idx) of the earliest closed pair, else None.
-
-        A closed pair is ``<tag>...</tag>`` of any variant.  Matches are
-        case-insensitive and non-greedy (the closest close tag after
-        an open tag wins), matching the regex ``<tag>.*?</tag>``
-        semantics of ``_strip_think_blocks`` case 1.  When two tag
-        variants could both match, the one whose open tag appears
-        earlier wins.
-        """
-        buf_lower = buf.lower()
-        best: "tuple[int, int] | None" = None
-        for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
-            open_lower = open_tag.lower()
-            close_lower = close_tag.lower()
-            open_idx = buf_lower.find(open_lower)
-            if open_idx == -1:
-                continue
-            close_idx = buf_lower.find(
-                close_lower, open_idx + len(open_lower),
-            )
-            if close_idx == -1:
-                continue
-            end_idx = close_idx + len(close_lower)
-            if best is None or open_idx < best[0]:
-                best = (open_idx, end_idx)
-        return best
-
-    def _find_open_at_boundary(
-        self, buf: str, already_emitted: list[str],
-    ) -> Tuple[int, int]:
-        """Return the earliest block-boundary open-tag (idx, len).
-
-        Returns (-1, 0) if no boundary-legal opener is present.
-        """
-        buf_lower = buf.lower()
-        best_idx = -1
-        best_len = 0
-        for tag in self._OPEN_TAGS:
-            tag_lower = tag.lower()
-            search_start = 0
-            while True:
-                idx = buf_lower.find(tag_lower, search_start)
-                if idx == -1:
-                    break
-                if self._is_block_boundary(buf, idx, already_emitted):
-                    if best_idx == -1 or idx < best_idx:
-                        best_idx = idx
-                        best_len = len(tag)
-                    break  # first boundary hit for this tag is enough
-                search_start = idx + 1
-        return best_idx, best_len
-
-    def _is_block_boundary(
-        self, buf: str, idx: int, already_emitted: list[str],
-    ) -> bool:
-        """True iff position *idx* in *buf* is a block boundary.
-
-        A block boundary is:
-          - buf position 0 AND the most recent emission ended with
-            a newline (or nothing has been emitted yet)
-          - any position whose preceding text on the current line
-            (since the last newline in buf) is whitespace-only, AND
-            if there is no newline in the preceding buf portion, the
-            most recent prior emission ended with a newline
-        """
-        if idx == 0:
-            # Check whether the last already-emitted chunk in THIS
-            # feed() call ended with a newline, otherwise fall back
-            # to the cross-feed flag.
-            if already_emitted:
-                return already_emitted[-1].endswith("\n")
-            return self._last_emitted_ended_newline
-        preceding = buf[:idx]
-        last_nl = preceding.rfind("\n")
-        if last_nl == -1:
-            # No newline in buf before the tag — boundary only if the
-            # prior emission ended with a newline AND everything since
-            # is whitespace.
-            if already_emitted:
-                prior_newline = already_emitted[-1].endswith("\n")
-            else:
-                prior_newline = self._last_emitted_ended_newline
-            return prior_newline and preceding.strip() == ""
-        # Newline present — text between it and the tag must be
-        # whitespace-only.
-        return preceding[last_nl + 1:].strip() == ""
-
-    @classmethod
-    def _max_partial_suffix(
-        cls, buf: str, tags: Tuple[str, ...],
-    ) -> int:
-        """Return the longest buf-suffix that is a prefix of any tag.
-
-        Only prefixes strictly shorter than the tag itself count
-        (full-length suffixes are the tag and are handled as matches,
-        not held-back partials).  Case-insensitive.
-        """
-        if not buf:
-            return 0
-        buf_lower = buf.lower()
-        max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
-        for i in range(max_check, 0, -1):
-            suffix = buf_lower[-i:]
-            for tag in tags:
-                tag_lower = tag.lower()
-                if len(tag_lower) > i and tag_lower.startswith(suffix):
-                    return i
-        return 0
-
-    @classmethod
-    def _strip_orphan_close_tags(cls, text: str) -> str:
-        """Remove any close tags from *text* (orphan-close handling).
-
-        An orphan close tag has no matching open in the current
-        scrubber state; it's always noise, stripped with any trailing
-        whitespace so the surrounding prose flows naturally.
-        """
-        if "</" not in text:
-            return text
-        text_lower = text.lower()
-        out: list[str] = []
-        i = 0
-        while i < len(text):
-            matched = False
-            if text_lower[i:i + 2] == "</":
-                for tag in cls._CLOSE_TAGS:
-                    tag_lower = tag.lower()
-                    tag_len = len(tag_lower)
-                    if text_lower[i:i + tag_len] == tag_lower:
-                        # Skip the tag and any trailing whitespace,
-                        # matching _strip_think_blocks case 3.
-                        j = i + tag_len
-                        while j < len(text) and text[j] in " \t\n\r":
-                            j += 1
-                        i = j
-                        matched = True
-                        break
-            if not matched:
-                out.append(text[i])
-                i += 1
-        return "".join(out)
--- a/agent/title_generator.py
+++ b/agent/title_generator.py
@@ -17,7 +17,6 @@ logger = logging.getLogger(__name__)
 # so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
 # become visible instead of piling up as NULL session titles.
 FailureCallback = Callable[[str, BaseException], None]
-TitleCallback = Callable[[str], None]

 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
@@ -91,7 +90,6 @@ def auto_title_session(
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
    main_runtime: dict = None,
-    title_callback: Optional[TitleCallback] = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -121,11 +119,6 @@ def auto_title_session(
    try:
        session_db.set_session_title(session_id, title)
        logger.debug("Auto-generated session title: %s", title)
-        if title_callback is not None:
-            try:
-                title_callback(title)
-            except Exception:
-                logger.debug("Auto-title callback failed", exc_info=True)
    except Exception as e:
        logger.debug("Failed to set auto-generated title: %s", e)

@@ -138,7 +131,6 @@ def maybe_auto_title(
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
    main_runtime: dict = None,
-    title_callback: Optional[TitleCallback] = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -160,11 +152,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={
-            "failure_callback": failure_callback,
-            "main_runtime": main_runtime,
-            "title_callback": title_callback,
-        },
+        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
        daemon=True,
        name="auto-title",
    )
--- a/agent/tool_guardrails.py
+++ b/agent/tool_guardrails.py
@@ -1,455 +0,0 @@
-"""Pure tool-call loop guardrail primitives.
-
-The controller in this module is intentionally side-effect free: it tracks
-per-turn tool-call observations and returns decisions. Runtime code owns whether
-those decisions become warning guidance, synthetic tool results, or controlled
-turn halts.
-"""
-
-from __future__ import annotations
-
-import hashlib
-import json
-from dataclasses import dataclass, field
-from typing import Any, Mapping
-
-from utils import safe_json_loads
-
-
-IDEMPOTENT_TOOL_NAMES = frozenset(
-    {
-        "read_file",
-        "search_files",
-        "web_search",
-        "web_extract",
-        "session_search",
-        "browser_snapshot",
-        "browser_console",
-        "browser_get_images",
-        "mcp_filesystem_read_file",
-        "mcp_filesystem_read_text_file",
-        "mcp_filesystem_read_multiple_files",
-        "mcp_filesystem_list_directory",
-        "mcp_filesystem_list_directory_with_sizes",
-        "mcp_filesystem_directory_tree",
-        "mcp_filesystem_get_file_info",
-        "mcp_filesystem_search_files",
-    }
-)
-
-MUTATING_TOOL_NAMES = frozenset(
-    {
-        "terminal",
-        "execute_code",
-        "write_file",
-        "patch",
-        "todo",
-        "memory",
-        "skill_manage",
-        "browser_click",
-        "browser_type",
-        "browser_press",
-        "browser_scroll",
-        "browser_navigate",
-        "send_message",
-        "cronjob",
-        "delegate_task",
-        "process",
-    }
-)
-
-
-@dataclass(frozen=True)
-class ToolCallGuardrailConfig:
-    """Thresholds for per-turn tool-call loop detection.
-
-    Warnings are enabled by default and never prevent tool execution. Hard stops
-    are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
-    the user enables circuit-breaker behavior in config.yaml.
-    """
-
-    warnings_enabled: bool = True
-    hard_stop_enabled: bool = False
-    exact_failure_warn_after: int = 2
-    exact_failure_block_after: int = 5
-    same_tool_failure_warn_after: int = 3
-    same_tool_failure_halt_after: int = 8
-    no_progress_warn_after: int = 2
-    no_progress_block_after: int = 5
-    idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
-    mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
-
-    @classmethod
-    def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
-        """Build config from the `tool_loop_guardrails` config.yaml section."""
-        if not isinstance(data, Mapping):
-            return cls()
-
-        warn_after = data.get("warn_after")
-        if not isinstance(warn_after, Mapping):
-            warn_after = {}
-        hard_stop_after = data.get("hard_stop_after")
-        if not isinstance(hard_stop_after, Mapping):
-            hard_stop_after = {}
-
-        defaults = cls()
-        return cls(
-            warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
-            hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
-            exact_failure_warn_after=_positive_int(
-                warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
-                defaults.exact_failure_warn_after,
-            ),
-            same_tool_failure_warn_after=_positive_int(
-                warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
-                defaults.same_tool_failure_warn_after,
-            ),
-            no_progress_warn_after=_positive_int(
-                warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
-                defaults.no_progress_warn_after,
-            ),
-            exact_failure_block_after=_positive_int(
-                hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
-                defaults.exact_failure_block_after,
-            ),
-            same_tool_failure_halt_after=_positive_int(
-                hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
-                defaults.same_tool_failure_halt_after,
-            ),
-            no_progress_block_after=_positive_int(
-                hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
-                defaults.no_progress_block_after,
-            ),
-        )
-
-
-@dataclass(frozen=True)
-class ToolCallSignature:
-    """Stable, non-reversible identity for a tool name plus canonical args."""
-
-    tool_name: str
-    args_hash: str
-
-    @classmethod
-    def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
-        canonical = canonical_tool_args(args or {})
-        return cls(tool_name=tool_name, args_hash=_sha256(canonical))
-
-    def to_metadata(self) -> dict[str, str]:
-        """Return public metadata without raw argument values."""
-        return {"tool_name": self.tool_name, "args_hash": self.args_hash}
-
-
-@dataclass(frozen=True)
-class ToolGuardrailDecision:
-    """Decision returned by the tool-call guardrail controller."""
-
-    action: str = "allow"  # allow | warn | block | halt
-    code: str = "allow"
-    message: str = ""
-    tool_name: str = ""
-    count: int = 0
-    signature: ToolCallSignature | None = None
-
-    @property
-    def allows_execution(self) -> bool:
-        return self.action in {"allow", "warn"}
-
-    @property
-    def should_halt(self) -> bool:
-        return self.action in {"block", "halt"}
-
-    def to_metadata(self) -> dict[str, Any]:
-        data: dict[str, Any] = {
-            "action": self.action,
-            "code": self.code,
-            "message": self.message,
-            "tool_name": self.tool_name,
-            "count": self.count,
-        }
-        if self.signature is not None:
-            data["signature"] = self.signature.to_metadata()
-        return data
-
-
-def canonical_tool_args(args: Mapping[str, Any]) -> str:
-    """Return sorted compact JSON for parsed tool arguments."""
-    if not isinstance(args, Mapping):
-        raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
-    return json.dumps(
-        args,
-        ensure_ascii=False,
-        sort_keys=True,
-        separators=(",", ":"),
-        default=str,
-    )
-
-
-def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
-    """Safety-fallback classifier used only when callers don't pass ``failed``.
-
-    Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
-    never disagrees with the CLI's user-visible ``[error]`` tag. Production
-    callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
-    from ``_detect_tool_failure``; this function exists so standalone callers
-    (tests, tooling) still get consistent behavior.
-    """
-    if result is None:
-        return False, ""
-
-    if tool_name == "terminal":
-        data = safe_json_loads(result)
-        if isinstance(data, dict):
-            exit_code = data.get("exit_code")
-            if exit_code is not None and exit_code != 0:
-                return True, f" [exit {exit_code}]"
-        return False, ""
-
-    if tool_name == "memory":
-        data = safe_json_loads(result)
-        if isinstance(data, dict):
-            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
-                return True, " [full]"
-
-    lower = result[:500].lower()
-    if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
-        return True, " [error]"
-
-    return False, ""
-
-
-class ToolCallGuardrailController:
-    """Per-turn controller for repeated failed/non-progressing tool calls."""
-
-    def __init__(self, config: ToolCallGuardrailConfig | None = None):
-        self.config = config or ToolCallGuardrailConfig()
-        self.reset_for_turn()
-
-    def reset_for_turn(self) -> None:
-        self._exact_failure_counts: dict[ToolCallSignature, int] = {}
-        self._same_tool_failure_counts: dict[str, int] = {}
-        self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
-        self._halt_decision: ToolGuardrailDecision | None = None
-
-    @property
-    def halt_decision(self) -> ToolGuardrailDecision | None:
-        return self._halt_decision
-
-    def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
-        signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
-        if not self.config.hard_stop_enabled:
-            return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
-
-        exact_count = self._exact_failure_counts.get(signature, 0)
-        if exact_count >= self.config.exact_failure_block_after:
-            decision = ToolGuardrailDecision(
-                action="block",
-                code="repeated_exact_failure_block",
-                message=(
-                    f"Blocked {tool_name}: the same tool call failed {exact_count} "
-                    "times with identical arguments. Stop retrying it unchanged; "
-                    "change strategy or explain the blocker."
-                ),
-                tool_name=tool_name,
-                count=exact_count,
-                signature=signature,
-            )
-            self._halt_decision = decision
-            return decision
-
-        if self._is_idempotent(tool_name):
-            record = self._no_progress.get(signature)
-            if record is not None:
-                _result_hash, repeat_count = record
-                if repeat_count >= self.config.no_progress_block_after:
-                    decision = ToolGuardrailDecision(
-                        action="block",
-                        code="idempotent_no_progress_block",
-                        message=(
-                            f"Blocked {tool_name}: this read-only call returned the same "
-                            f"result {repeat_count} times. Stop repeating it unchanged; "
-                            "use the result already provided or try a different query."
-                        ),
-                        tool_name=tool_name,
-                        count=repeat_count,
-                        signature=signature,
-                    )
-                    self._halt_decision = decision
-                    return decision
-
-        return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
-
-    def after_call(
-        self,
-        tool_name: str,
-        args: Mapping[str, Any] | None,
-        result: str | None,
-        *,
-        failed: bool | None = None,
-    ) -> ToolGuardrailDecision:
-        args = _coerce_args(args)
-        signature = ToolCallSignature.from_call(tool_name, args)
-        if failed is None:
-            failed, _ = classify_tool_failure(tool_name, result)
-
-        if failed:
-            exact_count = self._exact_failure_counts.get(signature, 0) + 1
-            self._exact_failure_counts[signature] = exact_count
-            self._no_progress.pop(signature, None)
-
-            same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
-            self._same_tool_failure_counts[tool_name] = same_count
-
-            if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
-                decision = ToolGuardrailDecision(
-                    action="halt",
-                    code="same_tool_failure_halt",
-                    message=(
-                        f"Stopped {tool_name}: it failed {same_count} times this turn. "
-                        "Stop retrying the same failing tool path and choose a different approach."
-                    ),
-                    tool_name=tool_name,
-                    count=same_count,
-                    signature=signature,
-                )
-                self._halt_decision = decision
-                return decision
-
-            if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
-                return ToolGuardrailDecision(
-                    action="warn",
-                    code="repeated_exact_failure_warning",
-                    message=(
-                        f"{tool_name} has failed {exact_count} times with identical arguments. "
-                        "This looks like a loop; inspect the error and change strategy "
-                        "instead of retrying it unchanged."
-                    ),
-                    tool_name=tool_name,
-                    count=exact_count,
-                    signature=signature,
-                )
-
-            if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
-                return ToolGuardrailDecision(
-                    action="warn",
-                    code="same_tool_failure_warning",
-                    message=(
-                        f"{tool_name} has failed {same_count} times this turn. "
-                        "This looks like a loop; change approach before retrying."
-                    ),
-                    tool_name=tool_name,
-                    count=same_count,
-                    signature=signature,
-                )
-
-            return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
-
-        self._exact_failure_counts.pop(signature, None)
-        self._same_tool_failure_counts.pop(tool_name, None)
-
-        if not self._is_idempotent(tool_name):
-            self._no_progress.pop(signature, None)
-            return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
-
-        result_hash = _result_hash(result)
-        previous = self._no_progress.get(signature)
-        repeat_count = 1
-        if previous is not None and previous[0] == result_hash:
-            repeat_count = previous[1] + 1
-        self._no_progress[signature] = (result_hash, repeat_count)
-
-        if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
-            return ToolGuardrailDecision(
-                action="warn",
-                code="idempotent_no_progress_warning",
-                message=(
-                    f"{tool_name} returned the same result {repeat_count} times. "
-                    "Use the result already provided or change the query instead of "
-                    "repeating it unchanged."
-                ),
-                tool_name=tool_name,
-                count=repeat_count,
-                signature=signature,
-            )
-
-        return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
-
-    def _is_idempotent(self, tool_name: str) -> bool:
-        if tool_name in self.config.mutating_tools:
-            return False
-        return tool_name in self.config.idempotent_tools
-
-
-def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
-    """Build a synthetic role=tool content string for a blocked tool call."""
-    return json.dumps(
-        {
-            "error": decision.message,
-            "guardrail": decision.to_metadata(),
-        },
-        ensure_ascii=False,
-    )
-
-
-def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
-    """Append runtime guidance to the current tool result content."""
-    if decision.action not in {"warn", "halt"} or not decision.message:
-        return result
-    label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
-    suffix = (
-        f"\n\n[{label}: "
-        f"{decision.code}; count={decision.count}; {decision.message}]"
-    )
-    return (result or "") + suffix
-
-
-def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
-    return args if isinstance(args, Mapping) else {}
-
-
-def _result_hash(result: str | None) -> str:
-    parsed = safe_json_loads(result or "")
-    if parsed is not None:
-        try:
-            canonical = json.dumps(
-                parsed,
-                ensure_ascii=False,
-                sort_keys=True,
-                separators=(",", ":"),
-                default=str,
-            )
-        except TypeError:
-            canonical = str(parsed)
-    else:
-        canonical = result or ""
-    return _sha256(canonical)
-
-
-def _as_bool(value: Any, default: bool) -> bool:
-    if value is None:
-        return default
-    if isinstance(value, bool):
-        return value
-    if isinstance(value, (int, float)):
-        return bool(value)
-    if isinstance(value, str):
-        lowered = value.strip().lower()
-        if lowered in {"1", "true", "yes", "on", "enabled"}:
-            return True
-        if lowered in {"0", "false", "no", "off", "disabled"}:
-            return False
-    return default
-
-
-def _positive_int(value: Any, default: int) -> int:
-    if value is None:
-        return default
-    try:
-        parsed = int(value)
-    except (TypeError, ValueError):
-        return default
-    return parsed if parsed >= 1 else default
-
-
-def _sha256(value: str) -> str:
-    return hashlib.sha256(value.encode("utf-8")).hexdigest()
--- a/agent/transports/init.py
+++ b/agent/transports/init.py
@@ -6,16 +6,9 @@ Usage:
    result = transport.normalize_response(raw_response)
 """

-from agent.transports.types import (
-    NormalizedResponse,
-    ToolCall,
-    Usage,
-    build_tool_call,
-    map_finish_reason,
-)  # noqa: F401
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401

 _REGISTRY: dict = {}
-_discovered: bool = False


 def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -30,9 +23,6 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
-    global _discovered
-    if not _discovered:
-        _discover_transports()
    cls = _REGISTRY.get(api_mode)
    if cls is None:
        # The registry can be partially populated when a specific transport
@@ -48,8 +38,6 @@ def get_transport(api_mode: str):

 def _discover_transports() -> None:
    """Import all transport modules to trigger auto-registration."""
-    global _discovered
-    _discovered = True
    try:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -58,7 +58,6 @@ class AnthropicTransport(ProviderTransport):
            context_length: int | None
            base_url: str | None
            fast_mode: bool
-            drop_context_1m_beta: bool
        """
        from agent.anthropic_adapter import build_anthropic_kwargs

@@ -74,7 +73,6 @@ class AnthropicTransport(ProviderTransport):
            context_length=params.get("context_length"),
            base_url=params.get("base_url"),
            fast_mode=params.get("fast_mode", False),
-            drop_context_1m_beta=params.get("drop_context_1m_beta", False),
        )

    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -20,22 +20,15 @@ from agent.transports.types import NormalizedResponse, ToolCall, Usage


 def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
-    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
+
+    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
+    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
+    then request thought parts with ``includeThoughts`` enabled.
+    """
    if reasoning_config is None or not isinstance(reasoning_config, dict):
        return None

-    normalized_model = (model or "").strip().lower()
-    if normalized_model.startswith("google/"):
-        normalized_model = normalized_model.split("/", 1)[1]
-
-    # ``thinking_config`` is a Gemini-only request parameter. The same
-    # ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
-    # those reject the field with HTTP 400 "Unknown name 'thinking_config':
-    # Cannot find field" — including the polite ``{"includeThoughts": False}``
-    # form. Omit the field entirely on non-Gemini models. (#17426)
-    if not normalized_model.startswith("gemini"):
-        return None
-
    if reasoning_config.get("enabled") is False:
        # Gemini can hide thought parts even when internal thinking still
        # happens; omit thinkingLevel to avoid model-specific validation quirks.
@@ -46,6 +39,9 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
        return {"includeThoughts": False}

    thinking_config: Dict[str, Any] = {"includeThoughts": True}
+    normalized_model = (model or "").strip().lower()
+    if normalized_model.startswith("google/"):
+        normalized_model = normalized_model.split("/", 1)[1]

    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
    # coarse effort levels. ``includeThoughts`` alone is enough to surface
@@ -75,30 +71,6 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
    return thinking_config


-def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
-    """Convert Gemini thinking config keys to the OpenAI-compat field names."""
-    if not isinstance(config, dict) or not config:
-        return None
-
-    translated: Dict[str, Any] = {}
-    if isinstance(config.get("includeThoughts"), bool):
-        translated["include_thoughts"] = config["includeThoughts"]
-    if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
-        translated["thinking_level"] = config["thinkingLevel"].strip().lower()
-    if isinstance(config.get("thinkingBudget"), (int, float)):
-        translated["thinking_budget"] = int(config["thinkingBudget"])
-    return translated or None
-
-
-def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
-    normalized = str(base_url or "").strip().rstrip("/").lower()
-    if not normalized:
-        return False
-    if "generativelanguage.googleapis.com" not in normalized:
-        return False
-    return normalized.endswith("/openai")
-
-
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -109,9 +81,7 @@ class ChatCompletionsTransport(ProviderTransport):
    def api_mode(self) -> str:
        return "chat_completions"

-    def convert_messages(
-        self, messages: list[dict[str, Any]], **kwargs
-    ) -> list[dict[str, Any]]:
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

        Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -128,9 +98,7 @@ class ChatCompletionsTransport(ProviderTransport):
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
-                    if isinstance(tc, dict) and (
-                        "call_id" in tc or "response_item_id" in tc
-                    ):
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
                        needs_sanitize = True
                        break
                if needs_sanitize:
@@ -153,41 +121,39 @@ class ChatCompletionsTransport(ProviderTransport):
                        tc.pop("response_item_id", None)
        return sanitized

-    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Tools are already in OpenAI format — identity."""
        return tools

    def build_kwargs(
        self,
        model: str,
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
        **params,
-    ) -> dict[str, Any]:
+    ) -> Dict[str, Any]:
        """Build chat.completions.create() kwargs.

-        params (all optional):
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
            timeout: float — API call timeout
            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
            reasoning_config: dict | None
            request_overrides: dict | None
            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
            model_lower: str — lowercase model name for pattern matching
-            # Provider profile path (all per-provider quirks live in providers/)
-            provider_profile: ProviderProfile | None — when present, delegates to
-                _build_kwargs_from_profile(); all flag params below are bypassed.
-            # Legacy-path flags — only used when provider_profile is None
-            # (i.e. custom / unregistered providers). Known providers all go
-            # through provider_profile.
+            # Provider detection flags (all optional, default False)
            is_openrouter: bool
            is_nous: bool
            is_qwen_portal: bool
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
-            is_tokenhub: bool
            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
@@ -196,7 +162,6 @@ class ChatCompletionsTransport(ProviderTransport):
            # Qwen-specific
            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
-            qwen_session_metadata: dict | None
            # Temperature
            fixed_temperature: Any — from _fixed_temperature_for_model()
            omit_temperature: bool
@@ -206,21 +171,28 @@ class ChatCompletionsTransport(ProviderTransport):
            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
-            extra_body_additions: dict | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
        """
        # Codex sanitization: drop reasoning_items / call_id / response_item_id
        sanitized = self.convert_messages(messages)

-        # ── Provider profile: single-path when present ──────────────────
-        _profile = params.get("provider_profile")
-        if _profile:
-            return self._build_kwargs_from_profile(
-                _profile, model, sanitized, tools, params
-            )
-
-        # ── Legacy fallback (unregistered / unknown provider) ───────────
-        # Reached only when get_provider_profile() returned None.
-        # Known providers always go through the profile path above.
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)

        # Developer role swap for GPT-5/Codex models
        model_lower = params.get("model_lower", (model or "").lower())
@@ -233,7 +205,7 @@ class ChatCompletionsTransport(ProviderTransport):
            sanitized = list(sanitized)
            sanitized[0] = {**sanitized[0], "role": "developer"}

-        api_kwargs: dict[str, Any] = {
+        api_kwargs: Dict[str, Any] = {
            "model": model,
            "messages": sanitized,
        }
@@ -242,6 +214,19 @@ class ChatCompletionsTransport(ProviderTransport):
        if timeout is not None:
            api_kwargs["timeout"] = timeout

+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
        # Tools
        if tools:
            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
@@ -265,6 +250,13 @@ class ChatCompletionsTransport(ProviderTransport):
            api_kwargs.update(max_tokens_fn(ephemeral))
        elif max_tokens is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
        elif anthropic_max_out is not None:
            api_kwargs["max_tokens"] = anthropic_max_out

@@ -311,13 +303,12 @@ class ChatCompletionsTransport(ProviderTransport):
                api_kwargs["reasoning_effort"] = _lm_effort

        # extra_body assembly
-        extra_body: dict[str, Any] = {}
+        extra_body: Dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
        provider_name = str(params.get("provider_name") or "").strip().lower()
-        base_url = params.get("base_url")

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -341,21 +332,37 @@ class ChatCompletionsTransport(ProviderTransport):
                if gh_reasoning is not None:
                    extra_body["reasoning"] = gh_reasoning
            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}

-        if provider_name == "gemini":
-            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
-            if _is_gemini_openai_compat_base_url(base_url):
-                thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
-                if thinking_config:
-                    openai_compat_extra = extra_body.get("extra_body", {})
-                    google_extra = openai_compat_extra.get("google", {})
-                    google_extra["thinking_config"] = thinking_config
-                    openai_compat_extra["google"] = google_extra
-                    extra_body["extra_body"] = openai_compat_extra
-            elif raw_thinking_config:
-                extra_body["thinking_config"] = raw_thinking_config
-        elif provider_name == "google-gemini-cli":
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        if provider_name in {"gemini", "google-gemini-cli"}:
            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
            if thinking_config:
                extra_body["thinking_config"] = thinking_config
@@ -375,120 +382,6 @@ class ChatCompletionsTransport(ProviderTransport):

        return api_kwargs

-    def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
-        """Build API kwargs using a ProviderProfile — single path, no legacy flags.
-
-        This method replaces the entire flag-based kwargs assembly when a
-        provider_profile is passed. Every quirk comes from the profile object.
-        """
-        from providers.base import OMIT_TEMPERATURE
-
-        # Message preprocessing
-        sanitized = profile.prepare_messages(sanitized)
-
-        # Developer role swap — model-name-based, applies to all providers
-        _model_lower = (model or "").lower()
-        if (
-            sanitized
-            and isinstance(sanitized[0], dict)
-            and sanitized[0].get("role") == "system"
-            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            sanitized = list(sanitized)
-            sanitized[0] = {**sanitized[0], "role": "developer"}
-
-        api_kwargs: dict[str, Any] = {
-            "model": model,
-            "messages": sanitized,
-        }
-
-        # Temperature
-        if profile.fixed_temperature is OMIT_TEMPERATURE:
-            pass  # Don't include temperature at all
-        elif profile.fixed_temperature is not None:
-            api_kwargs["temperature"] = profile.fixed_temperature
-        else:
-            # Use caller's temperature if provided
-            temp = params.get("temperature")
-            if temp is not None:
-                api_kwargs["temperature"] = temp
-
-        # Timeout
-        timeout = params.get("timeout")
-        if timeout is not None:
-            api_kwargs["timeout"] = timeout
-
-        # Tools — apply Moonshot/Kimi schema sanitization regardless of path
-        if tools:
-            if is_moonshot_model(model):
-                tools = sanitize_moonshot_tools(tools)
-            api_kwargs["tools"] = tools
-
-        # max_tokens resolution — priority: ephemeral > user > profile default
-        max_tokens_fn = params.get("max_tokens_param_fn")
-        ephemeral = params.get("ephemeral_max_output_tokens")
-        user_max = params.get("max_tokens")
-        anthropic_max = params.get("anthropic_max_output")
-
-        if ephemeral is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(ephemeral))
-        elif user_max is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(user_max))
-        elif profile.default_max_tokens and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
-        elif anthropic_max is not None:
-            api_kwargs["max_tokens"] = anthropic_max
-
-        # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
-        reasoning_config = params.get("reasoning_config")
-        extra_body_from_profile, top_level_from_profile = (
-            profile.build_api_kwargs_extras(
-                reasoning_config=reasoning_config,
-                supports_reasoning=params.get("supports_reasoning", False),
-                qwen_session_metadata=params.get("qwen_session_metadata"),
-                model=model,
-                ollama_num_ctx=params.get("ollama_num_ctx"),
-            )
-        )
-        api_kwargs.update(top_level_from_profile)
-
-        # extra_body assembly
-        extra_body: dict[str, Any] = {}
-
-        # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
-        profile_body = profile.build_extra_body(
-            session_id=params.get("session_id"),
-            provider_preferences=params.get("provider_preferences"),
-            model=model,
-            base_url=params.get("base_url"),
-            reasoning_config=reasoning_config,
-        )
-        if profile_body:
-            extra_body.update(profile_body)
-
-        # Profile's reasoning/thinking extra_body entries
-        if extra_body_from_profile:
-            extra_body.update(extra_body_from_profile)
-
-        # Merge any pre-built extra_body additions from the caller
-        additions = params.get("extra_body_additions")
-        if additions:
-            extra_body.update(additions)
-
-        # Request overrides (user config)
-        overrides = params.get("request_overrides")
-        if overrides:
-            for k, v in overrides.items():
-                if k == "extra_body" and isinstance(v, dict):
-                    extra_body.update(v)
-                else:
-                    api_kwargs[k] = v
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        return api_kwargs
-
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize OpenAI ChatCompletion to NormalizedResponse.

@@ -510,7 +403,7 @@ class ChatCompletionsTransport(ProviderTransport):
                # Gemini 3 thinking models attach extra_content with
                # thought_signature — without replay on the next turn the API
                # rejects the request with 400.
-                tc_provider_data: dict[str, Any] = {}
+                tc_provider_data: Dict[str, Any] = {}
                extra = getattr(tc, "extra_content", None)
                if extra is None and hasattr(tc, "model_extra"):
                    extra = (tc.model_extra or {}).get("extra_content")
@@ -521,14 +414,12 @@ class ChatCompletionsTransport(ProviderTransport):
                        except Exception:
                            pass
                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(
-                    ToolCall(
-                        id=tc.id,
-                        name=tc.function.name,
-                        arguments=tc.function.arguments,
-                        provider_data=tc_provider_data or None,
-                    )
-                )
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -545,13 +436,9 @@ class ChatCompletionsTransport(ProviderTransport):
        # so keep them apart in provider_data rather than merging.
        reasoning = getattr(msg, "reasoning", None)
        reasoning_content = getattr(msg, "reasoning_content", None)
-        if reasoning_content is None and hasattr(msg, "model_extra"):
-            model_extra = getattr(msg, "model_extra", None) or {}
-            if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
-                reasoning_content = model_extra["reasoning_content"]

        provider_data: Dict[str, Any] = {}
-        if reasoning_content is not None:
+        if reasoning_content:
            provider_data["reasoning_content"] = reasoning_content
        rd = getattr(msg, "reasoning_details", None)
        if rd:
@@ -576,7 +463,7 @@ class ChatCompletionsTransport(ProviderTransport):
            return False
        return True

-    def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
        usage = getattr(response, "usage", None)
        if usage is None:
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -143,18 +143,7 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs["max_output_tokens"] = max_tokens

        if is_xai_responses and session_id:
-            existing_extra_headers = kwargs.get("extra_headers")
-            merged_extra_headers: Dict[str, str] = {}
-            if isinstance(existing_extra_headers, dict):
-                merged_extra_headers.update(
-                    {
-                        str(key): str(value)
-                        for key, value in existing_extra_headers.items()
-                        if key and value is not None
-                    }
-                )
-            merged_extra_headers["x-grok-conv-id"] = session_id
-            kwargs["extra_headers"] = merged_extra_headers
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}

        return kwargs

--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@@ -12,7 +12,7 @@ from __future__ import annotations

 import json
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Dict, List, Optional


@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
    * Others: ``None``
    """

-    id: str | None
+    id: Optional[str]
    name: str
    arguments: str  # JSON string
-    provider_data: dict[str, Any] | None = field(default=None, repr=False)
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
        return "function"

    @property
-    def function(self) -> ToolCall:
+    def function(self) -> "ToolCall":
        """Return self so tc.function.name / tc.function.arguments work."""
        return self

    @property
-    def call_id(self) -> str | None:
+    def call_id(self) -> Optional[str]:
        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
        return (self.provider_data or {}).get("call_id")

    @property
-    def response_item_id(self) -> str | None:
+    def response_item_id(self) -> Optional[str]:
        """Codex response_item_id from provider_data."""
        return (self.provider_data or {}).get("response_item_id")

@@ -101,18 +101,18 @@ class NormalizedResponse:
    * Others: ``None``
    """

-    content: str | None
-    tool_calls: list[ToolCall] | None
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: str | None = None
-    usage: Usage | None = None
-    provider_data: dict[str, Any] | None = field(default=None, repr=False)
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The shim _nr_to_assistant_message() mapped these from provider_data.
    # These properties let NormalizedResponse pass through directly.
    @property
-    def reasoning_content(self) -> str | None:
+    def reasoning_content(self) -> Optional[str]:
        pd = self.provider_data or {}
        return pd.get("reasoning_content")

@@ -136,9 +136,8 @@ class NormalizedResponse:
 # Factory helpers
 # ---------------------------------------------------------------------------

-
 def build_tool_call(
-    id: str | None,
+    id: Optional[str],
    name: str,
    arguments: Any,
    **provider_fields: Any,
@@ -152,7 +151,7 @@ def build_tool_call(
    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)


-def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
    """Translate a provider-specific stop reason to the normalised set.

    Falls back to ``"stop"`` for unknown or ``None`` reasons.
--- a/apps/dashboard/public/ds-assets/filler-bg0.jpg
+++ b/apps/dashboard/public/ds-assets/filler-bg0.jpg
--- a/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2
--- a/apps/dashboard/public/fonts/Collapse-Italic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Italic.woff2
--- a/apps/dashboard/public/fonts/Collapse-Light.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Light.woff2
--- a/apps/dashboard/public/fonts/Collapse-LightItalic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-LightItalic.woff2
--- a/apps/dashboard/public/fonts/Collapse-Thin.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Thin.woff2
--- a/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2
--- a/apps/dashboard/public/fonts/Neuebit-Bold.woff2
+++ b/apps/dashboard/public/fonts/Neuebit-Bold.woff2
--- a/apps/dashboard/scripts/sync-assets.cjs
+++ b/apps/dashboard/scripts/sync-assets.cjs
@@ -1,46 +0,0 @@
-#!/usr/bin/env node
-/**
- * Copy font and asset folders from @nous-research/ui into public/ for Vite.
- *
- * Locates @nous-research/ui by walking up from this script looking for
- * node_modules/@nous-research/ui — works whether the dep is co-located
- * (non-workspace layout) or hoisted to the repo root (npm workspaces).
- */
-const fs = require('node:fs')
-const path = require('node:path')
-
-const DASHBOARD_ROOT = path.resolve(__dirname, '..')
-
-function locateUiPackage() {
-  let dir = DASHBOARD_ROOT
-  const { root } = path.parse(dir)
-  while (true) {
-    const candidate = path.join(dir, 'node_modules', '@nous-research', 'ui')
-    if (fs.existsSync(path.join(candidate, 'package.json'))) {
-      return candidate
-    }
-    if (dir === root) break
-    dir = path.dirname(dir)
-  }
-  throw new Error(
-    '@nous-research/ui not found. Run `npm install` from the repo root.'
-  )
-}
-
-const uiRoot = locateUiPackage()
-const distRoot = path.join(uiRoot, 'dist')
-
-const mappings = [
-  ['fonts', path.join(DASHBOARD_ROOT, 'public', 'fonts')],
-  ['assets', path.join(DASHBOARD_ROOT, 'public', 'ds-assets')],
-]
-
-for (const [srcName, destPath] of mappings) {
-  const srcPath = path.join(distRoot, srcName)
-  if (!fs.existsSync(srcPath)) {
-    throw new Error(`Missing ${srcPath} in @nous-research/ui — rebuild that package.`)
-  }
-  fs.rmSync(destPath, { recursive: true, force: true })
-  fs.cpSync(srcPath, destPath, { recursive: true })
-  console.log(`synced ${path.relative(DASHBOARD_ROOT, destPath)}`)
-}
--- a/apps/dashboard/src/components/NouiTypography.tsx
+++ b/apps/dashboard/src/components/NouiTypography.tsx
@@ -1,63 +0,0 @@
-import { forwardRef, type ElementType, type HTMLAttributes, type ReactNode } from "react";
-import { cn } from "@/lib/utils";
-
-type TypographyProps = HTMLAttributes<HTMLElement> & {
-  as?: ElementType;
-  children?: ReactNode;
-  compressed?: boolean;
-  courier?: boolean;
-  expanded?: boolean;
-  mondwest?: boolean;
-  mono?: boolean;
-  sans?: boolean;
-  variant?: "sm" | "md" | "lg" | "xl";
-};
-
-const variantClasses: Record<NonNullable<TypographyProps["variant"]>, string> = {
-  sm: "leading-[1.4] text-[.9375rem] tracking-[0.1875rem]",
-  md: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
-  lg: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
-  xl: "text-[4.5rem] leading-[1] tracking-[0.135rem]",
-};
-
-export const Typography = forwardRef<HTMLElement, TypographyProps>(function Typography(
-  {
-    as: Component = "span",
-    className,
-    compressed,
-    courier,
-    expanded,
-    mondwest,
-    mono,
-    sans,
-    variant,
-    ...props
-  },
-  ref,
-) {
-  const hasFontVariant = compressed || courier || expanded || mondwest || mono || sans;
-
-  return (
-    <Component
-      className={cn(
-        compressed && "font-compressed",
-        courier && "font-courier",
-        expanded && "font-expanded",
-        mondwest && "font-mondwest tracking-[0.1875rem]",
-        mono && "font-mono",
-        (!hasFontVariant || sans) && "font-sans",
-        variant && variantClasses[variant],
-        className,
-      )}
-      ref={ref}
-      {...props}
-    />
-  );
-});
-
-export const H2 = forwardRef<HTMLHeadingElement, Omit<TypographyProps, "as">>(function H2(
-  { className, variant = "lg", ...props },
-  ref,
-) {
-  return <Typography as="h2" className={cn("font-bold", className)} variant={variant} ref={ref} {...props} />;
-});
--- a/apps/dashboard/src/lib/gatewayClient.ts
+++ b/apps/dashboard/src/lib/gatewayClient.ts
@@ -1,36 +0,0 @@
-import {
-  JsonRpcGatewayClient,
-  type ConnectionState,
-  type GatewayEvent,
-  type GatewayEventName,
-} from "@hermes/shared";
-
-export type { ConnectionState, GatewayEvent, GatewayEventName };
-
-/**
- * Browser wrapper for the shared tui_gateway JSON-RPC client.
- *
- * Dashboard resolves its token and host from the served page. Desktop uses the
- * same shared protocol client, but supplies an absolute wsUrl from Electron.
- */
-export class GatewayClient extends JsonRpcGatewayClient {
-  async connect(token?: string): Promise<void> {
-    const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
-    if (!resolved) {
-      throw new Error(
-        "Session token not available — page must be served by the Hermes dashboard",
-      );
-    }
-
-    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
-    await super.connect(
-      `${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
-    );
-  }
-}
-
-declare global {
-  interface Window {
-    __HERMES_SESSION_TOKEN__?: string;
-  }
-}
--- a/apps/dashboard/src/pages/ModelsPage.tsx
+++ b/apps/dashboard/src/pages/ModelsPage.tsx
@@ -1,817 +0,0 @@
-import { useCallback, useEffect, useLayoutEffect, useState } from "react";
-import {
-  Brain,
-  ChevronDown,
-  Cpu,
-  DollarSign,
-  Eye,
-  RefreshCw,
-  Settings2,
-  Star,
-  Wrench,
-  Zap,
-} from "lucide-react";
-import { api } from "@/lib/api";
-import type {
-  AuxiliaryModelsResponse,
-  AuxiliaryTaskAssignment,
-  ModelsAnalyticsModelEntry,
-  ModelsAnalyticsResponse,
-} from "@/lib/api";
-import { timeAgo } from "@/lib/utils";
-import { formatTokenCount } from "@/lib/format";
-import { Button } from "@nous-research/ui/ui/components/button";
-import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { Stats } from "@nous-research/ui/ui/components/stats";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { Badge } from "@nous-research/ui/ui/components/badge";
-import { usePageHeader } from "@/contexts/usePageHeader";
-import { useI18n } from "@/i18n";
-import { PluginSlot } from "@/plugins";
-import { ModelPickerDialog } from "@/components/ModelPickerDialog";
-
-const PERIODS = [
-  { label: "7d", days: 7 },
-  { label: "30d", days: 30 },
-  { label: "90d", days: 90 },
-] as const;
-
-// Must match _AUX_TASK_SLOTS in hermes_cli/web_server.py.
-const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
-  { key: "vision", label: "Vision", hint: "Image analysis" },
-  { key: "web_extract", label: "Web Extract", hint: "Page summarization" },
-  { key: "compression", label: "Compression", hint: "Context compaction" },
-  { key: "session_search", label: "Session Search", hint: "Recall queries" },
-  { key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
-  { key: "approval", label: "Approval", hint: "Smart auto-approve" },
-  { key: "mcp", label: "MCP", hint: "MCP tool routing" },
-  { key: "title_generation", label: "Title Gen", hint: "Session titles" },
-  { key: "curator", label: "Curator", hint: "Skill-usage review" },
-] as const;
-
-function formatTokens(n: number): string {
-  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
-  if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
-  return String(n);
-}
-
-function formatCost(n: number): string {
-  if (n >= 1) return `$${n.toFixed(2)}`;
-  if (n >= 0.01) return `$${n.toFixed(3)}`;
-  if (n > 0) return `$${n.toFixed(4)}`;
-  return "$0";
-}
-
-/** Short model name: strip vendor prefix like "openrouter/" or "anthropic/". */
-function shortModelName(model: string): string {
-  const slashIdx = model.indexOf("/");
-  if (slashIdx > 0) return model.slice(slashIdx + 1);
-  return model;
-}
-
-/** Extract vendor prefix from a model string like "anthropic/claude-opus-4.7" → "anthropic". */
-function modelVendor(model: string, fallback?: string): string {
-  const slashIdx = model.indexOf("/");
-  if (slashIdx > 0) return model.slice(0, slashIdx);
-  return fallback || "";
-}
-
-function TokenBar({
-  input,
-  output,
-  cacheRead,
-  reasoning,
-}: {
-  input: number;
-  output: number;
-  cacheRead: number;
-  reasoning: number;
-}) {
-  const total = input + output + cacheRead + reasoning;
-  if (total === 0) return null;
-
-  const segments = [
-    { value: cacheRead, color: "bg-blue-400/60", label: "Cache Read" },
-    { value: reasoning, color: "bg-purple-400/60", label: "Reasoning" },
-    { value: input, color: "bg-[#ffe6cb]/70", label: "Input" },
-    { value: output, color: "bg-emerald-500/70", label: "Output" },
-  ].filter((s) => s.value > 0);
-
-  return (
-    <div className="space-y-1">
-      <div className="flex h-2 w-full overflow-hidden rounded-sm bg-muted/30">
-        {segments.map((s, i) => (
-          <div
-            key={i}
-            className={`${s.color} transition-all duration-300`}
-            style={{ width: `${(s.value / total) * 100}%` }}
-          />
-        ))}
-      </div>
-      <div className="flex flex-wrap gap-x-3 gap-y-0.5 text-[10px] text-muted-foreground">
-        {segments.map((s, i) => (
-          <span key={i} className="flex items-center gap-1">
-            <span className={`inline-block h-1.5 w-1.5 rounded-full ${s.color}`} />
-            {s.label} {formatTokens(s.value)}
-          </span>
-        ))}
-      </div>
-    </div>
-  );
-}
-
-function CapabilityBadges({
-  capabilities,
-}: {
-  capabilities: ModelsAnalyticsModelEntry["capabilities"];
-}) {
-  const hasAny =
-    capabilities.supports_tools ||
-    capabilities.supports_vision ||
-    capabilities.supports_reasoning ||
-    capabilities.model_family;
-  if (!hasAny) return null;
-
-  return (
-    <div className="flex flex-wrap items-center gap-1.5">
-      {capabilities.supports_tools && (
-        <span className="inline-flex items-center gap-1 bg-emerald-500/10 px-1.5 py-0.5 text-[10px] font-medium text-emerald-600 dark:text-emerald-400">
-          <Wrench className="h-2.5 w-2.5" /> Tools
-        </span>
-      )}
-      {capabilities.supports_vision && (
-        <span className="inline-flex items-center gap-1 bg-blue-500/10 px-1.5 py-0.5 text-[10px] font-medium text-blue-600 dark:text-blue-400">
-          <Eye className="h-2.5 w-2.5" /> Vision
-        </span>
-      )}
-      {capabilities.supports_reasoning && (
-        <span className="inline-flex items-center gap-1 bg-purple-500/10 px-1.5 py-0.5 text-[10px] font-medium text-purple-600 dark:text-purple-400">
-          <Brain className="h-2.5 w-2.5" /> Reasoning
-        </span>
-      )}
-      {capabilities.model_family && (
-        <span className="inline-flex items-center bg-muted px-1.5 py-0.5 text-[10px] font-medium text-muted-foreground">
-          {capabilities.model_family}
-        </span>
-      )}
-    </div>
-  );
-}
-
-/* ──────────────────────────────────────────────────────────────────── */
-/*  Per-card "Use as" menu                                              */
-/* ──────────────────────────────────────────────────────────────────── */
-
-function UseAsMenu({
-  provider,
-  model,
-  isMain,
-  mainAuxTask,
-  onAssigned,
-}: {
-  provider: string;
-  model: string;
-  /** True when this card's model+provider match config.yaml's main slot. */
-  isMain: boolean;
-  /** If this model is assigned to a specific aux task, that task's key. */
-  mainAuxTask: string | null;
-  onAssigned(): void;
-}) {
-  const [open, setOpen] = useState(false);
-  const [busy, setBusy] = useState(false);
-  const [error, setError] = useState<string | null>(null);
-
-  const assign = async (
-    scope: "main" | "auxiliary",
-    task: string,
-  ) => {
-    if (!provider || !model) {
-      setError("Missing provider/model");
-      return;
-    }
-    setBusy(true);
-    setError(null);
-    try {
-      await api.setModelAssignment({ scope, provider, model, task });
-      onAssigned();
-      setOpen(false);
-    } catch (e) {
-      setError(e instanceof Error ? e.message : String(e));
-    } finally {
-      setBusy(false);
-    }
-  };
-
-  // Close on outside click.
-  useEffect(() => {
-    if (!open) return;
-    const onDown = (e: MouseEvent) => {
-      const target = e.target as HTMLElement | null;
-      if (target && !target.closest?.("[data-use-as-menu]")) setOpen(false);
-    };
-    window.addEventListener("mousedown", onDown);
-    return () => window.removeEventListener("mousedown", onDown);
-  }, [open]);
-
-  return (
-    <div className="relative" data-use-as-menu>
-      <Button
-        size="sm"
-        outlined
-        onClick={() => setOpen((v) => !v)}
-        disabled={busy}
-        className="text-[10px] h-6 px-2"
-        prefix={busy ? <Spinner /> : null}
-      >
-        Use as <ChevronDown className="h-3 w-3" />
-      </Button>
-      {open && (
-        <div className="absolute right-0 top-full mt-1 z-50 min-w-[220px] border border-border bg-card shadow-lg">
-          <button
-            type="button"
-            onClick={() => assign("main", "")}
-            disabled={busy}
-            className="flex w-full items-center justify-between px-3 py-2 text-xs hover:bg-muted/50 disabled:opacity-40"
-          >
-            <span className="flex items-center gap-2">
-              <Star className="h-3 w-3" />
-              Main model
-            </span>
-            {isMain && (
-              <span className="text-[9px] uppercase tracking-wider text-primary/80">
-                current
-              </span>
-            )}
-          </button>
-
-          <div className="border-t border-border/50 px-3 py-1.5 text-[9px] uppercase tracking-wider text-muted-foreground">
-            Auxiliary task
-          </div>
-
-          <button
-            type="button"
-            onClick={() => assign("auxiliary", "")}
-            disabled={busy}
-            className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
-          >
-            <span>All auxiliary tasks</span>
-          </button>
-
-          {AUX_TASKS.map((t) => (
-            <button
-              key={t.key}
-              type="button"
-              onClick={() => assign("auxiliary", t.key)}
-              disabled={busy}
-              className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
-            >
-              <span>{t.label}</span>
-              {mainAuxTask === t.key && (
-                <span className="text-[9px] uppercase tracking-wider text-primary/80">
-                  current
-                </span>
-              )}
-            </button>
-          ))}
-
-          {error && (
-            <div className="px-3 py-2 text-[10px] text-destructive border-t border-border/50">
-              {error}
-            </div>
-          )}
-        </div>
-      )}
-    </div>
-  );
-}
-
-/* ──────────────────────────────────────────────────────────────────── */
-/*  ModelCard                                                           */
-/* ──────────────────────────────────────────────────────────────────── */
-
-function ModelCard({
-  entry,
-  rank,
-  main,
-  aux,
-  onAssigned,
-}: {
-  entry: ModelsAnalyticsModelEntry;
-  rank: number;
-  main: { provider: string; model: string } | null;
-  aux: AuxiliaryTaskAssignment[];
-  onAssigned(): void;
-}) {
-  const { t } = useI18n();
-  const provider = entry.provider || modelVendor(entry.model);
-  const totalTokens = entry.input_tokens + entry.output_tokens;
-  const caps = entry.capabilities;
-
-  const isMain =
-    !!main &&
-    main.provider === provider &&
-    main.model === entry.model;
-
-  // First aux task currently using this model (if any).
-  const mainAuxTask =
-    aux.find(
-      (a) => a.provider === provider && a.model === entry.model,
-    )?.task ?? null;
-
-  return (
-    <Card className={isMain ? "ring-1 ring-primary/40" : undefined}>
-      <CardHeader className="pb-3">
-        <div className="flex items-start justify-between gap-2">
-          <div className="min-w-0 flex-1">
-            <div className="flex items-center gap-2">
-              <span className="text-muted-foreground/50 text-xs font-mono">
-                #{rank}
-              </span>
-              <CardTitle className="text-sm font-mono-ui truncate">
-                {shortModelName(entry.model)}
-              </CardTitle>
-              {isMain && (
-                <span className="inline-flex items-center gap-0.5 bg-primary/15 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-primary">
-                  <Star className="h-2.5 w-2.5" /> main
-                </span>
-              )}
-              {mainAuxTask && (
-                <span className="inline-flex items-center bg-purple-500/10 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-purple-600 dark:text-purple-400">
-                  aux · {mainAuxTask}
-                </span>
-              )}
-            </div>
-            <div className="flex items-center gap-2 mt-1">
-              {provider && (
-                <Badge tone="secondary" className="text-[9px]">
-                  {provider}
-                </Badge>
-              )}
-              {caps.context_window && caps.context_window > 0 && (
-                <span className="text-[10px] text-muted-foreground">
-                  {formatTokenCount(caps.context_window)} ctx
-                </span>
-              )}
-              {caps.max_output_tokens && caps.max_output_tokens > 0 && (
-                <span className="text-[10px] text-muted-foreground">
-                  {formatTokenCount(caps.max_output_tokens)} out
-                </span>
-              )}
-            </div>
-          </div>
-          <div className="flex flex-col items-end gap-1 shrink-0">
-            <div className="text-right">
-              <div className="text-xs font-mono font-semibold">
-                {formatTokens(totalTokens)}
-              </div>
-              <div className="text-[10px] text-muted-foreground">
-                {t.models.tokens}
-              </div>
-            </div>
-            <UseAsMenu
-              provider={provider}
-              model={entry.model}
-              isMain={isMain}
-              mainAuxTask={mainAuxTask}
-              onAssigned={onAssigned}
-            />
-          </div>
-        </div>
-      </CardHeader>
-      <CardContent className="space-y-3 pt-0">
-        <TokenBar
-          input={entry.input_tokens}
-          output={entry.output_tokens}
-          cacheRead={entry.cache_read_tokens}
-          reasoning={entry.reasoning_tokens}
-        />
-
-        <div className="grid grid-cols-3 gap-2 text-xs">
-          <div className="text-center">
-            <div className="font-mono font-semibold">{entry.sessions}</div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.sessions}
-            </div>
-          </div>
-          <div className="text-center">
-            <div className="font-mono font-semibold">
-              {formatTokens(entry.avg_tokens_per_session)}
-            </div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.avgPerSession}
-            </div>
-          </div>
-          <div className="text-center">
-            <div className="font-mono font-semibold">
-              {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
-            </div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.apiCalls}
-            </div>
-          </div>
-        </div>
-
-        <div className="flex items-center justify-between text-[10px] text-muted-foreground border-t border-border/30 pt-2">
-          <div className="flex items-center gap-3">
-            {entry.estimated_cost > 0 && (
-              <span className="flex items-center gap-0.5">
-                <DollarSign className="h-2.5 w-2.5" />
-                {formatCost(entry.estimated_cost)}
-              </span>
-            )}
-            {entry.tool_calls > 0 && (
-              <span className="flex items-center gap-0.5">
-                <Zap className="h-2.5 w-2.5" />
-                {entry.tool_calls} {t.models.toolCalls}
-              </span>
-            )}
-          </div>
-          {entry.last_used_at > 0 && (
-            <span>{timeAgo(entry.last_used_at)}</span>
-          )}
-        </div>
-
-        <CapabilityBadges capabilities={entry.capabilities} />
-      </CardContent>
-    </Card>
-  );
-}
-
-/* ──────────────────────────────────────────────────────────────────── */
-/*  Model Settings panel (top of page)                                  */
-/* ──────────────────────────────────────────────────────────────────── */
-
-type PickerTarget =
-  | { kind: "main" }
-  | { kind: "aux"; task: string };
-
-function ModelSettingsPanel({
-  aux,
-  refreshKey,
-  onSaved,
-}: {
-  aux: AuxiliaryModelsResponse | null;
-  refreshKey: number;
-  onSaved(): void;
-}) {
-  const [expanded, setExpanded] = useState(false);
-  const [picker, setPicker] = useState<PickerTarget | null>(null);
-  const [resetBusy, setResetBusy] = useState(false);
-
-  const mainProv = aux?.main.provider ?? "";
-  const mainModel = aux?.main.model ?? "";
-
-  const applyAssignment = async ({
-    scope,
-    task,
-    provider,
-    model,
-  }: {
-    scope: "main" | "auxiliary";
-    task: string;
-    provider: string;
-    model: string;
-  }) => {
-    await api.setModelAssignment({ scope, task, provider, model });
-    onSaved();
-  };
-
-  const resetAllAux = async () => {
-    if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
-      return;
-    }
-    setResetBusy(true);
-    try {
-      await api.setModelAssignment({
-        scope: "auxiliary",
-        task: "__reset__",
-        provider: "",
-        model: "",
-      });
-      onSaved();
-    } finally {
-      setResetBusy(false);
-    }
-  };
-
-  return (
-    <Card>
-      <CardHeader className="pb-3">
-        <div className="flex items-center justify-between gap-3 flex-wrap">
-          <div className="flex items-center gap-2">
-            <Settings2 className="h-4 w-4 text-muted-foreground" />
-            <CardTitle className="text-sm">Model Settings</CardTitle>
-            <span className="text-[10px] text-muted-foreground">
-              applies to new sessions
-            </span>
-          </div>
-          <Button
-            size="sm"
-            outlined
-            onClick={() => setExpanded((v) => !v)}
-            className="text-xs"
-          >
-            {expanded ? "Hide auxiliary" : "Show auxiliary"}
-            <ChevronDown
-              className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
-            />
-          </Button>
-        </div>
-      </CardHeader>
-
-      <CardContent className="space-y-3 pt-0">
-        {/* Main row */}
-        <div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
-          <div className="min-w-0 flex-1">
-            <div className="flex items-center gap-2 mb-0.5">
-              <Star className="h-3 w-3 text-primary" />
-              <span className="text-xs font-medium uppercase tracking-wider">
-                Main model
-              </span>
-            </div>
-            <div className="text-xs font-mono text-muted-foreground truncate">
-              {mainProv || "(unset)"}
-              {mainProv && mainModel && " · "}
-              {mainModel || "(unset)"}
-            </div>
-          </div>
-          <Button
-            size="sm"
-            onClick={() => setPicker({ kind: "main" })}
-            className="text-xs"
-          >
-            Change
-          </Button>
-        </div>
-
-        {/* Auxiliary rows */}
-        {expanded && (
-          <div className="space-y-1 border-t border-border/50 pt-3">
-            <div className="flex items-center justify-between pb-1">
-              <div className="text-[10px] uppercase tracking-wider text-muted-foreground">
-                Auxiliary tasks
-              </div>
-              <Button
-                size="sm"
-                outlined
-                onClick={resetAllAux}
-                disabled={resetBusy}
-                className="text-[10px] h-6"
-                prefix={resetBusy ? <Spinner /> : null}
-              >
-                Reset all to auto
-              </Button>
-            </div>
-
-            <p className="text-[10px] text-muted-foreground/80 pb-2">
-              Auxiliary tasks handle side-jobs like vision, session search, and
-              compression. <span className="font-mono">auto</span> means
-              &quot;use the main model&quot;. Override per-task when you want a
-              cheap/fast model for a specific job.
-            </p>
-
-            {AUX_TASKS.map((t) => {
-              const cur = aux?.tasks.find((a) => a.task === t.key);
-              const isAuto =
-                !cur || cur.provider === "auto" || !cur.provider;
-              return (
-                <div
-                  key={t.key}
-                  className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
-                >
-                  <div className="min-w-0 flex-1">
-                    <div className="flex items-baseline gap-2">
-                      <span className="text-xs font-medium">{t.label}</span>
-                      <span className="text-[10px] text-muted-foreground/60">
-                        {t.hint}
-                      </span>
-                    </div>
-                    <div className="text-[10px] font-mono text-muted-foreground truncate">
-                      {isAuto
-                        ? "auto (use main model)"
-                        : `${cur?.provider} · ${cur?.model || "(provider default)"}`}
-                    </div>
-                  </div>
-                  <Button
-                    size="sm"
-                    outlined
-                    onClick={() => setPicker({ kind: "aux", task: t.key })}
-                    className="text-[10px] h-6"
-                  >
-                    Change
-                  </Button>
-                </div>
-              );
-            })}
-          </div>
-        )}
-
-        {picker && (
-          <ModelPickerDialog
-            key={`picker-${refreshKey}`}
-            loader={api.getModelOptions}
-            alwaysGlobal
-            title={
-              picker.kind === "main"
-                ? "Set Main Model"
-                : `Set Auxiliary: ${
-                    AUX_TASKS.find((t) => t.key === picker.task)?.label ??
-                    picker.task
-                  }`
-            }
-            onApply={async ({ provider, model }) => {
-              await applyAssignment({
-                scope: picker.kind === "main" ? "main" : "auxiliary",
-                task: picker.kind === "main" ? "" : picker.task,
-                provider,
-                model,
-              });
-            }}
-            onClose={() => setPicker(null)}
-          />
-        )}
-      </CardContent>
-    </Card>
-  );
-}
-
-/* ──────────────────────────────────────────────────────────────────── */
-/*  Page                                                                */
-/* ──────────────────────────────────────────────────────────────────── */
-
-export default function ModelsPage() {
-  const [days, setDays] = useState(30);
-  const [data, setData] = useState<ModelsAnalyticsResponse | null>(null);
-  const [aux, setAux] = useState<AuxiliaryModelsResponse | null>(null);
-  const [loading, setLoading] = useState(true);
-  const [error, setError] = useState<string | null>(null);
-  const [saveKey, setSaveKey] = useState(0);
-  const { t } = useI18n();
-  const { setAfterTitle, setEnd } = usePageHeader();
-
-  const load = useCallback(() => {
-    setLoading(true);
-    setError(null);
-    Promise.all([
-      api.getModelsAnalytics(days),
-      api.getAuxiliaryModels().catch(() => null),
-    ])
-      .then(([models, auxData]) => {
-        setData(models);
-        setAux(auxData);
-      })
-      .catch((err) => setError(String(err)))
-      .finally(() => setLoading(false));
-  }, [days]);
-
-  const onAssigned = useCallback(() => {
-    // Reload aux state after any assignment change.
-    api
-      .getAuxiliaryModels()
-      .then(setAux)
-      .catch(() => {});
-    setSaveKey((k) => k + 1);
-  }, []);
-
-  useLayoutEffect(() => {
-    const periodLabel =
-      PERIODS.find((p) => p.days === days)?.label ?? `${days}d`;
-    setAfterTitle(
-      <span className="flex items-center gap-2">
-        {loading && <Spinner className="shrink-0 text-base text-primary" />}
-        <Badge tone="secondary" className="text-[10px]">
-          {periodLabel}
-        </Badge>
-      </span>,
-    );
-    setEnd(
-      <div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
-        <div className="flex flex-wrap items-center gap-1.5">
-          {PERIODS.map((p) => (
-            <Button
-              key={p.label}
-              type="button"
-              size="sm"
-              outlined={days !== p.days}
-              onClick={() => setDays(p.days)}
-            >
-              {p.label}
-            </Button>
-          ))}
-        </div>
-        <Button
-          type="button"
-          size="sm"
-          outlined
-          onClick={load}
-          disabled={loading}
-          prefix={loading ? <Spinner /> : <RefreshCw />}
-        >
-          {t.common.refresh}
-        </Button>
-      </div>,
-    );
-    return () => {
-      setAfterTitle(null);
-      setEnd(null);
-    };
-  }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]);
-
-  useEffect(() => {
-    load();
-  }, [load]);
-
-  return (
-    <div className="flex flex-col gap-6">
-      <PluginSlot name="models:top" />
-
-      <ModelSettingsPanel
-        aux={aux}
-        refreshKey={saveKey}
-        onSaved={onAssigned}
-      />
-
-      {loading && !data && (
-        <div className="flex items-center justify-center py-24">
-          <Spinner className="text-2xl text-primary" />
-        </div>
-      )}
-
-      {error && (
-        <Card>
-          <CardContent className="py-6">
-            <p className="text-sm text-destructive text-center">{error}</p>
-          </CardContent>
-        </Card>
-      )}
-
-      {data && (
-        <>
-          <Card>
-            <CardContent className="py-6">
-              <Stats
-                items={[
-                  {
-                    label: t.models.modelsUsed,
-                    value: String(data.totals.distinct_models),
-                  },
-                  {
-                    label: t.analytics.totalTokens,
-                    value: formatTokens(
-                      data.totals.total_input + data.totals.total_output,
-                    ),
-                  },
-                  {
-                    label: t.analytics.input,
-                    value: formatTokens(data.totals.total_input),
-                  },
-                  {
-                    label: t.analytics.output,
-                    value: formatTokens(data.totals.total_output),
-                  },
-                  {
-                    label: t.models.estimatedCost,
-                    value: formatCost(data.totals.total_estimated_cost),
-                  },
-                  {
-                    label: t.analytics.totalSessions,
-                    value: String(data.totals.total_sessions),
-                  },
-                ]}
-              />
-            </CardContent>
-          </Card>
-
-          {data.models.length > 0 ? (
-            <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
-              {data.models.map((m, i) => (
-                <ModelCard
-                  key={`${m.model}:${m.provider}`}
-                  entry={m}
-                  rank={i + 1}
-                  main={aux?.main ?? null}
-                  aux={aux?.tasks ?? []}
-                  onAssigned={onAssigned}
-                />
-              ))}
-            </div>
-          ) : (
-            <Card>
-              <CardContent className="py-12">
-                <div className="flex flex-col items-center text-muted-foreground">
-                  <Cpu className="h-8 w-8 mb-3 opacity-40" />
-                  <p className="text-sm font-medium">{t.models.noModelsData}</p>
-                  <p className="text-xs mt-1 text-muted-foreground/60">
-                    {t.models.startSession}
-                  </p>
-                </div>
-              </CardContent>
-            </Card>
-          )}
-        </>
-      )}
-
-      <PluginSlot name="models:bottom" />
-    </div>
-  );
-}
--- a/apps/dashboard/src/pages/PluginsPage.tsx
+++ b/apps/dashboard/src/pages/PluginsPage.tsx
@@ -1,581 +0,0 @@
-import { useCallback, useEffect, useState } from "react";
-import { ExternalLink, RefreshCw, Puzzle, Trash2, Eye, EyeOff } from "lucide-react";
-import type { Translations } from "@/i18n/types";
-import { Link } from "react-router-dom";
-import { api } from "@/lib/api";
-import type { HubAgentPluginRow, PluginsHubResponse } from "@/lib/api";
-import { Button } from "@nous-research/ui/ui/components/button";
-import { Badge } from "@nous-research/ui/ui/components/badge";
-import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
-import { Switch } from "@nous-research/ui/ui/components/switch";
-import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import { useToast } from "@/hooks/useToast";
-import { Toast } from "@/components/Toast";
-import { useI18n } from "@/i18n";
-import { PluginSlot } from "@/plugins";
-import { cn } from "@/lib/utils";
-import { usePageHeader } from "@/contexts/usePageHeader";
-
-/** Select value for built-in memory (`config` uses empty string). Never use `""` — UI Select maps empty value to an empty label. */
-const MEMORY_PROVIDER_BUILTIN = "__hermes_memory_builtin__";
-
-export default function PluginsPage() {
-  const [hub, setHub] = useState<PluginsHubResponse | null>(null);
-  const [loading, setLoading] = useState(true);
-  const [installId, setInstallId] = useState("");
-  const [installForce, setInstallForce] = useState(false);
-  const [installEnable, setInstallEnable] = useState(true);
-  const [installBusy, setInstallBusy] = useState(false);
-  const [rescanBusy, setRescanBusy] = useState(false);
-  const [memorySel, setMemorySel] = useState(MEMORY_PROVIDER_BUILTIN);
-  const [contextSel, setContextSel] = useState("compressor");
-  const [providerBusy, setProviderBusy] = useState(false);
-  const [rowBusy, setRowBusy] = useState<string | null>(null);
-
-  const { toast, showToast } = useToast();
-  const { t } = useI18n();
-  const { setEnd } = usePageHeader();
-
-  const loadHub = useCallback(() => {
-    return api
-      .getPluginsHub()
-      .then((h) => {
-        setHub(h);
-        const p = h.providers;
-        setMemorySel(p.memory_provider ? p.memory_provider : MEMORY_PROVIDER_BUILTIN);
-        setContextSel(p.context_engine || "compressor");
-      })
-      .catch(() => showToast(t.common.loading, "error"));
-  }, [showToast, t.common.loading]);
-
-  useEffect(() => {
-    setLoading(true);
-    void loadHub().finally(() => setLoading(false));
-  }, [loadHub]);
-
-  useEffect(() => {
-    setEnd(
-      <Button
-        ghost
-        size="sm"
-        className="shrink-0 gap-2"
-        disabled={loading || rescanBusy}
-        onClick={() => void onRescan()}
-      >
-        {rescanBusy ? <Spinner /> : <RefreshCw className="h-3.5 w-3.5" />}
-        {t.pluginsPage.refreshDashboard}
-      </Button>,
-    );
-    return () => setEnd(null);
-  }, [loading, rescanBusy, setEnd, t.pluginsPage.refreshDashboard]);
-
-  const onInstall = async () => {
-    const id = installId.trim();
-    if (!id) {
-      showToast(t.pluginsPage.installHint, "error");
-      return;
-    }
-    setInstallBusy(true);
-    try {
-      const r = await api.installAgentPlugin({
-        identifier: id,
-        force: installForce,
-        enable: installEnable,
-      });
-      showToast(`${r.plugin_name ?? id} installed`, "success");
-      if ((r.warnings?.length ?? 0) > 0) showToast(r.warnings!.join(" "), "error");
-      if ((r.missing_env?.length ?? 0) > 0)
-        showToast(`${t.pluginsPage.missingEnvWarn} ${r.missing_env!.join(", ")}`, "error");
-      setInstallId("");
-      await loadHub();
-    } catch (e) {
-      showToast(e instanceof Error ? e.message : "Install failed", "error");
-    } finally {
-      setInstallBusy(false);
-    }
-  };
-
-  const onRescan = async () => {
-    setRescanBusy(true);
-    try {
-      const rc = await api.rescanPlugins();
-      showToast(
-        `${t.pluginsPage.refreshDashboard} (${rc.count})`,
-        "success",
-      );
-      await loadHub();
-    } catch (e) {
-      showToast(e instanceof Error ? e.message : "Rescan failed", "error");
-    } finally {
-      setRescanBusy(false);
-    }
-  };
-
-  const onSaveProviders = async () => {
-    setProviderBusy(true);
-    try {
-      await api.savePluginProviders({
-        memory_provider:
-          memorySel === MEMORY_PROVIDER_BUILTIN ? "" : memorySel,
-        context_engine: contextSel,
-      });
-      showToast(t.pluginsPage.savedProviders, "success");
-      await loadHub();
-    } catch (e) {
-      showToast(e instanceof Error ? e.message : "Save failed", "error");
-    } finally {
-      setProviderBusy(false);
-    }
-  };
-
-  const setRuntimeLoading = async (name: string, fn: () => Promise<unknown>) => {
-    setRowBusy(name);
-    try {
-      await fn();
-      await loadHub();
-    } catch (e) {
-      showToast(e instanceof Error ? e.message : "Failed", "error");
-    } finally {
-      setRowBusy(null);
-    }
-  };
-
-  const rows = hub?.plugins ?? [];
-  const providers = hub?.providers;
-
-  return (
-    <div className="flex flex-col gap-4">
-      <PluginSlot name="plugins:top" />
-
-      <div className={cn("flex w-full flex-col gap-8")}>
-
-        {providers && (
-          <Card>
-            <CardHeader>
-              <CardTitle>{t.pluginsPage.providersHeading}</CardTitle>
-              <p className="text-[0.7rem] tracking-[0.08em] text-midground/55 normal-case">
-                {t.pluginsPage.providersHint}
-              </p>
-            </CardHeader>
-
-            <CardContent className="flex flex-col gap-6">
-
-              <div className="grid gap-6 sm:grid-cols-2 max-w-full">
-              <div className="grid gap-2 min-w-0">
-                <Label htmlFor="mem-provider">{t.pluginsPage.memoryProviderLabel}</Label>
-
-                <Select
-                  id="mem-provider"
-                  className="w-full"
-                  value={memorySel}
-                  onValueChange={setMemorySel}
-                >
-                  <SelectOption value={MEMORY_PROVIDER_BUILTIN}>
-                    {`(${t.pluginsPage.providerDefaults})`}
-                  </SelectOption>
-
-                  {providers.memory_options.map((o) => (
-                    <SelectOption key={o.name} value={o.name}>
-                      {o.name}
-                    </SelectOption>
-                  ))}
-                </Select>
-              </div>
-
-              <div className="grid gap-2 min-w-0">
-                <Label htmlFor="ctx-engine">{t.pluginsPage.contextEngineLabel}</Label>
-
-                <Select
-                  id="ctx-engine"
-                  className="w-full"
-                  value={contextSel}
-                  onValueChange={setContextSel}
-                >
-                  <SelectOption value="compressor">compressor</SelectOption>
-
-                  {providers.context_options
-                    .filter((o) => o.name !== "compressor")
-                    .map((o) => (
-                      <SelectOption key={o.name} value={o.name}>
-                        {o.name}
-                      </SelectOption>
-                    ))}
-                </Select>
-              </div>
-              </div>
-
-              <Button
-                className="w-fit gap-2"
-                size="sm"
-                disabled={providerBusy}
-                onClick={() => void onSaveProviders()}
-              >
-                {providerBusy ? <Spinner /> : null}
-                {t.pluginsPage.saveProviders}
-              </Button>
-            </CardContent>
-          </Card>
-        )}
-
-        <Card>
-          <CardHeader>
-            <CardTitle>{t.pluginsPage.installHeading}</CardTitle>
-            <p className="text-[0.7rem] tracking-[0.08em] text-midground/55 normal-case">
-              {t.pluginsPage.installHint}
-            </p>
-          </CardHeader>
-
-
-          <CardContent className="flex flex-col gap-4">
-
-            <div className="flex flex-col gap-2">
-
-              <Label htmlFor="install-url">{t.pluginsPage.identifierLabel}</Label>
-
-              <Input
-                className="normal-case font-sans lowercase"
-                id="install-url"
-                placeholder="owner/repo or https://..."
-                spellCheck={false}
-                value={installId}
-                onChange={(e) => setInstallId(e.target.value)}
-              />
-            </div>
-
-
-            <div className="flex flex-wrap items-center gap-8">
-
-              <div className="flex items-center gap-3">
-
-                <Switch checked={installForce} onCheckedChange={setInstallForce} />
-
-                <span className="text-[0.7rem] tracking-[0.06em] text-midforeground/85 normal-case">
-                  {t.pluginsPage.forceReinstall}
-                </span>
-              </div>
-
-              <div className="flex items-center gap-3">
-
-                <Switch checked={installEnable} onCheckedChange={setInstallEnable} />
-
-                <span className="text-[0.7rem] tracking-[0.06em] text-midforeground/85 normal-case">
-                  {t.pluginsPage.enableAfterInstall}
-                </span>
-              </div>
-            </div>
-
-            <Button
-              className="w-fit gap-2"
-              size="sm"
-              disabled={installBusy}
-              onClick={() => void onInstall()}
-            >
-              {installBusy ? <Spinner /> : <Puzzle className="h-3.5 w-3.5" />}
-              {t.pluginsPage.installBtn}
-            </Button>
-
-            <p className="text-[0.65rem] tracking-[0.06em] text-midforeground/55 normal-case">
-              {t.pluginsPage.rescanHint}
-            </p>
-
-            <p className="text-[0.65rem] tracking-[0.06em] text-midforeground/55 normal-case">
-              {t.pluginsPage.removeHint}
-            </p>
-          </CardContent>
-        </Card>
-
-        <div className="flex flex-col gap-3">
-
-          <h3 className="font-mondwest text-[0.75rem] tracking-[0.12em] text-midground/85">
-            {t.pluginsPage.pluginListHeading}
-          </h3>
-
-          {loading ? (
-
-            <div className="flex items-center gap-2 py-8 text-[0.8rem] text-midforeground/65">
-
-              <Spinner />
-              <span>{t.common.loading}</span>
-            </div>
-          ) : rows.length === 0 ? (
-
-            <p className="text-[0.75rem] text-midforeground/55 normal-case">{t.common.noResults}</p>
-          ) : (
-
-            <ul className="flex flex-col gap-3">
-
-              {rows.map((row: HubAgentPluginRow) => (
-
-                <li key={row.name}>
-
-
-                  <PluginRowCard
-                    {...{ row, rowBusy, setRuntimeLoading, showToast, t }}
-                  />
-
-                </li>
-              ))}
-            </ul>
-          )}
-        </div>
-
-        {(hub?.orphan_dashboard_plugins?.length ?? 0) > 0 ? (
-
-
-          <div className="flex flex-col gap-3 opacity-95">
-
-            <h3 className="font-mondwest text-[0.75rem] tracking-[0.12em] text-midforeground/85">
-              {t.pluginsPage.orphanHeading}
-            </h3>
-
-            <ul className="flex flex-col gap-2 rounded border border-current/15 p-4">
-
-              {hub!.orphan_dashboard_plugins.map((m) => (
-
-                <li className="text-[0.7rem] normal-case opacity-85" key={m.name}>
-
-
-                  {m.label ?? m.name} — {m.description || m.tab?.path}
-
-
-                  {!m.tab?.hidden ? (
-
-
-                    <Link className="ml-3 inline-flex items-center gap-1 underline" to={m.tab.path}>
-
-
-                      <ExternalLink className="h-3 w-3 opacity-65" />
-
-                      {t.pluginsPage.openTab}
-                    </Link>
-                  ) : null}
-                </li>
-              ))}
-            </ul>
-          </div>
-        ) : null}
-      </div>
-
-      <Toast toast={toast} />
-      <PluginSlot name="plugins:bottom" />
-    </div>
-  );
-}
-
-interface PluginRowCardProps {
-
-  row: HubAgentPluginRow;
-  rowBusy: string | null;
-  setRuntimeLoading: (
-    name: string,
-    fn: () => Promise<unknown>,
-  ) => Promise<void>;
-
-  showToast: (msg: string, variant: "success" | "error") => void;
-  t: Translations;
-}
-
-function PluginRowCard(props: PluginRowCardProps) {
-  const {
-    row,
-    rowBusy,
-    setRuntimeLoading,
-    showToast,
-    t,
-  } = props;
-
-  const dm = row.dashboard_manifest;
-
-  const tabPath = dm?.tab && !dm.tab.hidden ? dm.tab.override ?? dm.tab.path : null;
-
-  const busy = rowBusy === row.name;
-
-  const badgeTone =
-    row.runtime_status === "enabled"
-      ? "success"
-      : row.runtime_status === "disabled"
-        ? "destructive"
-        : "outline";
-
-  return (
-
-    <Card className={cn(busy ? "opacity-70" : undefined)}>
-
-
-      <CardContent className="flex flex-col gap-4 px-6 py-4">
-
-
-        <div className="flex flex-wrap items-start justify-between gap-4">
-
-
-          <div className="min-w-0 flex-1">
-
-            <div className="flex flex-wrap items-center gap-3">
-
-              <span className="truncate font-semibold">{row.name}</span>
-
-              <Badge tone="outline">
-                {t.pluginsPage.sourceBadge}: {row.source}
-              </Badge>
-
-
-              <Badge tone="outline">v{row.version || "—"}</Badge>
-
-              <Badge tone={badgeTone}>{row.runtime_status}</Badge>
-
-              {row.auth_required ? (
-                <Badge tone="destructive">{t.pluginsPage.authRequired}</Badge>
-              ) : null}
-            </div>
-
-            {row.description ? (
-
-              <p className="mt-2 max-w-2xl text-[0.7rem] tracking-[0.06em] text-midforeground/75 normal-case">
-                {row.description}
-              </p>
-            ) : null}
-          </div>
-
-          <div className="flex flex-wrap items-center gap-2 shrink-0">
-
-
-            <Button
-              disabled={busy || row.runtime_status === "enabled"}
-              ghost
-              size="sm"
-              onClick={() => {
-                void setRuntimeLoading(row.name, async () => {
-                  await api.enableAgentPlugin(row.name);
-                  showToast(t.pluginsPage.enableRuntime, "success");
-                });
-              }}
-            >
-              {t.pluginsPage.enableRuntime}
-            </Button>
-
-
-            <Button
-              disabled={busy || row.runtime_status === "disabled"}
-              ghost
-              size="sm"
-              onClick={() => {
-                void setRuntimeLoading(row.name, async () => {
-                  await api.disableAgentPlugin(row.name);
-                  showToast(t.pluginsPage.disableRuntime, "success");
-                });
-              }}
-            >
-              {t.pluginsPage.disableRuntime}
-            </Button>
-
-            {tabPath ? (
-
-              <Link
-                className={cn(
-                  "inline-flex items-center rounded-none px-3 py-1.5",
-                  "border border-current/25 hover:bg-current/10",
-                  "font-mondwest text-[0.65rem] tracking-[0.1em] uppercase",
-                )}
-                to={tabPath}
-              >
-                {t.pluginsPage.openTab}
-              </Link>
-            ) : null}
-
-            {row.can_update_git ? (
-
-              <Button
-                disabled={busy}
-                ghost
-                size="sm"
-                onClick={() => {
-                  void setRuntimeLoading(row.name, async () => {
-                    await api.updateAgentPlugin(row.name);
-                    showToast(t.pluginsPage.updateGit, "success");
-                  });
-                }}
-              >
-                {busy ? <Spinner /> : null}
-                {t.pluginsPage.updateGit}
-              </Button>
-            ) : null}
-
-            {row.has_dashboard_manifest ? (
-              <Button
-                disabled={busy}
-                ghost
-                size="sm"
-                title={row.user_hidden ? t.pluginsPage.showInSidebar : t.pluginsPage.hideFromSidebar}
-                onClick={() => {
-                  void setRuntimeLoading(row.name, async () => {
-                    await api.setPluginVisibility(row.name, !row.user_hidden);
-                  });
-                }}
-              >
-                {row.user_hidden ? (
-                  <EyeOff className="h-3.5 w-3.5" />
-                ) : (
-                  <Eye className="h-3.5 w-3.5" />
-                )}
-                {row.user_hidden ? t.pluginsPage.showInSidebar : t.pluginsPage.hideFromSidebar}
-              </Button>
-            ) : null}
-
-            {row.can_remove ? (
-
-
-              <Button
-                destructive
-                disabled={busy}
-                ghost
-                size="sm"
-                onClick={() => {
-                  const ok =
-                    typeof window !== "undefined"
-                      ? window.confirm(t.pluginsPage.removeConfirm)
-                      : false;
-                  if (!ok) return;
-
-                  void setRuntimeLoading(row.name, async () => {
-                    await api.removeAgentPlugin(row.name);
-                    showToast(`${row.name} removed`, "success");
-                  });
-                }}
-              >
-
-                {busy ? <Spinner /> : <Trash2 className="h-3.5 w-3.5" />}
-              </Button>
-            ) : null}
-          </div>
-        </div>
-
-        {dm?.slots?.length ? (
-
-          <p className="text-[0.65rem] tracking-[0.05em] text-midforeground/55 normal-case">
-            {t.pluginsPage.dashboardSlots}: {dm.slots.join(", ")}
-          </p>
-        ) : null}
-
-        {row.auth_required ? (
-          <CommandBlock
-            label={t.pluginsPage.authRequiredHint}
-            code={row.auth_command}
-          />
-        ) : null}
-
-        {!row.has_dashboard_manifest && !dm ? (
-
-
-          <p className="text-[0.65rem] italic text-midforeground/45 normal-case">
-            {t.pluginsPage.noDashboardTab}
-          </p>
-        ) : null}
-      </CardContent>
-
-    </Card>
-  );
-}
--- a/apps/dashboard/src/pages/ProfilesPage.tsx
+++ b/apps/dashboard/src/pages/ProfilesPage.tsx
@@ -1,444 +0,0 @@
-import { useCallback, useEffect, useRef, useState } from "react";
-import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users } from "lucide-react";
-import { H2 } from "@/components/NouiTypography";
-import { api } from "@/lib/api";
-import type { ProfileInfo } from "@/lib/api";
-import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
-import { useToast } from "@/hooks/useToast";
-import { useConfirmDelete } from "@/hooks/useConfirmDelete";
-import { Toast } from "@/components/Toast";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { Badge } from "@nous-research/ui/ui/components/badge";
-import { Button } from "@nous-research/ui/ui/components/button";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import { useI18n } from "@/i18n";
-
-// Mirrors hermes_cli/profiles.py::_PROFILE_ID_RE so we can reject obviously
-// invalid names (uppercase, spaces, …) before round-tripping a doomed POST.
-const PROFILE_NAME_RE = /^[a-z0-9][a-z0-9_-]{0,63}$/;
-
-export default function ProfilesPage() {
-  const [profiles, setProfiles] = useState<ProfileInfo[]>([]);
-  const [loading, setLoading] = useState(true);
-  const { toast, showToast } = useToast();
-  const { t } = useI18n();
-
-  // Create form
-  const [newName, setNewName] = useState("");
-  const [cloneFromDefault, setCloneFromDefault] = useState(true);
-  const [creating, setCreating] = useState(false);
-
-  // Inline rename state
-  const [renamingFrom, setRenamingFrom] = useState<string | null>(null);
-  const [renameTo, setRenameTo] = useState("");
-
-  // Inline SOUL editor state
-  const [editingSoulFor, setEditingSoulFor] = useState<string | null>(null);
-  const [soulText, setSoulText] = useState("");
-  const [soulSaving, setSoulSaving] = useState(false);
-  // Tracks the latest SOUL request so out-of-order responses don't overwrite
-  // newer state when the user switches profiles or closes the editor.
-  const activeSoulRequest = useRef<string | null>(null);
-
-  const load = useCallback(() => {
-    api
-      .getProfiles()
-      .then((res) => setProfiles(res.profiles))
-      .catch((e) => showToast(`${t.status.error}: ${e}`, "error"))
-      .finally(() => setLoading(false));
-  }, [showToast, t.status.error]);
-
-  useEffect(() => {
-    load();
-  }, [load]);
-
-  const handleCreate = async () => {
-    const name = newName.trim();
-    if (!name) {
-      showToast(t.profiles.nameRequired, "error");
-      return;
-    }
-    if (!PROFILE_NAME_RE.test(name)) {
-      showToast(`${t.profiles.invalidName}: ${t.profiles.nameRule}`, "error");
-      return;
-    }
-    setCreating(true);
-    try {
-      await api.createProfile({ name, clone_from_default: cloneFromDefault });
-      showToast(`${t.profiles.created}: ${name}`, "success");
-      setNewName("");
-      load();
-    } catch (e) {
-      showToast(`${t.status.error}: ${e}`, "error");
-    } finally {
-      setCreating(false);
-    }
-  };
-
-  const handleRenameSubmit = async () => {
-    if (!renamingFrom) return;
-    const target = renameTo.trim();
-    if (!target || target === renamingFrom) {
-      setRenamingFrom(null);
-      setRenameTo("");
-      return;
-    }
-    if (!PROFILE_NAME_RE.test(target)) {
-      showToast(`${t.profiles.invalidName}: ${t.profiles.nameRule}`, "error");
-      return;
-    }
-    try {
-      await api.renameProfile(renamingFrom, target);
-      showToast(`${t.profiles.renamed}: ${renamingFrom} → ${target}`, "success");
-      setRenamingFrom(null);
-      setRenameTo("");
-      load();
-    } catch (e) {
-      showToast(`${t.status.error}: ${e}`, "error");
-    }
-  };
-
-  const openSoulEditor = useCallback(
-    async (name: string) => {
-      if (editingSoulFor === name) {
-        activeSoulRequest.current = null;
-        setEditingSoulFor(null);
-        return;
-      }
-      setEditingSoulFor(name);
-      setSoulText("");
-      activeSoulRequest.current = name;
-      try {
-        const soul = await api.getProfileSoul(name);
-        if (activeSoulRequest.current === name) {
-          setSoulText(soul.content);
-        }
-      } catch (e) {
-        if (activeSoulRequest.current === name) {
-          showToast(`${t.status.error}: ${e}`, "error");
-        }
-      }
-    },
-    [editingSoulFor, showToast, t.status.error],
-  );
-
-  const handleSaveSoul = async (name: string) => {
-    setSoulSaving(true);
-    try {
-      await api.updateProfileSoul(name, soulText);
-      showToast(`${t.profiles.soulSaved}: ${name}`, "success");
-    } catch (e) {
-      showToast(`${t.status.error}: ${e}`, "error");
-    } finally {
-      setSoulSaving(false);
-    }
-  };
-
-  const handleCopyTerminalCommand = async (name: string) => {
-    let cmd: string;
-    try {
-      const res = await api.getProfileSetupCommand(name);
-      cmd = res.command;
-    } catch (e) {
-      showToast(`${t.status.error}: ${e}`, "error");
-      return;
-    }
-    try {
-      await navigator.clipboard.writeText(cmd);
-      showToast(`${t.profiles.commandCopied}: ${cmd}`, "success");
-    } catch {
-      showToast(`${t.profiles.copyFailed}: ${cmd}`, "error");
-    }
-  };
-
-  const profileDelete = useConfirmDelete<string>({
-    onDelete: useCallback(
-      async (name: string) => {
-        try {
-          await api.deleteProfile(name);
-          showToast(`${t.profiles.deleted}: ${name}`, "success");
-          load();
-        } catch (e) {
-          showToast(`${t.status.error}: ${e}`, "error");
-          throw e;
-        }
-      },
-      [load, showToast, t.profiles.deleted, t.status.error],
-    ),
-  });
-
-  const pendingName = profileDelete.pendingId;
-
-  if (loading) {
-    return (
-      <div className="flex items-center justify-center py-24">
-        <div className="h-6 w-6 animate-spin rounded-full border-2 border-primary border-t-transparent" />
-      </div>
-    );
-  }
-
-  return (
-    // Profile names, model slugs, and paths are case-sensitive; opt out of
-    // the app shell's global ``uppercase`` so they render as the user typed.
-    // Children that explicitly opt back in (Badges, etc.) keep their casing.
-    <div className="flex flex-col gap-6 normal-case">
-      <Toast toast={toast} />
-
-      <DeleteConfirmDialog
-        open={profileDelete.isOpen}
-        onCancel={profileDelete.cancel}
-        onConfirm={profileDelete.confirm}
-        title={t.profiles.confirmDeleteTitle}
-        description={
-          pendingName
-            ? t.profiles.confirmDeleteMessage.replace("{name}", pendingName)
-            : t.profiles.confirmDeleteMessage
-        }
-        loading={profileDelete.isDeleting}
-      />
-
-      {/* Create new profile */}
-      <Card>
-        <CardHeader>
-          <CardTitle className="flex items-center gap-2 text-base">
-            <Plus className="h-4 w-4" />
-            {t.profiles.newProfile}
-          </CardTitle>
-        </CardHeader>
-        <CardContent>
-          <div className="grid gap-4">
-            <div className="grid gap-2">
-              <Label htmlFor="profile-name">{t.profiles.name}</Label>
-              <Input
-                id="profile-name"
-                placeholder={t.profiles.namePlaceholder}
-                value={newName}
-                onChange={(e) => setNewName(e.target.value)}
-                aria-invalid={
-                  newName.trim() !== "" &&
-                  !PROFILE_NAME_RE.test(newName.trim())
-                }
-              />
-              <p className="text-xs text-muted-foreground">
-                {t.profiles.nameRule}
-              </p>
-            </div>
-
-            <label className="flex items-center gap-2 text-sm cursor-pointer">
-              <input
-                type="checkbox"
-                checked={cloneFromDefault}
-                onChange={(e) => setCloneFromDefault(e.target.checked)}
-              />
-              {t.profiles.cloneFromDefault}
-            </label>
-
-            <div>
-              <Button onClick={handleCreate} disabled={creating}>
-                <Plus className="h-3 w-3" />
-                {creating ? t.common.creating : t.common.create}
-              </Button>
-            </div>
-          </div>
-        </CardContent>
-      </Card>
-
-      {/* List */}
-      <div className="flex flex-col gap-3">
-        <H2
-          variant="sm"
-          className="flex items-center gap-2 text-muted-foreground"
-        >
-          <Users className="h-4 w-4" />
-          {t.profiles.allProfiles} ({profiles.length})
-        </H2>
-
-        {profiles.length === 0 && (
-          <Card>
-            <CardContent className="py-8 text-center text-sm text-muted-foreground">
-              {t.profiles.noProfiles}
-            </CardContent>
-          </Card>
-        )}
-
-        {profiles.map((p) => {
-          const isRenaming = renamingFrom === p.name;
-          const isEditingSoul = editingSoulFor === p.name;
-          return (
-            <Card key={p.name}>
-              <CardContent className="flex items-center gap-4 py-4">
-                <div className="flex-1 min-w-0">
-                  <div className="flex items-center gap-2 mb-1 flex-wrap">
-                    {isRenaming ? (
-                      <Input
-                        autoFocus
-                        value={renameTo}
-                        onChange={(e) => setRenameTo(e.target.value)}
-                        onKeyDown={(e) => {
-                          if (e.key === "Enter") handleRenameSubmit();
-                          if (e.key === "Escape") setRenamingFrom(null);
-                        }}
-                        aria-invalid={
-                          renameTo.trim() !== "" &&
-                          renameTo.trim() !== p.name &&
-                          !PROFILE_NAME_RE.test(renameTo.trim())
-                        }
-                        className="max-w-xs"
-                      />
-                    ) : (
-                      <span className="font-medium text-sm truncate">
-                        {p.name}
-                      </span>
-                    )}
-                    {p.is_default && (
-                      <Badge tone="secondary">{t.profiles.defaultBadge}</Badge>
-                    )}
-                    {p.has_env && (
-                      <Badge tone="outline">{t.profiles.hasEnv}</Badge>
-                    )}
-                  </div>
-                  {isRenaming &&
-                    (() => {
-                      const trimmed = renameTo.trim();
-                      const invalid =
-                        trimmed !== "" &&
-                        trimmed !== p.name &&
-                        !PROFILE_NAME_RE.test(trimmed);
-                      return (
-                        <p
-                          className={
-                            "text-xs mb-1 " +
-                            (invalid
-                              ? "text-destructive"
-                              : "text-muted-foreground")
-                          }
-                        >
-                          {invalid
-                            ? `${t.profiles.invalidName}: ${t.profiles.nameRule}`
-                            : t.profiles.nameRule}
-                        </p>
-                      );
-                    })()}
-                  <div className="flex items-center gap-4 text-xs text-muted-foreground flex-wrap">
-                    {p.model && (
-                      <span>
-                        {t.profiles.model}: {p.model}
-                        {p.provider ? ` (${p.provider})` : ""}
-                      </span>
-                    )}
-                    <span>
-                      {t.profiles.skills}: {p.skill_count}
-                    </span>
-                    <span className="font-mono truncate max-w-[28rem]">
-                      {p.path}
-                    </span>
-                  </div>
-                </div>
-
-                <div className="flex items-center gap-1 shrink-0">
-                  {isRenaming ? (
-                    <>
-                      <Button
-                        size="sm"
-                        onClick={handleRenameSubmit}
-                      >
-                        {t.common.save}
-                      </Button>
-                      <Button
-                        size="sm"
-                        ghost
-                        onClick={() => setRenamingFrom(null)}
-                      >
-                        {t.common.cancel}
-                      </Button>
-                    </>
-                  ) : (
-                    <>
-                      <Button
-                        ghost
-                        size="icon"
-                        title={t.profiles.editSoul}
-                        aria-label={t.profiles.editSoul}
-                        onClick={() => openSoulEditor(p.name)}
-                      >
-                        {isEditingSoul ? (
-                          <ChevronDown className="h-4 w-4" />
-                        ) : (
-                          <span aria-hidden className="text-xs font-bold">
-                            S
-                          </span>
-                        )}
-                      </Button>
-                      <Button
-                        ghost
-                        size="icon"
-                        title={t.profiles.openInTerminal}
-                        aria-label={t.profiles.openInTerminal}
-                        onClick={() => handleCopyTerminalCommand(p.name)}
-                      >
-                        <Terminal className="h-4 w-4" />
-                      </Button>
-                      {!p.is_default && (
-                        <Button
-                          ghost
-                          size="icon"
-                          title={t.profiles.rename}
-                          aria-label={t.profiles.rename}
-                          onClick={() => {
-                            setRenamingFrom(p.name);
-                            setRenameTo(p.name);
-                          }}
-                        >
-                          <Pencil className="h-4 w-4" />
-                        </Button>
-                      )}
-                      {!p.is_default && (
-                        <Button
-                          ghost
-                          size="icon"
-                          title={t.common.delete}
-                          aria-label={t.common.delete}
-                          onClick={() => profileDelete.requestDelete(p.name)}
-                        >
-                          <Trash2 className="h-4 w-4 text-destructive" />
-                        </Button>
-                      )}
-                    </>
-                  )}
-                </div>
-              </CardContent>
-
-              {isEditingSoul && (
-                <div className="border-t border-border px-4 pb-4 pt-3 flex flex-col gap-2">
-                  <Label
-                    htmlFor={`soul-editor-${p.name}`}
-                    className="flex items-center gap-2 text-xs uppercase tracking-wider text-muted-foreground"
-                  >
-                    {t.profiles.soulSection}
-                  </Label>
-                  <textarea
-                    id={`soul-editor-${p.name}`}
-                    className="flex min-h-[180px] w-full border border-input bg-transparent px-3 py-2 text-sm font-mono shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
-                    placeholder={t.profiles.soulPlaceholder}
-                    value={soulText}
-                    onChange={(e) => setSoulText(e.target.value)}
-                  />
-                  <div>
-                    <Button
-                      size="sm"
-                      onClick={() => handleSaveSoul(p.name)}
-                      disabled={soulSaving}
-                    >
-                      {soulSaving ? t.common.saving : t.profiles.saveSoul}
-                    </Button>
-                  </div>
-                </div>
-              )}
-            </Card>
-          );
-        })}
-      </div>
-    </div>
-  );
-}
--- a/apps/desktop/.prettierrc
+++ b/apps/desktop/.prettierrc
@@ -1,11 +0,0 @@
-{
-  "arrowParens": "avoid",
-  "bracketSpacing": true,
-  "endOfLine": "auto",
-  "printWidth": 120,
-  "semi": false,
-  "singleQuote": true,
-  "tabWidth": 2,
-  "trailingComma": "none",
-  "useTabs": false
-}
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -1,195 +0,0 @@
-# Hermes Desktop
-
-Native Electron shell for Hermes. It packages the desktop renderer, a bundled Hermes source payload, and installer targets for macOS and Windows.
-
-## Setup
-
-Install workspace dependencies from the repo root so `apps/desktop`, `apps/dashboard`, and `apps/shared` stay linked:
-
-```bash
-npm install
-```
-
-Use the normal Hermes Python environment for local runs:
-
-```bash
-source .venv/bin/activate  # or: source venv/bin/activate
-python -m pip install -e .
-```
-
-## Development
-
-```bash
-cd apps/desktop
-npm run dev
-```
-
-`npm run dev` starts Vite on `127.0.0.1:5174`, launches Electron, and lets Electron boot the Hermes dashboard backend on an open port in `9120-9199`. This path is for UI iteration and may still show Electron/dev identities in OS prompts.
-
-Useful overrides:
-
-```bash
-HERMES_DESKTOP_HERMES_ROOT=/path/to/hermes-agent npm run dev
-HERMES_DESKTOP_PYTHON=/path/to/python npm run dev
-HERMES_DESKTOP_CWD=/path/to/project npm run dev
-HERMES_DESKTOP_IGNORE_EXISTING=1 npm run dev
-```
-
-`HERMES_DESKTOP_IGNORE_EXISTING=1` skips any `hermes` CLI already on `PATH`, which is useful when testing the bundled/runtime bootstrap path.
-
-## Dashboard Dev
-
-Run the Python dashboard backend with embedded chat enabled:
-
-```bash
-hermes dashboard --tui --no-open
-```
-
-For dashboard HMR, start Vite in another terminal:
-
-```bash
-cd apps/dashboard
-npm run dev
-```
-
-Open the Vite URL. The dev server proxies `/api`, `/api/pty`, and plugin assets to `http://127.0.0.1:9119` and fetches the live dashboard HTML so the ephemeral session token matches the running backend.
-
-## Build
-
-```bash
-npm run build
-npm run pack          # unpacked app at release/mac-<arch>/Hermes.app
-npm run dist:mac      # macOS DMG + zip
-npm run dist:mac:dmg  # DMG only
-npm run dist:mac:zip  # zip only
-npm run dist:win      # NSIS + MSI
-```
-
-Before packaging, `stage:hermes` copies the Python Hermes payload into `build/hermes-agent`. Electron Builder then ships it as `Contents/Resources/hermes-agent`.
-
-## Automated Releases
-
-Desktop installers are published by [`.github/workflows/desktop-release.yml`](../../.github/workflows/desktop-release.yml) with two channels:
-
- **Stable:** runs on published GitHub releases and uploads signed artifacts to that release tag.
- **Nightly:** runs on `main` pushes and updates the rolling `desktop-nightly` prerelease.
-
-The workflow injects a channel-aware desktop version at build time:
-
- stable: derived from the release tag (for example `v2026.5.5` -> `2026.5.5`)
- nightly: `0.0.0-nightly.YYYYMMDD.<sha>`
-
-Artifact names include channel, platform, and architecture:
-
-```text
-Hermes-<version>-<channel>-<platform>-<arch>.<ext>
-```
-
-Each run also publishes `SHA256SUMS-<platform>.txt` so installers can be verified.
-
-### Stable release gates
-
-Stable builds fail fast if signing credentials are missing:
-
- macOS signing + notarization: `CSC_LINK`, `CSC_KEY_PASSWORD`, `APPLE_API_KEY`, `APPLE_API_KEY_ID`, `APPLE_API_ISSUER`
- Windows signing: `WIN_CSC_LINK`, `WIN_CSC_KEY_PASSWORD`
-
-Stable macOS builds also validate stapling and Gatekeeper assessment in CI before upload.
-
-## Icons
-
-Desktop icons live in `assets/`:
-
- `assets/icon.icns`
- `assets/icon.ico`
- `assets/icon.png`
-
-The builder config points at `assets/icon`. Replace these files directly if the app icon changes.
-
-## Testing Install Paths
-
-Use the package-local test scripts from this directory:
-
-```bash
-npm run test:desktop:all
-npm run test:desktop:existing
-npm run test:desktop:fresh
-npm run test:desktop:dmg
-```
-
-`test:desktop:existing` builds the packaged app and opens it normally. It should use an existing `hermes` CLI if one is on `PATH`, preserving the user’s real `~/.hermes` config.
-
-`test:desktop:fresh` builds the packaged app, deletes the bundled desktop runtime, sets `HERMES_DESKTOP_IGNORE_EXISTING=1`, and launches the app through the bundled payload path. Use this repeatedly to test first-run bootstrap.
-
-`test:desktop:dmg` builds and opens the DMG.
-
-For fast reruns without rebuilding:
-
-```bash
-HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:fresh
-HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:existing
-HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:dmg
-```
-
-## Installing Locally
-
-```bash
-npm run dist:mac:dmg
-open release/Hermes-0.0.0-arm64.dmg
-```
-
-Drag `Hermes` to Applications. If testing repeated installs, replace the existing app.
-
-## Runtime Bootstrap
-
-Packaged desktop startup resolves Hermes in this order:
-
-1. `HERMES_DESKTOP_HERMES_ROOT`
-2. existing `hermes` CLI, unless `HERMES_DESKTOP_IGNORE_EXISTING=1`
-3. bundled `Contents/Resources/hermes-agent`
-4. dev repo source
-5. installed `python -m hermes_cli.main`
-
-When the bundled path is used, Electron creates or reuses:
-
-```text
-~/Library/Application Support/Hermes/hermes-runtime
-```
-
-The runtime is validated before use. If required dashboard imports are missing, it reinstalls the desktop runtime dependencies and retries.
-
-## Debugging
-
-Desktop boot logs are written to:
-
-```text
-~/Library/Application Support/Hermes/desktop.log
-```
-
-If the UI reports `Desktop boot failed`, check that log first. It includes the backend command output and recent Python traceback context.
-
-To reset bundled runtime state:
-
-```bash
-rm -rf "$HOME/Library/Application Support/Hermes/hermes-runtime"
-```
-
-To reset stale macOS microphone permission prompts:
-
-```bash
-tccutil reset Microphone com.github.Electron
-tccutil reset Microphone com.nousresearch.hermes
-```
-
-## Verification
-
-Run before handing off installer changes:
-
-```bash
-npm run fix
-npm run type-check
-npm run lint
-npm run test:desktop:all
-```
-
-Current lint may report existing warnings, but it should exit with no errors.
--- a/apps/desktop/assets/icon.icns
+++ b/apps/desktop/assets/icon.icns
--- a/apps/desktop/assets/icon.ico
+++ b/apps/desktop/assets/icon.ico
--- a/apps/desktop/assets/icon.png
+++ b/apps/desktop/assets/icon.png
--- a/apps/desktop/components.json
+++ b/apps/desktop/components.json
@@ -1,21 +0,0 @@
-{
-  "$schema": "https://ui.shadcn.com/schema.json",
-  "style": "new-york",
-  "rsc": false,
-  "tsx": true,
-  "tailwind": {
-    "config": "",
-    "css": "src/styles.css",
-    "baseColor": "neutral",
-    "cssVariables": true,
-    "prefix": ""
-  },
-  "aliases": {
-    "components": "@/components",
-    "utils": "@/lib/utils",
-    "ui": "@/components/ui",
-    "lib": "@/lib",
-    "hooks": "@/hooks"
-  },
-  "iconLibrary": "lucide"
-}
--- a/apps/desktop/electron/entitlements.mac.inherit.plist
+++ b/apps/desktop/electron/entitlements.mac.inherit.plist
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-  <key>com.apple.security.cs.allow-jit</key>
-  <true/>
-  <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
-  <true/>
-  <key>com.apple.security.cs.disable-library-validation</key>
-  <true/>
-</dict>
-</plist>
--- a/apps/desktop/electron/entitlements.mac.plist
+++ b/apps/desktop/electron/entitlements.mac.plist
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-  <key>com.apple.security.cs.allow-jit</key>
-  <true/>
-  <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
-  <true/>
-  <key>com.apple.security.cs.disable-library-validation</key>
-  <true/>
-  <key>com.apple.security.device.audio-input</key>
-  <true/>
-</dict>
-</plist>
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -1,44 +0,0 @@
-const { contextBridge, ipcRenderer, webUtils } = require('electron')
-
-contextBridge.exposeInMainWorld('hermesDesktop', {
-  getConnection: () => ipcRenderer.invoke('hermes:connection'),
-  api: request => ipcRenderer.invoke('hermes:api', request),
-  notify: payload => ipcRenderer.invoke('hermes:notify', payload),
-  requestMicrophoneAccess: () => ipcRenderer.invoke('hermes:requestMicrophoneAccess'),
-  readFileDataUrl: filePath => ipcRenderer.invoke('hermes:readFileDataUrl', filePath),
-  readFileText: filePath => ipcRenderer.invoke('hermes:readFileText', filePath),
-  selectPaths: options => ipcRenderer.invoke('hermes:selectPaths', options),
-  writeClipboard: text => ipcRenderer.invoke('hermes:writeClipboard', text),
-  saveImageFromUrl: url => ipcRenderer.invoke('hermes:saveImageFromUrl', url),
-  saveImageBuffer: (data, ext) => ipcRenderer.invoke('hermes:saveImageBuffer', { data, ext }),
-  saveClipboardImage: () => ipcRenderer.invoke('hermes:saveClipboardImage'),
-  getPathForFile: file => {
-    try {
-      return webUtils.getPathForFile(file) || ''
-    } catch {
-      return ''
-    }
-  },
-  normalizePreviewTarget: (target, baseDir) => ipcRenderer.invoke('hermes:normalizePreviewTarget', target, baseDir),
-  watchPreviewFile: url => ipcRenderer.invoke('hermes:watchPreviewFile', url),
-  stopPreviewFileWatch: id => ipcRenderer.invoke('hermes:stopPreviewFileWatch', id),
-  setPreviewShortcutActive: active => ipcRenderer.send('hermes:previewShortcutActive', Boolean(active)),
-  openExternal: url => ipcRenderer.invoke('hermes:openExternal', url),
-  readDir: dirPath => ipcRenderer.invoke('hermes:fs:readDir', dirPath),
-  gitRoot: startPath => ipcRenderer.invoke('hermes:fs:gitRoot', startPath),
-  onClosePreviewRequested: callback => {
-    const listener = () => callback()
-    ipcRenderer.on('hermes:close-preview-requested', listener)
-    return () => ipcRenderer.removeListener('hermes:close-preview-requested', listener)
-  },
-  onPreviewFileChanged: callback => {
-    const listener = (_event, payload) => callback(payload)
-    ipcRenderer.on('hermes:preview-file-changed', listener)
-    return () => ipcRenderer.removeListener('hermes:preview-file-changed', listener)
-  },
-  onBackendExit: callback => {
-    const listener = (_event, payload) => callback(payload)
-    ipcRenderer.on('hermes:backend-exit', listener)
-    return () => ipcRenderer.removeListener('hermes:backend-exit', listener)
-  }
-})
--- a/apps/desktop/eslint.config.mjs
+++ b/apps/desktop/eslint.config.mjs
@@ -1,122 +0,0 @@
-import js from '@eslint/js'
-import typescriptEslint from '@typescript-eslint/eslint-plugin'
-import typescriptParser from '@typescript-eslint/parser'
-import perfectionist from 'eslint-plugin-perfectionist'
-import reactPlugin from 'eslint-plugin-react'
-import reactCompiler from 'eslint-plugin-react-compiler'
-import hooksPlugin from 'eslint-plugin-react-hooks'
-import unusedImports from 'eslint-plugin-unused-imports'
-import globals from 'globals'
-
-const noopRule = {
-  meta: { schema: [], type: 'problem' },
-  create: () => ({})
-}
-
-const customRules = {
-  rules: {
-    'no-process-cwd': noopRule,
-    'no-process-env-top-level': noopRule,
-    'no-sync-fs': noopRule,
-    'no-top-level-dynamic-import': noopRule,
-    'no-top-level-side-effects': noopRule
-  }
-}
-
-export default [
-  {
-    ignores: ['**/node_modules/**', '**/dist/**', 'src/**/*.js']
-  },
-  js.configs.recommended,
-  {
-    files: ['**/*.{ts,tsx}'],
-    languageOptions: {
-      globals: {
-        ...globals.browser,
-        ...globals.node
-      },
-      parser: typescriptParser,
-      parserOptions: {
-        ecmaFeatures: { jsx: true },
-        ecmaVersion: 'latest',
-        sourceType: 'module'
-      }
-    },
-    plugins: {
-      '@typescript-eslint': typescriptEslint,
-      'custom-rules': customRules,
-      perfectionist,
-      react: reactPlugin,
-      'react-compiler': reactCompiler,
-      'react-hooks': hooksPlugin,
-      'unused-imports': unusedImports
-    },
-    rules: {
-      '@typescript-eslint/consistent-type-imports': ['error', { prefer: 'type-imports' }],
-      '@typescript-eslint/no-unused-vars': 'off',
-      curly: ['error', 'all'],
-      'no-fallthrough': ['error', { allowEmptyCase: true }],
-      'no-undef': 'off',
-      'no-unused-vars': 'off',
-      'padding-line-between-statements': [
-        1,
-        {
-          blankLine: 'always',
-          next: [
-            'block-like',
-            'block',
-            'return',
-            'if',
-            'class',
-            'continue',
-            'debugger',
-            'break',
-            'multiline-const',
-            'multiline-let'
-          ],
-          prev: '*'
-        },
-        {
-          blankLine: 'always',
-          next: '*',
-          prev: ['case', 'default', 'multiline-const', 'multiline-let', 'multiline-block-like']
-        },
-        { blankLine: 'never', next: ['block', 'block-like'], prev: ['case', 'default'] },
-        { blankLine: 'always', next: ['block', 'block-like'], prev: ['block', 'block-like'] },
-        { blankLine: 'always', next: ['empty'], prev: 'export' },
-        { blankLine: 'never', next: 'iife', prev: ['block', 'block-like', 'empty'] }
-      ],
-      'perfectionist/sort-exports': ['error', { order: 'asc', type: 'natural' }],
-      'perfectionist/sort-imports': [
-        'error',
-        {
-          groups: ['side-effect', 'builtin', 'external', 'internal', 'parent', 'sibling', 'index'],
-          order: 'asc',
-          type: 'natural'
-        }
-      ],
-      'perfectionist/sort-jsx-props': ['error', { order: 'asc', type: 'natural' }],
-      'perfectionist/sort-named-exports': ['error', { order: 'asc', type: 'natural' }],
-      'perfectionist/sort-named-imports': ['error', { order: 'asc', type: 'natural' }],
-      'react-compiler/react-compiler': 'warn',
-      'react-hooks/exhaustive-deps': 'warn',
-      'react-hooks/rules-of-hooks': 'error',
-      'unused-imports/no-unused-imports': 'error'
-    },
-    settings: {
-      react: { version: 'detect' }
-    }
-  },
-  {
-    files: ['**/*.js', '**/*.cjs'],
-    ignores: ['**/node_modules/**', '**/dist/**'],
-    languageOptions: {
-      ecmaVersion: 'latest',
-      globals: { ...globals.node },
-      sourceType: 'commonjs'
-    }
-  },
-  {
-    ignores: ['*.config.*']
-  }
-]
--- a/apps/desktop/index.html
+++ b/apps/desktop/index.html
@@ -1,14 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <link rel="icon" href="/apple-touch-icon.png" />
-    <link rel="apple-touch-icon" href="/apple-touch-icon.png" />
-    <title>Hermes</title>
-  </head>
-  <body>
-    <div id="root"></div>
-    <script type="module" src="/src/main.tsx"></script>
-  </body>
-</html>
--- a/apps/desktop/package-lock.json
+++ b/apps/desktop/package-lock.json
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -1,183 +0,0 @@
-{
-  "name": "hermes",
-  "productName": "Hermes",
-  "private": true,
-  "version": "0.0.0",
-  "description": "Native desktop shell for Hermes Agent.",
-  "author": "Nous Research",
-  "type": "module",
-  "main": "electron/main.cjs",
-  "scripts": {
-    "dev": "concurrently -k \"npm:dev:renderer\" \"npm:dev:electron\"",
-    "dev:renderer": "vite --host 127.0.0.1 --port 5174",
-    "dev:electron": "wait-on http://127.0.0.1:5174 && cross-env HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
-    "profile:main": "wait-on http://127.0.0.1:5174 && cross-env HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
-    "profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
-    "start": "npm run build && electron .",
-    "build": "tsc -b && vite build",
-    "stage:hermes": "node scripts/stage-hermes-payload.mjs",
-    "pack": "npm run build && npm run stage:hermes && electron-builder --dir",
-    "dist": "npm run build && npm run stage:hermes && electron-builder",
-    "dist:mac": "npm run build && npm run stage:hermes && electron-builder --mac",
-    "dist:mac:dmg": "npm run build && npm run stage:hermes && electron-builder --mac dmg",
-    "dist:mac:zip": "npm run build && npm run stage:hermes && electron-builder --mac zip",
-    "dist:win": "npm run build && npm run stage:hermes && electron-builder --win",
-    "dist:win:msi": "npm run build && npm run stage:hermes && electron-builder --win msi",
-    "dist:win:nsis": "npm run build && npm run stage:hermes && electron-builder --win nsis",
-    "test:desktop": "node scripts/test-desktop.mjs",
-    "test:desktop:all": "node scripts/test-desktop.mjs all",
-    "test:desktop:dmg": "node scripts/test-desktop.mjs dmg",
-    "test:desktop:existing": "node scripts/test-desktop.mjs existing",
-    "test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
-    "type-check": "tsc -b",
-    "lint": "eslint src/ electron/",
-    "lint:fix": "eslint src/ electron/ --fix",
-    "fmt": "prettier --write 'src/**/*.{ts,tsx}' 'electron/**/*.{js,cjs}' 'vite.config.ts'",
-    "fix": "npm run lint:fix && npm run fmt",
-    "test:ui": "vitest run --environment jsdom",
-    "preview": "vite preview --host 127.0.0.1 --port 4174"
-  },
-  "dependencies": {
-    "@assistant-ui/react": "^0.12.28",
-    "@assistant-ui/react-streamdown": "^0.1.11",
-    "@audiowave/react": "^0.6.2",
-    "@chenglou/pretext": "^0.0.6",
-    "@hermes/shared": "file:../shared",
-    "@nanostores/react": "^1.1.0",
-    "@radix-ui/react-slot": "^1.2.4",
-    "@streamdown/code": "^1.1.1",
-    "@tabler/icons-react": "^3.41.1",
-    "@tailwindcss/vite": "^4.2.4",
-    "@tanstack/react-query": "^5.100.6",
-    "class-variance-authority": "^0.7.1",
-    "clsx": "^2.1.1",
-    "cmdk": "^1.1.1",
-    "ignore": "^7.0.5",
-    "liquid-glass-react": "^1.1.1",
-    "lucide-react": "^0.577.0",
-    "nanostores": "^1.3.0",
-    "radix-ui": "^1.4.3",
-    "react": "^19.2.5",
-    "react-arborist": "^3.5.0",
-    "react-dom": "^19.2.5",
-    "react-router-dom": "^7.14.2",
-    "react-shiki": "^0.9.3",
-    "shiki": "^4.0.2",
-    "streamdown": "^2.5.0",
-    "tailwind-merge": "^3.5.0",
-    "tailwindcss": "^4.2.4",
-    "tw-shimmer": "^0.4.11",
-    "unicode-animations": "^1.0.3",
-    "use-stick-to-bottom": "^1.1.4",
-    "web-haptics": "^0.0.6"
-  },
-  "devDependencies": {
-    "@eslint/js": "^9.39.4",
-    "@testing-library/react": "^16.3.2",
-    "@types/node": "^24.12.2",
-    "@types/react": "^19.2.14",
-    "@types/react-dom": "^19.2.3",
-    "@typescript-eslint/eslint-plugin": "^8.59.1",
-    "@typescript-eslint/parser": "^8.59.1",
-    "@vitejs/plugin-react": "^6.0.1",
-    "concurrently": "^9.2.1",
-    "cross-env": "^10.1.0",
-    "electron": "^40.9.3",
-    "electron-builder": "^26.8.1",
-    "eslint": "^9.39.4",
-    "eslint-plugin-perfectionist": "^5.9.0",
-    "eslint-plugin-react": "^7.37.5",
-    "eslint-plugin-react-compiler": "^19.1.0-rc.2",
-    "eslint-plugin-react-hooks": "^7.1.1",
-    "eslint-plugin-unused-imports": "^4.4.1",
-    "globals": "^16.5.0",
-    "jsdom": "^29.1.1",
-    "prettier": "^3.8.3",
-    "typescript": "^6.0.3",
-    "vite": "^8.0.10",
-    "vitest": "^4.1.5",
-    "wait-on": "^9.0.5"
-  },
-  "build": {
-    "appId": "com.nousresearch.hermes",
-    "productName": "Hermes",
-    "executableName": "Hermes",
-    "artifactName": "Hermes-${version}-${os}-${arch}.${ext}",
-    "icon": "assets/icon",
-    "directories": {
-      "output": "release"
-    },
-    "files": [
-      "dist/**",
-      "assets/**",
-      "electron/**",
-      "public/**",
-      "package.json"
-    ],
-    "extraResources": [
-      {
-        "from": "build/hermes-agent",
-        "to": "hermes-agent"
-      }
-    ],
-    "asar": true,
-    "afterSign": "scripts/notarize.cjs",
-    "asarUnpack": [
-      "**/*.node"
-    ],
-    "mac": {
-      "category": "public.app-category.developer-tools",
-      "entitlements": "electron/entitlements.mac.plist",
-      "entitlementsInherit": "electron/entitlements.mac.inherit.plist",
-      "extendInfo": {
-        "CFBundleDisplayName": "Hermes",
-        "CFBundleExecutable": "Hermes",
-        "CFBundleName": "Hermes",
-        "NSAudioCaptureUsageDescription": "Hermes uses audio capture for voice conversations.",
-        "NSMicrophoneUsageDescription": "Hermes uses the microphone for voice input and voice conversations."
-      },
-      "gatekeeperAssess": false,
-      "hardenedRuntime": true,
-      "target": [
-        "dmg",
-        "zip"
-      ]
-    },
-    "dmg": {
-      "title": "Install Hermes",
-      "backgroundColor": "#f5f5f7",
-      "iconSize": 96,
-      "window": {
-        "width": 560,
-        "height": 360
-      },
-      "contents": [
-        {
-          "x": 160,
-          "y": 170,
-          "type": "file"
-        },
-        {
-          "x": 400,
-          "y": 170,
-          "type": "link",
-          "path": "/Applications"
-        }
-      ]
-    },
-    "win": {
-      "legalTrademarks": "Hermes",
-      "target": [
-        "nsis",
-        "msi"
-      ]
-    },
-    "nsis": {
-      "oneClick": false,
-      "allowToChangeInstallationDirectory": true,
-      "perMachine": false,
-      "shortcutName": "Hermes",
-      "uninstallDisplayName": "Hermes"
-    }
-  }
-}
--- a/apps/desktop/preview-demo.html
+++ b/apps/desktop/preview-demo.html
@@ -1,65 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8" />
-<meta name="viewport" content="width=device-width,initial-scale=1" />
-<title>Preview Demo</title>
-<style>
-  :root { color-scheme: dark; }
-  html, body { height: 100%; margin: 0; }
-  body {
-    font-family: ui-sans-serif, system-ui, -apple-system, "SF Pro Text", sans-serif;
-    background: radial-gradient(1200px 600px at 20% 10%, #4a1a33 0%, #2a1020 40%, #120810 100%);
-    color: #ffe4f1;
-    display: grid;
-    place-items: center;
-    padding: 2rem;
-  }
-  .card {
-    max-width: 520px;
-    padding: 2rem 2.25rem;
-    border: 1px solid rgba(255,182,214,0.18);
-    border-radius: 14px;
-    background: rgba(28,14,22,0.6);
-    backdrop-filter: blur(6px);
-    box-shadow: 0 10px 40px rgba(0,0,0,0.4);
-  }
-  h1 {
-    margin: 0 0 0.5rem;
-    font-size: 1.5rem;
-    letter-spacing: 0.01em;
-  }
-  p { margin: 0.35rem 0; opacity: 0.85; line-height: 1.5; }
-  .dot {
-    display: inline-block; width: 10px; height: 10px; border-radius: 50%;
-    background: #ff6fb5; margin-right: 0.5rem;
-    box-shadow: 0 0 12px #ff6fb5;
-    animation: pulse 1.6s ease-in-out infinite;
-  }
-  @keyframes pulse {
-    0%,100% { transform: scale(1); opacity: 1; }
-    50%     { transform: scale(1.4); opacity: 0.6; }
-  }
-  code {
-    background: rgba(255,182,214,0.10);
-    padding: 0.1rem 0.35rem;
-    border-radius: 4px;
-    font-size: 0.9em;
-  }
-  .time { font-variant-numeric: tabular-nums; opacity: 0.7; font-size: 0.85rem; margin-top: 1rem; }
-</style>
-</head>
-<body>
-  <div class="card">
-    <h1><span class="dot"></span>preview-demo.html</h1>
-    <p>Tiny standalone HTML artifact — no server, no build step.</p>
-    <p>Open directly in a browser via <code>file://</code>.</p>
-    <p class="time" id="t"></p>
-  </div>
-  <script>
-    const el = document.getElementById('t');
-    const tick = () => { el.textContent = new Date().toLocaleString(); };
-    tick(); setInterval(tick, 1000);
-  </script>
-</body>
-</html>
--- a/apps/desktop/public/apple-touch-icon.png
+++ b/apps/desktop/public/apple-touch-icon.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-0.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-0.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-1.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-1.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-2.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-2.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-3.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-3.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-4.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-4.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-5.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-5.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-6.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-6.png
--- a/apps/desktop/public/hermes-frames/hermes-frame-7.png
+++ b/apps/desktop/public/hermes-frames/hermes-frame-7.png
--- a/apps/desktop/public/hermes-sprite.png
+++ b/apps/desktop/public/hermes-sprite.png
--- a/apps/desktop/public/hermes.png
+++ b/apps/desktop/public/hermes.png
--- a/apps/desktop/scripts/notarize-artifact.cjs
+++ b/apps/desktop/scripts/notarize-artifact.cjs
@@ -1,74 +0,0 @@
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const { execFile } = require('node:child_process')
-
-function run(command, args) {
-  return new Promise((resolve, reject) => {
-    execFile(command, args, (error, stdout, stderr) => {
-      if (error) {
-        reject(new Error(`${command} ${args.join(' ')} failed: ${stderr?.trim() || stdout?.trim() || error.message}`))
-        return
-      }
-      resolve()
-    })
-  })
-}
-
-function inlineKeyLooksValid(value) {
-  return value.includes('BEGIN PRIVATE KEY') && value.includes('END PRIVATE KEY')
-}
-
-function resolveApiKeyPath(rawValue) {
-  const value = String(rawValue || '').trim()
-  if (!value) return { keyPath: '', cleanup: () => {} }
-
-  if (fs.existsSync(value)) {
-    return { keyPath: value, cleanup: () => {} }
-  }
-
-  if (!inlineKeyLooksValid(value)) {
-    throw new Error('APPLE_API_KEY must be a file path or inline .p8 key content')
-  }
-
-  const tempPath = path.join(os.tmpdir(), `hermes-notary-${Date.now()}-${process.pid}.p8`)
-  fs.writeFileSync(tempPath, value, 'utf8')
-  return {
-    keyPath: tempPath,
-    cleanup: () => fs.rmSync(tempPath, { force: true })
-  }
-}
-
-async function main() {
-  const artifactPath = process.argv[2]
-  if (!artifactPath || !fs.existsSync(artifactPath)) {
-    throw new Error(`Missing artifact to notarize: ${artifactPath || '(none)'}`)
-  }
-
-  const profile = String(process.env.APPLE_NOTARY_PROFILE || '').trim()
-  if (profile) {
-    await run('xcrun', ['notarytool', 'submit', artifactPath, '--keychain-profile', profile, '--wait'])
-    await run('xcrun', ['stapler', 'staple', '-v', artifactPath])
-    return
-  }
-
-  const keyId = String(process.env.APPLE_API_KEY_ID || '').trim()
-  const issuer = String(process.env.APPLE_API_ISSUER || '').trim()
-  const rawApiKey = process.env.APPLE_API_KEY
-  if (!rawApiKey || !keyId || !issuer) {
-    throw new Error('APPLE_API_KEY, APPLE_API_KEY_ID, and APPLE_API_ISSUER are required')
-  }
-
-  const { keyPath, cleanup } = resolveApiKeyPath(rawApiKey)
-  try {
-    await run('xcrun', ['notarytool', 'submit', artifactPath, '--key', keyPath, '--key-id', keyId, '--issuer', issuer, '--wait'])
-    await run('xcrun', ['stapler', 'staple', '-v', artifactPath])
-  } finally {
-    cleanup()
-  }
-}
-
-main().catch(error => {
-  console.error(error.message)
-  process.exit(1)
-})
--- a/apps/desktop/scripts/notarize.cjs
+++ b/apps/desktop/scripts/notarize.cjs
@@ -1,100 +0,0 @@
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-const { execFile } = require('node:child_process')
-
-function run(command, args) {
-  return new Promise((resolve, reject) => {
-    execFile(command, args, (error, stdout, stderr) => {
-      if (error) {
-        reject(
-          new Error(
-            `${command} ${args.join(' ')} failed: ${stderr?.trim() || stdout?.trim() || error.message}`
-          )
-        )
-        return
-      }
-      resolve({ stdout, stderr })
-    })
-  })
-}
-
-function inlineKeyLooksValid(value) {
-  return value.includes('BEGIN PRIVATE KEY') && value.includes('END PRIVATE KEY')
-}
-
-function resolveApiKeyPath(rawValue) {
-  const value = String(rawValue || '').trim()
-  if (!value) return { keyPath: '', cleanup: () => {} }
-
-  if (fs.existsSync(value)) {
-    return { keyPath: value, cleanup: () => {} }
-  }
-
-  if (!inlineKeyLooksValid(value)) {
-    throw new Error('APPLE_API_KEY must be a file path or inline .p8 key content')
-  }
-
-  const tempPath = path.join(os.tmpdir(), `hermes-notary-${Date.now()}-${process.pid}.p8`)
-  fs.writeFileSync(tempPath, value, 'utf8')
-  return {
-    keyPath: tempPath,
-    cleanup: () => {
-      try {
-        fs.rmSync(tempPath, { force: true })
-      } catch {
-        // Best-effort cleanup.
-      }
-    }
-  }
-}
-
-exports.default = async function notarize(context) {
-  const { electronPlatformName, appOutDir, packager } = context
-  if (electronPlatformName !== 'darwin') return
-
-  const appName = packager.appInfo.productFilename
-  const appPath = path.join(appOutDir, `${appName}.app`)
-  if (!fs.existsSync(appPath)) {
-    throw new Error(`Cannot notarize missing app bundle: ${appPath}`)
-  }
-
-  const profile = String(process.env.APPLE_NOTARY_PROFILE || '').trim()
-  if (profile) {
-    const zipPath = path.join(appOutDir, `${appName}.zip`)
-    await run('ditto', ['-c', '-k', '--sequesterRsrc', '--keepParent', appPath, zipPath])
-    await run('xcrun', ['notarytool', 'submit', zipPath, '--keychain-profile', profile, '--wait'])
-    await run('xcrun', ['stapler', 'staple', '-v', appPath])
-    try {
-      fs.rmSync(zipPath, { force: true })
-    } catch {
-      // Best-effort cleanup.
-    }
-    return
-  }
-
-  const keyId = String(process.env.APPLE_API_KEY_ID || '').trim()
-  const issuer = String(process.env.APPLE_API_ISSUER || '').trim()
-  const rawApiKey = process.env.APPLE_API_KEY
-  if (!rawApiKey || !keyId || !issuer) {
-    console.log(
-      'Skipping notarization: APPLE_API_KEY, APPLE_API_KEY_ID, and APPLE_API_ISSUER are not fully configured.'
-    )
-    return
-  }
-
-  const { keyPath, cleanup } = resolveApiKeyPath(rawApiKey)
-  const zipPath = path.join(appOutDir, `${appName}.zip`)
-  try {
-    await run('ditto', ['-c', '-k', '--sequesterRsrc', '--keepParent', appPath, zipPath])
-    await run('xcrun', ['notarytool', 'submit', zipPath, '--key', keyPath, '--key-id', keyId, '--issuer', issuer, '--wait'])
-    await run('xcrun', ['stapler', 'staple', '-v', appPath])
-  } finally {
-    try {
-      fs.rmSync(zipPath, { force: true })
-    } catch {
-      // Best-effort cleanup.
-    }
-    cleanup()
-  }
-}
--- a/apps/desktop/scripts/stage-hermes-payload.mjs
+++ b/apps/desktop/scripts/stage-hermes-payload.mjs
@@ -1,109 +0,0 @@
-import fs from 'node:fs/promises'
-import path from 'node:path'
-import { fileURLToPath } from 'node:url'
-
-const DESKTOP_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..')
-const REPO_ROOT = path.resolve(DESKTOP_ROOT, '../..')
-const OUT_ROOT = path.join(DESKTOP_ROOT, 'build', 'hermes-agent')
-
-const ROOT_FILES = [
-  'README.md',
-  'LICENSE',
-  'pyproject.toml',
-  'run_agent.py',
-  'model_tools.py',
-  'toolsets.py',
-  'batch_runner.py',
-  'trajectory_compressor.py',
-  'toolset_distributions.py',
-  'cli.py',
-  'hermes_constants.py',
-  'hermes_logging.py',
-  'hermes_state.py',
-  'hermes_time.py',
-  'rl_cli.py',
-  'utils.py'
-]
-
-const ROOT_DIRS = [
-  'acp_adapter',
-  'agent',
-  'cron',
-  'gateway',
-  'hermes_cli',
-  'plugins',
-  'scripts',
-  'skills',
-  'tools',
-  'tui_gateway'
-]
-
-const TUI_FILES = ['package.json', 'package-lock.json']
-const TUI_DIRS = ['dist', 'packages/hermes-ink/dist']
-
-const EXCLUDED_NAMES = new Set([
-  '.DS_Store',
-  '.git',
-  '.mypy_cache',
-  '.pytest_cache',
-  '.ruff_cache',
-  '.venv',
-  '__pycache__',
-  'node_modules',
-  'release',
-  'venv'
-])
-
-function keep(entry) {
-  return !EXCLUDED_NAMES.has(entry.name) && !entry.name.endsWith('.pyc') && !entry.name.endsWith('.pyo')
-}
-
-async function exists(target) {
-  try {
-    await fs.access(target)
-    return true
-  } catch {
-    return false
-  }
-}
-
-async function copyFileIfPresent(relativePath) {
-  const from = path.join(REPO_ROOT, relativePath)
-  if (!(await exists(from))) return
-
-  const to = path.join(OUT_ROOT, relativePath)
-  await fs.mkdir(path.dirname(to), { recursive: true })
-  await fs.copyFile(from, to)
-}
-
-async function copyDirIfPresent(relativePath) {
-  const from = path.join(REPO_ROOT, relativePath)
-  if (!(await exists(from))) return
-
-  const to = path.join(OUT_ROOT, relativePath)
-  await fs.cp(from, to, {
-    recursive: true,
-    filter: source => keep({ name: path.basename(source) })
-  })
-}
-
-async function main() {
-  await fs.rm(OUT_ROOT, { force: true, recursive: true })
-  await fs.mkdir(OUT_ROOT, { recursive: true })
-
-  await Promise.all(ROOT_FILES.map(copyFileIfPresent))
-
-  for (const dir of ROOT_DIRS) {
-    await copyDirIfPresent(dir)
-  }
-
-  for (const file of TUI_FILES) {
-    await copyFileIfPresent(path.join('ui-tui', file))
-  }
-
-  for (const dir of TUI_DIRS) {
-    await copyDirIfPresent(path.join('ui-tui', dir))
-  }
-}
-
-await main()
--- a/apps/desktop/scripts/test-desktop.mjs
+++ b/apps/desktop/scripts/test-desktop.mjs
@@ -1,171 +0,0 @@
-import fs from 'node:fs'
-import os from 'node:os'
-import path from 'node:path'
-import { spawn, spawnSync } from 'node:child_process'
-import { fileURLToPath } from 'node:url'
-
-const DESKTOP_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..')
-const PACKAGE_JSON = JSON.parse(fs.readFileSync(path.join(DESKTOP_ROOT, 'package.json'), 'utf8'))
-const MODE = process.argv[2] || 'help'
-const ARCH = process.arch === 'arm64' ? 'arm64' : 'x64'
-const RELEASE_ROOT = path.join(DESKTOP_ROOT, 'release')
-const APP_PATH = path.join(RELEASE_ROOT, `mac-${ARCH}`, 'Hermes.app')
-const APP_BIN = path.join(APP_PATH, 'Contents', 'MacOS', 'Hermes')
-const DMG_PATH = path.join(RELEASE_ROOT, `Hermes-${PACKAGE_JSON.version}-${ARCH}.dmg`)
-const USER_DATA = path.join(os.homedir(), 'Library', 'Application Support', 'Hermes')
-const RUNTIME_ROOT = path.join(USER_DATA, 'hermes-runtime')
-
-function die(message) {
-  console.error(`\n${message}`)
-  process.exit(1)
-}
-
-function run(command, args, options = {}) {
-  const result = spawnSync(command, args, {
-    cwd: options.cwd || DESKTOP_ROOT,
-    env: options.env || process.env,
-    shell: Boolean(options.shell),
-    stdio: 'inherit'
-  })
-
-  if (result.status !== 0) {
-    die(`${command} ${args.join(' ')} failed`)
-  }
-}
-
-function output(command, args) {
-  const result = spawnSync(command, args, {
-    encoding: 'utf8',
-    stdio: ['ignore', 'pipe', 'ignore']
-  })
-
-  return result.status === 0 ? result.stdout.trim() : ''
-}
-
-function exists(target) {
-  return fs.existsSync(target)
-}
-
-function ensureMac() {
-  if (process.platform !== 'darwin') {
-    die('Desktop launch tests are macOS-only from this script.')
-  }
-}
-
-function ensurePackagedApp() {
-  if (process.env.HERMES_DESKTOP_SKIP_BUILD === '1' && exists(APP_BIN)) {
-    return
-  }
-
-  run('npm', ['run', 'pack'])
-}
-
-function ensureDmg() {
-  if (process.env.HERMES_DESKTOP_SKIP_BUILD === '1' && exists(DMG_PATH)) {
-    return
-  }
-
-  run('npm', ['run', 'dist:mac:dmg'])
-}
-
-function openApp() {
-  if (!exists(APP_PATH)) {
-    die(`Missing packaged app: ${APP_PATH}`)
-  }
-
-  run('open', ['-n', APP_PATH])
-}
-
-function openDmg() {
-  if (!exists(DMG_PATH)) {
-    die(`Missing DMG: ${DMG_PATH}`)
-  }
-
-  run('open', [DMG_PATH])
-}
-
-function launchFresh() {
-  if (!exists(APP_BIN)) {
-    die(`Missing app executable: ${APP_BIN}`)
-  }
-
-  fs.rmSync(RUNTIME_ROOT, { force: true, recursive: true })
-
-  const python = output('which', ['python3'])
-  if (!python) {
-    die('python3 is required for fresh bundled-runtime bootstrap.')
-  }
-
-  const env = {
-    ...process.env,
-    HERMES_DESKTOP_IGNORE_EXISTING: '1',
-    HERMES_DESKTOP_TEST_MODE: 'fresh-bundled-runtime'
-  }
-  delete env.HERMES_DESKTOP_HERMES
-  delete env.HERMES_DESKTOP_HERMES_ROOT
-
-  const child = spawn(APP_BIN, [], {
-    cwd: os.homedir(),
-    detached: true,
-    env,
-    stdio: 'ignore'
-  })
-  child.unref()
-}
-
-function validateBundle() {
-  const required = [
-    APP_BIN,
-    path.join(APP_PATH, 'Contents', 'Resources', 'hermes-agent', 'hermes_cli', 'main.py'),
-    path.join(APP_PATH, 'Contents', 'Resources', 'app.asar.unpacked', 'dist', 'index.html')
-  ]
-
-  for (const target of required) {
-    if (!exists(target)) {
-      die(`Missing packaged payload file: ${target}`)
-    }
-  }
-}
-
-function printArtifacts() {
-  console.log('\nDesktop artifacts:')
-  console.log(`  app: ${APP_PATH}`)
-  console.log(`  dmg: ${DMG_PATH}`)
-  console.log(`  runtime: ${RUNTIME_ROOT}`)
-}
-
-function help() {
-  console.log(`Usage:
-  npm run test:desktop:existing  # build packaged app, launch with normal PATH/existing Hermes
-  npm run test:desktop:fresh     # build packaged app, delete bundled runtime, hide existing Hermes, launch
-  npm run test:desktop:dmg       # build DMG and open it
-  npm run test:desktop:all       # build DMG, validate app payload, print paths
-
-Fast rerun:
-  HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:fresh
-`)
-}
-
-ensureMac()
-
-if (MODE === 'existing') {
-  ensurePackagedApp()
-  validateBundle()
-  openApp()
-  printArtifacts()
-} else if (MODE === 'fresh') {
-  ensurePackagedApp()
-  validateBundle()
-  launchFresh()
-  printArtifacts()
-} else if (MODE === 'dmg') {
-  ensureDmg()
-  openDmg()
-  printArtifacts()
-} else if (MODE === 'all') {
-  ensureDmg()
-  validateBundle()
-  printArtifacts()
-} else {
-  help()
-}
--- a/apps/desktop/src/app/agents/index.tsx
+++ b/apps/desktop/src/app/agents/index.tsx
@@ -1,140 +0,0 @@
-import { useStore } from '@nanostores/react'
-import { useMemo, useState } from 'react'
-
-import { Activity, AlertCircle, Layers3, Loader2, type LucideIcon, RefreshCw, Sparkles } from '@/lib/icons'
-import { cn } from '@/lib/utils'
-import { $desktopActionTasks, buildRailTasks, type RailTask, type RailTaskStatus } from '@/store/activity'
-import { $previewServerRestart } from '@/store/preview'
-import { $sessions, $workingSessionIds } from '@/store/session'
-
-import { OverlayCard } from '../overlays/overlay-chrome'
-import { OverlayMain, OverlayNavItem, OverlaySidebar, OverlaySplitLayout } from '../overlays/overlay-split-layout'
-import { OverlayView } from '../overlays/overlay-view'
-
-type AgentsSection = 'tree' | 'activity' | 'history'
-
-interface SectionDef {
-  description: string
-  icon: LucideIcon
-  id: AgentsSection
-  label: string
-}
-
-const SECTIONS: readonly SectionDef[] = [
-  { description: 'Live subagent spawn tree for the current turn', icon: Layers3, id: 'tree', label: 'Spawn tree' },
-  { description: 'Background work across sessions and the desktop', icon: Activity, id: 'activity', label: 'Activity' },
-  { description: 'Past spawn snapshots, replay, and diff', icon: RefreshCw, id: 'history', label: 'History' }
-]
-
-const STATUS_TONE: Record<RailTaskStatus, string> = {
-  error: 'text-destructive',
-  running: 'text-foreground',
-  success: 'text-emerald-500'
-}
-
-const STATUS_ICON: Record<RailTaskStatus, LucideIcon> = {
-  error: AlertCircle,
-  running: Loader2,
-  success: Sparkles
-}
-
-interface AgentsViewProps {
-  initialSection?: AgentsSection
-  onClose: () => void
-}
-
-export function AgentsView({ initialSection = 'tree', onClose }: AgentsViewProps) {
-  const [section, setSection] = useState<AgentsSection>(initialSection)
-
-  const sessions = useStore($sessions)
-  const workingSessionIds = useStore($workingSessionIds)
-  const previewRestart = useStore($previewServerRestart)
-  const desktopActionTasks = useStore($desktopActionTasks)
-
-  const activityTasks = useMemo(
-    () => buildRailTasks(workingSessionIds, sessions, previewRestart, desktopActionTasks),
-    [desktopActionTasks, previewRestart, sessions, workingSessionIds]
-  )
-
-  const active = SECTIONS.find(s => s.id === section) ?? SECTIONS[0]!
-
-  return (
-    <OverlayView closeLabel="Close agents" onClose={onClose}>
-      <OverlaySplitLayout>
-        <OverlaySidebar>
-          {SECTIONS.map(s => (
-            <OverlayNavItem
-              active={s.id === section}
-              icon={s.icon}
-              key={s.id}
-              label={s.label}
-              onClick={() => setSection(s.id)}
-            />
-          ))}
-        </OverlaySidebar>
-
-        <OverlayMain>
-          <header className="mb-4">
-            <h2 className="text-sm font-semibold text-foreground">{active.label}</h2>
-            <p className="text-xs text-muted-foreground">{active.description}</p>
-          </header>
-
-          {section === 'activity' ? <ActivityList tasks={activityTasks} /> : <SectionStub label={active.label} />}
-        </OverlayMain>
-      </OverlaySplitLayout>
-    </OverlayView>
-  )
-}
-
-function ActivityList({ tasks }: { tasks: readonly RailTask[] }) {
-  if (tasks.length === 0) {
-    return (
-      <OverlayCard className="px-3 py-4 text-sm text-muted-foreground">
-        No background activity. Long-running tools, preview restarts, and parallel sessions surface here.
-      </OverlayCard>
-    )
-  }
-
-  return (
-    <div className="grid min-h-0 gap-1.5 overflow-y-auto pr-1">
-      {tasks.map(task => {
-        const Icon = STATUS_ICON[task.status]
-
-        return (
-          <OverlayCard className="flex items-start gap-2.5 px-3 py-2" key={task.id}>
-            <Icon
-              className={cn(
-                'mt-0.5 size-3.5 shrink-0',
-                STATUS_TONE[task.status],
-                task.status === 'running' && 'animate-spin'
-              )}
-            />
-            <div className="min-w-0 flex-1">
-              <div className="truncate text-sm font-medium text-foreground">{task.label}</div>
-              {task.detail && <div className="truncate text-xs text-muted-foreground">{task.detail}</div>}
-            </div>
-          </OverlayCard>
-        )
-      })}
-    </div>
-  )
-}
-
-function SectionStub({ label }: { label: string }) {
-  return (
-    <OverlayCard className="grid place-items-center gap-3 px-6 py-12 text-center">
-      <Sparkles className="size-6 text-muted-foreground/70" />
-      <div className="grid gap-1">
-        <p className="text-sm font-medium text-foreground">{label} — coming soon</p>
-        <p className="max-w-md text-xs leading-relaxed text-muted-foreground">
-          Subagent stores aren&apos;t wired into the desktop yet. Once gateway events for{' '}
-          <code className="rounded bg-muted/60 px-1 py-0.5 font-mono text-[0.65rem]">
-            subagent.spawn / progress / complete
-          </code>{' '}
-          land here, this view shows the live spawn tree, replay history, and pause/kill controls — modelled on the
-          TUI&apos;s <code className="rounded bg-muted/60 px-1 py-0.5 font-mono text-[0.65rem]">/agents</code> overlay.
-        </p>
-      </div>
-    </OverlayCard>
-  )
-}
--- a/apps/desktop/src/app/artifacts/index.tsx
+++ b/apps/desktop/src/app/artifacts/index.tsx
@@ -1,859 +0,0 @@
-import type * as React from 'react'
-import { useCallback, useEffect, useMemo, useState } from 'react'
-import { useNavigate } from 'react-router-dom'
-
-import { ZoomableImage } from '@/components/assistant-ui/zoomable-image'
-import { PageLoader } from '@/components/page-loader'
-import { Button } from '@/components/ui/button'
-import { CopyButton } from '@/components/ui/copy-button'
-import { Input } from '@/components/ui/input'
-import {
-  Pagination,
-  PaginationButton,
-  PaginationContent,
-  PaginationEllipsis,
-  PaginationItem,
-  PaginationNext,
-  PaginationPrevious
-} from '@/components/ui/pagination'
-import { getSessionMessages, listSessions } from '@/hermes'
-import { sessionTitle } from '@/lib/chat-runtime'
-import { ExternalLink, FileImage, FileText, FolderOpen, Layers3, Link2, RefreshCw, Search, X } from '@/lib/icons'
-import { cn } from '@/lib/utils'
-import { notifyError } from '@/store/notifications'
-import type { SessionInfo, SessionMessage } from '@/types/hermes'
-
-import { sessionRoute } from '../routes'
-import type { SetStatusbarItemGroup } from '../shell/statusbar-controls'
-import { titlebarHeaderBaseClass } from '../shell/titlebar'
-import type { SetTitlebarToolGroup } from '../shell/titlebar-controls'
-
-type ArtifactKind = 'image' | 'file' | 'link'
-
-interface ArtifactRecord {
-  id: string
-  kind: ArtifactKind
-  value: string
-  href: string
-  label: string
-  sessionId: string
-  sessionTitle: string
-  timestamp: number
-}
-
-const MARKDOWN_IMAGE_RE = /!\[([^\]]*)\]\(([^)\s]+)\)/g
-const MARKDOWN_LINK_RE = /\[([^\]]+)\]\(([^)\s]+)\)/g
-const URL_RE = /https?:\/\/[^\s<>"')]+/g
-const PATH_RE = /(^|[\s("'`])((?:\/|~\/|\.\.?\/)[^\s"'`<>]+(?:\.[a-z0-9]{1,8})?)/gi
-const IMAGE_EXT_RE = /\.(?:png|jpe?g|gif|webp|svg|bmp)(?:\?.*)?$/i
-const FILE_EXT_RE = /\.(?:png|jpe?g|gif|webp|svg|bmp|pdf|txt|json|md|csv|zip|tar|gz|mp3|wav|mp4|mov)(?:\?.*)?$/i
-const KEY_HINT_RE = /(path|file|url|image|artifact|output|download|result|target)/i
-
-const ARTIFACT_TIME_FMT = new Intl.DateTimeFormat(undefined, {
-  day: 'numeric',
-  hour: 'numeric',
-  minute: '2-digit',
-  month: 'short'
-})
-
-function normalizeValue(value: string): string {
-  return value.trim().replace(/[),.;]+$/, '')
-}
-
-function parseMaybeJson(value: string): unknown {
-  if (!value.trim()) {
-    return null
-  }
-
-  try {
-    return JSON.parse(value)
-  } catch {
-    return null
-  }
-}
-
-function looksLikePathOrUrl(value: string): boolean {
-  return (
-    value.startsWith('http://') ||
-    value.startsWith('https://') ||
-    value.startsWith('file://') ||
-    value.startsWith('data:image/') ||
-    value.startsWith('/') ||
-    value.startsWith('./') ||
-    value.startsWith('../') ||
-    value.startsWith('~/')
-  )
-}
-
-function looksLikeArtifact(value: string): boolean {
-  if (value.startsWith('data:image/')) {
-    return true
-  }
-
-  if (looksLikePathOrUrl(value) && (IMAGE_EXT_RE.test(value) || FILE_EXT_RE.test(value))) {
-    return true
-  }
-
-  return value.startsWith('/') && value.includes('.')
-}
-
-function artifactKind(value: string): ArtifactKind {
-  if (value.startsWith('data:image/') || IMAGE_EXT_RE.test(value)) {
-    return 'image'
-  }
-
-  if (
-    value.startsWith('/') ||
-    value.startsWith('./') ||
-    value.startsWith('../') ||
-    value.startsWith('~/') ||
-    value.startsWith('file://')
-  ) {
-    return 'file'
-  }
-
-  return 'link'
-}
-
-function artifactHref(value: string): string {
-  if (
-    value.startsWith('http://') ||
-    value.startsWith('https://') ||
-    value.startsWith('file://') ||
-    value.startsWith('data:')
-  ) {
-    return value
-  }
-
-  if (value.startsWith('/')) {
-    return `file://${encodeURI(value)}`
-  }
-
-  return value
-}
-
-function artifactLabel(value: string): string {
-  try {
-    const url = new URL(value)
-    const item = url.pathname.split('/').filter(Boolean).pop()
-
-    return item || value
-  } catch {
-    const parts = value.split(/[\\/]/).filter(Boolean)
-
-    return parts.pop() || value
-  }
-}
-
-function messageText(message: SessionMessage): string {
-  if (typeof message.content === 'string' && message.content.trim()) {
-    return message.content
-  }
-
-  if (typeof message.text === 'string' && message.text.trim()) {
-    return message.text
-  }
-
-  if (typeof message.context === 'string' && message.context.trim()) {
-    return message.context
-  }
-
-  return ''
-}
-
-function collectStringValues(
-  value: unknown,
-  keyPath: string,
-  collector: (value: string, keyPath: string) => void
-): void {
-  if (typeof value === 'string') {
-    collector(value, keyPath)
-
-    return
-  }
-
-  if (Array.isArray(value)) {
-    value.forEach((entry, index) => collectStringValues(entry, `${keyPath}.${index}`, collector))
-
-    return
-  }
-
-  if (!value || typeof value !== 'object') {
-    return
-  }
-
-  for (const [key, child] of Object.entries(value as Record<string, unknown>)) {
-    collectStringValues(child, keyPath ? `${keyPath}.${key}` : key, collector)
-  }
-}
-
-function collectArtifactsFromText(text: string, pushValue: (value: string) => void): void {
-  for (const match of text.matchAll(MARKDOWN_IMAGE_RE)) {
-    pushValue(match[2] || '')
-  }
-
-  for (const match of text.matchAll(MARKDOWN_LINK_RE)) {
-    const start = match.index ?? 0
-
-    if (start > 0 && text[start - 1] === '!') {
-      continue
-    }
-
-    const value = match[2] || ''
-
-    if (looksLikeArtifact(value)) {
-      pushValue(value)
-    }
-  }
-
-  for (const match of text.matchAll(URL_RE)) {
-    const value = match[0] || ''
-
-    if (looksLikeArtifact(value)) {
-      pushValue(value)
-    }
-  }
-
-  for (const match of text.matchAll(PATH_RE)) {
-    pushValue(match[2] || '')
-  }
-}
-
-function collectArtifactsFromMessage(message: SessionMessage, pushValue: (value: string) => void): void {
-  const text = messageText(message)
-
-  if (text) {
-    collectArtifactsFromText(text, pushValue)
-  }
-
-  if (message.role !== 'tool' && !Array.isArray(message.tool_calls)) {
-    return
-  }
-
-  if (Array.isArray(message.tool_calls)) {
-    for (const call of message.tool_calls) {
-      collectStringValues(call, 'tool_call', (value, keyPath) => {
-        const normalized = normalizeValue(value)
-
-        if (!normalized) {
-          return
-        }
-
-        if (KEY_HINT_RE.test(keyPath) && (looksLikePathOrUrl(normalized) || FILE_EXT_RE.test(normalized))) {
-          pushValue(normalized)
-        }
-      })
-    }
-  }
-
-  const parsed = parseMaybeJson(text)
-
-  if (parsed !== null) {
-    collectStringValues(parsed, 'tool_result', (value, keyPath) => {
-      const normalized = normalizeValue(value)
-
-      if (!normalized) {
-        return
-      }
-
-      if ((KEY_HINT_RE.test(keyPath) || looksLikePathOrUrl(normalized)) && looksLikeArtifact(normalized)) {
-        pushValue(normalized)
-      }
-    })
-  }
-}
-
-function collectArtifactsForSession(session: SessionInfo, messages: SessionMessage[]): ArtifactRecord[] {
-  const found = new Map<string, ArtifactRecord>()
-  const title = sessionTitle(session)
-
-  for (const message of messages) {
-    if (message.role !== 'assistant' && message.role !== 'tool') {
-      continue
-    }
-
-    collectArtifactsFromMessage(message, candidate => {
-      const value = normalizeValue(candidate)
-
-      if (!value || !looksLikeArtifact(value)) {
-        return
-      }
-
-      const key = `${session.id}:${value}`
-
-      if (found.has(key)) {
-        return
-      }
-
-      found.set(key, {
-        id: key,
-        kind: artifactKind(value),
-        value,
-        href: artifactHref(value),
-        label: artifactLabel(value),
-        sessionId: session.id,
-        sessionTitle: title,
-        timestamp: message.timestamp || session.last_active || session.started_at || Date.now()
-      })
-    })
-  }
-
-  return Array.from(found.values())
-}
-
-function formatArtifactTime(timestamp: number): string {
-  return ARTIFACT_TIME_FMT.format(new Date(timestamp))
-}
-
-function pageRangeLabel(total: number, page: number, pageSize: number): string {
-  if (total === 0) {
-    return '0'
-  }
-
-  const start = (page - 1) * pageSize + 1
-  const end = Math.min(total, page * pageSize)
-
-  return `${start}-${end} of ${total}`
-}
-
-function paginationItems(page: number, pageCount: number): Array<number | 'ellipsis'> {
-  if (pageCount <= 7) {
-    return Array.from({ length: pageCount }, (_, index) => index + 1)
-  }
-
-  const pages: Array<number | 'ellipsis'> = [1]
-  const start = Math.max(2, page - 1)
-  const end = Math.min(pageCount - 1, page + 1)
-
-  if (start > 2) {
-    pages.push('ellipsis')
-  }
-
-  for (let nextPage = start; nextPage <= end; nextPage += 1) {
-    pages.push(nextPage)
-  }
-
-  if (end < pageCount - 1) {
-    pages.push('ellipsis')
-  }
-
-  pages.push(pageCount)
-
-  return pages
-}
-
-interface ArtifactsViewProps extends React.ComponentProps<'section'> {
-  setStatusbarItemGroup?: SetStatusbarItemGroup
-  setTitlebarToolGroup?: SetTitlebarToolGroup
-}
-
-export function ArtifactsView({
-  setStatusbarItemGroup: _setStatusbarItemGroup,
-  setTitlebarToolGroup,
-  ...props
-}: ArtifactsViewProps) {
-  const navigate = useNavigate()
-  const [artifacts, setArtifacts] = useState<ArtifactRecord[] | null>(null)
-  const [query, setQuery] = useState('')
-  const [kindFilter, setKindFilter] = useState<'all' | ArtifactKind>('all')
-  const [refreshing, setRefreshing] = useState(false)
-  const [failedImageIds, setFailedImageIds] = useState<Set<string>>(() => new Set())
-  const [imagePage, setImagePage] = useState(1)
-  const [filePage, setFilePage] = useState(1)
-
-  const refreshArtifacts = useCallback(async () => {
-    setRefreshing(true)
-
-    try {
-      const sessions = (await listSessions(30, 1)).sessions
-      const results = await Promise.allSettled(sessions.map(session => getSessionMessages(session.id)))
-      const nextArtifacts: ArtifactRecord[] = []
-
-      results.forEach((result, index) => {
-        if (result.status !== 'fulfilled') {
-          return
-        }
-
-        const session = sessions[index]
-        nextArtifacts.push(...collectArtifactsForSession(session, result.value.messages))
-      })
-
-      setArtifacts(nextArtifacts.sort((a, b) => b.timestamp - a.timestamp))
-    } catch (err) {
-      notifyError(err, 'Artifacts failed to load')
-      setArtifacts([])
-    } finally {
-      setRefreshing(false)
-    }
-  }, [])
-
-  useEffect(() => {
-    void refreshArtifacts()
-  }, [refreshArtifacts])
-
-  useEffect(() => {
-    if (!setTitlebarToolGroup) {
-      return
-    }
-
-    setTitlebarToolGroup('artifacts', [
-      {
-        disabled: refreshing,
-        icon: <RefreshCw className={cn(refreshing && 'animate-spin')} />,
-        id: 'refresh-artifacts',
-        label: refreshing ? 'Refreshing artifacts' : 'Refresh artifacts',
-        onSelect: () => void refreshArtifacts()
-      }
-    ])
-
-    return () => setTitlebarToolGroup('artifacts', [])
-  }, [refreshArtifacts, refreshing, setTitlebarToolGroup])
-
-  useEffect(() => {
-    setImagePage(1)
-    setFilePage(1)
-  }, [artifacts, kindFilter, query])
-
-  const visibleArtifacts = useMemo(() => {
-    if (!artifacts) {
-      return []
-    }
-
-    const q = query.trim().toLowerCase()
-
-    return artifacts.filter(artifact => {
-      if (kindFilter !== 'all' && artifact.kind !== kindFilter) {
-        return false
-      }
-
-      if (!q) {
-        return true
-      }
-
-      return (
-        artifact.label.toLowerCase().includes(q) ||
-        artifact.value.toLowerCase().includes(q) ||
-        artifact.sessionTitle.toLowerCase().includes(q)
-      )
-    })
-  }, [artifacts, kindFilter, query])
-
-  const visibleImageArtifacts = useMemo(
-    () => visibleArtifacts.filter(artifact => artifact.kind === 'image'),
-    [visibleArtifacts]
-  )
-
-  const visibleFileArtifacts = useMemo(
-    () => visibleArtifacts.filter(artifact => artifact.kind !== 'image'),
-    [visibleArtifacts]
-  )
-
-  const imagePageCount = Math.max(1, Math.ceil(visibleImageArtifacts.length / 24))
-  const filePageCount = Math.max(1, Math.ceil(visibleFileArtifacts.length / 100))
-  const currentImagePage = Math.min(imagePage, imagePageCount)
-  const currentFilePage = Math.min(filePage, filePageCount)
-
-  const pagedImageArtifacts = useMemo(
-    () => visibleImageArtifacts.slice((currentImagePage - 1) * 24, currentImagePage * 24),
-    [currentImagePage, visibleImageArtifacts]
-  )
-
-  const pagedFileArtifacts = useMemo(
-    () => visibleFileArtifacts.slice((currentFilePage - 1) * 100, currentFilePage * 100),
-    [currentFilePage, visibleFileArtifacts]
-  )
-
-  const counts = useMemo(() => {
-    const all = artifacts || []
-
-    return {
-      all: all.length,
-      image: all.filter(artifact => artifact.kind === 'image').length,
-      file: all.filter(artifact => artifact.kind === 'file').length,
-      link: all.filter(artifact => artifact.kind === 'link').length
-    }
-  }, [artifacts])
-
-  const openArtifact = useCallback(async (href: string) => {
-    try {
-      if (window.hermesDesktop?.openExternal) {
-        await window.hermesDesktop.openExternal(href)
-      } else {
-        window.open(href, '_blank', 'noopener,noreferrer')
-      }
-    } catch (err) {
-      notifyError(err, 'Open failed')
-    }
-  }, [])
-
-  const markImageFailed = useCallback((id: string) => {
-    setFailedImageIds(current => {
-      if (current.has(id)) {
-        return current
-      }
-
-      return new Set(current).add(id)
-    })
-  }, [])
-
-  return (
-    <section {...props} className="flex h-full min-w-0 flex-col overflow-hidden rounded-[0.9375rem] bg-background">
-      <header className={titlebarHeaderBaseClass}>
-        <h2 className="pointer-events-auto text-base font-semibold leading-none tracking-tight">Artifacts</h2>
-        <span className="pointer-events-auto text-xs text-muted-foreground">{counts.all} found</span>
-      </header>
-
-      <div className="min-h-0 flex-1 overflow-hidden rounded-[1.0625rem] border border-border/50 bg-background/85">
-        <div className="border-b border-border/50 px-4 py-3">
-          <div className="flex flex-wrap items-center gap-2">
-            <FilterButton
-              active={kindFilter === 'all'}
-              icon={Layers3}
-              label={`All (${counts.all})`}
-              onClick={() => setKindFilter('all')}
-            />
-            <FilterButton
-              active={kindFilter === 'image'}
-              icon={FileImage}
-              label={`Images (${counts.image})`}
-              onClick={() => setKindFilter('image')}
-            />
-            <FilterButton
-              active={kindFilter === 'file'}
-              icon={FileText}
-              label={`Files (${counts.file})`}
-              onClick={() => setKindFilter('file')}
-            />
-            <FilterButton
-              active={kindFilter === 'link'}
-              icon={Link2}
-              label={`Links (${counts.link})`}
-              onClick={() => setKindFilter('link')}
-            />
-            <div className="ml-auto w-full max-w-sm min-w-64">
-              <div className="relative">
-                <Search className="pointer-events-none absolute left-2.5 top-1/2 size-3.5 -translate-y-1/2 text-muted-foreground" />
-                <Input
-                  className="h-8 rounded-lg pl-8 pr-8 text-sm"
-                  onChange={event => setQuery(event.target.value)}
-                  placeholder="Search artifacts..."
-                  value={query}
-                />
-                {query && (
-                  <Button
-                    aria-label="Clear search"
-                    className="absolute right-1 top-1/2 h-6 w-6 -translate-y-1/2 text-muted-foreground hover:text-foreground"
-                    onClick={() => setQuery('')}
-                    size="icon"
-                    type="button"
-                    variant="ghost"
-                  >
-                    <X className="size-3.5" />
-                  </Button>
-                )}
-              </div>
-            </div>
-          </div>
-        </div>
-
-        {!artifacts ? (
-          <PageLoader label="Indexing recent session artifacts" />
-        ) : visibleArtifacts.length === 0 ? (
-          <div className="grid h-full place-items-center px-6 text-center">
-            <div>
-              <div className="text-sm font-medium">No artifacts found</div>
-              <div className="mt-1 text-xs text-muted-foreground">
-                Generated images and file outputs will appear here as sessions produce them.
-              </div>
-            </div>
-          </div>
-        ) : (
-          <div className="h-full overflow-y-auto">
-            <div className="flex flex-col gap-4 px-2 pb-2">
-              {visibleImageArtifacts.length > 0 && (
-                <section aria-labelledby="artifacts-images-heading" className="flex flex-col">
-                  <div className="sticky top-0 z-10 -mx-2 flex h-7 items-center justify-between gap-3 overflow-x-auto bg-background px-3">
-                    <h3 className="shrink-0 text-xs font-semibold" id="artifacts-images-heading">
-                      Images
-                    </h3>
-                    <ArtifactsPagination
-                      className="justify-end px-0"
-                      itemLabel="images"
-                      onPageChange={setImagePage}
-                      page={currentImagePage}
-                      pageSize={24}
-                      total={visibleImageArtifacts.length}
-                    />
-                  </div>
-                  <div className="grid grid-cols-[repeat(auto-fill,minmax(12rem,1fr))] items-start gap-2 pt-1.5">
-                    {pagedImageArtifacts.map(artifact => (
-                      <ArtifactImageCard
-                        artifact={artifact}
-                        failedImage={failedImageIds.has(artifact.id)}
-                        key={artifact.id}
-                        onImageError={markImageFailed}
-                        onOpenChat={sessionId => navigate(sessionRoute(sessionId))}
-                      />
-                    ))}
-                  </div>
-                </section>
-              )}
-
-              {visibleFileArtifacts.length > 0 && (
-                <section aria-labelledby="artifacts-files-heading" className="flex flex-col">
-                  <div className="sticky top-0 z-10 -mx-2 flex h-7 items-center justify-between gap-3 overflow-x-auto bg-background px-3">
-                    <h3 className="shrink-0 text-xs font-semibold" id="artifacts-files-heading">
-                      {kindFilter === 'link' ? 'Links' : kindFilter === 'file' ? 'Files' : 'Files and links'}
-                    </h3>
-                    <ArtifactsPagination
-                      className="justify-end px-0"
-                      itemLabel="files"
-                      onPageChange={setFilePage}
-                      page={currentFilePage}
-                      pageSize={100}
-                      total={visibleFileArtifacts.length}
-                    />
-                  </div>
-                  <div className="overflow-x-auto rounded-lg border border-border/50 bg-background/70 shadow-[0_0.125rem_0.5rem_color-mix(in_srgb,black_3%,transparent)]">
-                    <table className="w-full min-w-176 table-fixed text-left text-xs">
-                      <thead className="border-b border-border/50 bg-muted/35 text-[0.62rem] uppercase tracking-[0.08em] text-muted-foreground">
-                        <tr>
-                          <th className="w-[31%] px-2.5 py-1.5 font-medium">Name</th>
-                          <th className="w-[35%] px-2.5 py-1.5 font-medium">Location</th>
-                          <th className="w-[22%] px-2.5 py-1.5 font-medium">Session</th>
-                          <th className="w-[12%] px-2.5 py-1.5 text-right font-medium">Actions</th>
-                        </tr>
-                      </thead>
-                      <tbody className="divide-y divide-border/45">
-                        {pagedFileArtifacts.map(artifact => (
-                          <ArtifactListRow
-                            artifact={artifact}
-                            key={artifact.id}
-                            onOpen={openArtifact}
-                            onOpenChat={sessionId => navigate(sessionRoute(sessionId))}
-                          />
-                        ))}
-                      </tbody>
-                    </table>
-                  </div>
-                </section>
-              )}
-            </div>
-          </div>
-        )}
-      </div>
-    </section>
-  )
-}
-
-interface ArtifactsPaginationProps {
-  className?: string
-  itemLabel: string
-  onPageChange: (page: number) => void
-  page: number
-  pageSize: number
-  total: number
-}
-
-function ArtifactsPagination({ className, itemLabel, onPageChange, page, pageSize, total }: ArtifactsPaginationProps) {
-  const pageCount = Math.max(1, Math.ceil(total / pageSize))
-
-  return (
-    <div className={cn('flex h-6 items-center justify-between gap-2 px-1', className)}>
-      <div className="shrink-0 text-[0.62rem] text-muted-foreground">
-        {pageRangeLabel(total, page, pageSize)} {itemLabel}
-      </div>
-      {pageCount > 1 && (
-        <Pagination className="mx-0 w-auto min-w-0 justify-end">
-          <PaginationContent className="gap-0.5">
-            <PaginationItem>
-              <PaginationPrevious disabled={page <= 1} onClick={() => onPageChange(Math.max(1, page - 1))} />
-            </PaginationItem>
-            {paginationItems(page, pageCount).map((item, index) => (
-              <PaginationItem key={`${item}-${index}`}>
-                {item === 'ellipsis' ? (
-                  <PaginationEllipsis />
-                ) : (
-                  <PaginationButton
-                    aria-label={`Go to ${itemLabel} page ${item}`}
-                    isActive={page === item}
-                    onClick={() => onPageChange(item)}
-                  >
-                    {item}
-                  </PaginationButton>
-                )}
-              </PaginationItem>
-            ))}
-            <PaginationItem>
-              <PaginationNext
-                disabled={page >= pageCount}
-                onClick={() => onPageChange(Math.min(pageCount, page + 1))}
-              />
-            </PaginationItem>
-          </PaginationContent>
-        </Pagination>
-      )}
-    </div>
-  )
-}
-
-function FilterButton({
-  active,
-  icon: Icon,
-  label,
-  onClick
-}: {
-  active: boolean
-  icon: typeof Layers3
-  label: string
-  onClick: () => void
-}) {
-  return (
-    <Button
-      className={cn(
-        'h-8 gap-1.5 rounded-md px-2.5 text-xs',
-        active ? 'bg-accent text-foreground' : 'text-muted-foreground hover:text-foreground'
-      )}
-      onClick={onClick}
-      size="sm"
-      type="button"
-      variant="ghost"
-    >
-      <Icon className="size-3.5" />
-      {label}
-    </Button>
-  )
-}
-
-interface ArtifactImageCardProps {
-  artifact: ArtifactRecord
-  failedImage: boolean
-  onImageError: (id: string) => void
-  onOpenChat: (sessionId: string) => void
-}
-
-function ArtifactImageCard({ artifact, failedImage, onImageError, onOpenChat }: ArtifactImageCardProps) {
-  return (
-    <article
-      className={cn(
-        'group/artifact overflow-hidden rounded-lg border border-border/50 bg-background/70 shadow-[0_0.125rem_0.5rem_color-mix(in_srgb,black_3%,transparent)]',
-        'bg-muted/20'
-      )}
-    >
-      <div
-        className={cn(
-          'relative flex h-44 w-full items-center justify-center overflow-hidden border-b border-border/50 bg-[color-mix(in_srgb,var(--dt-muted)_58%,var(--dt-background))] p-1.5',
-          failedImage && 'cursor-default'
-        )}
-      >
-        {!failedImage && (
-          <ZoomableImage
-            alt={artifact.label}
-            className="max-h-40 max-w-full rounded-md object-contain shadow-sm"
-            containerClassName="max-h-full"
-            decoding="async"
-            loading="lazy"
-            onError={() => onImageError(artifact.id)}
-            slot="artifact-media"
-            src={artifact.href}
-          />
-        )}
-      </div>
-
-      <div className="space-y-1.5 p-2">
-        <div className="min-w-0">
-          <div className="mb-0.5 flex items-center gap-1 text-[0.62rem] uppercase tracking-[0.08em] text-muted-foreground">
-            <FileImage className="size-3" />
-            {artifact.kind}
-          </div>
-          <div className="truncate text-xs font-medium">{artifact.label}</div>
-          <div className="mt-0.5 truncate text-[0.62rem] text-muted-foreground">{artifact.value}</div>
-        </div>
-
-        <div className="truncate text-[0.62rem] text-muted-foreground">
-          {artifact.sessionTitle} · {formatArtifactTime(artifact.timestamp)}
-        </div>
-
-        <div className="flex flex-wrap gap-1.5">
-          <Button onClick={() => onOpenChat(artifact.sessionId)} size="xs" type="button" variant="outline">
-            <FolderOpen className="size-3" />
-            Chat
-          </Button>
-        </div>
-      </div>
-    </article>
-  )
-}
-
-interface ArtifactListRowProps {
-  artifact: ArtifactRecord
-  onOpen: (href: string) => void | Promise<void>
-  onOpenChat: (sessionId: string) => void
-}
-
-function ArtifactListRow({ artifact, onOpen, onOpenChat }: ArtifactListRowProps) {
-  const Icon = artifact.kind === 'file' ? FileText : Link2
-
-  return (
-    <tr className="group/artifact transition-colors hover:bg-muted/30">
-      <td className="px-2.5 py-1.5 align-middle">
-        <div className="flex min-w-0 items-center gap-2">
-          <div className="grid size-7 shrink-0 place-items-center rounded-md bg-muted text-muted-foreground">
-            <Icon className="size-3.5" />
-          </div>
-          <div className="min-w-0">
-            <div className="truncate font-medium" title={artifact.label}>
-              {artifact.label}
-            </div>
-            <div className="text-[0.6rem] uppercase tracking-[0.08em] text-muted-foreground">{artifact.kind}</div>
-          </div>
-        </div>
-      </td>
-      <td className="px-2.5 py-1.5 align-middle">
-        <div className="truncate font-mono text-[0.68rem] text-muted-foreground/85" title={artifact.value}>
-          {artifact.value}
-        </div>
-      </td>
-      <td className="px-2.5 py-1.5 align-middle">
-        <div className="min-w-0">
-          <div className="truncate text-[0.68rem] text-muted-foreground" title={artifact.sessionTitle}>
-            {artifact.sessionTitle}
-          </div>
-          <div className="text-[0.6rem] text-muted-foreground/75">{formatArtifactTime(artifact.timestamp)}</div>
-        </div>
-      </td>
-      <td className="px-2.5 py-1.5 align-middle">
-        <div className="flex justify-end gap-0.5 opacity-70 transition-opacity group-hover/artifact:opacity-100">
-          <Button
-            className="text-muted-foreground hover:text-foreground"
-            onClick={() => void onOpen(artifact.href)}
-            size="icon-xs"
-            title="Open"
-            type="button"
-            variant="ghost"
-          >
-            <ExternalLink className="size-3.5" />
-          </Button>
-          <CopyButton
-            appearance="button"
-            buttonSize="icon-xs"
-            className="text-muted-foreground hover:text-foreground"
-            iconClassName="size-3.5"
-            label="Copy"
-            text={artifact.value}
-          />
-          <Button
-            className="text-muted-foreground hover:text-foreground"
-            onClick={() => onOpenChat(artifact.sessionId)}
-            size="icon-xs"
-            title="Open chat"
-            type="button"
-            variant="ghost"
-          >
-            <FolderOpen className="size-3.5" />
-          </Button>
-        </div>
-      </td>
-    </tr>
-  )
-}
--- a/apps/desktop/src/app/chat/composer/attachments.tsx
+++ b/apps/desktop/src/app/chat/composer/attachments.tsx
@@ -1,109 +0,0 @@
-import { useStore } from '@nanostores/react'
-
-import { FileText, FolderOpen, ImageIcon, Link, X } from '@/lib/icons'
-import { normalizeOrLocalPreviewTarget } from '@/lib/local-preview'
-import type { ComposerAttachment } from '@/store/composer'
-import { notifyError } from '@/store/notifications'
-import { setCurrentSessionPreviewTarget } from '@/store/preview'
-import { $currentCwd } from '@/store/session'
-
-export function AttachmentList({
-  attachments,
-  onRemove
-}: {
-  attachments: ComposerAttachment[]
-  onRemove?: (id: string) => void
-}) {
-  return (
-    <div className="flex max-w-full flex-wrap gap-1.5 px-1 pt-1" data-slot="composer-attachments">
-      {attachments.map(a => (
-        <AttachmentPill attachment={a} key={a.id} onRemove={onRemove} />
-      ))}
-    </div>
-  )
-}
-
-function AttachmentPill({ attachment, onRemove }: { attachment: ComposerAttachment; onRemove?: (id: string) => void }) {
-  const Icon = { folder: FolderOpen, url: Link, image: ImageIcon, file: FileText }[attachment.kind]
-  const cwd = useStore($currentCwd)
-  const canPreview = attachment.kind !== 'folder'
-  const detail = attachment.detail && attachment.detail !== attachment.label ? attachment.detail : undefined
-
-  async function openPreview() {
-    if (!canPreview) {
-      return
-    }
-
-    const rawTarget =
-      attachment.path ||
-      attachment.detail ||
-      attachment.refText?.replace(/^@(file|image|url):/, '') ||
-      attachment.label ||
-      ''
-
-    const target = rawTarget.replace(/^`|`$/g, '')
-
-    if (!target) {
-      return
-    }
-
-    try {
-      const preview = await normalizeOrLocalPreviewTarget(target, cwd || undefined)
-
-      if (!preview) {
-        throw new Error(`Could not preview ${attachment.label}`)
-      }
-
-      setCurrentSessionPreviewTarget(preview, 'manual', target)
-    } catch (error) {
-      notifyError(error, 'Preview unavailable')
-    }
-  }
-
-  return (
-    <div
-      className="group/attachment relative min-w-0 shrink-0"
-      title={attachment.path || attachment.detail || attachment.label}
-    >
-      <button
-        aria-label={canPreview ? `Preview ${attachment.label}` : attachment.label}
-        className="flex max-w-56 items-center gap-2 border border-border/60 bg-background/50 px-2 py-1.5 text-left shadow-[inset_0_1px_0_rgba(255,255,255,0.25)] transition-colors hover:border-primary/35 hover:bg-accent/45 disabled:cursor-default"
-        disabled={!canPreview}
-        onClick={() => void openPreview()}
-        title={canPreview ? `Preview ${attachment.label}` : attachment.label}
-        type="button"
-      >
-        {attachment.previewUrl && attachment.kind === 'image' ? (
-          <img
-            alt={attachment.label}
-            className="size-8 shrink-0 border border-border/70 object-cover"
-            draggable={false}
-            src={attachment.previewUrl}
-          />
-        ) : (
-          <span className="grid size-8 shrink-0 place-items-center border border-border/55 bg-muted/35 text-muted-foreground">
-            <Icon className="size-3.5" />
-          </span>
-        )}
-        <span className="min-w-0">
-          <span className="block truncate text-[0.72rem] font-medium leading-4 text-foreground/90">
-            {attachment.label}
-          </span>
-          {detail && (
-            <span className="block truncate font-mono text-[0.6rem] leading-3 text-muted-foreground/65">{detail}</span>
-          )}
-        </span>
-      </button>
-      {onRemove && (
-        <button
-          aria-label={`Remove ${attachment.label}`}
-          className="absolute -right-1 -top-1 grid size-3.5 place-items-center rounded-full border border-border/70 bg-background text-muted-foreground opacity-0 shadow-xs transition hover:bg-accent hover:text-foreground group-hover/attachment:opacity-100 focus-visible:opacity-100"
-          onClick={() => onRemove(attachment.id)}
-          type="button"
-        >
-          <X className="size-2.5" />
-        </button>
-      )}
-    </div>
-  )
-}
--- a/apps/desktop/src/app/chat/composer/completion-drawer.tsx
+++ b/apps/desktop/src/app/chat/composer/completion-drawer.tsx
@@ -1,56 +0,0 @@
-import type { Unstable_TriggerAdapter } from '@assistant-ui/core'
-import { ComposerPrimitive } from '@assistant-ui/react'
-import type { ReactNode } from 'react'
-
-export const COMPLETION_DRAWER_CLASS = [
-  'absolute inset-x-0 bottom-[calc(100%-0.5rem)] z-50',
-  'max-h-[min(23rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain',
-  'rounded-t-(--composer-active-radius) border border-b-0',
-  'border-[color-mix(in_srgb,var(--dt-ring)_45%,transparent)]',
-  'bg-[color-mix(in_srgb,var(--dt-popover)_96%,transparent)]',
-  'px-1.5 pb-3 pt-1.5 text-popover-foreground',
-  'backdrop-blur-[0.75rem] backdrop-saturate-[1.1]',
-  '[-webkit-backdrop-filter:blur(0.75rem)_saturate(1.1)]',
-  'data-[state=open]:-mb-2',
-  'data-[state=open]:shadow-[0_-0.0625rem_0_0.0625rem_color-mix(in_srgb,var(--dt-ring)_35%,transparent),0_-1rem_2.25rem_-1.75rem_color-mix(in_srgb,var(--dt-foreground)_34%,transparent),0_-0.3125rem_0.875rem_-0.6875rem_color-mix(in_srgb,var(--dt-foreground)_22%,transparent)]'
-].join(' ')
-
-export const COMPLETION_DRAWER_ROW_CLASS = [
-  'flex w-full min-w-0 items-baseline gap-2 rounded-md px-2.5 py-1',
-  'text-left text-xs transition-colors',
-  'hover:bg-[color-mix(in_srgb,var(--dt-accent)_70%,transparent)]',
-  'data-[highlighted]:bg-[color-mix(in_srgb,var(--dt-accent)_70%,transparent)]'
-].join(' ')
-
-export function ComposerCompletionDrawer({
-  adapter,
-  ariaLabel,
-  char,
-  children
-}: {
-  adapter: Unstable_TriggerAdapter
-  ariaLabel: string
-  char: string
-  children: ReactNode
-}) {
-  return (
-    <ComposerPrimitive.Unstable_TriggerPopover
-      adapter={adapter}
-      aria-label={ariaLabel}
-      char={char}
-      className={COMPLETION_DRAWER_CLASS}
-      data-slot="composer-completion-drawer"
-    >
-      {children}
-    </ComposerPrimitive.Unstable_TriggerPopover>
-  )
-}
-
-export function CompletionDrawerEmpty({ children, title }: { children?: ReactNode; title: string }) {
-  return (
-    <div className="px-3 py-3 text-sm text-muted-foreground">
-      <p>{title}</p>
-      {children && <p className="mt-1 text-xs text-muted-foreground/80">{children}</p>}
-    </div>
-  )
-}
--- a/apps/desktop/src/app/chat/composer/context-menu.tsx
+++ b/apps/desktop/src/app/chat/composer/context-menu.tsx
@@ -1,119 +0,0 @@
-import { Button } from '@/components/ui/button'
-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuLabel,
-  DropdownMenuSeparator,
-  DropdownMenuSub,
-  DropdownMenuSubContent,
-  DropdownMenuSubTrigger,
-  DropdownMenuTrigger
-} from '@/components/ui/dropdown-menu'
-import { Clipboard, FileText, FolderOpen, ImageIcon, Link, type LucideIcon, MessageSquareText, Plus } from '@/lib/icons'
-import { cn } from '@/lib/utils'
-
-import { GHOST_ICON_BTN } from './controls'
-import type { ChatBarState } from './types'
-
-export function ContextMenu({
-  state,
-  onInsertText,
-  onOpenUrlDialog,
-  onPasteClipboardImage,
-  onPickFiles,
-  onPickFolders,
-  onPickImages
-}: {
-  state: ChatBarState
-  onInsertText: (text: string) => void
-  onOpenUrlDialog: () => void
-  onPasteClipboardImage?: () => void
-  onPickFiles?: () => void
-  onPickFolders?: () => void
-  onPickImages?: () => void
-}) {
-  return (
-    <DropdownMenu>
-      <DropdownMenuTrigger asChild>
-        <Button
-          aria-label={state.tools.label}
-          className={cn(GHOST_ICON_BTN, 'data-[state=open]:bg-accent data-[state=open]:text-foreground')}
-          disabled={!state.tools.enabled}
-          size="icon"
-          title={state.tools.label}
-          type="button"
-          variant="ghost"
-        >
-          <Plus size={18} />
-        </Button>
-      </DropdownMenuTrigger>
-      <DropdownMenuContent align="start" className="w-60" side="top" sideOffset={10}>
-        <DropdownMenuLabel className="text-[0.7rem] font-medium uppercase tracking-wide text-muted-foreground/85">
-          Attach
-        </DropdownMenuLabel>
-        <ContextMenuItem disabled={!onPickFiles} icon={FileText} onSelect={onPickFiles}>
-          Files…
-        </ContextMenuItem>
-        <ContextMenuItem disabled={!onPickFolders} icon={FolderOpen} onSelect={onPickFolders}>
-          Folder…
-        </ContextMenuItem>
-        <ContextMenuItem disabled={!onPickImages} icon={ImageIcon} onSelect={onPickImages}>
-          Images…
-        </ContextMenuItem>
-        <ContextMenuItem disabled={!onPasteClipboardImage} icon={Clipboard} onSelect={onPasteClipboardImage}>
-          Paste image
-        </ContextMenuItem>
-        <ContextMenuItem icon={Link} onSelect={onOpenUrlDialog}>
-          URL…
-        </ContextMenuItem>
-
-        <DropdownMenuSeparator />
-
-        <DropdownMenuSub>
-          <DropdownMenuSubTrigger>
-            <MessageSquareText />
-            <span>Prompt snippets</span>
-          </DropdownMenuSubTrigger>
-          <DropdownMenuSubContent className="w-72">
-            {[
-              { label: 'Code review', text: 'Please review this for bugs, regressions, and missing tests.' },
-              { label: 'Implementation plan', text: 'Please make a concise implementation plan before changing code.' },
-              { label: 'Explain this', text: 'Please explain how this works and point me to the key files.' }
-            ].map(snippet => (
-              <ContextMenuItem icon={MessageSquareText} key={snippet.label} onSelect={() => onInsertText(snippet.text)}>
-                {snippet.label}
-              </ContextMenuItem>
-            ))}
-          </DropdownMenuSubContent>
-        </DropdownMenuSub>
-
-        <DropdownMenuSeparator />
-
-        <div className="px-2 py-1 text-[0.7rem] text-muted-foreground/80">
-          Tip: type <kbd className="rounded bg-muted/70 px-1 py-px font-mono text-[0.65rem]">@</kbd> to reference files
-          inline.
-        </div>
-      </DropdownMenuContent>
-    </DropdownMenu>
-  )
-}
-
-export function ContextMenuItem({
-  children,
-  disabled,
-  icon: Icon,
-  onSelect
-}: {
-  children: string
-  disabled?: boolean
-  icon: LucideIcon
-  onSelect?: () => void
-}) {
-  return (
-    <DropdownMenuItem disabled={disabled} onSelect={onSelect}>
-      <Icon />
-      <span>{children}</span>
-    </DropdownMenuItem>
-  )
-}
--- a/Show More
+++ b/Show More