refactor(desktop): tighten right-rail tab close API

Promote closeRightRailTab/closeActiveRightRailTab as the single public entry point. Drops the activeTabRef + handleCloseDocument indirection in ChatPreviewRail, the unused $rightRailHasContent atom, and the legacy dismissFilePreviewTarget alias. -70 LOC.
Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui
2026-05-06 02:37:05 +08:00 · 2026-05-05 13:27:05 -05:00 · 2026-05-05 13:21:04 -05:00 · 2026-05-05 13:17:46 -05:00 · 2026-05-05 13:17:40 -05:00 · 2026-05-05 13:04:33 -05:00
670 changed files with 105553 additions and 2688 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -25,3 +25,7 @@ ui-tui/packages/hermes-ink/dist/
 # Runtime data (bind-mounted at /opt/data; must not leak into build context)
 data/
 # Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
 hermes-config/
 runtime/
--- a/.env.example
+++ b/.env.example
@@ -384,9 +384,9 @@ IMAGE_TOOLS_DEBUG=false
 # Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
 # Install with: pip install faster-whisper
 # Model downloads automatically on first use (~150 MB for "base").
-# To use cloud providers instead, set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY above.
+# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
-# Provider priority: local > groq > openai
+# Provider priority: local > groq > openai > mistral > xai > elevenlabs
-# Configure in config.yaml: stt.provider: local | groq | openai
+# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
 # =============================================================================
 # STT ADVANCED OVERRIDES (optional)
@@ -394,10 +394,12 @@ IMAGE_TOOLS_DEBUG=false
 # Override default STT models per provider (normally set via stt.model in config.yaml)
 # STT_GROQ_MODEL=whisper-large-v3-turbo
 # STT_OPENAI_MODEL=whisper-1
 # STT_ELEVENLABS_MODEL=scribe_v2
 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
 # ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1
 # =============================================================================
 # MICROSOFT TEAMS INTEGRATION
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,44 @@
 # Dependabot configuration for hermes-agent.
 #
 # Deliberately scoped to github-actions only.
 #
 # We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
 # because we pin source dependencies exactly (uv.lock, package-lock.json) as
 # part of our supply-chain posture. Automatic version-bump PRs against those
 # pins would undermine the strategy — pins are moved deliberately, after
 # review, not on a schedule.
 #
 # github-actions is the exception: action pins (we use full commit SHAs per
 # supply-chain policy) must be updated when upstream actions publish
 # patches — usually themselves security fixes. Dependabot opens a PR with
 # the new SHA and release notes; we review and merge like any other PR.
 #
 # Security-update PRs for source dependencies (opened ONLY when a CVE is
 # published affecting a currently-pinned version) are enabled separately
 # via the repo's Dependabot security updates setting
 # (Settings → Code security → Dependabot → Dependabot security updates).
 # Those are CVE-only, not schedule-driven, and do not conflict with our
 # pinning strategy — they fire when a pinned version becomes known-bad,
 # which is exactly when we want to move the pin.
 version: 2
 updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
    open-pull-requests-limit: 5
    labels:
      - "dependencies"
      - "github-actions"
    commit-message:
      prefix: "chore(actions)"
      include: "scope"
    groups:
      # Batch routine action bumps into one PR per week to reduce noise.
      # Security updates still open individually and bypass grouping.
      actions-minor-patch:
        update-types:
          - "minor"
          - "patch"
--- a/.github/workflows/desktop-release.yml
+++ b/.github/workflows/desktop-release.yml
@@ -0,0 +1,341 @@
 name: Desktop Release
 on:
  push:
    branches: [main]
  release:
    types: [published]
  workflow_dispatch:
    inputs:
      channel:
        description: Release channel to build
        required: true
        default: nightly
        type: choice
        options:
          - nightly
          - stable
      release_tag:
        description: "Required when channel=stable (example: v2026.5.5)"
        required: false
        type: string
 permissions:
  contents: write
 concurrency:
  group: desktop-release-${{ github.ref }}
  cancel-in-progress: false
 jobs:
  prepare:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    outputs:
      channel: ${{ steps.meta.outputs.channel }}
      release_name: ${{ steps.meta.outputs.release_name }}
      release_tag: ${{ steps.meta.outputs.release_tag }}
      version: ${{ steps.meta.outputs.version }}
      is_stable: ${{ steps.meta.outputs.is_stable }}
    steps:
      - id: meta
        env:
          EVENT_NAME: ${{ github.event_name }}
          INPUT_CHANNEL: ${{ github.event.inputs.channel }}
          INPUT_RELEASE_TAG: ${{ github.event.inputs.release_tag }}
          RELEASE_TAG_FROM_EVENT: ${{ github.event.release.tag_name }}
          GITHUB_SHA: ${{ github.sha }}
        run: |
          set -euo pipefail
          channel="nightly"
          release_tag="desktop-nightly"
          is_stable="false"
          if [[ "$EVENT_NAME" == "release" ]]; then
            channel="stable"
            release_tag="$RELEASE_TAG_FROM_EVENT"
            is_stable="true"
          elif [[ "$EVENT_NAME" == "workflow_dispatch" && "$INPUT_CHANNEL" == "stable" ]]; then
            channel="stable"
            release_tag="$INPUT_RELEASE_TAG"
            is_stable="true"
          fi
          if [[ "$channel" == "stable" ]]; then
            if [[ -z "$release_tag" ]]; then
              echo "Stable desktop releases require a release tag." >&2
              exit 1
            fi
            version="${release_tag#v}"
            release_name="Hermes Desktop ${release_tag}"
          else
            stamp="$(date -u +%Y%m%d)"
            short_sha="${GITHUB_SHA::7}"
            version="0.0.0-nightly.${stamp}.${short_sha}"
            release_name="Hermes Desktop Nightly ${stamp}-${short_sha}"
          fi
          {
            echo "channel=$channel"
            echo "release_name=$release_name"
            echo "release_tag=$release_tag"
            echo "version=$version"
            echo "is_stable=$is_stable"
          } >> "$GITHUB_OUTPUT"
  build:
    if: github.repository == 'NousResearch/hermes-agent'
    needs: prepare
    strategy:
      fail-fast: false
      matrix:
        include:
          - platform: mac
            runner: macos-latest
            build_args: --mac dmg zip
          - platform: win
            runner: windows-latest
            build_args: --win nsis msi
    runs-on: ${{ matrix.runner }}
    env:
      DESKTOP_CHANNEL: ${{ needs.prepare.outputs.channel }}
      DESKTOP_VERSION: ${{ needs.prepare.outputs.version }}
      MAC_CSC_LINK: ${{ secrets.CSC_LINK }}
      MAC_CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
      APPLE_API_KEY: ${{ secrets.APPLE_API_KEY }}
      APPLE_API_KEY_ID: ${{ secrets.APPLE_API_KEY_ID }}
      APPLE_API_ISSUER: ${{ secrets.APPLE_API_ISSUER }}
      WIN_CSC_LINK: ${{ secrets.WIN_CSC_LINK }}
      WIN_CSC_KEY_PASSWORD: ${{ secrets.WIN_CSC_KEY_PASSWORD }}
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: package-lock.json
      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: "3.11"
      - name: Enforce signing gates for stable releases
        if: needs.prepare.outputs.is_stable == 'true'
        shell: bash
        run: |
          set -euo pipefail
          missing=()
          if [[ "${{ matrix.platform }}" == "mac" ]]; then
            [[ -z "${MAC_CSC_LINK:-}" ]] && missing+=("CSC_LINK")
            [[ -z "${MAC_CSC_KEY_PASSWORD:-}" ]] && missing+=("CSC_KEY_PASSWORD")
            [[ -z "${APPLE_API_KEY:-}" ]] && missing+=("APPLE_API_KEY")
            [[ -z "${APPLE_API_KEY_ID:-}" ]] && missing+=("APPLE_API_KEY_ID")
            [[ -z "${APPLE_API_ISSUER:-}" ]] && missing+=("APPLE_API_ISSUER")
          else
            [[ -z "${WIN_CSC_LINK:-}" ]] && missing+=("WIN_CSC_LINK")
            [[ -z "${WIN_CSC_KEY_PASSWORD:-}" ]] && missing+=("WIN_CSC_KEY_PASSWORD")
          fi
          if (( ${#missing[@]} > 0 )); then
            echo "::error::Stable desktop release missing required secrets: ${missing[*]}"
            exit 1
          fi
      - name: Install workspace dependencies
        run: npm ci
      - name: Build bundled TUI payload
        run: npm --prefix ui-tui run build
      - name: Build desktop renderer
        run: npm --prefix apps/desktop run build
      - name: Stage Hermes payload
        run: npm --prefix apps/desktop run stage:hermes
      - name: Map macOS signing credentials
        if: matrix.platform == 'mac'
        shell: bash
        run: |
          set -euo pipefail
          has_link=0
          has_pass=0
          [[ -n "${MAC_CSC_LINK:-}" ]] && has_link=1
          [[ -n "${MAC_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
          if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
            echo "CSC_LINK=${MAC_CSC_LINK}" >> "$GITHUB_ENV"
            echo "CSC_KEY_PASSWORD=${MAC_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
          elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
            echo "::error::macOS signing secrets are partially configured. Set both CSC_LINK and CSC_KEY_PASSWORD."
            exit 1
          fi
      - name: Map Windows signing credentials
        if: matrix.platform == 'win'
        shell: bash
        run: |
          set -euo pipefail
          has_link=0
          has_pass=0
          [[ -n "${WIN_CSC_LINK:-}" ]] && has_link=1
          [[ -n "${WIN_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
          if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
            echo "CSC_LINK=${WIN_CSC_LINK}" >> "$GITHUB_ENV"
            echo "CSC_KEY_PASSWORD=${WIN_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
            echo "CSC_FOR_PULL_REQUEST=true" >> "$GITHUB_ENV"
          elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
            echo "::error::Windows signing secrets are partially configured. Set both WIN_CSC_LINK and WIN_CSC_KEY_PASSWORD."
            exit 1
          fi
      - name: Build desktop installers
        shell: bash
        run: |
          set -euo pipefail
          npm --prefix apps/desktop exec electron-builder -- \
            ${{ matrix.build_args }} \
            --publish never \
            --config.extraMetadata.version="${DESKTOP_VERSION}" \
            --config.extraMetadata.desktopChannel="${DESKTOP_CHANNEL}" \
            '--config.artifactName=Hermes-${version}-${env.DESKTOP_CHANNEL}-${os}-${arch}.${ext}'
      - name: Notarize and staple macOS DMG
        if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
        shell: bash
        run: |
          set -euo pipefail
          dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
          node apps/desktop/scripts/notarize-artifact.cjs "$dmg_path"
      - name: Validate macOS notarization and Gatekeeper trust
        if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
        shell: bash
        run: |
          set -euo pipefail
          app_path="$(ls -d apps/desktop/release/mac*/Hermes.app | head -n 1)"
          dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
          xcrun stapler validate "$app_path"
          xcrun stapler validate "$dmg_path"
          spctl --assess --type execute --verbose=4 "$app_path"
      - name: Generate desktop checksums
        shell: bash
        run: |
          set -euo pipefail
          node <<'EOF'
          const crypto = require('node:crypto')
          const fs = require('node:fs')
          const path = require('node:path')
          const releaseDir = path.resolve('apps/desktop/release')
          const platform = process.env.PLATFORM
          const extensions = platform === 'mac' ? ['.dmg', '.zip'] : ['.exe', '.msi']
          const files = fs
            .readdirSync(releaseDir)
            .filter(name => extensions.some(ext => name.endsWith(ext)))
            .sort()
          if (!files.length) {
            throw new Error(`No release artifacts were produced for ${platform}`)
          }
          const lines = files.map(name => {
            const full = path.join(releaseDir, name)
            const hash = crypto.createHash('sha256').update(fs.readFileSync(full)).digest('hex')
            return `${hash}  ${name}`
          })
          fs.writeFileSync(path.join(releaseDir, `SHA256SUMS-${platform}.txt`), `${lines.join('\n')}\n`)
          EOF
        env:
          PLATFORM: ${{ matrix.platform }}
      - name: Upload packaged desktop artifacts
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: desktop-${{ matrix.platform }}
          path: |
            apps/desktop/release/*.dmg
            apps/desktop/release/*.zip
            apps/desktop/release/*.exe
            apps/desktop/release/*.msi
            apps/desktop/release/SHA256SUMS-${{ matrix.platform }}.txt
          if-no-files-found: error
  publish:
    if: github.repository == 'NousResearch/hermes-agent'
    needs: [prepare, build]
    runs-on: ubuntu-latest
    env:
      GH_TOKEN: ${{ github.token }}
      CHANNEL: ${{ needs.prepare.outputs.channel }}
      RELEASE_NAME: ${{ needs.prepare.outputs.release_name }}
      RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
        with:
          fetch-depth: 0
      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          pattern: desktop-*
          merge-multiple: true
          path: dist/desktop
      - name: Publish desktop assets to GitHub release
        shell: bash
        run: |
          set -euo pipefail
          shopt -s globstar nullglob
          files=(
            dist/desktop/**/*.dmg
            dist/desktop/**/*.zip
            dist/desktop/**/*.exe
            dist/desktop/**/*.msi
            dist/desktop/**/SHA256SUMS-*.txt
          )
          if (( ${#files[@]} == 0 )); then
            echo "No desktop artifacts were downloaded for publishing." >&2
            exit 1
          fi
          if [[ "$CHANNEL" == "nightly" ]]; then
            git tag -f "$RELEASE_TAG" "$GITHUB_SHA"
            git push origin "refs/tags/$RELEASE_TAG" --force
            notes="Automated nightly desktop build from main. This prerelease is replaced on each new run."
            if gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
              while IFS= read -r asset_name; do
                gh release delete-asset "$RELEASE_TAG" "$asset_name" --yes
              done < <(gh release view "$RELEASE_TAG" --json assets -q '.assets[].name')
              gh release edit "$RELEASE_TAG" \
                --title "$RELEASE_NAME" \
                --prerelease \
                --notes "$notes"
            else
              gh release create "$RELEASE_TAG" \
                --target "$GITHUB_SHA" \
                --title "$RELEASE_NAME" \
                --notes "$notes" \
                --prerelease
            fi
          else
            if ! gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
              notes="Automated desktop artifacts attached by desktop-release workflow."
              gh release create "$RELEASE_TAG" \
                --target "$GITHUB_SHA" \
                --title "$RELEASE_NAME" \
                --notes "$notes"
            fi
          fi
          gh release upload "$RELEASE_TAG" "${files[@]}" --clobber
--- a/.github/workflows/nix-lockfile-fix.yml
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -6,8 +6,8 @@ on:
    paths:
      - 'ui-tui/package-lock.json'
      - 'ui-tui/package.json'
-      - 'web/package-lock.json'
+      - 'apps/dashboard/package-lock.json'
-      - 'web/package.json'
+      - 'apps/dashboard/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -28,7 +28,7 @@ concurrency:
 jobs:
  # ── Auto-fix on main ───────────────────────────────────────────────
  # Fires when a push to main touches package.json or package-lock.json
-  # in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
+  # in ui-tui/ or apps/dashboard/. Runs fix-lockfiles and pushes the hash
  # update commit directly to main so Nix builds never stay broken.
  #
  # Safety invariants:
@@ -110,7 +110,7 @@ jobs:
            # run recompute from the correct package-lock state.
            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
              'ui-tui/package-lock.json' 'ui-tui/package.json' \
-              'web/package-lock.json' 'web/package.json' || true)"
+              'apps/dashboard/package-lock.json' 'apps/dashboard/package.json' || true)"
            if [ -n "$pkg_changed" ]; then
              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
              exit 0
--- a/.github/workflows/osv-scanner.yml
+++ b/.github/workflows/osv-scanner.yml
@@ -0,0 +1,67 @@
 name: OSV-Scanner
 # Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
 # database. Runs on every PR that touches a lockfile and on a weekly schedule
 # against main.
 #
 # This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
 # It reports known CVEs in currently-pinned dependency versions so we can
 # decide when and how to patch on our own schedule. Our pinning strategy
 # (full SHA / exact version) is preserved; only the notification signal
 # is added.
 #
 # Complements the existing supply-chain-audit.yml workflow (which scans
 # for malicious code patterns in PR diffs) by covering the orthogonal
 # "currently-pinned dep became known-vulnerable" case.
 #
 # Uses Google's officially-recommended reusable workflow, pinned by SHA.
 # Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
 # fail-on-vuln is disabled so the job does not block merges on pre-existing
 # vulnerabilities in pinned deps that we may need to patch deliberately.
 on:
  pull_request:
    branches: [main]
    paths:
      - 'uv.lock'
      - 'pyproject.toml'
      - 'package.json'
      - 'package-lock.json'
      - 'ui-tui/package.json'
      - 'ui-tui/package-lock.json'
      - 'website/package.json'
      - 'website/package-lock.json'
      - '.github/workflows/osv-scanner.yml'
  push:
    branches: [main]
    paths:
      - 'uv.lock'
      - 'pyproject.toml'
      - 'package.json'
      - 'package-lock.json'
      - 'ui-tui/package-lock.json'
      - 'website/package-lock.json'
  schedule:
    # Weekly scan against main — catches CVEs published after merge for
    # deps that haven't changed since.
    - cron: '0 9 * * 1'
  workflow_dispatch:
 permissions:
  # Required by the reusable workflow to upload SARIF to the Security tab.
  actions: read
  contents: read
  security-events: write
 jobs:
  scan:
    name: Scan lockfiles
    uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c51854704019a247608d928f370c98740469d4b5  # v2.3.5
    with:
      # Scan explicit lockfiles rather than recursing, so we only look at
      # the three sources of truth and skip vendored / test / worktree dirs.
      scan-args: |-
        --lockfile=uv.lock
        --lockfile=ui-tui/package-lock.json
        --lockfile=website/package-lock.json
      fail-on-vuln: false
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,10 @@ environments/benchmarks/evals/
 # Web UI build output
 hermes_cli/web_dist/
 apps/desktop/build/
 apps/desktop/dist/
 apps/desktop/release/
 apps/desktop/*.tsbuildinfo
 # Web UI assets — synced from @nous-research/ui at build time via
 # `npm run sync-assets` (see web/package.json).
@@ -70,3 +74,12 @@ mini-swe-agent/
 result
 website/static/api/skills-index.json
 models-dev-upstream/
 # Local editor / agent tooling (machine-specific; keep in global config, not the repo)
 .codex/
 .cursor/
 .gemini/
 .zed/
 .mcp.json
 opencode.json
 config/mcporter.json
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -2,6 +2,8 @@
 Instructions for AI coding assistants and developers working on the hermes-agent codebase.
 **Never give up on the right solution.**
 ## Development Environment
 ```bash
@@ -37,12 +39,17 @@ hermes-agent/
 │   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
-│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   │                     #   yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
 │   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
-│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
+│   ├── kanban/           # Multi-agent board dispatcher + worker plugin
 │   ├── hermes-achievements/  # Gamified achievement tracking
 │   ├── observability/    # Metrics / traces / logs plugin
 │   ├── image_gen/        # Image-generation providers
 │   └── <others>/         # disk-cleanup, example-dashboard, google_meet, platforms,
 │                         #   spotify, strike-freedom-cockpit, ...
 ├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
 ├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
@@ -53,7 +60,7 @@ hermes-agent/
 ├── environments/         # RL training environments (Atropos)
 ├── scripts/              # run_tests.sh, release.py, auxiliary scripts
 ├── website/              # Docusaurus docs site
-└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
+└── tests/                # Pytest suite (~17k tests across ~900 files as of May 2026)
 ```
 **User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
@@ -61,6 +68,29 @@ hermes-agent/
 `gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
 Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
 ## TypeScript Style
 Applies to TypeScript across Hermes: desktop, TUI, website, and future TS packages.
 - Prefer small nanostores over component state when state is shared, reused, or read by distant UI.
 - Let each feature own its atoms. Chat state belongs near chat, shell state near shell, shared state in `src/store`.
 - Components that render from an atom should use `useStore`. Non-rendering actions should read with `$atom.get()`.
 - Do not pass state through three components when the leaf can subscribe to the atom.
 - Keep persistence beside the atom that owns it.
 - Keep route roots thin. They compose routes and shell; they should not become controllers.
 - No monolithic hooks. A hook should own one narrow job.
 - Prefer colocated action modules over hidden god hooks.
 - If a callback is pure side effect, use the terse void form:
  `onState={st => void setGatewayState(st)}`.
 - Async UI handlers should make intent explicit:
  `onClick={() => void save()}`.
 - Prefer interfaces for public props and shared object shapes. Avoid `type X = { ... }` for object props.
 - Extend React primitives for props: `React.ComponentProps<'button'>`, `React.ComponentProps<typeof Dialog>`, `Omit<...>`, `Pick<...>`.
 - Table-driven beats condition ladders when mapping ids, routes, or views.
 - `src/app` owns routes, pages, and page-specific components.
 - `src/store` owns shared atoms.
 - `src/lib` owns shared pure helpers.
 ## File Dependency Chain
 ```
@@ -244,7 +274,7 @@ npm test          # vitest
 The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- Browser loads `apps/dashboard/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
 - `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
 - The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
 - Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
@@ -257,7 +287,16 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes
 ## Adding New Tools
-Requires changes in **2 files**:
+For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
 route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
 `~/.hermes/plugins/<name>/__init__.py`, then register tools with
 `ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
 enabled or disabled without touching `tools/` or `toolsets.py`.
 Use the built-in route below only when the user is explicitly contributing a new
 core Hermes tool that should ship in the base system.
 Built-in/core tools require changes in **2 files**:
 **1. Create `tools/your_tool.py`:**
 ```python
@@ -280,9 +319,9 @@ registry.register(
 )
 ```
-**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
+**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.
-Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.
 The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
@@ -304,6 +343,22 @@ The registry handles schema collection, dispatch, availability checking, and err
   section is handled automatically by the deep-merge and does NOT require
   a version bump.
 ### Top-level `config.yaml` sections (non-exhaustive):
 `model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
 `memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
 `auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
 `plugins`, `honcho`.
 `auxiliary` holds per-task overrides for side-LLM work (curator, vision,
 embedding, title generation, session_search, etc.) — each task can pin
 its own provider/model/base_url/max_tokens/reasoning_effort. See
 `agent/auxiliary_client.py::_resolve_auto` for resolution order.
 `curator` holds the background skill-maintenance config —
 `enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
 `archive_after_days`, `backup` (nested).
 ### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
@@ -510,11 +565,176 @@ niche skills belong in `optional-skills/`.
 ### SKILL.md frontmatter
-Standard fields: `name`, `description`, `version`, `platforms`
+Standard fields: `name`, `description`, `version`, `author`, `license`,
-(OS-gating list: `[macos]`, `[linux, macos]`, ...),
+`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
 `metadata.hermes.tags`, `metadata.hermes.category`,
-`metadata.hermes.config` (config.yaml settings the skill needs — stored
+`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
-under `skills.config.<key>`, prompted during setup, injected at load time).
+settings the skill needs — stored under `skills.config.<key>`, prompted
 during setup, injected at load time).
 Top-level `tags:` and `category:` are also accepted and mirrored from
 `metadata.hermes.*` by the loader.
 ---
 ## Toolsets
 All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
 Each platform's adapter picks a base toolset (e.g. Telegram uses
 `"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
 platforms inherit from.
 Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
 `debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
 `feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
 `messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
 `spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
 Enable/disable per platform via `hermes tools` (the curses UI) or the
 `tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
 `config.yaml`.
 ---
 ## Delegation (`delegate_task`)
 `tools/delegate_tool.py` spawns a subagent with an isolated
 context + terminal session. Synchronous: the parent waits for the
 child's summary before continuing its own loop — if the parent is
 interrupted, the child is cancelled.
 Two shapes:
 - **Single:** pass `goal` (+ optional `context`, `toolsets`).
 - **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
  running concurrently. Concurrency is capped by
  `delegation.max_concurrent_children` (default 3).
 Roles:
 - `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
  `clarify`, `memory`, `send_message`, `execute_code`.
 - `role="orchestrator"` — retains `delegate_task` so it can spawn its
  own workers. Gated by `delegation.orchestrator_enabled` (default true)
  and bounded by `delegation.max_spawn_depth` (default 2).
 Key config knobs (under `delegation:` in `config.yaml`):
 `max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
 `orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
 `max_iterations`.
 Synchronicity rule: delegate_task is **not** durable. For long-running
 work that must outlive the current turn, use `cronjob` or
 `terminal(background=True, notify_on_complete=True)` instead.
 ---
 ## Curator (skill lifecycle)
 Background skill-maintenance system that tracks usage on agent-created
 skills and auto-archives stale ones. Users never lose skills; archives
 go to `~/.hermes/skills/.archive/` and are restorable.
 - **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
  prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
 - **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
  verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
  `archive`, `restore`, `prune`, `backup`, `rollback`.
 - **Telemetry:** `tools/skill_usage.py` owns the sidecar
  `~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
  `patch_count`, `last_activity_at`, `state` (active / stale /
  archived), `pinned`.
 Invariants:
 - Curator only touches skills with `created_by: "agent"` provenance —
  bundled + hub-installed skills are off-limits.
 - Never deletes; max destructive action is archive.
 - Pinned skills are exempt from every auto-transition and from the
  LLM review pass.
 - `skill_manage(action="delete")` refuses pinned skills; patch/edit/
  write_file/remove_file go through so the agent can keep improving
  pinned skills.
 Config section (`curator:` in `config.yaml`):
 `enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
 `archive_after_days`, `backup.*`.
 Full user-facing docs: `website/docs/user-guide/features/curator.md`.
 ---
 ## Cron (scheduled jobs)
 `cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
 schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
 (`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
 `/cron` slash command.
 Supported schedule formats:
 - Duration: `"30m"`, `"2h"`, `"1d"`
 - "every" phrase: `"every 2h"`, `"every monday 9am"`
 - 5-field cron expression: `"0 9 * * *"`
 - ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
 Per-job fields include `skills` (load specific skills), `model` /
 `provider` overrides, `script` (pre-run data-collection script whose
 stdout is injected into the prompt; `no_agent=True` turns the script
 into the entire job), `context_from` (chain job A's last output into
 job B's prompt), `workdir` (run in a specific directory with its
 `AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
 Hardening invariants:
 - **3-minute hard interrupt** on cron sessions — runaway agent loops
  cannot monopolize the scheduler.
 - Catchup window: half the job's period, clamped to 120s–2h.
 - Grace window: 120s for one-shot jobs whose fire time was missed.
 - File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
  across processes.
 - Cron sessions pass `skip_memory=True` by default; memory providers
  intentionally do not run during cron.
 Cron deliveries are **not** mirrored into the target gateway session —
 they land in their own cron session with a header/footer frame so the
 main conversation's message-role alternation stays intact.
 ---
 ## Kanban (multi-agent work queue)
 Durable SQLite-backed board that lets multiple profiles / workers
 collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
 workers spawned by the dispatcher drive it via a dedicated `kanban_*`
 toolset so their schema footprint is zero when they're not inside a
 kanban task.
 - **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
  `init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
  `unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
  `tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
  `assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
 - **Worker toolset:** `tools/kanban_tools.py` exposes `kanban_show`,
  `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`,
  `kanban_create`, `kanban_link` — gated by `HERMES_KANBAN_TASK` so
  the schema only appears for processes actually running as a worker.
 - **Dispatcher:** long-lived loop that (default every 60s) reclaims
  stale claims, promotes ready tasks, atomically claims, and spawns
  assigned profiles. Runs **inside the gateway** by default via
  `kanban.dispatch_in_gateway: true`.
 - **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
  `plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
  standalone dispatcher deployment).
 Isolation model:
 - **Board** is the hard boundary — workers are spawned with
  `HERMES_KANBAN_BOARD` pinned in their env so they can't see other
  boards.
 - **Tenant** is a soft namespace *within* a board — one specialist
  fleet can serve multiple businesses with workspace-path + memory-key
  isolation.
 - After ~5 consecutive spawn failures on the same task the dispatcher
  auto-blocks it to prevent spin loops.
 Full user-facing docs: `website/docs/user-guide/features/kanban.md`.
 ---
--- a/acp_adapter/session.py
+++ b/acp_adapter/session.py
@@ -466,17 +466,10 @@ class SessionManager:
                except Exception:
                    logger.debug("Failed to update ACP session metadata", exc_info=True)
-            # Replace stored messages with current history.
+            # Replace stored messages with current history atomically so a
-            db.clear_messages(state.session_id)
+            # mid-rewrite failure rolls back and the previously persisted
-            for msg in state.history:
+            # conversation is preserved (salvaged from #13675).
-                db.append_message(
+            db.replace_messages(state.session_id, state.history)
                    session_id=state.session_id,
                    role=msg.get("role", "user"),
                    content=msg.get("content"),
                    tool_name=msg.get("tool_name") or msg.get("name"),
                    tool_calls=msg.get("tool_calls"),
                    tool_call_id=msg.get("tool_call_id"),
                )
        except Exception:
            logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
 # Models where temperature/top_p/top_k return 400 if set to non-default values.
 # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
 _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
 _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 # ── Max output token limits per Anthropic model ───────────────────────
 # Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
@@ -105,6 +106,9 @@ _ANTHROPIC_OUTPUT_LIMITS = {
    "claude-3-haiku":      4_096,
    # Third-party Anthropic-compatible providers
    "minimax":            131_072,
    # Qwen models via DashScope Anthropic-compatible endpoint
    # DashScope enforces max_tokens ∈ [1, 65536]
    "qwen3":               65_536,
 }
 # For any model not in the table, assume the highest current limit.
@@ -216,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool:
    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
 def _supports_fast_mode(model: str) -> bool:
    """Return True for models that support Anthropic Fast Mode (speed=fast).
    Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
    Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
    returns HTTP 400. This guard prevents silently 400'ing when stale config
    or older callers leave fast mode enabled across a model upgrade.
    """
    return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
 # Beta headers for enhanced features (sent with ALL auth types).
 # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
@@ -1222,6 +1237,14 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
    ``keep_nullable_hint=False`` because the Anthropic validator does not
    recognize the OpenAPI-style ``nullable: true`` extension and strict
    schema-to-grammar converters may reject unknown keywords.
    Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the
    Anthropic API rejects union keywords at the schema root with a generic
    HTTP 400. Several upstream and plugin tools ship schemas with one of
    these keywords at the top level (commonly for Pydantic discriminated
    unions). If we land here with those keywords still present after
    nullable-union stripping, drop them and fall back to a plain object
    schema so the tool still validates at the Anthropic boundary.
    """
    if not schema:
        return {"type": "object", "properties": {}}
@@ -1231,6 +1254,12 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
    if not isinstance(normalized, dict):
        return {"type": "object", "properties": {}}
    # Strip top-level union keywords that Anthropic's validator rejects.
    banned = {"oneOf", "allOf", "anyOf"}
    if banned & normalized.keys():
        normalized = {k: v for k, v in normalized.items() if k not in banned}
        if "type" not in normalized:
            normalized["type"] = "object"
    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
        normalized = {**normalized, "properties": {}}
    return normalized
@@ -1915,9 +1944,15 @@ def build_anthropic_kwargs(
    # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
    # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
-    # output speed. Only for native Anthropic endpoints — third-party
+    # output speed. Per Anthropic docs, fast mode is only supported on
-    # providers would reject the unknown beta header and speed parameter.
+    # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
-    if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
+    # Only for native Anthropic endpoints — third-party providers would
    # reject the unknown beta header and speed parameter.
    if (
        fast_mode
        and not _is_third_party_anthropic_endpoint(base_url)
        and _supports_fast_mode(model)
    ):
        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -259,10 +259,12 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
    "kimi-coding-cn",
 })
-# OpenRouter app attribution headers (base — always sent)
+# OpenRouter app attribution headers (base — always sent).
 # `X-Title` is the canonical attribution header OpenRouter's dashboard
 # reads; the previous `X-OpenRouter-Title` label was not recognized there.
 _OR_HEADERS_BASE = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-    "X-OpenRouter-Title": "Hermes Agent",
+    "X-Title": "Hermes Agent",
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }
@@ -567,7 +569,12 @@ class _CodexCompletionsAdapter:
                    # API allows it.
                    pass
                else:
-                    effort = reasoning_cfg.get("effort", "medium")
+                    # Truthy-only check mirrors agent/transports/codex.py
                    # build_kwargs(): falsy values (None, "", 0) fall back
                    # to the default rather than being forwarded to the
                    # Codex backend, which rejects e.g. {"effort": null}
                    # with a 400.
                    effort = reasoning_cfg.get("effort") or "medium"
                    # Codex backend rejects "minimal"; clamp to "low" to
                    # match the main-agent Codex transport behavior.
                    if effort == "minimal":
@@ -1529,7 +1536,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
    return CodexAuxiliaryClient(real_client, model), model
-def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
+def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
    try:
        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
    except ImportError:
@@ -1539,10 +1546,10 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    if pool_present:
        if entry is None:
            return None, None
-        token = _pool_runtime_api_key(entry)
+        token = explicit_api_key or _pool_runtime_api_key(entry)
    else:
        entry = None
-        token = resolve_anthropic_token()
+        token = explicit_api_key or resolve_anthropic_token()
    if not token:
        return None, None
@@ -1643,6 +1650,39 @@ def _is_payment_error(exc: Exception) -> bool:
    return False
 def _is_rate_limit_error(exc: Exception) -> bool:
    """Detect rate-limit errors that warrant provider fallback.
    Returns True for HTTP 429 errors whose message indicates rate limiting
    (as opposed to billing/quota exhaustion, which _is_payment_error handles).
    Also catches OpenAI SDK RateLimitError instances that may not set
    .status_code on the exception object.
    """
    status = getattr(exc, "status_code", None)
    err_lower = str(exc).lower()
    # OpenAI SDK's RateLimitError sometimes omits .status_code —
    # detect by class name so we don't miss these.  (PR #8023 pattern)
    if type(exc).__name__ == "RateLimitError":
        return True
    if status == 429:
        # Distinguish rate-limit from billing: billing keywords are handled
        # by _is_payment_error, everything else on 429 is a rate limit.
        if any(kw in err_lower for kw in (
            "rate limit", "rate_limit", "too many requests",
            "try again", "retry after", "resets in",
        )):
            return True
        # Generic 429 without billing keywords = likely a rate limit
        if not any(kw in err_lower for kw in (
            "credits", "insufficient funds", "billing",
            "payment required", "can only afford",
        )):
            return True
    return False
 def _is_connection_error(exc: Exception) -> bool:
    """Detect connection/network errors that warrant provider fallback.
@@ -2336,7 +2376,7 @@ def resolve_provider_client(
    if pconfig.auth_type == "api_key":
        if provider == "anthropic":
-            client, default_model = _try_anthropic()
+            client, default_model = _try_anthropic(explicit_api_key=explicit_api_key)
            if client is None:
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
@@ -2648,8 +2688,11 @@ def resolve_vision_provider_client(
        return resolved_provider, sync_client, final_model
    if resolved_base_url:
        provider_for_base_override = (
            requested if requested and requested not in ("", "auto") else "custom"
        )
        client, final_model = resolve_provider_client(
-            "custom",
+            provider_for_base_override,
            model=resolved_model,
            async_mode=async_mode,
            explicit_base_url=resolved_base_url,
@@ -2657,8 +2700,8 @@ def resolve_vision_provider_client(
            api_mode=resolved_api_mode,
        )
        if client is None:
-            return "custom", None, None
+            return provider_for_base_override, None, None
-        return "custom", client, final_model
+        return provider_for_base_override, client, final_model
    if requested == "auto":
        # Vision auto-detection order:
@@ -3124,8 +3167,14 @@ def _resolve_task_provider_model(
    if task:
        # Config.yaml is the primary source for per-task overrides.
-        if cfg_base_url:
+        if cfg_base_url and cfg_api_key:
            # Both base_url and api_key explicitly set → custom endpoint.
            return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
        if cfg_base_url and cfg_provider and cfg_provider != "auto":
            # base_url set without api_key but with a known provider — use
            # the provider so it can resolve credentials from env vars
            # (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
            return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
        if cfg_provider and cfg_provider != "auto":
            return cfg_provider, resolved_model, None, None, resolved_api_mode
@@ -3526,7 +3575,7 @@ def call_llm(
            except Exception as retry_err:
                # If the max_tokens retry also hits a payment or connection
                # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
                    raise
                first_err = retry_err
@@ -3609,13 +3658,27 @@ def call_llm(
        # Codex/OAuth tokens that authenticate but whose endpoint is down,
        # and providers the user never configured that got picked up by
        # the auto-detection chain.
-        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        #
        # ── Rate-limit fallback (#13579) ─────────────────────────────
        # When the provider returns a 429 rate-limit (not billing), fall
        # back to an alternative provider instead of exhausting retries
        # against the same rate-limited endpoint.
        should_fallback = (
            _is_payment_error(first_err)
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
        )
        # Only try alternative providers when the user didn't explicitly
        # configure this task's provider.  Explicit provider = hard constraint;
        # auto (the default) = best-effort fallback chain.  (#7559)
        is_auto = resolved_provider in ("auto", "", None)
        if should_fallback and is_auto:
-            reason = "payment error" if _is_payment_error(first_err) else "connection error"
+            if _is_payment_error(first_err):
                reason = "payment error"
            elif _is_rate_limit_error(first_err):
                reason = "rate limit"
            else:
                reason = "connection error"
            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
@@ -3818,7 +3881,7 @@ async def async_call_llm(
            except Exception as retry_err:
                # If the max_tokens retry also hits a payment or connection
                # error, fall through to the fallback chain below.
-                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err) or _is_rate_limit_error(retry_err)):
                    raise
                first_err = retry_err
@@ -3887,11 +3950,20 @@ async def async_call_llm(
                    return _validate_llm_response(
                        await retry_client.chat.completions.create(**retry_kwargs), task)
-        # ── Payment / connection fallback (mirrors sync call_llm) ─────
+        # ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
-        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        should_fallback = (
            _is_payment_error(first_err)
            or _is_connection_error(first_err)
            or _is_rate_limit_error(first_err)
        )
        is_auto = resolved_provider in ("auto", "", None)
        if should_fallback and is_auto:
-            reason = "payment error" if _is_payment_error(first_err) else "connection error"
+            if _is_payment_error(first_err):
                reason = "payment error"
            elif _is_rate_limit_error(first_err):
                reason = "rate limit"
            else:
                reason = "connection error"
            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -344,6 +344,7 @@ class ContextCompressor(ContextEngine):
        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0
        self._summary_failure_cooldown_until = 0.0  # transient errors must not block a fresh session
    def update_model(
        self,
@@ -553,7 +554,16 @@ class ContextCompressor(ContextEngine):
                    break
                accumulated += msg_tokens
                boundary = i
-            prune_boundary = max(boundary, len(result) - min_protect)
+            # Translate the budget walk into a "protected count", apply the
            # floor in count-space (where `max` reads naturally: protect at
            # least `min_protect` messages or whatever the budget reserved,
            # whichever is more), then convert back to a prune boundary.
            # Doing this in index-space with `max` would invert the direction
            # (smaller index = MORE protected), so a generous budget would
            # silently get truncated back down to `min_protect`.
            budget_protect_count = len(result) - boundary
            protected_count = max(budget_protect_count, min_protect)
            prune_boundary = len(result) - protected_count
        else:
            prune_boundary = len(result) - protect_tail_count
@@ -569,6 +579,8 @@ class ContextCompressor(ContextEngine):
            # Skip multimodal content (list of content blocks)
            if isinstance(content, list):
                continue
            if not isinstance(content, str):
                continue
            if len(content) < 200:
                continue
            h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
@@ -588,6 +600,8 @@ class ContextCompressor(ContextEngine):
            # Skip multimodal content (list of content blocks)
            if isinstance(content, list):
                continue
            if not isinstance(content, str):
                continue
            if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                continue
            # Skip already-deduplicated or previously-summarized results
@@ -903,15 +917,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                or "does not exist" in _err_str
                or "no available channel" in _err_str
            )
            _is_timeout = (
                _status in (408, 429, 502, 504)
                or "timeout" in _err_str
            )
            if (
-                _is_model_not_found
+                (_is_model_not_found or _is_timeout)
                and self.summary_model
                and self.summary_model != self.model
                and not getattr(self, "_summary_model_fallen_back", False)
            ):
                self._summary_model_fallen_back = True
                logging.warning(
-                    "Summary model '%s' not available (%s). "
+                    "Summary model '%s' unavailable (%s). "
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
@@ -975,15 +993,39 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return None
    @staticmethod
-    def _with_summary_prefix(summary: str) -> str:
+    def _strip_summary_prefix(summary: str) -> str:
-        """Normalize summary text to the current compaction handoff format."""
+        """Return summary body without the current or legacy handoff prefix."""
        text = (summary or "").strip()
-        for prefix in (LEGACY_SUMMARY_PREFIX, SUMMARY_PREFIX):
+        for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
            if text.startswith(prefix):
-                text = text[len(prefix):].lstrip()
+                return text[len(prefix):].lstrip()
-                break
+        return text
    @classmethod
    def _with_summary_prefix(cls, summary: str) -> str:
        """Normalize summary text to the current compaction handoff format."""
        text = cls._strip_summary_prefix(summary)
        return f"{SUMMARY_PREFIX}\n{text}" if text else SUMMARY_PREFIX
    @staticmethod
    def _is_context_summary_content(content: Any) -> bool:
        text = _content_text_for_contains(content).lstrip()
        return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
    @classmethod
    def _find_latest_context_summary(
        cls,
        messages: List[Dict[str, Any]],
        start: int,
        end: int,
    ) -> tuple[Optional[int], str]:
        """Find the newest handoff summary inside a compression window."""
        for idx in range(end - 1, start - 1, -1):
            content = messages[idx].get("content")
            if cls._is_context_summary_content(content):
                return idx, cls._strip_summary_prefix(_content_text_for_contains(content))
        return None, ""
    # ------------------------------------------------------------------
    # Tool-call / tool-result pair integrity helpers
    # ------------------------------------------------------------------
@@ -1290,6 +1332,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            return messages
        turns_to_summarize = messages[compress_start:compress_end]
        summary_idx, summary_body = self._find_latest_context_summary(
            messages,
            compress_start,
            compress_end,
        )
        if summary_idx is not None:
            if summary_body and not self._previous_summary:
                self._previous_summary = summary_body
            turns_to_summarize = messages[summary_idx + 1:compress_end]
        if not self.quiet_mode:
            logger.info(
@@ -1367,6 +1418,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                # Merge the summary into the first tail message instead
                # of inserting a standalone message that breaks alternation.
                _merge_summary_into_tail = True
        # When the summary lands as a standalone role="user" message,
        # weak models read the verbatim "## Active Task" quote of a past
        # user request as fresh input (#11475, #14521). Append the explicit
        # end marker — the same one used in the merge-into-tail path — so
        # the model has a clear "summary above, not new input" signal.
        if not _merge_summary_into_tail and summary_role == "user":
            summary = (
                summary
                + "\n\n--- END OF CONTEXT SUMMARY — "
                "respond to the message below, not the summary above ---"
            )
        if not _merge_summary_into_tail:
            compressed.append({"role": summary_role, "content": summary})
--- a/agent/curator.py
+++ b/agent/curator.py
@@ -24,11 +24,12 @@ from __future__ import annotations
 import json
 import logging
 import os
 import re
 import tempfile
 import threading
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set
 from hermes_constants import get_hermes_home
 from tools import skill_usage
@@ -36,6 +37,22 @@ from tools import skill_usage
 logger = logging.getLogger(__name__)
 def _strip_aux_credential(value: Any) -> Optional[str]:
    if value is None:
        return None
    text = str(value).strip()
    return text or None
 class _ReviewRuntimeBinding(NamedTuple):
    """Provider/model for the curator review fork plus optional per-slot overrides."""
    provider: str
    model: str
    explicit_api_key: Optional[str]
    explicit_base_url: Optional[str]
 DEFAULT_INTERVAL_HOURS = 24 * 7  # 7 days
 DEFAULT_MIN_IDLE_HOURS = 2
 DEFAULT_STALE_AFTER_DAYS = 30
@@ -453,6 +470,24 @@ def _reports_root() -> Path:
    return root
 def _needle_in_path_component(needle: str, path: str) -> bool:
    """Check if *needle* is a complete filename stem or directory name in *path*.
    Unlike simple substring matching, this avoids false positives where short
    skill names are embedded in longer filenames (e.g. "api" matching
    "references/api-design.md").  Hyphens and underscores are normalised so
    "open-webui-setup" matches "open_webui_setup.md".
    """
    norm_needle = needle.replace("-", "_")
    for part in path.replace("\\", "/").split("/"):
        if not part:
            continue
        stem = part.rsplit(".", 1)[0] if "." in part else part
        if stem.replace("-", "_") == norm_needle:
            return True
    return False
 def _classify_removed_skills(
    removed: List[str],
    added: List[str],
@@ -531,15 +566,29 @@ def _classify_removed_skills(
                continue
            # Look for the removed skill's name in file_path / content / raw.
-            haystacks: List[str] = []
+            # Matching strategy differs by field type:
            #   file_path — needle must be a complete path component
            #     (filename stem or directory name), so "api" does NOT
            #     falsely match "references/api-design.md".
            #   content fields — word-boundary regex so "test" does NOT
            #     falsely match "latest" or "testing".
            haystacks: List[tuple[str, str]] = []
            for key in ("file_path", "file_content", "content", "new_string", "_raw"):
                v = args.get(key)
                if isinstance(v, str):
-                    haystacks.append(v)
+                    haystacks.append((key, v))
            hit = False
-            for hay in haystacks:
+            for key, hay in haystacks:
                for needle in needles:
-                    if needle and needle in hay:
+                    if not needle:
                        continue
                    if key == "file_path":
                        matched = _needle_in_path_component(needle, hay)
                    else:
                        matched = bool(
                            re.search(rf'\b{re.escape(needle)}\b', hay)
                        )
                    if matched:
                        hit = True
                        evidence = (
                            f"skill_manage action={args.get('action', '?')} "
@@ -1398,6 +1447,52 @@ def run_curator_review(
    }
 def _resolve_review_runtime(cfg: Dict[str, Any]) -> _ReviewRuntimeBinding:
    """Resolve provider/model and per-slot credentials for the curator review fork.
    Same precedence as `_resolve_review_model()`. Non-empty ``api_key`` /
    ``base_url`` from the active slot are returned as explicit overrides so
    ``resolve_runtime_provider`` does not silently reuse the main chat
    credential chain for a routed auxiliary model.
    """
    _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
    _main_provider = _main.get("provider") or "auto"
    _main_model = _main.get("default") or _main.get("model") or ""
    # 1. Canonical aux task slot
    _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
    _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {}
    _task_provider = (_cur_task.get("provider") or "").strip() or None
    _task_model = (_cur_task.get("model") or "").strip() or None
    if _task_provider and _task_provider != "auto" and _task_model:
        return _ReviewRuntimeBinding(
            _task_provider,
            _task_model,
            _strip_aux_credential(_cur_task.get("api_key")),
            _strip_aux_credential(_cur_task.get("base_url")),
        )
    # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification)
    _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {}
    _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {}
    _legacy_provider = _legacy.get("provider") or None
    _legacy_model = _legacy.get("model") or None
    if _legacy_provider and _legacy_model:
        logger.info(
            "curator: using deprecated curator.auxiliary.{provider,model} "
            "config — please migrate to auxiliary.curator.{provider,model}"
        )
        return _ReviewRuntimeBinding(
            str(_legacy_provider),
            str(_legacy_model),
            _strip_aux_credential(_legacy.get("api_key")),
            _strip_aux_credential(_legacy.get("base_url")),
        )
    # 3. Fall through to the main chat model
    return _ReviewRuntimeBinding(_main_provider, _main_model, None, None)
 def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]:
    """Pick (provider, model) for the curator review fork.
@@ -1413,32 +1508,8 @@ def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]:
      2. Legacy ``curator.auxiliary.{provider,model}`` when both are set
      3. Main ``model.{provider,default/model}`` pair
    """
-    _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
+    b = _resolve_review_runtime(cfg)
-    _main_provider = _main.get("provider") or "auto"
+    return b.provider, b.model
    _main_model = _main.get("default") or _main.get("model") or ""
    # 1. Canonical aux task slot
    _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
    _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {}
    _task_provider = (_cur_task.get("provider") or "").strip() or None
    _task_model = (_cur_task.get("model") or "").strip() or None
    if _task_provider and _task_provider != "auto" and _task_model:
        return _task_provider, _task_model
    # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification)
    _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {}
    _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {}
    _legacy_provider = _legacy.get("provider") or None
    _legacy_model = _legacy.get("model") or None
    if _legacy_provider and _legacy_model:
        logger.info(
            "curator: using deprecated curator.auxiliary.{provider,model} "
            "config — please migrate to auxiliary.curator.{provider,model}"
        )
        return _legacy_provider, _legacy_model
    # 3. Fall through to the main chat model
    return _main_provider, _main_model
 def _run_llm_review(prompt: str) -> Dict[str, Any]:
@@ -1477,10 +1548,10 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
    # arguments hits an auto-resolution path that fails for OAuth-only
    # providers and for pool-backed credentials.
    #
-    # `_resolve_review_model()` honors `auxiliary.curator.{provider,model}`
+    # `_resolve_review_runtime()` honors `auxiliary.curator.{provider,model,...}`
    # (canonical aux-task slot, wired through `hermes model` → auxiliary
    # picker and the dashboard Models tab), with a legacy fallback to
-    # `curator.auxiliary.{provider,model}`. See docs/user-guide/features/curator.md.
+    # `curator.auxiliary.{provider,model,...}`. See docs/user-guide/features/curator.md.
    _api_key = None
    _base_url = None
    _api_mode = None
@@ -1490,9 +1561,13 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
        from hermes_cli.config import load_config
        from hermes_cli.runtime_provider import resolve_runtime_provider
        _cfg = load_config()
-        _provider, _model_name = _resolve_review_model(_cfg)
+        _binding = _resolve_review_runtime(_cfg)
        _provider, _model_name = _binding.provider, _binding.model
        _rp = resolve_runtime_provider(
-            requested=_provider, target_model=_model_name
+            requested=_provider,
            target_model=_model_name,
            explicit_api_key=_binding.explicit_api_key,
            explicit_base_url=_binding.explicit_base_url,
        )
        _api_key = _rp.get("api_key")
        _base_url = _rp.get("base_url")
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -55,6 +55,7 @@ class FailoverReason(enum.Enum):
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
    long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
    oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
    llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern"  # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry
    # Catch-all
    unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -470,6 +471,31 @@ def classify_api_error(
            should_compress=False,
        )
    # llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
    # server to build GBNF tool-call parsers) rejects regex escape classes
    # like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
    # routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
    # email params. llama.cpp surfaces this as HTTP 400 with one of a few
    # recognizable phrases; on match we strip ``pattern``/``format`` from
    # ``self.tools`` in the retry loop and retry once. Cloud providers are
    # unaffected — they accept these keywords and we never hit this branch.
    if (
        status_code == 400
        and (
            "error parsing grammar" in error_msg
            or "json-schema-to-grammar" in error_msg
            or (
                "unable to generate parser" in error_msg
                and "template" in error_msg
            )
        )
    ):
        return _result(
            FailoverReason.llama_cpp_grammar_pattern,
            retryable=True,
            should_compress=False,
        )
    # ── 2. HTTP status code classification ──────────────────────────
    if status_code is not None:
@@ -520,7 +546,12 @@ def classify_api_error(
    is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
    if is_disconnect and not status_code:
-        is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
+        # Absolute token/message-count thresholds are only a proxy for smaller
        # context windows.  Large-context sessions can have hundreds of
        # messages while still being far below their actual token budget.
        is_large = approx_tokens > context_length * 0.6 or (
            context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
        )
        if is_large:
            return _result(
                FailoverReason.context_overflow,
@@ -766,7 +797,12 @@ def _classify_400(
        if not err_body_msg:
            err_body_msg = str(body.get("message") or "").strip().lower()
    is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
-    is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
+    # Absolute token/message-count thresholds are only a proxy for smaller
    # context windows.  Large-context sessions can have many messages while
    # still being far below their actual token budget.
    is_large = approx_tokens > context_length * 0.4 or (
        context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
    )
    if is_generic and is_large:
        return result_fn(
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -679,7 +679,21 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices:
    finish_reason_raw = str(cand.get("finishReason") or "")
    if finish_reason_raw:
        mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
-        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
+        finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
        # Attach usage from this event's usageMetadata so the streaming
        # loop in run_agent.py can record token counts (mirrors the
        # non-streaming path in translate_gemini_response).
        usage_meta = event.get("usageMetadata") or {}
        if usage_meta:
            finish_chunk.usage = SimpleNamespace(
                prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
                completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
                total_tokens=int(usage_meta.get("totalTokenCount") or 0),
                prompt_tokens_details=SimpleNamespace(
                    cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
                ),
            )
        chunks.append(finish_chunk)
    return chunks
--- a/agent/google_oauth.py
+++ b/agent/google_oauth.py
@@ -489,16 +489,29 @@ def save_credentials(creds: GoogleCredentials) -> Path:
    """Atomically write creds to disk with 0o600 permissions."""
    path = _credentials_path()
    path.parent.mkdir(parents=True, exist_ok=True)
    # Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
    # On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
    try:
        os.chmod(path.parent, 0o700)
    except OSError:
        pass
    payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"
    with _credentials_lock():
        tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}")
        try:
-            with open(tmp_path, "w", encoding="utf-8") as fh:
+            # Create with 0o600 atomically to close the TOCTOU window where the
            # default umask (often 0o644) would briefly expose tokens to other
            # local users between open() and chmod().
            fd = os.open(
                str(tmp_path),
                os.O_WRONLY | os.O_CREAT | os.O_EXCL,
                stat.S_IRUSR | stat.S_IWUSR,
            )
            with os.fdopen(fd, "w", encoding="utf-8") as fh:
                fh.write(payload)
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
            atomic_replace(tmp_path, path)
        finally:
            try:
--- a/agent/i18n.py
+++ b/agent/i18n.py
@@ -0,0 +1,230 @@
 """Lightweight internationalization (i18n) for Hermes static user-facing messages.
 Scope (thin slice, by design): only the highest-impact static strings shown
 to the user by Hermes itself -- approval prompts, a handful of gateway slash
 command replies, restart-drain notices.  Agent-generated output, log lines,
 error tracebacks, tool outputs, and slash-command descriptions all stay in
 English.
 Catalog files live under ``locales/<lang>.yaml`` at the repo root.  Each
 catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
 ``gateway.approval_expired``).  Missing keys fall back to English; if English
 is missing too, the key path itself is returned so a broken catalog never
 crashes the agent.
 Usage::
    from agent.i18n import t
    print(t("approval.choose_long"))                       # current lang
    print(t("gateway.draining", count=3))                  # {count} formatted
    print(t("approval.choose_long", lang="zh"))            # explicit override
 Language resolution order:
    1. Explicit ``lang=`` argument passed to :func:`t`
    2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
    3. ``display.language`` from config.yaml
    4. ``"en"`` (baseline)
 Supported languages: en, zh, ja, de, es.  Unknown values fall back to en.
 """
 from __future__ import annotations
 import logging
 import os
 import threading
 from functools import lru_cache
 from pathlib import Path
 from typing import Any
 logger = logging.getLogger(__name__)
 SUPPORTED_LANGUAGES: tuple[str, ...] = ("en", "zh", "ja", "de", "es")
 DEFAULT_LANGUAGE = "en"
 # Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
 # get the right catalog instead of silently falling back to English.
 _LANGUAGE_ALIASES: dict[str, str] = {
    "english": "en", "en-us": "en", "en-gb": "en",
    "chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-tw": "zh", "zh-hans": "zh", "zh-hant": "zh",
    "japanese": "ja", "jp": "ja", "ja-jp": "ja",
    "german": "de", "deutsch": "de", "de-de": "de",
    "spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es",
 }
 _catalog_cache: dict[str, dict[str, str]] = {}
 _catalog_lock = threading.Lock()
 def _locales_dir() -> Path:
    """Return the directory containing locale YAML files.
    Lives next to the repo root so both the bundled install and editable
    checkouts find it without PYTHONPATH gymnastics.
    """
    # agent/i18n.py -> agent/ -> repo root
    return Path(__file__).resolve().parent.parent / "locales"
 def _normalize_lang(value: Any) -> str:
    """Normalize a user-supplied language value to a supported code.
    Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
    and case-insensitive regional tags (``zh-CN`` -> ``zh``).  Returns the
    default language for unknown values.
    """
    if not isinstance(value, str):
        return DEFAULT_LANGUAGE
    key = value.strip().lower()
    if not key:
        return DEFAULT_LANGUAGE
    if key in SUPPORTED_LANGUAGES:
        return key
    if key in _LANGUAGE_ALIASES:
        return _LANGUAGE_ALIASES[key]
    # Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
    # but "zh-CN" -> "zh" will).
    base = key.split("-", 1)[0]
    if base in SUPPORTED_LANGUAGES:
        return base
    return DEFAULT_LANGUAGE
 def _load_catalog(lang: str) -> dict[str, str]:
    """Load and flatten one locale YAML file into a dotted-key dict.
    YAML files can be nested for human readability; this produces the flat
    key space :func:`t` expects.  Cached per-language for the process.
    """
    with _catalog_lock:
        cached = _catalog_cache.get(lang)
        if cached is not None:
            return cached
    path = _locales_dir() / f"{lang}.yaml"
    if not path.is_file():
        logger.debug("i18n catalog missing for %s at %s", lang, path)
        with _catalog_lock:
            _catalog_cache[lang] = {}
        return {}
    try:
        import yaml  # PyYAML is already a hermes dependency
        with path.open("r", encoding="utf-8") as f:
            raw = yaml.safe_load(f) or {}
    except Exception as exc:
        logger.warning("Failed to load i18n catalog %s: %s", path, exc)
        with _catalog_lock:
            _catalog_cache[lang] = {}
        return {}
    flat: dict[str, str] = {}
    _flatten_into(raw, "", flat)
    with _catalog_lock:
        _catalog_cache[lang] = flat
    return flat
 def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
    if isinstance(node, dict):
        for key, value in node.items():
            child_key = f"{prefix}.{key}" if prefix else str(key)
            _flatten_into(value, child_key, out)
    elif isinstance(node, str):
        out[prefix] = node
    # Non-string, non-dict leaves are ignored -- catalogs are text-only.
@lru_cache(maxsize=1)
 def _config_language_cached() -> str | None:
    """Read ``display.language`` from config.yaml once per process.
    Cached because ``t()`` is called in hot paths (every approval prompt,
    every gateway reply) and re-reading YAML each call would be wasteful.
    ``reset_language_cache()`` clears this when config changes at runtime
    (e.g. after the setup wizard).
    """
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
        lang = (cfg.get("display") or {}).get("language")
        if lang:
            return _normalize_lang(lang)
    except Exception as exc:
        logger.debug("Could not read display.language from config: %s", exc)
    return None
 def reset_language_cache() -> None:
    """Invalidate cached language resolution and catalogs.
    Call after :func:`hermes_cli.config.save_config` if a running process
    needs to pick up a changed ``display.language`` without restart.
    """
    _config_language_cached.cache_clear()
    with _catalog_lock:
        _catalog_cache.clear()
 def get_language() -> str:
    """Resolve the active language using env > config > default order."""
    env_lang = os.environ.get("HERMES_LANGUAGE")
    if env_lang:
        return _normalize_lang(env_lang)
    cfg_lang = _config_language_cached()
    if cfg_lang:
        return cfg_lang
    return DEFAULT_LANGUAGE
 def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
    """Translate a dotted key to the active language.
    Parameters
    ----------
    key
        Dotted path into the catalog, e.g. ``"approval.choose_long"``.
    lang
        Explicit language override.  Takes precedence over env + config.
    **format_kwargs
        ``str.format`` substitution arguments (``t("gateway.drain", count=3)``
        expects a catalog entry with a ``{count}`` placeholder).
    Returns
    -------
    The translated string, or the English fallback if the key is missing in
    the target language, or the bare key if English is also missing.
    """
    target = _normalize_lang(lang) if lang else get_language()
    catalog = _load_catalog(target)
    value = catalog.get(key)
    if value is None and target != DEFAULT_LANGUAGE:
        # Fall through to English rather than showing a key path to the user.
        value = _load_catalog(DEFAULT_LANGUAGE).get(key)
    if value is None:
        # Last-ditch: return the key itself.  A broken catalog should not
        # crash anything; it just looks ugly until someone fixes it.
        logger.debug("i18n miss: key=%r lang=%r", key, target)
        value = key
    if format_kwargs:
        try:
            return value.format(**format_kwargs)
        except (KeyError, IndexError, ValueError) as exc:
            logger.warning(
                "i18n format failed for key=%r lang=%r kwargs=%r: %s",
                key, target, format_kwargs, exc,
            )
            return value
    return value
 __all__ = [
    "SUPPORTED_LANGUAGES",
    "DEFAULT_LANGUAGE",
    "t",
    "get_language",
    "reset_language_cache",
 ]
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -183,8 +183,8 @@ SKILLS_GUIDANCE = (
 )
 KANBAN_GUIDANCE = (
-    "# You are a Kanban worker\n"
+    "# Kanban task execution protocol\n"
-    "You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
+    "You have been assigned ONE task from "
    "the shared board at `~/.hermes/kanban.db`. Your task id is in "
    "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
    "The `kanban_*` tools in your schema are your primary coordination surface — "
@@ -513,6 +513,12 @@ PLATFORM_HINTS = {
        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
        "— when a sticker is the right response, use yb_send_sticker."
    ),
    "api_server": (
        "You're responding through an API server. The rendering layer is unknown — "
        "assume plain text. No markdown formatting (no asterisks, bullets, headers, "
        "code fences). Treat this like a conversation, not a document. Keep responses "
        "brief and natural."
    ),
 }
 # ---------------------------------------------------------------------------
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -305,13 +305,18 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())
-def redact_sensitive_text(text: str, *, force: bool = False) -> str:
+def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
    """Apply all redaction patterns to a block of text.
    Safe to call on any string -- non-matching text passes through unchanged.
    Disabled by default — enable via security.redact_secrets: true in config.yaml.
    Set force=True for safety boundaries that must never return raw secrets
    regardless of the user's global logging redaction preference.
    Set code_file=True to skip the ENV-assignment and JSON-field regex
    patterns when the text is known to be source code (e.g. MAX_TOKENS=***
    constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
    private keys, DB connstrings, JWTs, and URL secrets are still redacted.
    """
    if text is None:
        return None
@@ -325,13 +330,14 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str:
    # Known prefixes (sk-, ghp_, etc.)
    text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
-    # ENV assignments: OPENAI_API_KEY=sk-abc...
+    # ENV assignments: OPENAI_API_KEY=***  (skip for code files — false positives)
    if not code_file:
        def _redact_env(m):
            name, quote, value = m.group(1), m.group(2), m.group(3)
            return f"{name}={quote}{_mask_token(value)}{quote}"
        text = _ENV_ASSIGN_RE.sub(_redact_env, text)
-    # JSON fields: "apiKey": "value"
+        # JSON fields: "apiKey": "***"  (skip for code files — false positives)
        def _redact_json(m):
            key, value = m.group(1), m.group(2)
            return f'{key}: "{_mask_token(value)}"'
--- a/agent/think_scrubber.py
+++ b/agent/think_scrubber.py
@@ -0,0 +1,386 @@
 """Stateful scrubber for reasoning/thinking blocks in streamed assistant text.
 ``run_agent._strip_think_blocks`` is regex-based and correct for a complete
 string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
 the state that downstream consumers (CLI ``_stream_delta``, gateway
 ``GatewayStreamConsumer._filter_and_accumulate``) rely on.
 Concretely, when MiniMax-M2.7 streams
    delta1 = "<think>"
    delta2 = "Let me check their config"
    delta3 = "</think>"
 the per-delta regex erases delta1 entirely (case 2: unterminated-open at
 boundary matches ``^<think>...``), so the downstream state machine never
 sees the open tag, treats delta2 as regular content, and leaks reasoning
 to the user.  Consumers that don't run their own state machine (ACP,
 api_server, TTS) never had any defence at all — they just emitted
 whatever survived the upstream regex.
 This module centralises the tag-suppression state machine at the
 upstream layer so every stream_delta_callback sees text that has
 already had reasoning blocks removed.  Partial tags at delta
 boundaries are held back until the next delta resolves them, and
 end-of-stream flushing surfaces any held-back prose that turned out
 not to be a real tag.
 Usage::
    scrubber = StreamingThinkScrubber()
    for delta in stream:
        visible = scrubber.feed(delta)
        if visible:
            emit(visible)
    tail = scrubber.flush()  # at end of stream
    if tail:
        emit(tail)
 The scrubber is re-entrant per agent instance.  Call ``reset()`` at
 the top of each new turn so a hung block from an interrupted prior
 stream cannot taint the next turn's output.
 Tag variants handled (case-insensitive):
  ``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
  ``<REASONING_SCRATCHPAD>``.
 Block-boundary rule for opens: an opening tag is only treated as a
 reasoning-block opener when it appears at the start of the stream,
 after a newline (optionally followed by whitespace), or when only
 whitespace has been emitted on the current line.  This prevents prose
 that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
 being incorrectly suppressed.  Closed pairs (``<think>X</think>``) are
 always suppressed regardless of boundary; a closed pair is an
 intentional, bounded construct.
 """
 from __future__ import annotations
 from typing import Tuple
 __all__ = ["StreamingThinkScrubber"]
 class StreamingThinkScrubber:
    """Stateful scrubber for streaming reasoning/thinking blocks.
    State machine:
      - ``_in_block``: True while inside an opened block, waiting for
        a close tag.  All text inside is discarded.
      - ``_buf``: held-back partial-tag tail.  Emitted / discarded on
        the next ``feed()`` call or by ``flush()``.
      - ``_last_emitted_ended_newline``: True iff the most recent
        emission to the consumer ended with ``\\n``, or nothing has
        been emitted yet (start-of-stream counts as a boundary).  Used
        to decide whether an open tag at buffer position 0 is at a
        block boundary.
    """
    _OPEN_TAG_NAMES: Tuple[str, ...] = (
        "think",
        "thinking",
        "reasoning",
        "thought",
        "REASONING_SCRATCHPAD",
    )
    # Materialise literal tag strings so the hot path does string
    # operations, not regex compilation per feed().
    _OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
    _CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)
    # Pre-compute the longest tag (for partial-tag hold-back bound).
    _MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)
    def __init__(self) -> None:
        self._in_block: bool = False
        self._buf: str = ""
        self._last_emitted_ended_newline: bool = True
    def reset(self) -> None:
        """Reset all state.  Call at the top of every new turn."""
        self._in_block = False
        self._buf = ""
        self._last_emitted_ended_newline = True
    def feed(self, text: str) -> str:
        """Feed one delta; return the scrubbed visible portion.
        May return an empty string when the entire delta is reasoning
        content or is being held back pending resolution of a partial
        tag at the boundary.
        """
        if not text:
            return ""
        buf = self._buf + text
        self._buf = ""
        out: list[str] = []
        while buf:
            if self._in_block:
                # Hunt for the earliest close tag.
                close_idx, close_len = self._find_first_tag(
                    buf, self._CLOSE_TAGS,
                )
                if close_idx == -1:
                    # No close yet — hold back a potential partial
                    # close-tag prefix; discard everything else.
                    held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
                    self._buf = buf[-held:] if held else ""
                    return "".join(out)
                # Found close: discard block content + tag, continue.
                buf = buf[close_idx + close_len:]
                self._in_block = False
            else:
                # Priority 1 — closed <tag>X</tag> pair anywhere in
                # buf.  Closed pairs are always an intentional,
                # bounded construct (even mid-line prose containing
                # an open/close pair is almost certainly a model
                # leaking reasoning inline), so no boundary gating.
                pair = self._find_earliest_closed_pair(buf)
                # Priority 2 — unterminated open tag at a block
                # boundary.  Boundary-gated so prose that mentions
                # '<think>' isn't over-stripped.
                open_idx, open_len = self._find_open_at_boundary(
                    buf, out,
                )
                # Pick whichever match comes earliest in the buffer.
                if pair is not None and (
                    open_idx == -1 or pair[0] <= open_idx
                ):
                    start_idx, end_idx = pair
                    preceding = buf[:start_idx]
                    if preceding:
                        preceding = self._strip_orphan_close_tags(preceding)
                        if preceding:
                            out.append(preceding)
                            self._last_emitted_ended_newline = (
                                preceding.endswith("\n")
                            )
                    buf = buf[end_idx:]
                    continue
                if open_idx != -1:
                    # Unterminated open at boundary — emit preceding,
                    # enter block, continue loop with remainder.
                    preceding = buf[:open_idx]
                    if preceding:
                        preceding = self._strip_orphan_close_tags(preceding)
                        if preceding:
                            out.append(preceding)
                            self._last_emitted_ended_newline = (
                                preceding.endswith("\n")
                            )
                    self._in_block = True
                    buf = buf[open_idx + open_len:]
                    continue
                # No resolvable tag structure in buf.  Hold back any
                # partial-tag prefix at the tail so a split tag
                # across deltas isn't missed, then emit the rest.
                held = self._max_partial_suffix(buf, self._OPEN_TAGS)
                held_close = self._max_partial_suffix(
                    buf, self._CLOSE_TAGS,
                )
                held = max(held, held_close)
                if held:
                    emit_text = buf[:-held]
                    self._buf = buf[-held:]
                else:
                    emit_text = buf
                    self._buf = ""
                if emit_text:
                    emit_text = self._strip_orphan_close_tags(emit_text)
                    if emit_text:
                        out.append(emit_text)
                        self._last_emitted_ended_newline = (
                            emit_text.endswith("\n")
                        )
                return "".join(out)
        return "".join(out)
    def flush(self) -> str:
        """End-of-stream flush.
        If still inside an unterminated block, held-back content is
        discarded — leaking partial reasoning is worse than a
        truncated answer.  Otherwise the held-back partial-tag tail is
        emitted verbatim (it turned out not to be a real tag prefix).
        """
        if self._in_block:
            self._buf = ""
            self._in_block = False
            return ""
        tail = self._buf
        self._buf = ""
        if not tail:
            return ""
        tail = self._strip_orphan_close_tags(tail)
        if tail:
            self._last_emitted_ended_newline = tail.endswith("\n")
        return tail
    # ── internal helpers ───────────────────────────────────────────────
    @staticmethod
    def _find_first_tag(
        buf: str, tags: Tuple[str, ...],
    ) -> Tuple[int, int]:
        """Return (earliest_index, tag_length) over *tags*, or (-1, 0).
        Case-insensitive match.
        """
        buf_lower = buf.lower()
        best_idx = -1
        best_len = 0
        for tag in tags:
            idx = buf_lower.find(tag.lower())
            if idx != -1 and (best_idx == -1 or idx < best_idx):
                best_idx = idx
                best_len = len(tag)
        return best_idx, best_len
    def _find_earliest_closed_pair(self, buf: str):
        """Return (start_idx, end_idx) of the earliest closed pair, else None.
        A closed pair is ``<tag>...</tag>`` of any variant.  Matches are
        case-insensitive and non-greedy (the closest close tag after
        an open tag wins), matching the regex ``<tag>.*?</tag>``
        semantics of ``_strip_think_blocks`` case 1.  When two tag
        variants could both match, the one whose open tag appears
        earlier wins.
        """
        buf_lower = buf.lower()
        best: "tuple[int, int] | None" = None
        for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
            open_lower = open_tag.lower()
            close_lower = close_tag.lower()
            open_idx = buf_lower.find(open_lower)
            if open_idx == -1:
                continue
            close_idx = buf_lower.find(
                close_lower, open_idx + len(open_lower),
            )
            if close_idx == -1:
                continue
            end_idx = close_idx + len(close_lower)
            if best is None or open_idx < best[0]:
                best = (open_idx, end_idx)
        return best
    def _find_open_at_boundary(
        self, buf: str, already_emitted: list[str],
    ) -> Tuple[int, int]:
        """Return the earliest block-boundary open-tag (idx, len).
        Returns (-1, 0) if no boundary-legal opener is present.
        """
        buf_lower = buf.lower()
        best_idx = -1
        best_len = 0
        for tag in self._OPEN_TAGS:
            tag_lower = tag.lower()
            search_start = 0
            while True:
                idx = buf_lower.find(tag_lower, search_start)
                if idx == -1:
                    break
                if self._is_block_boundary(buf, idx, already_emitted):
                    if best_idx == -1 or idx < best_idx:
                        best_idx = idx
                        best_len = len(tag)
                    break  # first boundary hit for this tag is enough
                search_start = idx + 1
        return best_idx, best_len
    def _is_block_boundary(
        self, buf: str, idx: int, already_emitted: list[str],
    ) -> bool:
        """True iff position *idx* in *buf* is a block boundary.
        A block boundary is:
          - buf position 0 AND the most recent emission ended with
            a newline (or nothing has been emitted yet)
          - any position whose preceding text on the current line
            (since the last newline in buf) is whitespace-only, AND
            if there is no newline in the preceding buf portion, the
            most recent prior emission ended with a newline
        """
        if idx == 0:
            # Check whether the last already-emitted chunk in THIS
            # feed() call ended with a newline, otherwise fall back
            # to the cross-feed flag.
            if already_emitted:
                return already_emitted[-1].endswith("\n")
            return self._last_emitted_ended_newline
        preceding = buf[:idx]
        last_nl = preceding.rfind("\n")
        if last_nl == -1:
            # No newline in buf before the tag — boundary only if the
            # prior emission ended with a newline AND everything since
            # is whitespace.
            if already_emitted:
                prior_newline = already_emitted[-1].endswith("\n")
            else:
                prior_newline = self._last_emitted_ended_newline
            return prior_newline and preceding.strip() == ""
        # Newline present — text between it and the tag must be
        # whitespace-only.
        return preceding[last_nl + 1:].strip() == ""
    @classmethod
    def _max_partial_suffix(
        cls, buf: str, tags: Tuple[str, ...],
    ) -> int:
        """Return the longest buf-suffix that is a prefix of any tag.
        Only prefixes strictly shorter than the tag itself count
        (full-length suffixes are the tag and are handled as matches,
        not held-back partials).  Case-insensitive.
        """
        if not buf:
            return 0
        buf_lower = buf.lower()
        max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
        for i in range(max_check, 0, -1):
            suffix = buf_lower[-i:]
            for tag in tags:
                tag_lower = tag.lower()
                if len(tag_lower) > i and tag_lower.startswith(suffix):
                    return i
        return 0
    @classmethod
    def _strip_orphan_close_tags(cls, text: str) -> str:
        """Remove any close tags from *text* (orphan-close handling).
        An orphan close tag has no matching open in the current
        scrubber state; it's always noise, stripped with any trailing
        whitespace so the surrounding prose flows naturally.
        """
        if "</" not in text:
            return text
        text_lower = text.lower()
        out: list[str] = []
        i = 0
        while i < len(text):
            matched = False
            if text_lower[i:i + 2] == "</":
                for tag in cls._CLOSE_TAGS:
                    tag_lower = tag.lower()
                    tag_len = len(tag_lower)
                    if text_lower[i:i + tag_len] == tag_lower:
                        # Skip the tag and any trailing whitespace,
                        # matching _strip_think_blocks case 3.
                        j = i + tag_len
                        while j < len(text) and text[j] in " \t\n\r":
                            j += 1
                        i = j
                        matched = True
                        break
            if not matched:
                out.append(text[i])
                i += 1
        return "".join(out)
--- a/agent/title_generator.py
+++ b/agent/title_generator.py
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
 # so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
 # become visible instead of piling up as NULL session titles.
 FailureCallback = Callable[[str, BaseException], None]
 TitleCallback = Callable[[str], None]
 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
@@ -90,6 +91,7 @@ def auto_title_session(
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
    main_runtime: dict = None,
    title_callback: Optional[TitleCallback] = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.
@@ -119,6 +121,11 @@ def auto_title_session(
    try:
        session_db.set_session_title(session_id, title)
        logger.debug("Auto-generated session title: %s", title)
        if title_callback is not None:
            try:
                title_callback(title)
            except Exception:
                logger.debug("Auto-title callback failed", exc_info=True)
    except Exception as e:
        logger.debug("Failed to set auto-generated title: %s", e)
@@ -131,6 +138,7 @@ def maybe_auto_title(
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
    main_runtime: dict = None,
    title_callback: Optional[TitleCallback] = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.
@@ -152,7 +160,11 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
+        kwargs={
            "failure_callback": failure_callback,
            "main_runtime": main_runtime,
            "title_callback": title_callback,
        },
        daemon=True,
        name="auto-title",
    )
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@@ -143,7 +143,18 @@ class ResponsesApiTransport(ProviderTransport):
            kwargs["max_output_tokens"] = max_tokens
        if is_xai_responses and session_id:
-            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+            existing_extra_headers = kwargs.get("extra_headers")
            merged_extra_headers: Dict[str, str] = {}
            if isinstance(existing_extra_headers, dict):
                merged_extra_headers.update(
                    {
                        str(key): str(value)
                        for key, value in existing_extra_headers.items()
                        if key and value is not None
                    }
                )
            merged_extra_headers["x-grok-conv-id"] = session_id
            kwargs["extra_headers"] = merged_extra_headers
        return kwargs
--- a/apps/dashboard/README.md
+++ b/apps/dashboard/README.md
@@ -10,17 +10,34 @@ Browser-based dashboard for managing Hermes Agent configuration, API keys, and m
 ## Development
-```bash
+Install workspace dependencies from the repo root first:
 # Start the backend API server
 cd ../
 python -m hermes_cli.main web --no-open
-# In another terminal, start the Vite dev server (with HMR + API proxy)
+```bash
-cd web/
+npm install
 ```
 Start the backend API server from the repo root:
 ```bash
 hermes dashboard --tui --no-open
 ```
 `--tui` exposes the in-browser Chat tab through `/api/pty`. Omit it if you only need the config/session dashboard.
 In another terminal, start the Vite dev server:
 ```bash
 cd apps/dashboard
 npm run dev
 ```
-The Vite dev server proxies `/api` requests to `http://127.0.0.1:9119` (the FastAPI backend).
+The Vite dev server proxies `/api`, `/api/pty`, and `/dashboard-plugins` to `http://127.0.0.1:9119` (the FastAPI backend). It also fetches the backend's `index.html` on each dev page load so the ephemeral session token stays in sync.
 If the `hermes` entry point is not installed, use:
 ```bash
 python -m hermes_cli.main dashboard --tui --no-open
 ```
 ## Build
@@ -28,7 +45,7 @@ The Vite dev server proxies `/api` requests to `http://127.0.0.1:9119` (the Fast
 npm run build
 ```
-This outputs to `../hermes_cli/web_dist/`, which the FastAPI server serves as a static SPA. The built assets are included in the Python package via `pyproject.toml` package-data.
+This outputs to `../../hermes_cli/web_dist/`, which the FastAPI server serves as a static SPA. The built assets are included in the Python package via `pyproject.toml` package-data.
 ## Structure
--- a/apps/dashboard/eslint.config.js
+++ b/apps/dashboard/eslint.config.js
--- a/apps/dashboard/index.html
+++ b/apps/dashboard/index.html
--- a/apps/dashboard/package-lock.json
+++ b/apps/dashboard/package-lock.json
@@ -1,13 +1,14 @@
 {
-  "name": "web",
+  "name": "dashboard",
  "version": "0.0.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
-      "name": "web",
+      "name": "dashboard",
      "version": "0.0.0",
      "dependencies": {
        "@hermes/shared": "file:../shared",
        "@nous-research/ui": "^0.10.0",
        "@observablehq/plot": "^0.6.17",
        "@react-three/fiber": "^9.6.0",
@@ -45,6 +46,13 @@
        "vite": "^7.3.1"
      }
    },
    "../shared": {
      "name": "@hermes/shared",
      "version": "0.0.0",
      "devDependencies": {
        "typescript": "^6.0.3"
      }
    },
    "node_modules/@babel/code-frame": {
      "version": "7.29.0",
      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
@@ -947,6 +955,10 @@
      "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==",
      "license": "MIT"
    },
    "node_modules/@hermes/shared": {
      "resolved": "../shared",
      "link": true
    },
    "node_modules/@humanfs/core": {
      "version": "0.19.2",
      "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.2.tgz",
@@ -2371,6 +2383,64 @@
        "node": ">=14.0.0"
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
      "version": "1.8.1",
      "inBundle": true,
      "license": "MIT",
      "optional": true,
      "dependencies": {
        "@emnapi/wasi-threads": "1.1.0",
        "tslib": "^2.4.0"
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
      "version": "1.8.1",
      "inBundle": true,
      "license": "MIT",
      "optional": true,
      "dependencies": {
        "tslib": "^2.4.0"
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
      "version": "1.1.0",
      "inBundle": true,
      "license": "MIT",
      "optional": true,
      "dependencies": {
        "tslib": "^2.4.0"
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
      "version": "1.1.1",
      "inBundle": true,
      "license": "MIT",
      "optional": true,
      "dependencies": {
        "@emnapi/core": "^1.7.1",
        "@emnapi/runtime": "^1.7.1",
        "@tybys/wasm-util": "^0.10.1"
      },
      "funding": {
        "type": "github",
        "url": "https://github.com/sponsors/Brooooooklyn"
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
      "version": "0.10.1",
      "inBundle": true,
      "license": "MIT",
      "optional": true,
      "dependencies": {
        "tslib": "^2.4.0"
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
      "version": "2.8.1",
      "inBundle": true,
      "license": "0BSD",
      "optional": true
    },
    "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
      "version": "4.2.4",
      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.2.4.tgz",
--- a/apps/dashboard/package.json
+++ b/apps/dashboard/package.json
@@ -1,10 +1,10 @@
 {
-  "name": "web",
+  "name": "dashboard",
  "private": true,
  "version": "0.0.0",
  "type": "module",
  "scripts": {
-    "sync-assets": "rm -rf public/fonts public/ds-assets && cp -r node_modules/@nous-research/ui/dist/fonts public/fonts && cp -r node_modules/@nous-research/ui/dist/assets public/ds-assets",
+    "sync-assets": "node scripts/sync-assets.cjs",
    "predev": "npm run sync-assets",
    "prebuild": "npm run sync-assets",
    "dev": "vite",
@@ -13,6 +13,7 @@
    "preview": "vite preview"
  },
  "dependencies": {
    "@hermes/shared": "file:../shared",
    "@nous-research/ui": "^0.10.0",
    "@observablehq/plot": "^0.6.17",
    "@react-three/fiber": "^9.6.0",
--- a/apps/dashboard/public/ds-assets/filler-bg0.jpg
+++ b/apps/dashboard/public/ds-assets/filler-bg0.jpg
--- a/apps/dashboard/public/favicon.ico
+++ b/apps/dashboard/public/favicon.ico
--- a/apps/dashboard/public/fonts-terminal/JetBrainsMono-Bold.woff2
+++ b/apps/dashboard/public/fonts-terminal/JetBrainsMono-Bold.woff2
--- a/apps/dashboard/public/fonts-terminal/JetBrainsMono-Italic.woff2
+++ b/apps/dashboard/public/fonts-terminal/JetBrainsMono-Italic.woff2
--- a/apps/dashboard/public/fonts-terminal/JetBrainsMono-Regular.woff2
+++ b/apps/dashboard/public/fonts-terminal/JetBrainsMono-Regular.woff2
--- a/apps/dashboard/public/fonts/Collapse-Bold.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Bold.woff2
--- a/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2
--- a/apps/dashboard/public/fonts/Collapse-Italic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Italic.woff2
--- a/apps/dashboard/public/fonts/Collapse-Light.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Light.woff2
--- a/apps/dashboard/public/fonts/Collapse-LightItalic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-LightItalic.woff2
--- a/apps/dashboard/public/fonts/Collapse-Regular.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Regular.woff2
--- a/apps/dashboard/public/fonts/Collapse-Thin.woff2
+++ b/apps/dashboard/public/fonts/Collapse-Thin.woff2
--- a/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2
+++ b/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2
--- a/apps/dashboard/public/fonts/Mondwest-Regular.woff2
+++ b/apps/dashboard/public/fonts/Mondwest-Regular.woff2
--- a/apps/dashboard/public/fonts/Neuebit-Bold.woff2
+++ b/apps/dashboard/public/fonts/Neuebit-Bold.woff2
--- a/apps/dashboard/public/fonts/RulesCompressed-Medium.woff2
+++ b/apps/dashboard/public/fonts/RulesCompressed-Medium.woff2
--- a/apps/dashboard/public/fonts/RulesCompressed-Regular.woff2
+++ b/apps/dashboard/public/fonts/RulesCompressed-Regular.woff2
--- a/apps/dashboard/public/fonts/RulesExpanded-Bold.woff2
+++ b/apps/dashboard/public/fonts/RulesExpanded-Bold.woff2
--- a/apps/dashboard/public/fonts/RulesExpanded-Regular.woff2
+++ b/apps/dashboard/public/fonts/RulesExpanded-Regular.woff2
--- a/apps/dashboard/scripts/sync-assets.cjs
+++ b/apps/dashboard/scripts/sync-assets.cjs
@@ -0,0 +1,46 @@
 #!/usr/bin/env node
 /**
 * Copy font and asset folders from @nous-research/ui into public/ for Vite.
 *
 * Locates @nous-research/ui by walking up from this script looking for
 * node_modules/@nous-research/ui — works whether the dep is co-located
 * (non-workspace layout) or hoisted to the repo root (npm workspaces).
 */
 const fs = require('node:fs')
 const path = require('node:path')
 const DASHBOARD_ROOT = path.resolve(__dirname, '..')
 function locateUiPackage() {
  let dir = DASHBOARD_ROOT
  const { root } = path.parse(dir)
  while (true) {
    const candidate = path.join(dir, 'node_modules', '@nous-research', 'ui')
    if (fs.existsSync(path.join(candidate, 'package.json'))) {
      return candidate
    }
    if (dir === root) break
    dir = path.dirname(dir)
  }
  throw new Error(
    '@nous-research/ui not found. Run `npm install` from the repo root.'
  )
 }
 const uiRoot = locateUiPackage()
 const distRoot = path.join(uiRoot, 'dist')
 const mappings = [
  ['fonts', path.join(DASHBOARD_ROOT, 'public', 'fonts')],
  ['assets', path.join(DASHBOARD_ROOT, 'public', 'ds-assets')],
 ]
 for (const [srcName, destPath] of mappings) {
  const srcPath = path.join(distRoot, srcName)
  if (!fs.existsSync(srcPath)) {
    throw new Error(`Missing ${srcPath} in @nous-research/ui — rebuild that package.`)
  }
  fs.rmSync(destPath, { recursive: true, force: true })
  fs.cpSync(srcPath, destPath, { recursive: true })
  console.log(`synced ${path.relative(DASHBOARD_ROOT, destPath)}`)
 }
--- a/apps/dashboard/src/App.tsx
+++ b/apps/dashboard/src/App.tsx
@@ -80,6 +80,14 @@ function RootRedirect() {
  return <Navigate to="/sessions" replace />;
 }
 function UnknownRouteFallback({ pluginsLoading }: { pluginsLoading: boolean }) {
  if (pluginsLoading) {
    // Render nothing during the plugin-load window — a spinner here would just flash.
    return null;
  }
  return <Navigate to="/sessions" replace />;
 }
 const CHAT_NAV_ITEM: NavItem = {
  path: "/chat",
  labelKey: "chat",
@@ -582,7 +590,9 @@ export default function App() {
                  ))}
                  <Route
                    path="*"
-                    element={<Navigate to="/sessions" replace />}
+                    element={
                      <UnknownRouteFallback pluginsLoading={pluginsLoading} />
                    }
                  />
                </Routes>
--- a/apps/dashboard/src/components/AutoField.tsx
+++ b/apps/dashboard/src/components/AutoField.tsx
--- a/apps/dashboard/src/components/Backdrop.tsx
+++ b/apps/dashboard/src/components/Backdrop.tsx
--- a/apps/dashboard/src/components/ChatSidebar.tsx
+++ b/apps/dashboard/src/components/ChatSidebar.tsx
--- a/apps/dashboard/src/components/DeleteConfirmDialog.tsx
+++ b/apps/dashboard/src/components/DeleteConfirmDialog.tsx
--- a/apps/dashboard/src/components/LanguageSwitcher.tsx
+++ b/apps/dashboard/src/components/LanguageSwitcher.tsx
--- a/apps/dashboard/src/components/Markdown.tsx
+++ b/apps/dashboard/src/components/Markdown.tsx
--- a/apps/dashboard/src/components/ModelInfoCard.tsx
+++ b/apps/dashboard/src/components/ModelInfoCard.tsx
--- a/apps/dashboard/src/components/ModelPickerDialog.tsx
+++ b/apps/dashboard/src/components/ModelPickerDialog.tsx
--- a/apps/dashboard/src/components/NouiTypography.tsx
+++ b/apps/dashboard/src/components/NouiTypography.tsx
--- a/apps/dashboard/src/components/OAuthLoginModal.tsx
+++ b/apps/dashboard/src/components/OAuthLoginModal.tsx
--- a/apps/dashboard/src/components/OAuthProvidersCard.tsx
+++ b/apps/dashboard/src/components/OAuthProvidersCard.tsx
--- a/apps/dashboard/src/components/PlatformsCard.tsx
+++ b/apps/dashboard/src/components/PlatformsCard.tsx
--- a/apps/dashboard/src/components/SidebarFooter.tsx
+++ b/apps/dashboard/src/components/SidebarFooter.tsx
--- a/apps/dashboard/src/components/SidebarStatusStrip.tsx
+++ b/apps/dashboard/src/components/SidebarStatusStrip.tsx
--- a/apps/dashboard/src/components/SlashPopover.tsx
+++ b/apps/dashboard/src/components/SlashPopover.tsx
--- a/apps/dashboard/src/components/ThemeSwitcher.tsx
+++ b/apps/dashboard/src/components/ThemeSwitcher.tsx
@@ -4,6 +4,7 @@ import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { Typography } from "@/components/NouiTypography";
 import { BUILTIN_THEMES, useTheme } from "@/themes";
 import type { DashboardTheme } from "@/themes";
 import { useI18n } from "@/i18n";
 import { cn } from "@/lib/utils";
@@ -11,8 +12,8 @@ import { cn } from "@/lib/utils";
 * Compact theme picker mounted next to the language switcher in the header.
 * Each dropdown row shows a 3-stop swatch (background / midground / warm
 * glow) so users can preview the palette before committing. User-defined
- * themes from `~/.hermes/dashboard-themes/*.yaml` that aren't in
+ * themes from `~/.hermes/dashboard-themes/*.yaml` use their API-provided
- * `BUILTIN_THEMES` render without swatches and apply the default palette.
+ * definitions so they show real palette swatches just like built-ins.
 *
 * When placed at the bottom of a container (e.g. the sidebar rail), pass
 * `dropUp` so the menu opens above the trigger instead of clipping below
@@ -95,7 +96,7 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
          {availableThemes.map((th) => {
            const isActive = th.name === themeName;
-            const preset = BUILTIN_THEMES[th.name];
+            const paletteTheme = BUILTIN_THEMES[th.name] ?? th.definition;
            return (
              <ListItem
@@ -109,8 +110,8 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
                }}
                className="gap-3"
              >
-                {preset ? (
+                {paletteTheme ? (
-                  <ThemeSwatch theme={preset.name} />
+                  <ThemeSwatch theme={paletteTheme} />
                ) : (
                  <PlaceholderSwatch />
                )}
@@ -144,10 +145,8 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
  );
 }
-function ThemeSwatch({ theme }: { theme: string }) {
+function ThemeSwatch({ theme }: { theme: DashboardTheme }) {
-  const preset = BUILTIN_THEMES[theme];
+  const { background, midground, warmGlow } = theme.palette;
  if (!preset) return <PlaceholderSwatch />;
  const { background, midground, warmGlow } = preset.palette;
  return (
    <div
      aria-hidden
--- a/apps/dashboard/src/components/Toast.tsx
+++ b/apps/dashboard/src/components/Toast.tsx
--- a/apps/dashboard/src/components/ToolCall.tsx
+++ b/apps/dashboard/src/components/ToolCall.tsx
--- a/apps/dashboard/src/components/ui/card.tsx
+++ b/apps/dashboard/src/components/ui/card.tsx
--- a/apps/dashboard/src/components/ui/confirm-dialog.tsx
+++ b/apps/dashboard/src/components/ui/confirm-dialog.tsx
--- a/apps/dashboard/src/components/ui/input.tsx
+++ b/apps/dashboard/src/components/ui/input.tsx
--- a/apps/dashboard/src/components/ui/label.tsx
+++ b/apps/dashboard/src/components/ui/label.tsx
--- a/apps/dashboard/src/components/ui/separator.tsx
+++ b/apps/dashboard/src/components/ui/separator.tsx
--- a/apps/dashboard/src/contexts/PageHeaderProvider.tsx
+++ b/apps/dashboard/src/contexts/PageHeaderProvider.tsx
--- a/apps/dashboard/src/contexts/SystemActions.tsx
+++ b/apps/dashboard/src/contexts/SystemActions.tsx
--- a/apps/dashboard/src/contexts/page-header-context.ts
+++ b/apps/dashboard/src/contexts/page-header-context.ts
--- a/apps/dashboard/src/contexts/system-actions-context.ts
+++ b/apps/dashboard/src/contexts/system-actions-context.ts
--- a/apps/dashboard/src/contexts/usePageHeader.ts
+++ b/apps/dashboard/src/contexts/usePageHeader.ts
--- a/apps/dashboard/src/contexts/useSystemActions.ts
+++ b/apps/dashboard/src/contexts/useSystemActions.ts
--- a/apps/dashboard/src/hooks/useConfirmDelete.ts
+++ b/apps/dashboard/src/hooks/useConfirmDelete.ts
--- a/apps/dashboard/src/hooks/useSidebarStatus.ts
+++ b/apps/dashboard/src/hooks/useSidebarStatus.ts
--- a/apps/dashboard/src/hooks/useToast.ts
+++ b/apps/dashboard/src/hooks/useToast.ts
--- a/apps/dashboard/src/i18n/context.tsx
+++ b/apps/dashboard/src/i18n/context.tsx
--- a/apps/dashboard/src/i18n/en.ts
+++ b/apps/dashboard/src/i18n/en.ts
--- a/apps/dashboard/src/i18n/index.ts
+++ b/apps/dashboard/src/i18n/index.ts
--- a/apps/dashboard/src/i18n/types.ts
+++ b/apps/dashboard/src/i18n/types.ts
--- a/apps/dashboard/src/i18n/zh.ts
+++ b/apps/dashboard/src/i18n/zh.ts
--- a/apps/dashboard/src/index.css
+++ b/apps/dashboard/src/index.css
--- a/apps/dashboard/src/lib/api.ts
+++ b/apps/dashboard/src/lib/api.ts
--- a/apps/dashboard/src/lib/dashboard-flags.ts
+++ b/apps/dashboard/src/lib/dashboard-flags.ts
--- a/apps/dashboard/src/lib/format.ts
+++ b/apps/dashboard/src/lib/format.ts
--- a/apps/dashboard/src/lib/gatewayClient.ts
+++ b/apps/dashboard/src/lib/gatewayClient.ts
@@ -0,0 +1,36 @@
 import {
  JsonRpcGatewayClient,
  type ConnectionState,
  type GatewayEvent,
  type GatewayEventName,
 } from "@hermes/shared";
 export type { ConnectionState, GatewayEvent, GatewayEventName };
 /**
 * Browser wrapper for the shared tui_gateway JSON-RPC client.
 *
 * Dashboard resolves its token and host from the served page. Desktop uses the
 * same shared protocol client, but supplies an absolute wsUrl from Electron.
 */
 export class GatewayClient extends JsonRpcGatewayClient {
  async connect(token?: string): Promise<void> {
    const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
    if (!resolved) {
      throw new Error(
        "Session token not available — page must be served by the Hermes dashboard",
      );
    }
    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
    await super.connect(
      `${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
    );
  }
 }
 declare global {
  interface Window {
    __HERMES_SESSION_TOKEN__?: string;
  }
 }
--- a/apps/dashboard/src/lib/nested.ts
+++ b/apps/dashboard/src/lib/nested.ts
--- a/apps/dashboard/src/lib/resolve-page-title.ts
+++ b/apps/dashboard/src/lib/resolve-page-title.ts
--- a/apps/dashboard/src/lib/slashExec.ts
+++ b/apps/dashboard/src/lib/slashExec.ts
--- a/apps/dashboard/src/lib/utils.ts
+++ b/apps/dashboard/src/lib/utils.ts
--- a/apps/dashboard/src/main.tsx
+++ b/apps/dashboard/src/main.tsx
--- a/apps/dashboard/src/pages/AnalyticsPage.tsx
+++ b/apps/dashboard/src/pages/AnalyticsPage.tsx
--- a/apps/dashboard/src/pages/ChatPage.tsx
+++ b/apps/dashboard/src/pages/ChatPage.tsx
--- a/apps/dashboard/src/pages/ConfigPage.tsx
+++ b/apps/dashboard/src/pages/ConfigPage.tsx
@@ -27,6 +27,15 @@ import {
  Wrench,
  FileQuestion,
  Filter,
  Cloud,
  Sparkles,
  LayoutDashboard,
  BookOpen,
  Route,
  History,
  Shield,
  FileOutput,
  RefreshCw,
 } from "lucide-react";
 import { api } from "@/lib/api";
 import { getNestedValue, setNestedValue } from "@/lib/nested";
@@ -66,6 +75,15 @@ const CATEGORY_ICONS: Record<
  logging: ClipboardList,
  discord: MessageCircle,
  auxiliary: Wrench,
  bedrock: Cloud,
  curator: Sparkles,
  kanban: LayoutDashboard,
  model_catalog: BookOpen,
  openrouter: Route,
  sessions: History,
  tool_loop_guardrails: Shield,
  tool_output: FileOutput,
  updates: RefreshCw,
 };
 function CategoryIcon({
--- a/apps/dashboard/src/pages/CronPage.tsx
+++ b/apps/dashboard/src/pages/CronPage.tsx
--- a/apps/dashboard/src/pages/DocsPage.tsx
+++ b/apps/dashboard/src/pages/DocsPage.tsx
--- a/Show More
+++ b/Show More