Compare commits

..

1 Commits

Author SHA1 Message Date
teknium1
fffbef0ec4 feat(mcp): adopt mcp__server__tool naming convention
Port from anomalyco/opencode#33533. Native MCP tools now register as
mcp__<server>__<tool> (double-underscore delimiter) instead of
mcp_<server>_<tool>, aligning with the convention used by Claude Code,
Codex, and OpenCode.

The double-underscore delimiter disambiguates the server/tool boundary
even when either component contains underscores (the single-underscore
form was ambiguous, which is why is_mcp_tool_parallel_safe already had to
track provenance in a side-map). It also unifies native registration with
the Anthropic-OAuth wire form (_MCP_TOOL_PREFIX = 'mcp__'), so the
single->double promotion that path performed is now a no-op for native
tools while still handling legacy replayed names.

- tools/mcp_tool.py: add MCP_TOOL_NAME_PREFIX + mcp_prefixed_tool_name()
  helper; route _convert_mcp_schema, utility schemas, refresh stale-set,
  and the parallel-safe prefix gate through it
- agent/transports/codex_event_projector.py: mirror convention in the
  deterministic call_id input for MCP server-executed tool calls
- tests: update produced-name assertions to the new convention
2026-06-25 17:08:44 -07:00
1110 changed files with 10652 additions and 68851 deletions

View File

@@ -66,12 +66,8 @@ runtime/
# ---------- Not needed inside the Docker image ----------
# Desktop app source (Tauri/Electron); never installed in the container.
# apps/shared is the dashboard↔desktop websocket helper and is linked from
# web/package.json as a file: workspace dep — keep it in the build context.
# Desktop app source (Tauri/Electron); never installed in the container
apps/
!apps/shared/
!apps/shared/**
# Test suite — not shipped in production images
tests/

2
.envrc
View File

@@ -1,5 +1,5 @@
watch_file pyproject.toml uv.lock
watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
use flake

View File

@@ -0,0 +1,50 @@
name: Hermes smoke test
description: >
Run the image's built-in entrypoint against `--help` and `dashboard --help`
to catch basic runtime regressions before publishing. Requires the image
to already be loaded into the local Docker daemon under `image`.
Works identically on amd64 and arm64 runners.
inputs:
image:
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
required: true
runs:
using: composite
steps:
- name: Ensure /tmp/hermes-test is hermes-writable
shell: bash
run: |
# The image runs as the hermes user (UID 10000). GitHub Actions
# creates /tmp/hermes-test root-owned by default, which hermes
# can't write to — chown it to match the in-container UID before
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
# with their own UID hit the same issue and have their own
# remediations (HERMES_UID env var, or chown locally).
mkdir -p /tmp/hermes-test
sudo chown -R 10000:10000 /tmp/hermes-test
- name: hermes --help
shell: bash
run: |
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
# this exercises the actual production startup path. PR #30136
# review caught that an --entrypoint override here had been
# silently neutered by the s6-overlay migration — stage2-hook
# ignores its CMD args, so the smoke test was a no-op.
docker run --rm \
-v /tmp/hermes-test:/opt/data \
"${{ inputs.image }}" --help
- name: hermes dashboard --help
shell: bash
run: |
# Regression guard for #9153: dashboard was present in source but
# missing from the published image. If this fails, something in
# the Dockerfile is excluding the dashboard subcommand from the
# installed package.
docker run --rm \
-v /tmp/hermes-test:/opt/data \
"${{ inputs.image }}" dashboard --help

View File

@@ -20,7 +20,6 @@ permissions:
pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
actions: read # needed by osv-scanner (SARIF upload)
security-events: write # needed by osv-scanner (SARIF upload)
packages: write # needed by docker build
concurrency:
group: ci-${{ github.ref }}
@@ -33,7 +32,6 @@ jobs:
# (all lanes true) so post-merge validation is never weakened.
# ─────────────────────────────────────────────────────────────────────
detect:
name: Detect affected areas
runs-on: ubuntu-latest
outputs:
python: ${{ steps.classify.outputs.python }}
@@ -55,15 +53,11 @@ jobs:
# Skipped workflows (if condition is false) don't spin up runners.
# ─────────────────────────────────────────────────────────────────────
tests:
name: Python tests
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/tests.yml
with:
slice_count: 8
lint:
name: Python lints
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/lint.yml
@@ -71,49 +65,35 @@ jobs:
event_name: ${{ needs.detect.outputs.event_name }}
typecheck:
name: TypeScript
needs: detect
if: needs.detect.outputs.frontend == 'true'
uses: ./.github/workflows/typecheck.yml
docs-site:
name: Docs Site
needs: detect
if: needs.detect.outputs.site == 'true'
uses: ./.github/workflows/docs-site-checks.yml
history-check:
name: Deny unrelated histories
needs: detect
if: needs.detect.outputs.event_name == 'pull_request'
uses: ./.github/workflows/history-check.yml
contributor-check:
name: Check contributors
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/contributor-check.yml
uv-lockfile:
name: Check uv.lock
needs: detect
uses: ./.github/workflows/uv-lockfile-check.yml
docker-lint:
name: Lint Docker scripts
needs: detect
if: needs.detect.outputs.docker_meta == 'true'
uses: ./.github/workflows/docker-lint.yml
docker:
name: Build&Test Docker image
needs: detect
if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
uses: ./.github/workflows/docker.yml
secrets: inherit
supply-chain:
name: Supply-chain scan
needs: detect
if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
uses: ./.github/workflows/supply-chain-audit.yml
@@ -124,7 +104,7 @@ jobs:
mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}
osv-scanner:
name: OSV scan
needs: detect
uses: ./.github/workflows/osv-scanner.yml
# ─────────────────────────────────────────────────────────────────────
@@ -147,8 +127,6 @@ jobs:
- docker-lint
- supply-chain
- osv-scanner
# we don't require docker to pass rn because it's so slow lol
# - docker
if: always()
runs-on: ubuntu-latest
steps:

View File

@@ -2,7 +2,7 @@ name: Docker / shell lint
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
# scripts under docker/ (via shellcheck). These catch the class of regression
# the behavioral docker smoke test can't — unquoted variable
# the behavioral docker-publish smoke test can't — unquoted variable
# expansions, silently-failing RUN commands, etc.
#
# Rules and ignores are documented in .hadolint.yaml at the repo root.

View File

@@ -1,9 +1,24 @@
name: Docker Build, Test, and Publish
name: Docker Build and Publish
on:
push:
branches: [main]
paths:
- '**/*.py'
- 'pyproject.toml'
- 'uv.lock'
- 'Dockerfile'
- 'docker/**'
- '.github/workflows/docker-publish.yml'
- '.github/actions/hermes-smoke-test/**'
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
release:
types: [published]
workflow_call:
permissions:
contents: read
@@ -24,7 +39,11 @@ env:
IMAGE_NAME: nousresearch/hermes-agent
jobs:
# Build, test, and optionally push the amd64 image.
# ---------------------------------------------------------------------------
# Build amd64 natively. This job also runs the smoke tests (basic --help
# and the dashboard subcommand regression guard from #9153), because amd64
# is the only arch we can `load` into the local daemon on an amd64 runner.
# ---------------------------------------------------------------------------
build-amd64:
# Only run on the upstream repository, not on forks
if: github.repository == 'NousResearch/hermes-agent'
@@ -34,19 +53,24 @@ jobs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# The image build + integration tests run on every event
# (PRs, push-to-main, release). Publish steps below are gated to
# push-to-main / release only.
# The image build + smoke test + integration tests run ONLY on
# push-to-main and release — never on PRs. They are the heaviest jobs
# in CI (~15-45 min) and a broken build surfaces on the main push (and
# is gated pre-merge by docker-lint + uv-lockfile-check). Every step
# below is skipped on PRs, so the job still reports green and the
# required check never hangs.
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
if: github.event_name != 'pull_request'
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Build once, load into the local daemon for testing. Cached
# Build once, load into the local daemon for smoke testing. Cached
# to gha with a per-arch scope; the push step below reuses every
# layer from this build.
- name: Build image (amd64)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
- name: Build image (amd64, smoke test)
if: github.event_name != 'pull_request'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -58,12 +82,25 @@ jobs:
cache-from: type=gha,scope=docker-amd64
cache-to: type=gha,mode=max,scope=docker-amd64
- name: Smoke test image
if: github.event_name != 'pull_request'
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
# ---------------------------------------------------------------------
# Run the docker-integration test suite against the freshly-built
# image already loaded into the local daemon (`:test`).
# image already loaded into the local daemon (`:test`). These tests
# are excluded from the sharded `tests.yml :: test` matrix on purpose
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
# shard would otherwise reach the session-scoped ``built_image``
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
# ``docker build`` — guaranteed to
# die in fixture setup.
#
# Piggybacking here avoids a second image build: the build step
# already loaded the image into the daemon under
# `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
# Piggybacking here avoids a second image build: the smoke test
# already proved the image loads + runs, so the daemon has it under
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
#
@@ -73,20 +110,26 @@ jobs:
# cheapest path to coverage on every PR that touches docker code.
# ---------------------------------------------------------------------
- name: Install uv (for docker tests)
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
if: github.event_name != 'pull_request'
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Set up Python 3.11 (for docker tests)
if: github.event_name != 'pull_request'
run: uv python install 3.11
- name: Install Python dependencies (for docker tests)
if: github.event_name != 'pull_request'
run: |
uv venv .venv --python 3.11
source .venv/bin/activate
# ``dev`` extra pulls in pytest, pytest-asyncio —
# everything tests/docker/ needs. We deliberately avoid ``all``
# here because the docker tests only drive the container via
# subprocess and don't import hermes_agent's optional deps.
uv sync --locked --python 3.11 --extra dev
uv pip install -e ".[dev]"
- name: Run docker integration tests
if: github.event_name != 'pull_request'
env:
# Skip rebuild; use the image already loaded by the build step.
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
@@ -96,11 +139,12 @@ jobs:
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
scripts/run_tests.sh tests/docker/ --file-timeout 600
source .venv/bin/activate
python -m pytest tests/docker/ -v --tb=short
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -111,7 +155,7 @@ jobs:
- name: Push amd64 by digest
id: push
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -135,7 +179,7 @@ jobs:
- name: Upload digest artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: digest-amd64
path: /tmp/digests/*
@@ -143,7 +187,10 @@ jobs:
retention-days: 1
# ---------------------------------------------------------------------------
# Build, test, and optionally push the arm64 image.
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
# smoke test, then on push/release push by digest.
# ---------------------------------------------------------------------------
build-arm64:
if: github.repository == 'NousResearch/hermes-agent'
@@ -153,26 +200,29 @@ jobs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# arm64 build runs only on push-to-main and release (see build-amd64).
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
if: github.event_name != 'pull_request'
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Log in to ghcr.io so the registry-backed build cache below can be
# read (cache-from) on every event and written (cache-to) on
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
# the whole job — unlike the gha cache backend's short-lived Azure SAS
# token, which expired mid-build on slow cold-cache arm64 runs and
# crashed the build before the tests ran (the reason the gha cache
# crashed the build before the smoke test (the reason the gha cache
# was removed from arm64 PRs in the first place).
- name: Log in to ghcr.io (build cache)
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
if: github.event_name != 'pull_request'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Build once, load into the local daemon for testing, then push
# Build once, load into the local daemon for smoke testing, then push
# by digest below. Reads AND writes the registry-backed cache so the
# push reuses layers from this build and the next build starts warm.
#
@@ -180,8 +230,9 @@ jobs:
# cache that previously broke here: its credential is the job-lifetime
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
# token failure mode cannot recur.
- name: Build image (arm64, cached publish)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
- name: Build image (arm64, smoke test, cached publish)
if: github.event_name != 'pull_request'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -193,29 +244,15 @@ jobs:
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
- name: Install uv for docker tests
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Set up Python 3.11 for docker tests
run: uv python install 3.11
- name: Install Python dependencies for docker tests
run: |
uv sync --locked --python 3.11 --extra dev
- name: Run docker tests
env:
# Skip rebuild; use the image already loaded by the build step.
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
OPENROUTER_API_KEY: ""
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
scripts/run_tests.sh tests/docker/ --file-timeout 600
- name: Smoke test image
if: github.event_name != 'pull_request'
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -223,7 +260,7 @@ jobs:
- name: Push arm64 by digest
id: push
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -245,7 +282,7 @@ jobs:
- name: Upload digest artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: digest-arm64
path: /tmp/digests/*
@@ -267,17 +304,17 @@ jobs:
timeout-minutes: 10
steps:
- name: Download digests
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
path: /tmp/digests
pattern: digest-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Log in to Docker Hub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

View File

@@ -37,7 +37,7 @@ jobs:
fetch-depth: 0 # need full history for merge-base + worktree
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Install ruff + ty
uses: ./.github/actions/retry
@@ -110,7 +110,7 @@ jobs:
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload reports as artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: lint-reports
path: .lint-reports/
@@ -164,7 +164,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Install ruff
uses: ./.github/actions/retry

View File

@@ -3,17 +3,17 @@ name: Build Skills Index
on:
schedule:
# Run twice daily: 6 AM and 6 PM UTC
- cron: "0 6,18 * * *"
workflow_dispatch: # Manual trigger
- cron: '0 6,18 * * *'
workflow_dispatch: # Manual trigger
push:
branches: [main]
paths:
- "scripts/build_skills_index.py"
- ".github/workflows/skills-index.yml"
- 'scripts/build_skills_index.py'
- '.github/workflows/skills-index.yml'
permissions:
contents: read
actions: write # to trigger deploy-site.yml on schedule
actions: write # to trigger deploy-site.yml on schedule
jobs:
build-index:
@@ -21,11 +21,11 @@ jobs:
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"
python-version: '3.11'
- name: Install dependencies
run: pip install httpx==0.28.1 pyyaml==6.0.2
@@ -36,7 +36,7 @@ jobs:
run: python scripts/build_skills_index.py
- name: Upload index artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: skills-index
path: website/static/api/skills-index.json

View File

@@ -2,11 +2,6 @@ name: Tests
on:
workflow_call:
inputs:
slice_count:
description: Number of parallel test slices
type: number
default: 8
permissions:
contents: read
@@ -17,11 +12,13 @@ concurrency:
cancel-in-progress: true
jobs:
generate:
name: "Generate slices"
test:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.matrix.outputs.matrix }}
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
slice: [1, 2, 3, 4, 5, 6]
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -30,26 +27,13 @@ jobs:
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: test_durations.json
# main always writes a new suffix, but jobs pick the latest one with the same prefix
# quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
# If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
# If there are no exact matches, the action searches for partial matches of the restore keys.
# When the action finds a partial match, the most recent cache is restored to the path directory.
key: test-durations
- name: Generate test slices
id: matrix
run: |
MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
test:
name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
needs: generate
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install ripgrep (prebuilt binary)
run: |
set -euo pipefail
@@ -65,7 +49,7 @@ jobs:
rg --version
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
with:
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
# Keyed on the dependency manifests, so the cache is reused until
@@ -94,19 +78,33 @@ jobs:
# re-download, keeping the persisted cache small and fast to restore.
run: uv cache prune --ci
- name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
# Per-file isolation via scripts/run_tests.sh: each test file runs
# in its own freshly-spawned `python -m pytest <file>` subprocess
- name: Run tests (slice ${{ matrix.slice }}/6)
# Per-file isolation via scripts/run_tests_parallel.py: discovers
# every test_*.py file under tests/ (excluding integration/ + e2e/),
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
# with bounded parallelism. No xdist, no shared workers, no
# module-level state leakage between files.
#
# File list is pre-computed by the generate job (--generate-slices)
# which runs LPT distribution once and passes the file list to each
# matrix job via --files. Previously each job re-discovered files and
# re-ran LPT independently — redundant N times.
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
# every file a fresh interpreter — the only isolation boundary
# that matters in practice (cross-file leakage was the original
# flake source; intra-file is the test author's responsibility).
#
# Why drop xdist entirely: xdist's persistent workers accumulate
# state across files, which is exactly the leakage we wanted to
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
# the job with cleaner semantics.
#
# Matrix slicing (--slice I/N): files are distributed across 6
# jobs by cached duration (LPT algorithm) so each job gets
# roughly equal wall time. Without a cache, files default to 2s
# estimate and get split roughly evenly by count — still correct,
# just not perfectly balanced.
run: |
source .venv/bin/activate
scripts/run_tests.sh --files '${{ matrix.slice.files }}'
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
env:
# Ensure tests don't accidentally call real APIs
OPENROUTER_API_KEY: ""
@@ -116,7 +114,7 @@ jobs:
- name: Upload per-slice durations
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: test-durations-slice-${{ matrix.slice.index }}
name: test-durations-slice-${{ matrix.slice }}
path: test_durations.json
retention-days: 1
@@ -175,7 +173,7 @@ jobs:
rg --version
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
with:
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
# Keyed on the dependency manifests, so the cache is reused until

View File

@@ -6,7 +6,6 @@ on:
jobs:
typecheck:
name: Check TypeScript
runs-on: ubuntu-latest
strategy:
matrix:
@@ -23,7 +22,8 @@ jobs:
# native builds. Skipping install scripts drops node-pty's node-gyp
# header fetch — the transient flake that killed this job pre-`tsc` — and
# is faster. retry covers the remaining registry blips.
- uses: ./.github/actions/retry
-
uses: ./.github/actions/retry
with:
command: npm ci --ignore-scripts
- run: npm run --prefix ${{ matrix.package }} typecheck
@@ -35,7 +35,6 @@ jobs:
# users build apps/desktop from source on install/update. Run the real
# `vite build` here so that class of break fails in CI instead.
desktop-build:
name: Build desktop app
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -45,7 +44,8 @@ jobs:
cache: npm
# Keep install scripts here: the production build may need node-pty's
# native binary. retry handles the transient install-time fetch flakes.
- uses: ./.github/actions/retry
-
uses: ./.github/actions/retry
with:
command: npm ci
- run: npm run --prefix apps/desktop build

View File

@@ -5,11 +5,11 @@ name: Publish to PyPI
on:
push:
tags:
- "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
workflow_dispatch:
inputs:
confirm_tag:
description: "Tag to publish (e.g. v2026.5.15). Must already exist."
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
required: true
type: string
@@ -27,7 +27,7 @@ jobs:
name: Build distribution 📦
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# On workflow_dispatch, check out the confirmed tag.
@@ -43,17 +43,17 @@ jobs:
fi
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.13"
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
node-version: '22'
- name: Build web dashboard
run: cd web && npm ci && npm run build
@@ -81,7 +81,7 @@ jobs:
run: uv build --sdist --wheel
- name: Upload distribution artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: python-package-distributions
path: dist/
@@ -94,17 +94,17 @@ jobs:
name: pypi
url: https://pypi.org/p/hermes-agent
permissions:
id-token: write # OIDC trusted publishing
id-token: write # OIDC trusted publishing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
with:
skip-existing: true
@@ -116,12 +116,12 @@ jobs:
needs: publish
runs-on: ubuntu-latest
permissions:
contents: write # attach assets to the existing release
id-token: write # sigstore signing
contents: write # attach assets to the existing release
id-token: write # sigstore signing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
@@ -145,7 +145,7 @@ jobs:
- name: Sign with Sigstore
if: env.skip_sign != 'true'
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
with:
inputs: >-
./dist/*.tar.gz

View File

@@ -4,7 +4,7 @@ name: uv.lock check
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
# must not merge, because the Docker build's `uv sync --frozen` step will
# fail on a stale lockfile and we'd rather catch it here than in the
# docker workflow on main.
# docker-publish workflow on main.
#
# ─────────────────────────────────────────────────────────────────────────
# IMPORTANT: this check runs against the MERGED state, not just your branch
@@ -63,7 +63,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
# `uv lock --check` re-resolves the project from pyproject.toml and
# compares the result to uv.lock, exiting non-zero if they disagree.
@@ -100,7 +100,7 @@ jobs:
This check is blocking because the Docker image build uses
`uv sync --frozen --extra all`, which rejects stale lockfiles
— catching it here avoids a ~15 min failed docker run
— catching it here avoids a ~15 min failed docker-publish run
on `main` post-merge.
EOF
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."

6
.gitignore vendored
View File

@@ -137,9 +137,3 @@ RELEASE_v*.md
# Desktop demo-run scratch output (hermes writes demo/*.txt during recorded
# walkthroughs). Throwaway artifacts, never part of the app.
apps/desktop/demo/
# PR infographics are rendered locally and embedded in PR descriptions via the
# image-provider (fal.media) URL — they are NEVER committed to the repo. The
# PR body is the archive. See the hermes-agent-dev skill's
# pr-infographic-workflow reference (storage rule + lapse #8 / #COMMIT-1).
infographic/

View File

@@ -123,17 +123,6 @@ conservative at the waist.
without E2E proof, and plugins that touch core files.** Plugins live in their
own directory and work within the ABCs/hooks we provide; if a plugin needs
more, widen the generic plugin surface, don't special-case it in core.
- **Third-party products / other people's projects integrated into the core
tree.** Observability backends, vendor SaaS integrations, analytics dashboards,
and similar "someone else's product" plugins do NOT land under `plugins/` in
this repo. They place an ongoing maintenance burden on us to keep them working
against a fast-moving core, for a backend we don't own. Ship them as a
**standalone plugin repo** users install into `~/.hermes/plugins/` (or via a
pip entry point), and promote them in the Nous Research Discord
(`#plugins-skills-and-skins`). This is a coupling-and-maintenance decision, not
a quality bar — the plugin can be excellent and still be a close. PRs that add
such a directory to the tree are closed with a pointer to publish it as its own
repo.
### Before you call it a bug — verify the premise (and when NOT to close)
@@ -491,7 +480,7 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes
### Electron Desktop Chat App (`apps/desktop/`)
A **separate** chat surface from both the classic CLI and the dashboard's embedded TUI. It is an Electron + React + nanostore renderer (`@assistant-ui/react`) that talks to a `tui_gateway` backend over JSON-RPC (`requestGateway(method, params)`). The WebSocket/JSON-RPC transport lives in the framework-agnostic `apps/shared` package (`@hermes/shared``JsonRpcGatewayClient` + WS URL helpers), which the web dashboard (`web/`) also consumes; **desktop has no build/runtime dependency on the dashboard frontend** — it spawns a headless `hermes serve` backend server (the same gateway `dashboard` serves, minus the browser UI). `dashboard` and `serve` share `cmd_dashboard`/`start_server` but are independent surfaces — neither launches the other. The one exception is a backward-compat *fallback*: `serve` is newer, so the desktop spawn (`electron/backend-command.cjs` + `backendSupportsServe()` in `main.cjs`) detects whether the resolved runtime registers `serve` and, only when it does not (an older managed install / PATH `hermes` the app hasn't updated yet), rewrites the argv to the legacy `dashboard --no-open`. Without that, a new app against an un-upgraded runtime would crash on an unknown subcommand and brick every mid-upgrade user. It does NOT embed `hermes --tui` — it has its own composer, transcript, and slash-command pipeline. Route desktop bugs to the `hermes-desktop-app-work` skill, not `hermes-dashboard-work`.
A **separate** chat surface from both the classic CLI and the dashboard's embedded TUI. It is an Electron + React + nanostore renderer (`@assistant-ui/react`) that talks to a `tui_gateway` backend over JSON-RPC (`requestGateway(method, params)`). It does NOT embed `hermes --tui` — it has its own composer, transcript, and slash-command pipeline. Route desktop bugs to the `hermes-desktop-app-work` skill, not `hermes-dashboard-work`.
**Slash commands in the desktop app are curated client-side, then dispatched to the backend.** The pipeline:
@@ -794,24 +783,6 @@ landing in this tree. PRs that add a new directory under
provider as its own repo. Existing in-tree providers stay; bug fixes
to them are welcome.
**No new third-party-product plugins in-tree (policy, June 2026):** the
same rule applies beyond memory providers. Plugins that integrate
someone else's product or project — observability/metrics backends,
vendor SaaS connectors, analytics dashboards, paid-service tie-ins —
must ship as **standalone plugin repos** that users install into
`~/.hermes/plugins/` (or via pip entry points). They register through
the existing plugin discovery path and use the ABCs/hooks/ctx surface
we expose; nothing special is needed in core. The reason is
maintenance load: every product we absorb into the tree becomes our
burden to keep working against a fast-moving core, for a backend we
don't own. Promote standalone plugins in the Nous Research Discord
(`#plugins-skills-and-skins`). PRs that add such a directory under
`plugins/` are closed with a pointer to publish it as its own repo —
this is a coupling decision, not a quality judgment. (The
`observability/`, `kanban/`, `disk-cleanup/`, etc. directories already
in the tree are existing precedent, not an invitation to add more
third-party-product plugins alongside them.)
### Model-provider plugins (`plugins/model-providers/<name>/`)
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)

View File

@@ -85,23 +85,6 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
---
## Third-Party Product Integrations: Ship as a Standalone Plugin
The same rule extends to **any plugin that integrates someone else's product or project** — observability/metrics backends, vendor SaaS connectors, analytics dashboards, paid-service tie-ins, and similar third-party integrations. **These do not land in this repo.**
The reason is maintenance load, not quality. Every external product absorbed into the core tree becomes ours to keep working against a fast-moving codebase, for a backend we don't own and can't control. Hermes ships a lot and the core moves quickly; coupling third-party products into it creates an open-ended burden on the maintainers.
Publish these as a **standalone plugin repo** instead:
- Implement the relevant ABC and use the existing plugin discovery path (`~/.hermes/plugins/`, project `.hermes/plugins/`, or a pip entry point) — see [Build a Hermes Plugin](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin)
- Register lifecycle hooks (`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`), tools (`ctx.register_tool`), and CLI subcommands (`ctx.register_cli_command`) through the surface we already expose — no core changes needed
- If your plugin needs a capability the framework doesn't expose, that's a feature request to **widen the generic plugin surface** (a new hook or `ctx` method) — never special-case your plugin in core
- Promote it in the [Nous Research Discord](https://discord.gg/NousResearch) `#plugins-skills-and-skins` channel so users can find and install it
A well-built third-party-product plugin can clear automated review and still be closed for this reason — it's a placement decision, not a verdict on the code. PRs that add such a directory under `plugins/` will be closed with a pointer to publish it as its own repo.
---
## Development Setup
### Prerequisites
@@ -149,20 +132,13 @@ this way, make sure you run the `hermes` entrypoint from this venv; running the
system `python3 -m hermes_cli.main` can pick up unrelated system Python
packages.
Create the venv **outside** the cloned source tree. A venv that lives inside
the directory the agent operates from can be wiped by a relative-path command
the agent runs against its own checkout (`rm -rf venv`, `uv venv venv`, etc.),
which silently destroys the running runtime mid-session. Keeping it outside the
tree means no relative path from the workspace resolves to it.
```bash
git clone https://github.com/NousResearch/hermes-agent.git
cd hermes-agent
# Create venv with Python 3.11, OUTSIDE the source tree
uv venv ~/.hermes/venvs/hermes-dev --python 3.11
export VIRTUAL_ENV="$HOME/.hermes/venvs/hermes-dev"
export PATH="$VIRTUAL_ENV/bin:$PATH"
# Create venv with Python 3.11
uv venv venv --python 3.11
export VIRTUAL_ENV="$(pwd)/venv"
# Install with all extras (messaging, cron, CLI menus, dev tools)
uv pip install -e ".[all,dev]"

View File

@@ -119,9 +119,6 @@ COPY package.json package-lock.json ./
COPY web/package.json web/
COPY ui-tui/package.json ui-tui/
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
# apps/shared/ is copied IN FULL because web/package.json references it as a
# `file:` workspace dependency (same pattern as hermes-ink above).
COPY apps/shared/ apps/shared/
# `npm_config_install_links=false` forces npm to install `file:` deps as
# symlinks instead of copies. This is the default since npm 10+, which is
@@ -187,19 +184,12 @@ RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra
# invalidate the (relatively slow) web + ui-tui build layer.
COPY web/ web/
COPY ui-tui/ ui-tui/
COPY apps/shared/ apps/shared/
RUN cd web && npm run build && \
cd ../ui-tui && npm run build
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
# --link decouples this layer from parents for cache purposes; --chmod bakes
# the final read-only permissions at copy time so we skip the separate
# `chmod -R` pass that previously walked ~30k files across the venv +
# node_modules + source (21s amd64 / 222s arm64 — #49113). `a+rX,go-w`
# gives the non-root hermes user read + traverse but no write; root retains
# write so the build steps below don't need chmod u+w dances.
COPY --link --chmod=a+rX,go-w . .
COPY . .
# ---------- Permissions ----------
# Link hermes-agent itself (editable). Deps are already installed in the
@@ -207,15 +197,19 @@ COPY --link --chmod=a+rX,go-w . .
# resolution or downloads.
RUN uv pip install --no-cache-dir --no-deps -e "."
# Wire the exec shim and install-method stamp. Files under /opt/hermes are
# already root-owned (COPY, uv sync, npm install all run as root) and
# read-only for the hermes user (go-w from the --chmod above).
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
# instances must not be able to self-edit the installed source or venv; user
# data, skills, plugins, config, logs, and dashboard uploads live under
# /opt/data instead. Root can still repair the image during build/boot, but
# supervised Hermes processes drop to the non-root hermes user.
USER root
RUN mkdir -p /opt/hermes/bin && \
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
chmod 0755 /opt/hermes/bin/hermes && \
printf 'docker\n' > /opt/hermes/.install_method
printf 'docker\n' > /opt/hermes/.install_method && \
chown -R root:root /opt/hermes && \
chmod -R a+rX /opt/hermes && \
chmod -R a-w /opt/hermes
# The ``.install_method`` stamp is baked next to the running code (the install
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
# volume that is commonly bind-mounted from the host and even shared with a
@@ -242,11 +236,13 @@ RUN mkdir -p /opt/hermes/bin && \
#
# The arg is optional — local `docker build` without --build-arg simply
# omits the file, and the runtime falls back to live-git lookup. CI
# (.github/workflows/docker.yml) passes ${{ github.sha }} so
# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
# every published image has it.
ARG HERMES_GIT_SHA=
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
chmod u+w /opt/hermes && \
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
fi
# ---------- s6-overlay service wiring ----------

View File

@@ -18,7 +18,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), OpenRouter, OpenAI, your own endpoint, and [many others](https://hermes-agent.nousresearch.com/docs/integrations/providers). Switch with `hermes model` — no code changes, no lock-in.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
<table>
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -232,14 +232,10 @@ scripts/run_tests.sh
Manual clone fallback (for throwaway clones/CI where you intentionally do not
want the managed install layout):
Create the venv outside the cloned source tree — a venv inside the directory
the agent operates from can be wiped by a relative-path command the agent runs
against its own checkout, destroying the running runtime mid-session.
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv ~/.hermes/venvs/hermes-dev --python 3.11
source ~/.hermes/venvs/hermes-dev/bin/activate
uv venv .venv --python 3.11
source .venv/bin/activate
uv pip install -e ".[all,dev]"
scripts/run_tests.sh
```

View File

@@ -722,50 +722,10 @@ def init_agent(
elif agent.provider == "moa":
from agent.moa_loop import MoAClient
agent.api_mode = "chat_completions"
# Route reference-model outputs to the agent's tool_progress_callback so
# every surface that already consumes it (CLI spinner/scrollback, TUI,
# desktop, gateway) can show each reference's answer as a labelled block
# before the aggregator acts. The facade emits "moa.reference" and
# "moa.aggregating" events; we forward them through the same callback
# the tool lifecycle uses. Best-effort and cache-safe — these are
# display-only events, they never touch the message history.
def _moa_reference_relay(event: str, **kwargs: Any) -> None:
cb = getattr(agent, "tool_progress_callback", None)
if cb is None:
return
try:
if event == "moa.reference":
label = str(kwargs.get("label") or "")
text = str(kwargs.get("text") or "")
idx = kwargs.get("index")
count = kwargs.get("count")
cb(
"moa.reference",
label,
text,
None,
moa_index=idx,
moa_count=count,
)
elif event == "moa.aggregating":
cb(
"moa.aggregating",
str(kwargs.get("aggregator") or ""),
None,
None,
moa_ref_count=kwargs.get("ref_count"),
)
except Exception:
pass
agent.client = MoAClient(
agent.model or "default",
reference_callback=_moa_reference_relay,
)
agent.client = MoAClient(agent.model or "default")
agent._client_kwargs = {}
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = "moa://local"
agent.base_url = base_url or "moa://local"
if not agent.quiet_mode:
print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
elif agent.api_mode == "bedrock_converse":
@@ -1307,12 +1267,6 @@ def init_agent(
_agent_section = {}
agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
# Intent-ack continuation config: "auto" (default — codex_responses only,
# the historical gate), true (all api_modes), false (never), or a list of
# model-name substrings. Resolved against the active api_mode/model in the
# conversation loop's intent-ack block.
agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
# Universal task-completion guidance toggle. Default True. Surfaced
# as a separate flag from tool_use_enforcement because the guidance
# applies to ALL models, not just the model families enforcement
@@ -1676,10 +1630,8 @@ def init_agent(
f"Model {agent.model} has a context window of {_ctx:,} tokens, "
f"which is below the minimum {MINIMUM_CONTEXT_LENGTH:,} required "
f"by Hermes Agent. Choose a model with at least "
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context. If your server "
f"reports a window smaller than the model's true window, set "
f"model.context_length in config.yaml to the real value "
f"(this must be at least {MINIMUM_CONTEXT_LENGTH // 1000}K)."
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context, or set "
f"model.context_length in config.yaml to override."
)
# Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand).

View File

@@ -42,14 +42,6 @@ from utils import base_url_host_matches, base_url_hostname, env_var_enabled, ato
logger = logging.getLogger(__name__)
# Max consecutive successful credential-pool token refreshes of the SAME entry
# on a persistent auth failure before we give up and let the fallback chain
# activate. A single-entry OAuth pool can re-mint a fresh token indefinitely
# even when the upstream keeps rejecting it, so without this cap the retry loop
# spins forever and never reaches ``_try_activate_fallback``. See #26080.
_MAX_AUTH_REFRESH_ATTEMPTS = 2
def _ra():
"""Lazy ``run_agent`` reference for test-patch routing."""
import run_agent
@@ -783,30 +775,6 @@ def recover_with_credential_pool(
return False, has_retried_429
refreshed = pool.try_refresh_current()
if refreshed is not None:
# ``try_refresh_current()`` re-mints a fresh OAuth token and reports
# success even when the upstream keeps rejecting it — a single-entry
# pool (common for OAuth/Max subscribers) has nothing to rotate to,
# so a bare "refreshed → retry" loop spins forever on the same dead
# token and the configured fallback never activates. Cap consecutive
# same-entry refreshes and fall through to fallback once exceeded.
# See #26080.
refreshed_id = getattr(refreshed, "id", None)
if refreshed_id is not None:
refresh_counts = getattr(agent, "_auth_pool_refresh_counts", None)
if refresh_counts is None:
refresh_counts = {}
agent._auth_pool_refresh_counts = refresh_counts
refresh_key = (agent.provider, refreshed_id)
refresh_counts[refresh_key] = refresh_counts.get(refresh_key, 0) + 1
if refresh_counts[refresh_key] > _MAX_AUTH_REFRESH_ATTEMPTS:
_ra().logger.warning(
"Credential auth failure persists after %s refreshes for "
"pool entry %s — treating as unrecoverable and allowing "
"fallback to activate.",
refresh_counts[refresh_key] - 1,
refreshed_id,
)
return False, has_retried_429
_ra().logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
agent._swap_credential(refreshed)
return True, has_retried_429
@@ -1078,34 +1046,6 @@ def restore_primary_runtime(agent) -> bool:
api_mode=rt.get("compressor_api_mode", ""),
)
# ── Re-select from the credential pool if one is available ──
# The snapshot's api_key was captured at construction time. Across
# turns the pool may have rotated (token revocation, billing/rate-limit
# exhaustion, cooldown), leaving the snapshot key stale. Restoring it
# blindly re-fails on the first request and burns through the remaining
# pool entries before cross-provider fallback even gets a chance. Ask
# the pool for its current best entry and swap the live credential in.
# When the pool is absent, empty, or the entry has no usable key, we
# keep the snapshot key (the existing behavior). Fixes #25205.
pool = getattr(agent, "_credential_pool", None)
if pool is not None and pool.has_available():
entry = pool.select()
if entry is not None:
entry_key = (
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
if entry_key:
# ``_swap_credential`` rebuilds the OpenAI/Anthropic client,
# reapplies base-url-scoped headers, and carries the
# accumulated base_url / OAuth-detection fixes (#33163).
agent._swap_credential(entry)
logger.info(
"Restore re-selected pool entry %s (%s)",
getattr(entry, "id", "?"),
getattr(entry, "label", "?"),
)
# ── Reset fallback chain for the new turn ──
agent._fallback_activated = False
agent._fallback_index = 0
@@ -1281,11 +1221,7 @@ def dump_api_request_debug(
dump_payload["error"] = error_info
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
# Sanitize the session ID into a traversal-free path segment — it can
# originate from untrusted input (X-Hermes-Session-Id header), and an
# unsanitized "../"-shaped ID would write the dump outside logs_dir.
safe_sid = _ra()._safe_session_filename_component(agent.session_id)
dump_file = agent.logs_dir / f"request_dump_{safe_sid}_{timestamp}.json"
dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
# Redact secrets before persisting/printing. This dump captures the
# full request body (system prompt, tool defs, context-embedded
@@ -1484,15 +1420,6 @@ def create_openai_client(agent, client_kwargs: dict, *, reason: str, shared: boo
keepalive_http = agent._build_keepalive_http_client(client_kwargs.get("base_url", ""))
if keepalive_http is not None:
client_kwargs["http_client"] = keepalive_http
# Delegate all rate-limit / 5xx retry to hermes's outer conversation loop,
# which honors Retry-After and applies adaptive/jittered backoff. The OpenAI
# SDK default (max_retries=2) uses its own 1-2s backoff that ignores
# Retry-After and double-retries inside our loop — the same deadlock the
# Anthropic clients hit (#26293). This is the single chokepoint every primary
# OpenAI/aggregator client passes through (init, switch_model, recovery,
# restore, request-scoped); auxiliary_client builds its own clients and keeps
# SDK retries because it is NOT wrapped by the conversation loop.
client_kwargs.setdefault("max_retries", 0)
# Uses the module-level `OpenAI` name, resolved lazily on first
# access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
client = _ra().OpenAI(**client_kwargs)
@@ -1572,10 +1499,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
# _client_kwargs is a dict — snapshot a shallow copy so mutating the
# live dict doesn't poison the rollback target.
_snapshot["_client_kwargs"] = dict(getattr(agent, "_client_kwargs", {}) or {})
# Snapshot the credential pool reference so a failed client rebuild can
# restore the original pool (issue #52727: pool reload is part of this
# switch and must be reversible on rollback).
_snapshot["_credential_pool"] = getattr(agent, "_credential_pool", _MISSING)
try:
# Clear the per-config context_length override so the new model's
@@ -1600,36 +1523,8 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
if api_key:
agent.api_key = api_key
# ── Reload credential pool for the new provider (issue #52727) ──
# Without this, ``recover_with_credential_pool`` sees a
# ``pool.provider != agent.provider`` mismatch and short-circuits,
# leaving the new provider with no rotation/recovery on 401/429 and
# burning the original pool's entries. Only reload when the provider
# actually changed (or the pool was missing) — re-selecting the same
# provider must not churn the pool reference. A reload failure is
# logged + swallowed: the switch itself must still complete.
old_norm = (old_provider or "").strip().lower()
new_norm = (new_provider or "").strip().lower()
if old_norm != new_norm or getattr(agent, "_credential_pool", None) is None:
try:
from agent.credential_pool import load_pool
agent._credential_pool = load_pool(new_provider)
except Exception as _pool_exc: # noqa: BLE001
logger.warning(
"switch_model: credential pool reload failed for %s (%s); "
"continuing without pool rotation this turn",
new_provider, _pool_exc,
)
# ── Build new client ──
if (new_provider or "").strip().lower() == "moa":
from agent.moa_loop import MoAClient
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = "moa://local"
agent._client_kwargs = {}
agent.client = MoAClient(agent.model or "default")
elif api_mode == "anthropic_messages":
if api_mode == "anthropic_messages":
from agent.anthropic_adapter import (
build_anthropic_client,
resolve_anthropic_token,
@@ -2209,21 +2104,8 @@ def looks_like_codex_intermediate_ack(
user_message: str,
assistant_content: str,
messages: List[Dict[str, Any]],
require_workspace: bool = True,
) -> bool:
"""Detect a planning/ack message that should continue instead of ending the turn.
``require_workspace`` (default True) keeps the original codex-coding scope:
the ack must reference a filesystem/repo workspace. The conversation loop
passes ``require_workspace=False`` when the user has explicitly opted into
intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
is ``true`` or a model-list), so general autonomous workflows ("I'll run a
health check on the server", "I'll start the deployment") — which carry a
future-ack and an action verb but no filesystem reference — are caught too.
The future-ack + short-content + no-prior-tools + action-verb requirements
always apply, which is what keeps conversational "I'll help you brainstorm"
replies from tripping it.
"""
"""Detect a planning/ack message that should continue instead of ending the turn."""
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
return False
@@ -2276,67 +2158,17 @@ def looks_like_codex_intermediate_ack(
"path",
)
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
if not assistant_mentions_action:
return False
# Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
# call is enough — the user asked us to keep going when the model only
# announces intent, regardless of whether a filesystem is involved.
if not require_workspace:
return True
user_text = (user_message or "").strip().lower()
user_targets_workspace = (
any(marker in user_text for marker in workspace_markers)
or "~/" in user_text
or "/" in user_text
)
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
assistant_targets_workspace = any(
marker in assistant_text for marker in workspace_markers
)
return user_targets_workspace or assistant_targets_workspace
def intent_ack_continuation_mode(agent) -> str:
"""Classify the resolved intent-ack continuation mode for this turn.
Returns one of:
* ``"off"`` — never continue.
* ``"codex_only"`` — historical scope: continue only on the
``codex_responses`` api_mode, and only for codebase/workspace acks
(``require_workspace=True``).
* ``"all"`` — user opted in for every api_mode; continue on any
future-ack + action verb (``require_workspace=False``).
Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
(default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
matches the active model name, else off.
"""
mode = getattr(agent, "_intent_ack_continuation", "auto")
if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
return "all"
if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
return "off"
if isinstance(mode, list):
model_lower = (agent.model or "").lower()
return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
# "auto" or any unrecognised value — historical codex-only behavior.
return "codex_only" if agent.api_mode == "codex_responses" else "off"
def intent_ack_continuation_enabled(agent) -> bool:
"""Whether intent-ack continuation should fire at all for this turn.
The ``codex_ack_continuations < 2`` per-turn cap and the
``looks_like_codex_intermediate_ack`` detector are applied by the caller;
this only decides the on/off gate. Callers that also need to know whether
the workspace requirement applies should use ``intent_ack_continuation_mode``
directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
"""
return intent_ack_continuation_mode(agent) != "off"
return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action

View File

@@ -673,9 +673,6 @@ def _build_anthropic_client_with_bearer_hook(
kwargs = {
"timeout": timeout_obj,
"http_client": http_client,
# Delegate retry to hermes's outer loop (honors Retry-After); the SDK
# default max_retries=2 ignores it and double-retries. (#26293)
"max_retries": 0,
# The SDK requires *something* for api_key/auth_token. Our
# event hook overrides Authorization per request so this value
# is never sent. The sentinel string makes accidental leaks
@@ -760,12 +757,6 @@ def build_anthropic_client(
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
kwargs = {
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
# Delegate all rate-limit / 5xx retry to hermes's outer conversation
# loop, which honors Retry-After. The SDK default (max_retries=2) uses
# its own 1-2s backoff that ignores Retry-After and double-retries
# inside our loop — burning request slots against a bucket that won't
# refill for minutes. (#26293)
"max_retries": 0,
}
if normalized_base_url:
# Azure Anthropic endpoints require an ``api-version`` query parameter.
@@ -861,9 +852,6 @@ def build_anthropic_bedrock_client(region: str):
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
# Delegate retry to hermes's outer loop (honors Retry-After); the SDK
# default max_retries=2 ignores it and double-retries. (#26293)
max_retries=0,
default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])},
)
@@ -926,72 +914,44 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
return None
def _read_claude_code_credentials_from_file() -> Optional[Dict[str, Any]]:
"""Read Claude Code OAuth credentials from ~/.claude/.credentials.json.
Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
"""
cred_path = Path.home() / ".claude" / ".credentials.json"
if not cred_path.exists():
return None
try:
data = json.loads(cred_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError, IOError) as e:
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
return None
oauth_data = data.get("claudeAiOauth")
if not (oauth_data and isinstance(oauth_data, dict)):
return None
access_token = oauth_data.get("accessToken", "")
if not access_token:
return None
return {
"accessToken": access_token,
"refreshToken": oauth_data.get("refreshToken", ""),
"expiresAt": oauth_data.get("expiresAt", 0),
"source": "claude_code_credentials_file",
}
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials.
Reads from two possible sources and reconciles them:
Checks two sources in order:
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
2. ~/.claude/.credentials.json file
Selection rules when both are present:
- If exactly one is non-expired, prefer that one. (Handles the case
where Claude Code refreshes one source but not the other — observed
in the wild on Claude Code 2.1.x.)
- Otherwise, prefer the source with the later ``expiresAt`` so that
any subsequent refresh uses the most recent ``refreshToken``.
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
subscription flow is OAuth/setup-token based with refreshable credentials,
and native direct Anthropic provider usage should follow that path rather
than auto-detecting Claude's first-party managed key.
Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
"""
# Try macOS Keychain first (covers Claude Code >=2.1.114)
kc_creds = _read_claude_code_credentials_from_keychain()
file_creds = _read_claude_code_credentials_from_file()
if kc_creds:
return kc_creds
if kc_creds and file_creds:
kc_valid = is_claude_code_token_valid(kc_creds)
file_valid = is_claude_code_token_valid(file_creds)
if kc_valid and not file_valid:
return kc_creds
if file_valid and not kc_valid:
return file_creds
# Both valid or both expired: prefer the later expiresAt so the
# downstream refresh path uses the freshest refresh_token.
kc_exp = kc_creds.get("expiresAt", 0) or 0
file_exp = file_creds.get("expiresAt", 0) or 0
return kc_creds if kc_exp >= file_exp else file_creds
# Fall back to JSON file
cred_path = Path.home() / ".claude" / ".credentials.json"
if cred_path.exists():
try:
data = json.loads(cred_path.read_text(encoding="utf-8"))
oauth_data = data.get("claudeAiOauth")
if oauth_data and isinstance(oauth_data, dict):
access_token = oauth_data.get("accessToken", "")
if access_token:
return {
"accessToken": access_token,
"refreshToken": oauth_data.get("refreshToken", ""),
"expiresAt": oauth_data.get("expiresAt", 0),
"source": "claude_code_credentials_file",
}
except (json.JSONDecodeError, OSError, IOError) as e:
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
return kc_creds or file_creds
return None
def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
@@ -1074,40 +1034,8 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
"""Attempt to refresh an expired Claude Code OAuth token.
Claude Code's OAuth refresh tokens are single-use: a successful refresh
rotates the pair and invalidates the old refresh token. Claude Code itself
also refreshes on its own schedule (IDE/CLI activity), so by the time
Hermes notices an expired token, Claude Code may have already rotated it.
POSTing our now-stale refresh token in that window races Claude Code and
fails with ``invalid_grant``.
So before refreshing, re-read the live credential sources. If Claude Code
has already produced a valid token, adopt it and skip the POST entirely.
Only fall back to refreshing ourselves when no fresh credential is found.
"""
# Claude Code may have already refreshed — adopt its token rather than
# racing it with our (possibly already-rotated) refresh token. Only adopt
# when the live re-read produced a DIFFERENT token with a real future
# expiry: re-adopting the same credential we were just handed would be a
# no-op, and a 0/absent ``expiresAt`` means "managed key / unknown expiry"
# (see is_claude_code_token_valid) which must NOT be treated as a fresh
# refresh here.
current = read_claude_code_credentials()
if current:
current_token = current.get("accessToken", "")
current_exp = current.get("expiresAt", 0) or 0
if (
current_token
and current_token != creds.get("accessToken", "")
and current_exp > 0
and is_claude_code_token_valid(current)
):
logger.debug("Adopted Claude Code's already-refreshed OAuth token")
return current_token
refresh_token = (current or {}).get("refreshToken", "") or creds.get("refreshToken", "")
"""Attempt to refresh an expired Claude Code OAuth token."""
refresh_token = creds.get("refreshToken", "")
if not refresh_token:
logger.debug("No refresh token available — cannot refresh")
return None

View File

@@ -102,7 +102,6 @@ OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
from agent.credential_pool import load_pool
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length
from agent.process_bootstrap import build_keepalive_http_client
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
@@ -110,23 +109,6 @@ from utils import base_url_host_matches, base_url_hostname, env_float, model_for
logger = logging.getLogger(__name__)
def _openai_http_client_kwargs(
base_url: Optional[str],
*,
async_mode: bool = False,
) -> Dict[str, Any]:
"""Inject keepalive httpx client with env-only proxy (not macOS system proxy)."""
client = build_keepalive_http_client(str(base_url or ""), async_mode=async_mode)
if client is None:
return {}
return {"http_client": client}
def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any:
kwargs = {**_openai_http_client_kwargs(base_url), **kwargs}
return OpenAI(api_key=api_key, base_url=base_url, **kwargs)
# ── Interrupt protection for atomic auxiliary tasks ──────────────────────
# Some auxiliary tasks must NOT be aborted mid-flight by a gateway interrupt
# (e.g. an incoming user message while the agent is busy). Context
@@ -684,28 +666,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
return str(url or "").strip().rstrip("/")
# Hostnames (lowercase, exact) that the auxiliary Anthropic path is allowed to
# be pointed at via config.yaml model.base_url. Anything else falls back to the
# Anthropic default — operators routing main-session traffic through a
# non-Anthropic host (e.g. OpenRouter, OpenAI) with provider=anthropic in config
# must NOT have that foreign host leak into the auxiliary client. See #52608.
_ANTHROPIC_COMPATIBLE_HOSTS = frozenset({
"api.anthropic.com",
})
def _is_anthropic_compatible_host(url: str) -> bool:
"""Return True if ``url``'s hostname is an Anthropic endpoint we trust for aux calls."""
if not url:
return False
try:
from urllib.parse import urlparse
host = (urlparse(url).hostname or "").strip().lower().rstrip(".")
return host in _ANTHROPIC_COMPATIBLE_HOSTS
except Exception:
return False
def _nous_min_key_ttl_seconds() -> int:
try:
return max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
@@ -1632,7 +1592,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
_merged_aux = _apply_user_default_headers(extra.get("default_headers"))
if _merged_aux:
extra["default_headers"] = _merged_aux
_client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
return _client, model
@@ -1672,7 +1632,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
_merged_aux2 = _apply_user_default_headers(extra.get("default_headers"))
if _merged_aux2:
extra["default_headers"] = _merged_aux2
_client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
return _client, model
@@ -1687,21 +1647,20 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
or_key = explicit_api_key or _pool_runtime_api_key(entry)
if or_key:
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return _create_openai_client(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
# Pool exists but is exhausted (no usable runtime key) — fall through to
# the OPENROUTER_API_KEY env-var path rather than failing outright.
logger.debug("Auxiliary client: OpenRouter pool exhausted, trying OPENROUTER_API_KEY")
if not or_key:
_mark_provider_unhealthy("openrouter", ttl=60)
return None, None
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return OpenAI(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
if not or_key:
_mark_provider_unhealthy("openrouter", ttl=60)
return None, None
logger.debug("Auxiliary client: OpenRouter")
return _create_openai_client(api_key=or_key, base_url=OPENROUTER_BASE_URL,
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
@@ -1794,7 +1753,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
return (
_create_openai_client(
OpenAI(
api_key=api_key,
base_url=base_url,
),
@@ -2071,7 +2030,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
if _custom_headers:
_extra["default_headers"] = _custom_headers
if custom_mode == "codex_responses":
real_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
return CodexAuxiliaryClient(real_client, model), model
if custom_mode == "anthropic_messages":
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -2085,14 +2044,14 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
"Custom endpoint declares api_mode=anthropic_messages but the "
"anthropic SDK is not installed — falling back to OpenAI-wire."
)
return _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra), model
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
return (
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
# URL-based anthropic detection for custom endpoints that didn't set
# api_mode explicitly (e.g. kimi.com/coding reached via custom config).
_fallback_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
_fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
_fallback_client = _maybe_wrap_anthropic(
_fallback_client, model, custom_key, custom_base, custom_mode,
)
@@ -2121,7 +2080,7 @@ def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str
return None, None
api_key, base_url = resolved
logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
real_client = _create_openai_client(api_key=api_key, base_url=base_url)
real_client = OpenAI(api_key=api_key, base_url=base_url)
return CodexAuxiliaryClient(real_client, model), model
@@ -2158,7 +2117,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
return None, None
base_url = _CODEX_AUX_BASE_URL
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
real_client = _create_openai_client(
real_client = OpenAI(
api_key=codex_token,
base_url=base_url,
default_headers=_codex_cloudflare_headers(codex_token),
@@ -2258,7 +2217,7 @@ def _try_azure_foundry(
if _dq:
extra["default_query"] = _dq
client = _create_openai_client(api_key=api_key, base_url=_clean_base, **extra)
client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
if runtime_api_mode == "codex_responses":
# GPT-5.x / o-series / codex models on Azure Foundry are
@@ -2297,16 +2256,9 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
if not token:
return None, None
# Allow base URL override from config.yaml model.base_url, but only when:
# 1. the configured provider is anthropic (otherwise a non-Anthropic
# base_url, e.g. Codex endpoint, would leak into Anthropic requests), AND
# 2. the override URL actually points at an Anthropic-compatible endpoint.
# Without gate (2), operators who route main-session traffic through a
# non-Anthropic provider that accepts Anthropic-format requests (e.g.
# OpenRouter at openrouter.ai/api/v1, with provider=anthropic in config.yaml)
# would have every auxiliary side-channel call (memory extractors,
# reflection, vision, title generation) 401 from the foreign host —
# see issue #52608.
# Allow base URL override from config.yaml model.base_url, but only
# when the configured provider is anthropic otherwise a non-Anthropic
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
try:
from hermes_cli.config import load_config
@@ -2316,7 +2268,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
if cfg_provider == "anthropic":
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
if cfg_base_url and _is_anthropic_compatible_host(cfg_base_url):
if cfg_base_url:
base_url = cfg_base_url
except Exception:
pass
@@ -2802,25 +2754,6 @@ def _is_model_incompatible_error(exc: Exception) -> bool:
))
def _is_invalid_aux_response_error(exc: Exception) -> bool:
"""Detect provider responses that authenticated but cannot serve aux shape.
Some OpenAI-compatible routes return HTTP 200 with an empty/malformed
ChatCompletion instead of a normal provider error. That is still a
provider/model capability failure for auxiliary tasks: downstream callers
need ``choices[0].message`` and should be able to continue through the
same fallback path as explicit model-incompatibility errors.
"""
if not isinstance(exc, RuntimeError):
return False
msg = str(exc).lower()
return (
"auxiliary " in msg
and "llm returned invalid response" in msg
and "choices[0].message" in msg
)
def _evict_cached_clients(provider: str) -> None:
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
normalized = _normalize_aux_provider(provider)
@@ -3643,37 +3576,6 @@ def _resolve_auto(
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = str(runtime_provider or _read_main_provider() or "")
main_model = str(runtime_model or _read_main_model() or "")
# MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
# there is no real "moa" HTTP endpoint, so resolving an aux client against
# provider="moa"/model=<preset> sends the preset name as the model id and
# the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
# (title generation, compression, vision, …) don't need the reference
# fan-out — they should run on the aggregator, which is the preset's acting
# model. Resolve the MoA preset to its aggregator slot and continue Step 1
# with that real provider+model. Mirrors the MoA context-length resolution.
if main_provider == "moa":
try:
from hermes_cli.config import load_config
from hermes_cli.moa_config import resolve_moa_preset
_preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
_agg = _preset.get("aggregator") or {}
_agg_provider = str(_agg.get("provider") or "").strip()
_agg_model = str(_agg.get("model") or "").strip()
if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
main_provider = _agg_provider
main_model = _agg_model
# The MoA virtual runtime carries a non-HTTP base_url
# ("moa://local") and a placeholder api_key; they belong to the
# facade, not the aggregator's real provider. Drop them so the
# aggregator resolves through its own provider credentials.
runtime_base_url = ""
runtime_api_key = ""
runtime_api_mode = ""
except Exception:
logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
if (main_provider and main_model
and main_provider not in {"auto", ""}):
resolved_provider = main_provider
@@ -3820,10 +3722,6 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
_merged_async = _apply_user_default_headers(async_kwargs.get("default_headers"))
if _merged_async:
async_kwargs["default_headers"] = _merged_async
async_kwargs = {
**_openai_http_client_kwargs(sync_base_url, async_mode=True),
**async_kwargs,
}
return AsyncOpenAI(**async_kwargs), model
@@ -4034,7 +3932,7 @@ def resolve_provider_client(
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = _normalize_resolved_model(model, provider)
raw_client = _create_openai_client(
raw_client = OpenAI(
api_key=codex_token,
base_url=_CODEX_AUX_BASE_URL,
default_headers=_codex_cloudflare_headers(codex_token),
@@ -4115,7 +4013,7 @@ def resolve_provider_client(
_merged_custom = _apply_user_default_headers(extra.get("default_headers"))
if _merged_custom:
extra["default_headers"] = _merged_custom
client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **extra)
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base, custom_key)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
@@ -4219,7 +4117,7 @@ def resolve_provider_client(
_fb_headers = _apply_user_default_headers(_fb_extra.get("default_headers"))
if _fb_headers:
_fb_extra["default_headers"] = _fb_headers
client = _create_openai_client(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
sync_anthropic = AnthropicAuxiliaryClient(
@@ -4228,7 +4126,7 @@ def resolve_provider_client(
if async_mode:
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
return sync_anthropic, final_model
client = _create_openai_client(api_key=custom_key, base_url=_clean_base2, **_extra2)
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
# codex_responses or inherited auto-detect (via _wrap_if_needed).
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
# override). Named-provider entry api_mode=codex_responses also
@@ -4370,7 +4268,7 @@ def resolve_provider_client(
_merged_main = _apply_user_default_headers(headers)
if _merged_main:
headers = _merged_main
client = _create_openai_client(api_key=api_key, base_url=base_url,
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
# Copilot GPT-5+ models (except gpt-5-mini) require the Responses
@@ -4906,7 +4804,7 @@ def _refresh_nous_auxiliary_client(
return None, model
fresh_key, fresh_base_url = runtime
sync_client = _create_openai_client(api_key=fresh_key, base_url=fresh_base_url)
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
final_model = model
current_loop = None
@@ -5489,24 +5387,10 @@ def _build_call_kwargs(
# ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
# max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
# there.
#
# NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a
# second exception: some models—notably minimaxai/minimax-m3—return HTTP
# 200 with an empty choices[] payload when max_tokens is omitted. The main
# NVIDIA chat path already sends an output cap via the provider profile;
# preserve it on the auxiliary path too.
_effective_base = base_url or (
_current_custom_base_url() if provider == "custom" else ""
)
_provider_norm = str(provider or "").strip().lower()
_is_nvidia_nim = (
_provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"}
or base_url_host_matches(_effective_base, "integrate.api.nvidia.com")
)
if (
_is_anthropic_compat_endpoint(provider, _effective_base)
or _is_nvidia_nim
):
if _is_anthropic_compat_endpoint(provider, _effective_base):
kwargs["max_tokens"] = max_tokens
if tools:
@@ -5561,9 +5445,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
if not choices or not hasattr(choices[0], "message"):
raise AttributeError("missing choices[0].message")
except (AttributeError, TypeError, IndexError) as exc:
recovered = _recover_aux_response_message(response)
if recovered is not None:
return recovered
response_type = type(response).__name__
response_preview = str(response)[:120]
raise RuntimeError(
@@ -5575,64 +5456,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
return response
def _recover_aux_response_message(response: Any) -> Optional[Any]:
"""Synthesize chat-completions shape from Responses-style text fields.
Auxiliary callers consume ``choices[0].message``. Some compatible
endpoints return text outside ``choices`` (for example ``output_text`` or
``output`` items). Preserve that response before declaring it malformed.
"""
text = _extract_aux_response_text(response)
if not text:
return None
choice = SimpleNamespace(
message=SimpleNamespace(content=text),
finish_reason=getattr(response, "finish_reason", None) or "stop",
)
try:
response.choices = [choice]
return response
except Exception:
return SimpleNamespace(
id=getattr(response, "id", ""),
model=getattr(response, "model", ""),
object=getattr(response, "object", "chat.completion"),
choices=[choice],
usage=getattr(response, "usage", None),
)
def _extract_aux_response_text(response: Any) -> str:
output_text = _obj_get(response, "output_text")
if isinstance(output_text, str) and output_text.strip():
return output_text.strip()
output = _obj_get(response, "output")
if not isinstance(output, list):
return ""
parts: List[str] = []
for item in output:
item_type = _obj_get(item, "type")
if item_type and item_type != "message":
continue
for part in (_obj_get(item, "content") or []):
part_type = _obj_get(part, "type")
if part_type in {"output_text", "text", None}:
text = _obj_get(part, "text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
def _obj_get(obj: Any, key: str, default: Any = None) -> Any:
value = getattr(obj, key, default)
if value is default and isinstance(obj, dict):
value = obj.get(key, default)
return value
def call_llm(
task: str = None,
*,
@@ -6030,21 +5853,11 @@ def call_llm(
# When the provider returns a 429 rate-limit (not billing), fall
# back to an alternative provider instead of exhausting retries
# against the same rate-limited endpoint.
#
# ── Auth error fallback (#21165) ─────────────────────────────
# When the resolved provider returns 401 and neither the Nous
# refresh path nor explicit provider credential refresh applies,
# fall back to an alternative provider instead of dropping the
# auxiliary task on the floor (silent compression failure /
# message loss). Auth is NOT a capacity error: it only bypasses
# the explicit-provider gate when the user is in auto mode.
should_fallback = (
_is_auth_error(first_err)
or _is_payment_error(first_err)
_is_payment_error(first_err)
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Respect explicit provider choice for transient errors (auth, request
# validation, etc.) but allow fallback when the provider clearly cannot
@@ -6067,12 +5880,9 @@ def call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
if should_fallback and (is_auto or is_capacity_error):
if _is_auth_error(first_err):
reason = "auth error"
elif _is_payment_error(first_err):
if _is_payment_error(first_err):
reason = "payment error"
# Resolve the actual provider label (resolved_provider may be
# "auto"; the client's base_url tells us which backend got the
@@ -6085,8 +5895,6 @@ def call_llm(
reason = "rate limit"
elif _is_model_incompatible_error(first_err):
reason = "model incompatible with route"
elif _is_invalid_aux_response_error(first_err):
reason = "invalid provider response"
else:
reason = "connection error"
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
@@ -6521,17 +6329,11 @@ async def async_call_llm(
raise
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
# Auth error fallback (#21165): a 401 that survived the refresh path
# falls back in auto mode just like the sync call_llm() path. Auth is
# NOT a capacity error, so on an explicit provider it still respects
# the user's choice (handled by the is_auto/is_capacity_error gate).
should_fallback = (
_is_auth_error(first_err)
or _is_payment_error(first_err)
_is_payment_error(first_err)
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Capacity errors (payment/quota/connection/rate-limit) bypass the
# explicit-provider gate — the provider cannot serve the request
@@ -6546,12 +6348,9 @@ async def async_call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
if should_fallback and (is_auto or is_capacity_error):
if _is_auth_error(first_err):
reason = "auth error"
elif _is_payment_error(first_err):
if _is_payment_error(first_err):
reason = "payment error"
_mark_provider_unhealthy(
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
@@ -6560,8 +6359,6 @@ async def async_call_llm(
reason = "rate limit"
elif _is_model_incompatible_error(first_err):
reason = "model incompatible with route"
elif _is_invalid_aux_response_error(first_err):
reason = "invalid provider response"
else:
reason = "connection error"
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",

View File

@@ -28,7 +28,6 @@ from typing import Any, Dict, Optional
from hermes_cli.timeouts import get_provider_request_timeout, get_provider_stale_timeout
from hermes_constants import PARTIAL_STREAM_STUB_ID, FINISH_REASON_LENGTH
from agent.error_classifier import FailoverReason
from agent.gemini_native_adapter import is_native_gemini_base_url
from agent.model_metadata import is_local_endpoint
from agent.message_sanitization import (
_sanitize_surrogates,
@@ -38,18 +37,6 @@ from tools.terminal_tool import is_persistent_env
from utils import base_url_host_matches, base_url_hostname, env_float, env_int
logger = logging.getLogger(__name__)
_OPENROUTER_PROVIDER_SORT_VALUES = {"throughput", "latency", "price"}
# When the fallback chain is fully exhausted on a non-rate-limit failure
# (e.g. every provider returns a non-retryable client error like HTTP 400),
# arm a short cooldown so the NEXT turn's restore_primary_runtime stays gated
# and does not reset _fallback_index=0 to replay the entire chain again.
# Without this, a client/gateway that re-submits immediately would re-marshal
# the full (potentially 80k-token) context once per provider every turn and
# can drive a constrained host into memory/swap exhaustion. Rate-limit /
# billing reasons keep their own 60s cooldown (set above); this is the
# narrower non-rate-limit case. See issue #24996.
_FALLBACK_EXHAUSTED_COOLDOWN_S = 5.0
def _ra():
@@ -128,23 +115,6 @@ def _is_openai_codex_backend(agent) -> bool:
)
def _validated_openrouter_provider_sort(raw_sort: Any) -> Optional[str]:
"""Return a normalized OpenRouter provider.sort value or None."""
if not isinstance(raw_sort, str):
return None
sort_value = raw_sort.strip().lower()
if not sort_value:
return None
if sort_value in _OPENROUTER_PROVIDER_SORT_VALUES:
return sort_value
logger.warning(
"Ignoring invalid OpenRouter provider.sort value %r (allowed: %s)",
raw_sort,
", ".join(sorted(_OPENROUTER_PROVIDER_SORT_VALUES)),
)
return None
def _env_float(name: str, default: float) -> float:
try:
return float(os.getenv(name, str(default)))
@@ -259,11 +229,6 @@ def interruptible_api_call(agent, api_kwargs: dict):
invalidate_runtime_client(region)
raise
result["response"] = normalize_converse_response(raw_response)
elif agent.provider == "moa":
# MoA is a virtual chat-completions provider backed by the
# in-process MoAClient facade. Do not rebuild a request-local
# OpenAI client from the virtual runtime metadata.
result["response"] = agent.client.chat.completions.create(**api_kwargs)
else:
request_client = _set_request_client(
agent._create_request_openai_client(
@@ -733,9 +698,8 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
_prefs["ignore"] = agent.providers_ignored
if agent.providers_order:
_prefs["order"] = agent.providers_order
_provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
if _provider_sort:
_prefs["sort"] = _provider_sort
if agent.provider_sort:
_prefs["sort"] = agent.provider_sort
if agent.provider_require_parameters:
_prefs["require_parameters"] = True
if agent.provider_data_collection:
@@ -1051,23 +1015,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
"arguments": tool_call.function.arguments
},
}
# Tool-call arguments are intentionally NOT redacted here. This
# dict enters the in-memory conversation history that is replayed
# to the model on every subsequent turn AND persisted to state.db,
# which is itself replayed verbatim on session resume
# (get_messages_as_conversation). Masking a credential to `***`
# here poisons that replay: the model reads back its own
# `PGPASSWORD='***' psql ...` call and copies the placeholder into
# the next tool call, breaking every credential-dependent command
# on the second turn (#43083). The masking also provided no real
# protection — the same secret still leaks verbatim through tool
# OUTPUT (file contents, command output, diffs, the compaction
# block), none of which this pass ever touched. Keeping secrets
# out of the replayable store is a separate tokenization/vault
# concern, not something arg-redaction can deliver without
# breaking replay. Storage-time redaction remains governed by the
# `security.redact_secrets` toggle. (#19798 introduced this;
# #43083 removed it.)
# Defence-in-depth: redact credentials from tool call arguments
# before they enter conversation history. Tool execution uses the
# raw API response object, not this dict, so redacting the
# persisted shape is safe and only affects storage. Catches the
# case where a model accidentally inlines a secret into a tool
# call (e.g. `terminal(command="curl -H 'Authorization: Bearer
# sk-...'")`). (#19798)
if isinstance(tc_dict["function"]["arguments"], str):
from agent.redact import redact_sensitive_text
tc_dict["function"]["arguments"] = redact_sensitive_text(
tc_dict["function"]["arguments"]
)
# Preserve extra_content (e.g. Gemini thought_signature) so it
# is sent back on subsequent API calls. Without this, Gemini 3
# thinking models reject the request with a 400 error.
@@ -1134,22 +1093,8 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
agent._rate_limited_until = time.monotonic() + 60
if agent._fallback_index >= len(agent._fallback_chain):
# Chain exhausted. If we actually walked a non-empty chain and the
# failure was NOT a rate-limit/billing event (those already armed
# their own 60s cooldown above), arm a short cooldown so the next
# turn's restore_primary_runtime stays gated instead of resetting
# _fallback_index=0 and re-marshaling the whole context across every
# provider again. Guards the cross-turn replay storm in #24996.
if (
len(agent._fallback_chain) > 0
and reason not in {FailoverReason.rate_limit, FailoverReason.billing}
):
_existing_cooldown = getattr(agent, "_rate_limited_until", 0) or 0
agent._rate_limited_until = max(
_existing_cooldown,
time.monotonic() + _FALLBACK_EXHAUSTED_COOLDOWN_S,
)
return False
fb = agent._fallback_chain[agent._fallback_index]
agent._fallback_index += 1
fb_provider = (fb.get("provider") or "").strip().lower()
@@ -1265,16 +1210,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
agent._transport_cache.clear()
agent._fallback_activated = True
# Rebind the credential pool to the fallback provider when the provider
# changes. Keeping the primary pool attached would make downstream
# recovery (rate_limit / billing / auth) mutate the wrong credential
# set and can overwrite the fallback's base_url back to the primary
# endpoint. See #33163.
#
# Clear the credential pool when the fallback provider doesn't match
# the pool's provider. The pool was seeded for the primary provider;
# leaving it attached means downstream recovery (rate_limit / billing /
# auth) calls ``_swap_credential`` with a primary entry which overwrites
# the agent's ``base_url`` back to the primary's endpoint — every
# fallback request then 404s against the wrong host. See #33163.
# When the fallback shares the pool's provider (e.g. both openrouter
# entries with different routing) the pool is preserved. When the
# providers differ, load the fallback provider's own pool if one exists
# so provider-specific rotation continues to work after the switch.
# entries with different routing) the pool is preserved.
_existing_pool = getattr(agent, "_credential_pool", None)
if _existing_pool is not None:
_pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
@@ -1285,22 +1228,6 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
fb_provider, fb_model, _pool_provider,
)
agent._credential_pool = None
if getattr(agent, "_credential_pool", None) is None:
try:
from agent.credential_pool import load_pool
fallback_pool = load_pool(fb_provider)
if fallback_pool and fallback_pool.has_credentials():
agent._credential_pool = fallback_pool
logger.info(
"Fallback to %s/%s: attached fallback credential pool",
fb_provider, fb_model,
)
except Exception as exc:
logger.debug(
"Fallback to %s/%s: could not attach credential pool: %s",
fb_provider, fb_model, exc,
)
# Honor per-provider / per-model request_timeout_seconds for the
# fallback target (same knob the primary client uses). None = use
@@ -1531,9 +1458,8 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
provider_preferences["ignore"] = agent.providers_ignored
if agent.providers_order:
provider_preferences["order"] = agent.providers_order
_provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
if _provider_sort:
provider_preferences["sort"] = _provider_sort
if agent.provider_sort:
provider_preferences["sort"] = agent.provider_sort
if provider_preferences and (
(agent.provider or "").strip().lower() == "openrouter"
or agent._is_openrouter_url()
@@ -1912,6 +1838,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
stream_kwargs = {
**api_kwargs,
"stream": True,
"stream_options": {"include_usage": True},
"timeout": _httpx.Timeout(
connect=_conn_cap,
read=_stream_read_timeout,
@@ -1919,14 +1846,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
pool=_conn_cap,
),
}
# OpenAI's `stream_options={"include_usage": True}` drives usage
# accounting on OpenAI-compatible endpoints (incl. the Gemini OpenAI
# compat shim and aggregators like OpenRouter). Google's *native*
# Gemini REST endpoint rejects the keyword outright
# (`Completions.create() got an unexpected keyword argument
# 'stream_options'`), so omit it only for that endpoint.
if not is_native_gemini_base_url(agent.base_url):
stream_kwargs["stream_options"] = {"include_usage": True}
request_client = _set_request_client(
agent._create_request_openai_client(
reason="chat_completion_stream_request",
@@ -2327,15 +2246,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
_fire_first_delta()
agent._fire_reasoning_delta(thinking_text)
# Return the native Anthropic Message for downstream processing.
# If the stream was interrupted (the event loop broke out above on
# agent._interrupt_requested), do NOT call get_final_message() — on
# a partially-consumed stream the SDK may hang draining remaining
# events or return a Message with incomplete tool_use blocks (partial
# JSON in `input`). The outer poll loop raises InterruptedError, so
# this return value is discarded anyway.
if agent._interrupt_requested:
return None
# Return the native Anthropic Message for downstream processing
return stream.get_final_message()
def _call():
@@ -2480,19 +2391,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
diag=request_client_holder.get("diag"),
)
_close_request_client_once("stream_mid_tool_retry_cleanup")
if agent.api_mode == "anthropic_messages":
try:
agent._anthropic_client.close()
agent._rebuild_anthropic_client()
except Exception:
pass
else:
try:
agent._replace_primary_openai_client(
reason="stream_mid_tool_retry_pool_cleanup"
)
except Exception:
pass
try:
agent._replace_primary_openai_client(
reason="stream_mid_tool_retry_pool_cleanup"
)
except Exception:
pass
continue
# SSE error events from proxies (e.g. OpenRouter sends
@@ -2540,19 +2444,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
_close_request_client_once("stream_retry_cleanup")
# Also rebuild the primary client to purge
# any dead connections from the pool.
if agent.api_mode == "anthropic_messages":
try:
agent._anthropic_client.close()
agent._rebuild_anthropic_client()
except Exception:
pass
else:
try:
agent._replace_primary_openai_client(
reason="stream_retry_pool_cleanup"
)
except Exception:
pass
try:
agent._replace_primary_openai_client(
reason="stream_retry_pool_cleanup"
)
except Exception:
pass
continue
# Retries exhausted. Log the final failure with
# full diagnostic detail (chain, headers,
@@ -2664,17 +2561,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
else:
_stream_stale_timeout = _stream_stale_timeout_base
# Reasoning-model floor: known reasoning models (Nemotron 3 Ultra,
# OpenAI o1/o3, Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ,
# xAI Grok reasoning, etc.) routinely exceed the default 180s chat-
# model threshold during their thinking phase. The cloud gateway
# upstream kills the socket first, surfacing as BrokenPipeError.
# Raises the floor only — never overrides explicit user config
# (handled by get_provider_stale_timeout above).
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
_reasoning_floor = get_reasoning_stale_timeout_floor(api_kwargs.get("model"))
if _reasoning_floor is not None:
_stream_stale_timeout = max(_stream_stale_timeout, _reasoning_floor)
t = threading.Thread(target=_call, daemon=True)
t.start()
@@ -2723,17 +2609,10 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
pass
# Rebuild the primary client too — its connection pool
# may hold dead sockets from the same provider outage.
if agent.api_mode == "anthropic_messages":
try:
agent._anthropic_client.close()
agent._rebuild_anthropic_client()
except Exception:
pass
else:
try:
agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
except Exception:
pass
try:
agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
except Exception:
pass
# Reset the timer so we don't kill repeatedly while
# the inner thread processes the closure.
last_chunk_time["t"] = time.time()
@@ -2809,30 +2688,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
role="assistant", content=_partial_text, tool_calls=None,
reasoning_content=None,
)
# Detect provider output-layer content filtering (e.g. MiniMax
# "output new_sensitive (1027)", Azure/OpenAI content_filter,
# Anthropic safety refusal). The raw error is about to be
# swallowed into a finish_reason=length stub, so classify it HERE
# while we still have it and stamp the stub. Retrying such a
# content-deterministic filter on the same primary just re-hits
# the filter — the conversation loop reads this tag and activates
# the fallback chain instead of burning continuation retries.
# error_classifier is the single source of truth for "what counts
# as a content filter" (#32421).
_content_filter_terminated = False
try:
from agent.error_classifier import classify_api_error, FailoverReason
_cls = classify_api_error(
result["error"],
provider=str(getattr(agent, "provider", "") or ""),
model=str(getattr(agent, "model", "") or ""),
)
_content_filter_terminated = (
_cls.reason == FailoverReason.content_policy_blocked
)
except Exception:
_content_filter_terminated = False
_stub = SimpleNamespace(
return SimpleNamespace(
id=PARTIAL_STREAM_STUB_ID,
model=getattr(agent, "model", "unknown"),
choices=[SimpleNamespace(
@@ -2841,9 +2697,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
usage=None,
_dropped_tool_names=_partial_names or None,
)
if _content_filter_terminated:
_stub._content_filter_terminated = True
return _stub
raise result["error"]
return result["response"]

View File

@@ -60,8 +60,6 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
logger = logging.getLogger("hermes.coding_context")
CODING_TOOLSET = "coding"
@@ -649,14 +647,12 @@ def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:
def _git(cwd: Path, *args: str) -> str:
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
out = subprocess.run(
["git", "-C", str(cwd), *args],
capture_output=True,
text=True,
timeout=_GIT_TIMEOUT,
**_popen_kwargs,
)
except (OSError, subprocess.SubprocessError):
return ""

View File

@@ -1,156 +0,0 @@
"""Live session context-window breakdown for UI surfaces.
Estimates how the next provider request is composed: system prompt tiers,
tool schemas, and conversation history. Uses the same rough char/4 heuristic
as ``agent.model_metadata.estimate_request_tokens_rough`` so numbers align
with compression thresholds — not exact tokenizer counts.
"""
from __future__ import annotations
import json
import re
from typing import Any, Dict, List, Optional, Sequence, Tuple
_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
_SUBAGENT_TOOL_NAMES = frozenset({"delegate_task"})
_CATEGORY_COLORS = {
"system_prompt": "var(--context-usage-system)",
"tool_definitions": "var(--context-usage-tools)",
"rules": "var(--context-usage-rules)",
"skills": "var(--context-usage-skills)",
"mcp": "var(--context-usage-mcp)",
"subagent_definitions": "var(--context-usage-subagents)",
"memory": "var(--context-usage-memory)",
"conversation": "var(--context-usage-conversation)",
}
def _chars_to_tokens(text: str) -> int:
if not text:
return 0
return (len(text) + 3) // 4
def _json_tokens(value: Any) -> int:
if not value:
return 0
return _chars_to_tokens(json.dumps(value, ensure_ascii=False))
def _tool_name(tool: dict) -> str:
fn = tool.get("function") if isinstance(tool, dict) else None
if isinstance(fn, dict):
return str(fn.get("name") or "")
return str(tool.get("name") or "")
def _split_tools(tools: Sequence[dict]) -> Tuple[List[dict], List[dict], List[dict]]:
builtin: List[dict] = []
mcp: List[dict] = []
subagent: List[dict] = []
for tool in tools:
name = _tool_name(tool)
if name.startswith("mcp_"):
mcp.append(tool)
elif name in _SUBAGENT_TOOL_NAMES:
subagent.append(tool)
else:
builtin.append(tool)
return builtin, mcp, subagent
def _memory_blocks(agent: Any) -> Tuple[str, str]:
memory_block = ""
user_block = ""
store = getattr(agent, "_memory_store", None)
if store is None:
return memory_block, user_block
try:
if getattr(agent, "_memory_enabled", True):
memory_block = store.format_for_system_prompt("memory") or ""
if getattr(agent, "_user_profile_enabled", True):
user_block = store.format_for_system_prompt("user") or ""
except Exception:
pass
return memory_block, user_block
def _strip_blocks(text: str, *blocks: str) -> str:
out = text
for block in blocks:
if block:
out = out.replace(block, "")
return out.strip()
def compute_session_context_breakdown(
agent: Any,
messages: Optional[List[dict]] = None,
) -> Dict[str, Any]:
"""Return a Cursor-style context usage breakdown for one live agent."""
from agent.model_metadata import estimate_messages_tokens_rough
from agent.system_prompt import build_system_prompt_parts
parts = build_system_prompt_parts(agent)
stable = parts.get("stable", "") or ""
context = parts.get("context", "") or ""
volatile = parts.get("volatile", "") or ""
skills_match = _SKILLS_BLOCK_RE.search(stable)
skills_index = skills_match.group(0) if skills_match else ""
memory_block, user_block = _memory_blocks(agent)
memory_text = "\n\n".join(part for part in (memory_block, user_block) if part).strip()
system_core = _strip_blocks(stable, skills_index)
system_tail = _strip_blocks(volatile, memory_block, user_block)
system_prompt_text = "\n\n".join(part for part in (system_core, system_tail) if part).strip()
tools = list(getattr(agent, "tools", None) or [])
builtin_tools, mcp_tools, subagent_tools = _split_tools(tools)
conversation_tokens = estimate_messages_tokens_rough(messages or [])
categories = [
("system_prompt", "System prompt", _chars_to_tokens(system_prompt_text)),
("tool_definitions", "Tool definitions", _json_tokens(builtin_tools)),
("rules", "Rules", _chars_to_tokens(context)),
("skills", "Skills", _chars_to_tokens(skills_index)),
("mcp", "MCP", _json_tokens(mcp_tools)),
("subagent_definitions", "Subagent definitions", _json_tokens(subagent_tools)),
("memory", "Memory", _chars_to_tokens(memory_text)),
("conversation", "Conversation", conversation_tokens),
]
estimated_total = sum(tokens for _, _, tokens in categories)
comp = getattr(agent, "context_compressor", None)
context_max = int(getattr(comp, "context_length", 0) or 0) if comp else 0
measured_used = int(getattr(comp, "last_prompt_tokens", 0) or 0) if comp else 0
context_used = measured_used if measured_used > 0 else estimated_total
context_percent = (
max(0, min(100, round(context_used / context_max * 100)))
if context_max
else 0
)
return {
"categories": [
{
"color": _CATEGORY_COLORS.get(category_id, "var(--ui-text-tertiary)"),
"id": category_id,
"label": label,
"tokens": tokens,
}
for category_id, label, tokens in categories
if tokens > 0
],
"context_max": context_max,
"context_percent": context_percent,
"context_used": context_used,
"estimated_total": estimated_total,
"model": getattr(agent, "model", "") or "",
}

View File

@@ -12,7 +12,6 @@ from pathlib import Path
from typing import Awaitable, Callable
from agent.model_metadata import estimate_tokens_rough
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
REFERENCE_PATTERN = re.compile(
@@ -291,7 +290,6 @@ def _expand_git_reference(
args: list[str],
label: str,
) -> tuple[str | None, str | None]:
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
result = subprocess.run(
["git", *args],
@@ -300,7 +298,6 @@ def _expand_git_reference(
text=True,
timeout=30,
stdin=subprocess.DEVNULL,
**_popen_kwargs,
)
except subprocess.TimeoutExpired:
return f"{ref.raw}: git command timed out (30s)", None
@@ -328,9 +325,9 @@ async def _fetch_url_content(
async def _default_url_fetcher(url: str) -> str:
from tools.web_tools import web_extract_tool
raw = await web_extract_tool([url], format="markdown")
raw = await web_extract_tool([url], format="markdown", use_llm_processing=True)
payload = json.loads(raw)
docs = payload.get("results", [])
docs = payload.get("data", {}).get("documents", [])
if not docs:
return ""
doc = docs[0]
@@ -486,7 +483,6 @@ def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:
def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
result = subprocess.run(
["rg", "--files", str(path.relative_to(cwd))],
@@ -495,7 +491,6 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
text=True,
timeout=10,
stdin=subprocess.DEVNULL,
**_popen_kwargs,
)
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
return None

View File

@@ -288,29 +288,6 @@ def replay_compression_warning(agent: Any) -> None:
pass
def conversation_history_after_compression(agent: Any, messages: list) -> Optional[list]:
"""Return the correct flush baseline after a compression boundary.
Legacy compression rotates to a fresh child session. That child has not
seen the compacted transcript through the normal same-turn flush path yet,
so callers must clear ``conversation_history`` to ``None`` and let the next
persistence call write the whole compacted list.
In-place compaction is different: ``archive_and_compact()`` has already
soft-archived the previous active rows and inserted ``messages`` as the new
active live transcript under the same session id. If the same agent turn
continues with ``conversation_history=None``, the identity-based flush path
treats those already-persisted compacted dicts as new and appends them a
second time, doubling the active context and retriggering compression.
A shallow copy is intentional: it captures the current compacted dict
identities as history while allowing later same-turn appends to remain new.
"""
if bool(getattr(agent, "_last_compaction_in_place", False)):
return list(messages)
return None
def compress_context(
agent: Any,
messages: list,

View File

@@ -28,7 +28,6 @@ import uuid
from typing import Any, Dict, List, Optional
from agent.codex_responses_adapter import _summarize_user_message_for_log
from agent.conversation_compression import conversation_history_after_compression
from agent.display import KawaiiSpinner
from agent.error_classifier import FailoverReason, classify_api_error
from agent.iteration_budget import IterationBudget
@@ -588,13 +587,6 @@ def run_conversation(
compression_attempts = 0
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
# Per-turn tally of consecutive successful credential-pool token refreshes,
# keyed by (provider, pool-entry-id). A persistent upstream 401 lets
# ``try_refresh_current()`` "succeed" forever on a single-entry OAuth pool,
# so this tally caps same-entry refreshes and lets the fallback chain take
# over instead of spinning. Reset here so each turn starts fresh. See #26080.
agent._auth_pool_refresh_counts = {}
# Optional opt-in runtime: if api_mode == codex_app_server, hand the
# turn to the codex app-server subprocess (terminal/file ops/patching
# all run inside Codex). Default Hermes path is bypassed entirely.
@@ -835,6 +827,7 @@ def run_conversation(
aggregator=moa_config.get("aggregator") or {},
temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
max_tokens=int(moa_config.get("max_tokens", 4096) or 4096),
)
if _moa_context:
for _msg in reversed(api_messages):
@@ -1699,56 +1692,6 @@ def run_conversation(
if agent.api_mode in {"chat_completions", "bedrock_converse", "anthropic_messages"}:
assistant_message = _trunc_msg
# ── Content-filter stream stall → fallback (#32421) ──
# When the provider's output-layer safety filter (e.g.
# MiniMax "output new_sensitive (1027)", Azure
# content_filter) kills the stream mid-delivery, the
# raw error was classified at the swallow point and the
# stub tagged ``_content_filter_terminated``. This
# filter is content-deterministic — continuation
# retries against the SAME primary just re-hit it and
# burn paid attempts (the loop used to give up with
# "Response remained truncated after 3 continuation
# attempts" and never consult the fallback chain).
# Escalate to the configured fallback BEFORE retrying.
_cf_terminated = getattr(
response, "_content_filter_terminated", False
)
if (
_cf_terminated
and agent._fallback_index < len(agent._fallback_chain)
):
agent._vprint(
f"{agent.log_prefix}🛡️ Content filter terminated "
f"stream — activating fallback provider...",
force=True,
)
agent._emit_status(
"Content filter terminated stream; switching to fallback..."
)
if agent._try_activate_fallback():
# Roll the partial content (if any was already
# appended in a prior continuation pass) back to
# the last clean turn so the fallback provider
# gets a coherent continuation point.
if truncated_response_parts:
messages = agent._get_messages_up_to_last_assistant(messages)
agent._session_messages = messages
length_continue_retries = 0
truncated_response_parts = []
retry_count = 0
compression_attempts = 0
_retry.primary_recovery_attempted = False
_retry.restart_with_rebuilt_messages = True
break
# No fallback available — fall through to normal
# continuation (best-effort, may loop).
agent._vprint(
f"{agent.log_prefix}⚠️ No fallback provider "
f"configured — retrying with same provider "
f"(may re-hit filter)...",
force=True,
)
if assistant_message is not None and not _trunc_has_tool_calls:
length_continue_retries += 1
interim_msg = agent._build_assistant_message(assistant_message, finish_reason)
@@ -2068,21 +2011,9 @@ def run_conversation(
agent.thinking_callback("")
api_elapsed = time.time() - api_start_time
agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True)
interrupted = True
# Preserve any assistant text already streamed to the user
# before the stop landed. Dropping it leaves history with no
# record of the half-finished reply on screen, so the next turn
# the model "forgets" what it just said — exactly what users hit
# when they stop to redirect mid-response.
_partial = agent._strip_think_blocks(
getattr(agent, "_current_streamed_assistant_text", "") or ""
).strip()
if _partial:
messages.append({"role": "assistant", "content": _partial})
final_response = _partial
else:
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
agent._persist_session(messages, conversation_history)
interrupted = True
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
break
except Exception as api_error:
@@ -2316,15 +2247,6 @@ def run_conversation(
# "unknown variant `image_url`, expected `text`".
"unknown variant `image_url`, expected `text`",
"unknown variant image_url, expected text",
# OpenRouter routes a request to upstream endpoints and,
# when none of the candidate endpoints for the model accept
# image input, returns HTTP 404 "No endpoints found that
# support image input". Without this phrase the agent never
# strips the images, the retry loop re-sends the same
# rejected request until exhaustion, and the gateway leaves
# every subsequent message queued behind the stuck turn —
# the P1 in issue #21160. The 404 passes the 4xx gate below.
"no endpoints found that support image input",
)
_err_lower = _err_body.lower()
_looks_like_image_rejection = any(
@@ -2896,9 +2818,10 @@ def run_conversation(
approx_tokens=approx_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
if len(messages) < original_len or old_ctx > _reduced_ctx:
agent._buffer_status(
f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -2910,25 +2833,15 @@ def run_conversation(
# Fall through to normal error handling if compression
# is exhausted or didn't help.
# Eager fallback for rate-limit errors (429 or quota exhaustion)
# and transport errors (connection failure / timeout / provider
# overloaded). Rate limits and billing: switch immediately —
# the primary provider won't recover within the retry window.
# Transport errors: allow 1 retry first (transient hiccups
# recover), then fall back if the provider is truly unreachable.
# Eager fallback for rate-limit errors (429 or quota exhaustion).
# When a fallback model is configured, switch immediately instead
# of burning through retries with exponential backoff -- the
# primary provider won't recover within the retry window.
is_rate_limited = classified.reason in {
FailoverReason.rate_limit,
FailoverReason.billing,
}
_is_transport_failure = classified.reason in {
FailoverReason.timeout,
FailoverReason.overloaded,
}
_should_fallback = (
is_rate_limited
or (_is_transport_failure and retry_count >= 2)
)
if _should_fallback and agent._fallback_index < len(agent._fallback_chain):
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
# Don't eagerly fallback if credential pool rotation may
# still recover. See _pool_may_recover_from_rate_limit
# for the single-credential-pool and CloudCode-quota
@@ -2943,10 +2856,6 @@ def run_conversation(
agent._buffer_status(
"⚠️ Billing or credits exhausted — switching to fallback provider..."
)
elif _is_transport_failure:
agent._buffer_status(
"⚠️ Provider unreachable — switching to fallback provider..."
)
else:
agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
if agent._try_activate_fallback(reason=classified.reason):
@@ -3121,9 +3030,10 @@ def run_conversation(
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
# Re-estimate tokens after compression. Same-message-count
# compression (tool-result pruning, in-place summarization)
@@ -3287,9 +3197,10 @@ def run_conversation(
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
# Re-estimate tokens after compression. Same-message-count
# compression (tool-result pruning, in-place summarization)
@@ -3551,13 +3462,6 @@ def run_conversation(
):
_retry.primary_recovery_attempted = True
retry_count = 0
# Primary transport recovery starts a fresh attempt
# cycle. Re-open fallback state so a follow-on 429 can
# still activate fallback_providers after stale
# pre-recovery fallback/credential-pool bookkeeping.
_retry.has_retried_429 = False
agent._fallback_index = 0
agent._fallback_activated = False
continue
# Try fallback before giving up entirely
if agent._has_pending_fallback():
@@ -3623,65 +3527,6 @@ def run_conversation(
force=True,
)
# Detect thinking-timeout pattern: a known reasoning model
# hit a transport-layer error before the first content
# token arrived. Distinct from _is_stream_drop above
# (which fires for large file-write stream drops) and
# from any classifier reason that's not a transport
# timeout. Reuses the reasoning-model allowlist from
# agent/reasoning_timeouts.py (Fixes #52217) so the
# trigger is consistent with what the per-model
# stale-timeout floor covers. After the classifier
# override at agent/error_classifier.py:720-738 (this
# PR), transport disconnects on reasoning models route
# to FailoverReason.timeout rather than
# context_overflow, so this branch actually fires.
# Detection and message text live in
# agent.thinking_timeout_guidance so they're
# unit-testable without driving the full retry loop.
# (Part 2 of Fixes #52310.)
from agent.thinking_timeout_guidance import (
is_thinking_timeout,
)
_is_thinking_timeout = is_thinking_timeout(
classified,
_model,
error_msg,
)
if _is_thinking_timeout:
agent._vprint(
f"{agent.log_prefix} 💡 The model's thinking "
f"phase exceeded the upstream proxy's idle "
f"timeout before the first content token "
f"arrived. This is a known issue with "
f"reasoning models behind cloud gateways "
f"(NVIDIA NIM, OpenAI, Anthropic, DeepSeek).",
force=True,
)
agent._vprint(
f"{agent.log_prefix} Workarounds in priority order:",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 1. Set "
f"`providers.{_provider}.models.{_model}.stale_timeout_seconds: 900` "
f"in `~/.hermes/config.yaml` to extend the per-call "
f"timeout. (Hermes's built-in floor is 600s for "
f"known reasoning models — if you still see this "
f"after raising, the upstream cap is even shorter.)",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 2. Lower `reasoning_budget` or set "
f"`reasoning_effort: medium` on this model if the provider supports it.",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 3. Use a smaller / faster reasoning "
f"model if the task doesn't require deep thinking.",
force=True,
)
logger.error(
"%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
agent.log_prefix, max_retries, _final_summary,
@@ -3698,22 +3543,7 @@ def run_conversation(
_final_response += f"\n\n{_billing_guidance}"
else:
_final_response = f"API call failed after {max_retries} retries: {_final_summary}"
if _is_thinking_timeout:
# Thinking-timeout guidance overrides the generic
# stream-drop guidance — the latter is wrong for
# this case (it suggests splitting large file
# writes, which isn't what happened). See the
# reasoning-model override at
# agent/error_classifier.py:720-738 and the
# detection block above for context.
from agent.thinking_timeout_guidance import (
build_thinking_timeout_guidance,
)
_final_response += build_thinking_timeout_guidance(
provider=_provider,
model=_model,
)
elif _is_stream_drop:
if _is_stream_drop:
_final_response += (
"\n\nThe provider's stream connection keeps "
"dropping — this often happens when generating "
@@ -3745,12 +3575,7 @@ def run_conversation(
_ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
if _ra_raw:
try:
# Cap at 10 minutes. Anthropic Tier 1 input-token
# buckets reset in ~171s, so a 120s cap caused us to
# retry before the actual reset window and re-trip the
# limit. 600s covers all realistic provider reset
# windows while still rejecting pathological values. (#26293)
_retry_after = min(float(_ra_raw), 600)
_retry_after = min(float(_ra_raw), 120) # Cap at 2 minutes
except (TypeError, ValueError):
pass
wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
@@ -3831,17 +3656,6 @@ def run_conversation(
_retry.restart_with_compressed_messages = False
continue
if _retry.restart_with_rebuilt_messages:
# A content-filter stream stall (#32421) was escalated to the
# fallback chain and the partial content rolled back. Re-issue
# the API call against the now-active fallback provider. Refund
# the budget/count for the stalled attempt so the fallback gets a
# fair turn.
api_call_count -= 1
agent.iteration_budget.refund()
_retry.restart_with_rebuilt_messages = False
continue
if _retry.restart_with_length_continuation:
# Progressively boost the output token budget on each retry.
# Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
@@ -4416,9 +4230,10 @@ def run_conversation(
approx_tokens=agent.context_compressor.last_prompt_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history so
# _flush_messages_to_session_db writes compressed messages
# to the new session (see preflight compression comment).
conversation_history = None
# Save session log incrementally (so progress is visible even if interrupted)
agent._session_messages = messages
@@ -4460,11 +4275,7 @@ def run_conversation(
"as final response"
)
final_response = _recovered
# Streaming delivered a fragment, not a confirmed
# final preview. Leave response_previewed false so
# gateway fallback delivery can send the recovered
# text plus the abnormal-turn explanation.
agent._response_was_previewed = False
agent._response_was_previewed = True
break
# If the previous turn already delivered real content alongside
@@ -4709,20 +4520,14 @@ def run_conversation(
# status from earlier failed attempts in this turn.
agent._clear_status_buffer()
from agent.agent_runtime_helpers import (
intent_ack_continuation_mode,
)
_ack_mode = intent_ack_continuation_mode(agent)
if (
_ack_mode != "off"
agent.api_mode == "codex_responses"
and agent.valid_tool_names
and codex_ack_continuations < 2
and agent._looks_like_codex_intermediate_ack(
user_message=user_message,
assistant_content=final_response,
messages=messages,
require_workspace=(_ack_mode == "codex_only"),
)
):
codex_ack_continuations += 1
@@ -4803,11 +4608,7 @@ def run_conversation(
"_verification_stop_synthetic": True,
})
agent._session_messages = messages
# Run the verification-stop loop silently — the nudge is an
# internal turn that should not add noise to the user's
# terminal. Keep a debug breadcrumb in agent.log for tracing.
logger.debug("verification stop-loop nudge issued (attempt %d)",
agent._verification_stop_nudges)
agent._emit_status("↻ Verification required before finishing")
continue
messages.append(final_msg)

View File

@@ -21,14 +21,8 @@ from pathlib import Path
from types import SimpleNamespace
from typing import Any
from openai.types.chat.chat_completion_message_tool_call import (
ChatCompletionMessageToolCall,
Function,
)
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
from tools.environments.local import hermes_subprocess_env
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -100,10 +94,7 @@ def _resolve_home_dir() -> str:
def _build_subprocess_env() -> dict[str, str]:
# Copilot ACP is a model-driving CLI executor: it legitimately needs LLM
# provider credentials. Route through the central helper so Tier-1 secrets
# (gateway bot tokens, GitHub auth, infra) are still stripped (#29157).
env = hermes_subprocess_env(inherit_credentials=True)
env = os.environ.copy()
home = _resolve_home_dir()
env["HOME"] = home
from hermes_constants import apply_subprocess_home_env
@@ -233,73 +224,11 @@ def _render_message_content(content: Any) -> str:
return str(content).strip()
def _build_openai_tool_call(
*,
call_id: str,
name: str,
arguments: str,
) -> ChatCompletionMessageToolCall:
"""Build an OpenAI-compatible tool-call object for downstream handling."""
return ChatCompletionMessageToolCall(
id=call_id,
call_id=call_id,
response_item_id=None,
type="function",
function=Function(name=name, arguments=arguments),
)
def _completion_to_stream_chunks(completion: SimpleNamespace) -> list[SimpleNamespace]:
"""Convert a one-shot ACP response into OpenAI-style stream chunks."""
choice = completion.choices[0]
message = choice.message
tool_call_deltas = None
if message.tool_calls:
tool_call_deltas = []
for index, tool_call in enumerate(message.tool_calls):
tool_call_deltas.append(
SimpleNamespace(
index=index,
id=getattr(tool_call, "id", None),
type=getattr(tool_call, "type", "function"),
function=SimpleNamespace(
name=getattr(tool_call.function, "name", None),
arguments=getattr(tool_call.function, "arguments", None),
),
)
)
delta = SimpleNamespace(
role="assistant",
content=message.content or None,
tool_calls=tool_call_deltas,
reasoning_content=message.reasoning_content,
reasoning=message.reasoning,
)
data_chunk = SimpleNamespace(
choices=[
SimpleNamespace(
index=0,
delta=delta,
finish_reason=choice.finish_reason,
)
],
model=completion.model,
usage=None,
)
usage_chunk = SimpleNamespace(
choices=[],
model=completion.model,
usage=completion.usage,
)
return [data_chunk, usage_chunk]
def _extract_tool_calls_from_text(text: str) -> tuple[list[ChatCompletionMessageToolCall], str]:
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
if not isinstance(text, str) or not text.strip():
return [], ""
extracted: list[ChatCompletionMessageToolCall] = []
extracted: list[SimpleNamespace] = []
consumed_spans: list[tuple[int, int]] = []
def _try_add_tool_call(raw_json: str) -> None:
@@ -323,10 +252,12 @@ def _extract_tool_calls_from_text(text: str) -> tuple[list[ChatCompletionMessage
call_id = f"acp_call_{len(extracted)+1}"
extracted.append(
_build_openai_tool_call(
SimpleNamespace(
id=call_id,
call_id=call_id,
name=fn_name.strip(),
arguments=fn_args,
response_item_id=None,
type="function",
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
)
)
@@ -445,7 +376,6 @@ class CopilotACPClient:
timeout: float | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
stream: bool = False,
**_: Any,
) -> Any:
prompt_text = _format_messages_as_prompt(
@@ -492,14 +422,11 @@ class CopilotACPClient:
)
finish_reason = "tool_calls" if tool_calls else "stop"
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
completion = SimpleNamespace(
return SimpleNamespace(
choices=[choice],
usage=usage,
model=model or "copilot-acp",
)
if stream:
return _completion_to_stream_chunks(completion)
return completion
def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
try:

View File

@@ -11,7 +11,6 @@ import uuid
import re
from dataclasses import dataclass, fields, replace
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
@@ -448,63 +447,6 @@ def get_pool_strategy(provider: str) -> str:
DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
def _write_through_provider_state_to_global_root(
provider_id: str, state: Dict[str, Any]
) -> None:
"""Persist a rotated OAuth ``state`` into the global-root auth.json.
Best-effort write-through for the multi-profile rotation hazard
(#48415 / #43589): nous, openai-codex, and xai-oauth rotate the
refresh_token on refresh, so when a profile pool refresh rotates a grant
it resolved from the root fallback, the rotated chain must land back in
root. Otherwise root keeps a now-revoked refresh token and every other
profile reading the stale root grant dies with ``refresh_token_reused`` /
``invalid_grant`` once its access token expires.
Only updates ``providers.<provider_id>`` in the root store; never touches
the profile store (the caller already saved that). Swallows all errors — a
failed write-through degrades to the pre-existing behavior (root stale), it
must never break the profile's own successful save. Mirrors
``hermes_cli.auth._write_through_xai_oauth_to_global_root`` (which covers
the non-pool xAI refresh path) for the credential-pool refresh path.
"""
try:
global_path = auth_mod._global_auth_file_path()
except Exception:
return
if global_path is None:
# Classic mode (profile == root); the profile save already hit root.
return
# Seat belt: under pytest, refuse to write the real user's
# ~/.hermes/auth.json even when HERMES_HOME points at a profile path
# (mirrors the read-side guard in _load_global_auth_store). Uses the
# unmodified HOME env, not Path.home() which fixtures may monkeypatch.
if os.environ.get("PYTEST_CURRENT_TEST"):
real_home_env = os.environ.get("HOME", "")
if real_home_env:
real_root = Path(real_home_env) / ".hermes" / "auth.json"
try:
if global_path.resolve(strict=False) == real_root.resolve(strict=False):
return
except Exception:
return
try:
if global_path.exists():
global_store = _load_auth_store(global_path)
else:
global_store = {}
if not isinstance(global_store, dict):
return
_store_provider_state(global_store, provider_id, dict(state), set_active=False)
auth_mod._save_auth_store(global_store, global_path)
except Exception as exc: # pragma: no cover - best effort
logger.debug(
"%s pool refresh: write-through to global root failed: %s",
provider_id,
exc,
)
class CredentialPool:
def __init__(self, provider: str, entries: List[PooledCredential]):
self.provider = provider
@@ -537,11 +479,10 @@ class CredentialPool:
self._entries[idx] = new
return
def _persist(self, *, removed_ids: Optional[List[str]] = None) -> None:
def _persist(self) -> None:
write_credential_pool(
self.provider,
[entry.to_dict() for entry in self._entries],
removed_ids=removed_ids,
)
def _is_terminal_auth_failure(
@@ -859,28 +800,6 @@ class CredentialPool:
try:
with _auth_store_lock():
auth_store = _load_auth_store()
# Decide BEFORE writing whether this profile is reading the
# grant from the global root (no own providers.<id> block) vs.
# genuinely shadowing it. A pool refresh rotates single-use
# OAuth refresh tokens, so a profile that resolved the grant
# from root MUST write the rotated chain back to root too —
# otherwise root keeps a revoked refresh token and every other
# profile reading the stale root grant dies with
# refresh_token_reused / invalid_grant once its access token
# expires. This mirrors the xAI write-through in
# hermes_cli.auth._save_xai_oauth_tokens (#43589); the pool
# refresh path is the Codex/xAI analog reported in #48415.
_wt_provider_id = {
"nous": "nous",
"openai-codex": "openai-codex",
"xai-oauth": "xai-oauth",
}.get(self.provider)
write_through_to_root = bool(_wt_provider_id) and not (
isinstance(auth_store.get("providers"), dict)
and isinstance(
auth_store["providers"].get(_wt_provider_id), dict
)
)
if self.provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state is None:
@@ -936,10 +855,6 @@ class CredentialPool:
return
_save_auth_store(auth_store)
if write_through_to_root and _wt_provider_id:
_write_through_provider_state_to_global_root(
_wt_provider_id, state
)
except Exception as exc:
logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)
@@ -1125,17 +1040,13 @@ class CredentialPool:
logger.debug(
"Failed to clear terminal xAI OAuth state: %s", clear_exc
)
removed_ids = [
item.id for item in self._entries
if item.source == "loopback_pkce"
]
self._entries = [
item for item in self._entries
if item.source != "loopback_pkce"
]
if self._current_id == entry.id:
self._current_id = None
self._persist(removed_ids=removed_ids)
self._persist()
return None
# For openai-codex: same race as xAI/nous — another Hermes process
# may have consumed the refresh token between our proactive sync
@@ -1195,17 +1106,13 @@ class CredentialPool:
logger.debug(
"Failed to clear terminal Codex OAuth state: %s", clear_exc
)
removed_ids = [
item.id for item in self._entries
if item.source == "device_code"
]
self._entries = [
item for item in self._entries
if item.source != "device_code"
]
if self._current_id == entry.id:
self._current_id = None
self._persist(removed_ids=removed_ids)
self._persist()
return None
# For nous: another process may have consumed the refresh token
# between our proactive sync and the HTTP call. Re-sync from
@@ -1262,17 +1169,13 @@ class CredentialPool:
auth_mod.NOUS_DEVICE_CODE_SOURCE,
f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
}
removed_ids = [
item.id for item in self._entries
if item.source in singleton_sources
]
self._entries = [
item for item in self._entries
if item.source not in singleton_sources
]
if self._current_id == entry.id:
self._current_id = None
self._persist(removed_ids=removed_ids)
self._persist()
return None
self._mark_exhausted(entry, None)
return None
@@ -1434,7 +1337,7 @@ class CredentialPool:
pruned_ids = set(entries_to_prune)
self._entries = [e for e in self._entries if e.id not in pruned_ids]
if cleared_any:
self._persist(removed_ids=entries_to_prune)
self._persist()
return available
def _select_unlocked(self) -> Optional[PooledCredential]:
@@ -1608,11 +1511,7 @@ class CredentialPool:
replace(entry, priority=new_priority)
for new_priority, entry in enumerate(self._entries)
]
write_credential_pool(
self.provider,
[entry.to_dict() for entry in self._entries],
removed_ids=[removed.id],
)
self._persist()
if self._current_id == removed.id:
self._current_id = None
return removed
@@ -2274,11 +2173,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
def load_pool(provider: str) -> CredentialPool:
provider = (provider or "").strip().lower()
raw_entries = read_credential_pool(provider)
disk_ids = {
entry.get("id")
for entry in raw_entries
if isinstance(entry, dict) and entry.get("id")
}
raw_needs_sanitization = any(
isinstance(payload, dict)
and sanitize_borrowed_credential_payload(payload, provider) != payload
@@ -2307,10 +2201,8 @@ def load_pool(provider: str) -> CredentialPool:
changed |= _normalize_pool_priorities(provider, entries)
if changed:
new_ids = {entry.id for entry in entries}
write_credential_pool(
provider,
[entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
removed_ids=disk_ids - new_ids,
)
return CredentialPool(provider, entries)

View File

@@ -273,21 +273,6 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
# Automatic state transitions (pure function, no LLM)
# ---------------------------------------------------------------------------
def _cron_referenced_skills() -> Set[str]:
"""Skill names referenced by any cron job (incl. paused/disabled).
Best-effort: a cron-module import error or corrupt jobs store must never
break the curator, so any failure yields an empty set (no protection,
but no crash).
"""
try:
from cron.jobs import referenced_skill_names as _refs
return _refs()
except Exception as e:
logger.debug("Curator could not read cron skill references: %s", e, exc_info=True)
return set()
def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
"""Walk every curator-managed skill and move active/stale/archived based on
the latest real activity timestamp. Pinned skills are never touched.
@@ -307,8 +292,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
stale_cutoff = now - timedelta(days=get_stale_after_days())
archive_cutoff = now - timedelta(days=get_archive_after_days())
cron_referenced = _cron_referenced_skills()
counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0, "seeded": 0}
for row in _u.agent_created_report():
@@ -317,15 +300,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
if row.get("pinned"):
continue
# A skill referenced by any cron job (incl. paused/disabled) is in
# use by definition — resuming or the next fire must find it. The
# scheduler only bumps usage when a job actually fires, so jobs that
# fire less often than archive_after_days, paused jobs, and far-future
# one-shots would otherwise have their skills aged out from under
# them. Treat referenced skills like pinned: never auto-transition.
if name in cron_referenced:
continue
# First sight of a curation-eligible skill with no persisted record
# (e.g. a newly-eligible built-in): anchor its clock to now and defer.
if not row.get("_persisted", True):
@@ -342,18 +316,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
current = row.get("state", _u.STATE_ACTIVE)
# Never-used skills (use_count == 0) get a grace floor: don't archive
# one until it is at least stale_after_days old. A use=0 skill is
# absence of evidence, not evidence of staleness — a skill created
# recently may simply not have had its trigger come up yet.
never_used = int(row.get("use_count", 0) or 0) == 0
if never_used and anchor > stale_cutoff:
# Younger than the stale window — leave it alone entirely.
if current == _u.STATE_STALE:
_u.set_state(name, _u.STATE_ACTIVE)
counts["reactivated"] += 1
continue
if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
ok, _msg = _u.archive_skill(name)
if ok:
@@ -415,10 +377,8 @@ CURATOR_REVIEW_PROMPT = (
"bodies + `references/`, `templates/`, and `scripts/` subfiles for "
"session-specific detail — not one-session-one-skill micro-entries.\n\n"
"Hard rules — do not violate:\n"
"1. DO NOT touch bundled, hub-installed, or external-dir skills "
"(`skills.external_dirs`). The candidate list below is already filtered "
"to local curator-managed skills only; external skills are externally "
"owned and read-only to this background curator.\n"
"1. DO NOT touch bundled or hub-installed skills. The candidate list "
"below is already filtered to agent-created skills only.\n"
"2. DO NOT delete any skill. Archiving (moving the skill's directory "
"into ~/.hermes/skills/.archive/) is the maximum destructive action. "
"Archives are recoverable; deletion is not.\n"
@@ -428,19 +388,10 @@ CURATOR_REVIEW_PROMPT = (
"back load-bearing UX (slash-command entry points referenced in docs and "
"tips) and are filtered out of the candidate list below — never resurrect "
"one as an archive or absorb target.\n"
"3c. DO NOT archive or prune any skill marked `cron=yes` in the candidate "
"list. A cron job depends on it and will fail to load it on its next "
"run. You MAY still consolidate it into an umbrella — but only because "
"the curator rewrites cron job skill references to follow consolidations; "
"never simply prune it.\n"
"4. DO NOT use usage counters as a reason to skip consolidation. The "
"counters are new and often mostly zero. Judge overlap on CONTENT, "
"not on use_count. 'use=0' is not evidence a skill is valuable; it's "
"absence of evidence either way. Corollary: 'use=0' is ALSO not a "
"reason to PRUNE a skill. Never archive a never-used skill (use=0) "
"unless it is at least 30 days old (check last_activity / created date) "
"AND its content is genuinely obsolete or fully absorbed elsewhere — a "
"recently-created skill simply may not have had its trigger come up yet.\n"
"absence of evidence either way.\n"
"5. DO NOT reject consolidation on the grounds that 'each skill has "
"a distinct trigger'. Pairwise distinctness is the wrong bar. The "
"right bar is: 'would a human maintainer write this as N separate "
@@ -518,9 +469,8 @@ CURATOR_REVIEW_PROMPT = (
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
"forwarding target. This drives cron-job skill-reference migration — "
"guessing from your YAML summary after the fact is fragile.\n"
" - terminal — move LOCAL candidate content into "
"a support subfile when package integrity requires it; never mv, cp, rm, "
"patch, or rewrite bundled, hub-installed, or external-dir skills\n\n"
" - terminal — mv a sibling into the archive "
"OR move its content into a support subfile\n\n"
"'keep' is a legitimate decision ONLY when the skill is already a "
"class-level umbrella and none of the proposed merges would improve "
"discoverability. 'This is narrow but distinct from its siblings' "
@@ -1460,14 +1410,12 @@ def _render_candidate_list() -> str:
rows = skill_usage.agent_created_report()
if not rows:
return "No agent-created skills to review."
cron_referenced = _cron_referenced_skills()
lines = [f"Agent-created skills ({len(rows)}):\n"]
for r in rows:
lines.append(
f"- {r['name']} "
f"state={r['state']} "
f"pinned={'yes' if r.get('pinned') else 'no'} "
f"cron={'yes' if r['name'] in cron_referenced else 'no'} "
f"activity={r.get('activity_count', 0)} "
f"use={r.get('use_count', 0)} "
f"view={r.get('view_count', 0)} "
@@ -1895,14 +1843,6 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
# Disable recursive nudges — the curator must never spawn its own review.
review_agent._memory_nudge_interval = 0
review_agent._skill_nudge_interval = 0
# Tag this fork as autonomous background curation so skill_manage's
# background-review write guard fires. Without this the fork inherits
# the default "assistant_tool" origin, is_background_review() is False,
# and the external/bundled/hub-installed skill_manage guards never
# trigger during the curation pass they exist to protect against.
# turn_context.py binds this onto the write-origin ContextVar at turn
# start (see agent/turn_context.py).
review_agent._memory_write_origin = "background_review"
# Redirect the forked agent's stdout/stderr to /dev/null while it
# runs so its tool-call chatter doesn't pollute the foreground

View File

@@ -16,7 +16,6 @@ from pathlib import Path
from typing import Any
from utils import safe_json_loads
from agent.redact import redact_sensitive_text
from agent.tool_result_classification import file_mutation_result_landed
# ANSI escape codes for coloring tool failure indicators
@@ -340,62 +339,6 @@ def _read_file_line_label(args: dict) -> str:
return f"L{offset}-{offset + limit - 1}"
def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any:
"""Apply secret redaction to browser_type text in display-facing payloads.
Backends sometimes echo the attempted input in error strings or fallback
metadata. When the raw typed value contains a recognizable secret (API
key, token, JWT, etc.) the redacted form differs from the raw value, so we
replace every occurrence of the raw value with its redacted form before a
browser_type result reaches logs, callbacks, the model, or chat history.
Normal typed text (search queries, addresses, form fields) matches no
secret pattern, so it passes through unchanged and stays readable.
Redaction is forced here regardless of the global ``security.redact_secrets``
preference: a typed credential leaking into chat history is a security
boundary, not mere log hygiene.
"""
if typed_text is None:
return value
needle = str(typed_text)
if needle == "":
return value
redacted = redact_sensitive_text(needle, force=True)
if redacted == needle:
# Nothing secret-looking in the typed text; leave payload untouched.
return value
if isinstance(value, str):
return value.replace(needle, redacted)
if isinstance(value, dict):
return {
key: redact_browser_typed_text_for_display(item, typed_text)
for key, item in value.items()
}
if isinstance(value, list):
return [redact_browser_typed_text_for_display(item, typed_text) for item in value]
if isinstance(value, tuple):
return tuple(redact_browser_typed_text_for_display(item, typed_text) for item in value)
return value
def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | None:
"""Return a copy of tool args safe for logs/progress UI.
For ``browser_type`` the ``text`` argument is run through the same
secret-pattern redactor used for logs. Recognizable credentials (API
keys, tokens) are masked before the value reaches tool progress
notifications; normal typed text is left intact for debuggability.
"""
if not isinstance(args, dict):
return args
if tool_name == "browser_type" and isinstance(args.get("text"), str):
safe_args = dict(args)
safe_args["text"] = redact_sensitive_text(args["text"], force=True)
return safe_args
return args
def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
if not isinstance(tasks, list):
return 0, []
@@ -419,7 +362,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
max_len = _tool_preview_max_len
if not args:
return None
args = redact_tool_args_for_display(tool_name, args) or args
primary_args = {
"terminal": "command", "web_search": "query", "web_extract": "urls",
"read_file": "path", "write_file": "path", "patch": "path",
@@ -1143,7 +1085,6 @@ def get_cute_tool_message(
When *result* is provided the line is checked for failure indicators.
Failed tool calls get a red prefix and an informational suffix.
"""
args = redact_tool_args_for_display(tool_name, args) or args
dur = f"{duration:.1f}s"
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
skin_prefix = get_skin_tool_prefix()

View File

@@ -133,31 +133,6 @@ _RATE_LIMIT_PATTERNS = [
"servicequotaexceededexception",
]
# Patterns that indicate provider-side overload, NOT a per-credential rate
# limit or billing problem. The credential is valid — the server is just
# busy — so the correct recovery is "back off and retry the same key", never
# "rotate the credential" (rotating exhausts the pool while the endpoint is
# still busy; a single-key user has nothing to rotate to). Some providers
# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
# status path matches the body against this list before falling through to
# the rate_limit default. Phrases are kept narrow and overload-flavoured so a
# normal rate-limit message ("you have been rate-limited") doesn't hit this
# bucket. (#14038, #15297)
_OVERLOADED_PATTERNS = [
"overloaded",
"temporarily overloaded",
"service is temporarily overloaded",
"service may be temporarily overloaded",
"server is overloaded",
"server overloaded",
"service overloaded",
"service is overloaded",
"upstream overloaded",
"currently overloaded",
"at capacity",
"over capacity",
]
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
_USAGE_LIMIT_PATTERNS = [
"usage limit",
@@ -355,14 +330,6 @@ _CONTENT_POLICY_BLOCKED_PATTERNS = [
# echo back; the underscore form is provider-specific enough.
"content_filter",
"responsibleaipolicyviolation",
# MiniMax output-layer safety filter. The error string is surfaced
# verbatim by MiniMax SDK / OpenAI-compatible endpoints, usually in the
# form "output new_sensitive (1027)" when the model's *output* (often a
# large tool-call argument block) trips the upstream safety filter and
# the SSE stream is truncated mid-flight. ``new_sensitive`` is the
# filter name and is narrow enough that billing / format / auth error
# strings will not collide. See #32421.
"new_sensitive",
]
# Auth patterns (non-status-code signals)
@@ -750,26 +717,6 @@ def classify_api_error(
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
if is_disconnect and not status_code:
# Reasoning-model override: a transport disconnect on a reasoning
# model is much more likely the upstream proxy idle-killing a
# long thinking stream than a true context overflow — even on
# large sessions. The default disconnect+large-session routing
# below would otherwise send the user into the compression
# branch (should_compress=True) and silently delete
# conversation history on a phantom context-length error.
# Reasoning models have multi-minute thinking phases that
# routinely exceed the cloud gateway's idle window (NVIDIA
# NIM ~120s — first-party repro at NVIDIA/NemoClaw#4846;
# OpenAI worker / Anthropic stream-idle similar). The
# per-reasoning-model stale-timeout floor in
# agent/reasoning_timeouts.py raises the stale-detector
# threshold to tolerate long thinking, so a true
# transport-layer failure here is recoverable via the retry
# path — not via context compression. Reclassify as timeout.
# (Part 1 of Fixes #52310.)
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
if get_reasoning_stale_timeout_floor(model) is not None:
return _result(FailoverReason.timeout, retryable=True)
# Absolute token/message-count thresholds are only a proxy for smaller
# context windows. Large-context sessions can have hundreds of
# messages while still being far below their actual token budget.
@@ -896,19 +843,7 @@ def _classify_by_status(
)
if status_code == 429:
# Already checked long_context_tier above. Some providers (notably
# Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
# code as a true per-credential rate limit, but the credential is
# valid and the correct recovery is "back off and retry the same key",
# NOT "rotate the credential" (which exhausts the pool while the
# endpoint is still busy, and does nothing for a single-key user).
# Disambiguate on the error body so an overload 429 takes the
# transient-overload path instead of burning the pool. (#14038)
if any(p in error_msg for p in _OVERLOADED_PATTERNS):
return result_fn(
FailoverReason.overloaded,
retryable=True,
)
# Already checked long_context_tier above; this is a normal rate limit
return result_fn(
FailoverReason.rate_limit,
retryable=True,
@@ -1259,17 +1194,6 @@ def _classify_by_message(
should_fallback=True,
)
# Overloaded / server-busy patterns — must come BEFORE the rate_limit and
# billing checks so that a message-only "overloaded" (no 503/529 status,
# e.g. some Anthropic-compatible proxies) classifies as a transient
# overload (backoff + retry) instead of falling through to `unknown` or
# incorrectly triggering credential rotation.
if any(p in error_msg for p in _OVERLOADED_PATTERNS):
return result_fn(
FailoverReason.overloaded,
retryable=True,
)
# Billing patterns
if any(p in error_msg for p in _BILLING_PATTERNS):
return result_fn(
@@ -1359,25 +1283,19 @@ def _extract_status_code(error: Exception) -> Optional[int]:
def _extract_error_body(error: Exception) -> dict:
"""Extract the structured error body from an SDK exception or its cause chain."""
current = error
for _ in range(5): # Match _extract_status_code() traversal depth.
body = getattr(current, "body", None)
if isinstance(body, dict):
return body
# Some errors have .response.json()
response = getattr(current, "response", None)
if response is not None:
try:
json_body = response.json()
if isinstance(json_body, dict):
return json_body
except Exception:
pass
cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
if cause is None or cause is current:
break
current = cause
"""Extract the structured error body from an SDK exception."""
body = getattr(error, "body", None)
if isinstance(body, dict):
return body
# Some errors have .response.json()
response = getattr(error, "response", None)
if response is not None:
try:
json_body = response.json()
if isinstance(json_body, dict):
return json_body
except Exception:
pass
return {}

View File

@@ -77,22 +77,15 @@ def build_write_denied_prefixes(home: str) -> list[str]:
]
def get_safe_write_roots() -> set[str]:
"""Return resolved HERMES_WRITE_SAFE_ROOT paths. Supports multiple directories
separated by ``os.pathsep`` (``:`` on Unix, ``;`` on Windows).
E.g., ``/opt/data:/var/www/html`` on Unix, ``C:\\data;D:\\www`` on Windows."""
env = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not env:
return set()
roots: set[str] = set()
for path in env.split(os.pathsep):
if path:
try:
resolved = os.path.realpath(os.path.expanduser(path))
roots.add(resolved)
except (OSError, ValueError):
continue
return roots
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
@@ -131,15 +124,9 @@ def is_write_denied(path: str) -> bool:
except Exception:
pass
safe_roots = get_safe_write_roots()
if safe_roots:
allowed = False
for safe_root in safe_roots:
if resolved == safe_root or resolved.startswith(safe_root + os.sep):
allowed = True
break
if not allowed:
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False

View File

@@ -251,78 +251,6 @@ def _supports_vision_override(
return None
def _resolve_inference_base_url(
cfg: Optional[Dict[str, Any]],
provider: str,
) -> str:
"""Best-effort base URL for the active inference provider."""
try:
from agent.auxiliary_client import _RUNTIME_MAIN_BASE_URL
runtime = str(_RUNTIME_MAIN_BASE_URL or "").strip()
if runtime:
return runtime
except Exception:
pass
if not isinstance(cfg, dict):
return ""
model_cfg_raw = cfg.get("model")
model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
base_url = str(model_cfg.get("base_url") or "").strip()
if base_url:
return base_url
config_provider = str(model_cfg.get("provider") or "").strip()
candidate_names: set[str] = set()
for p in filter(None, (provider, config_provider)):
candidate_names.add(p)
if p.lower().startswith("custom:"):
candidate_names.add(p.split(":", 1)[1])
else:
candidate_names.add(f"custom:{p}")
providers_cfg = cfg.get("providers")
if isinstance(providers_cfg, dict):
for name in candidate_names:
entry = providers_cfg.get(name)
if isinstance(entry, dict):
bu = str(entry.get("base_url") or "").strip()
if bu:
return bu
custom_providers = cfg.get("custom_providers")
if isinstance(custom_providers, list):
lowered = {n.lower() for n in candidate_names}
for entry_raw in custom_providers:
if not isinstance(entry_raw, dict):
continue
entry_name = str(entry_raw.get("name") or "").strip()
if entry_name not in candidate_names and entry_name.lower() not in lowered:
continue
bu = str(entry_raw.get("base_url") or "").strip()
if bu:
return bu
return ""
def _should_probe_ollama_vision(provider: str, base_url: str) -> bool:
"""True when the active provider likely fronts a local Ollama server."""
p = (provider or "").strip().lower()
if p == "ollama":
return True
if not base_url:
return False
try:
from agent.model_metadata import detect_local_server_type
return detect_local_server_type(base_url) == "ollama"
except Exception:
return False
def _coerce_mode(raw: Any) -> str:
"""Normalize a config value into one of the valid modes."""
if not isinstance(raw, str):
@@ -374,33 +302,15 @@ def _lookup_supports_vision(
return override
if not provider or not model:
return None
caps = None
try:
from agent.models_dev import get_model_capabilities
caps = get_model_capabilities(provider, model)
except Exception as exc: # pragma: no cover - defensive
logger.debug("image_routing: caps lookup failed for %s:%s%s", provider, model, exc)
if caps is not None:
return bool(caps.supports_vision)
base_url = _resolve_inference_base_url(cfg, provider)
if not base_url and (provider or "").strip().lower() == "ollama":
base_url = "http://localhost:11434/v1"
if _should_probe_ollama_vision(provider, base_url):
try:
from agent.model_metadata import query_ollama_supports_vision
ollama_vision = query_ollama_supports_vision(model, base_url)
if ollama_vision is not None:
return ollama_vision
except Exception as exc: # pragma: no cover - defensive
logger.debug(
"image_routing: ollama vision probe failed for %s:%s%s",
provider,
model,
exc,
)
return None
return None
if caps is None:
return None
return bool(caps.supports_vision)
def decide_image_input_mode(
@@ -478,98 +388,14 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
# BMP: "BM"
if raw.startswith(b"BM"):
return "image/bmp"
# ISO-BMFF family (HEIC/HEIF/AVIF): bytes 4..8 == 'ftyp', major brand at 8..12
if len(raw) >= 12 and raw[4:8] == b"ftyp":
brand = raw[8:12]
if brand in {b"avif", b"avis"}:
return "image/avif"
if brand in {
b"heic", b"heix", b"hevc", b"hevx",
b"mif1", b"msf1", b"heim", b"heis",
}:
return "image/heic"
# TIFF: II*\0 (little-endian) or MM\0* (big-endian)
if raw[:4] in {b"II*\x00", b"MM\x00*"}:
return "image/tiff"
# ICO: 00 00 01 00 (reserved=0, type=1=icon)
if raw[:4] == b"\x00\x00\x01\x00":
return "image/x-icon"
# SVG: text-based, look for an <svg tag near the start (skip BOM/whitespace)
head = raw[:512].lstrip().lower()
if head.startswith(b"<?xml") or head.startswith(b"<svg"):
if b"<svg" in head:
return "image/svg+xml"
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
}:
return "image/heic"
return None
# Formats every major vision provider (Anthropic, OpenAI, Gemini, Bedrock)
# accepts natively. Anything outside this set has to be transcoded to PNG
# before we declare media_type, otherwise the provider returns HTTP 400
# ("Could not process image" / "Unsupported image media type") and the
# whole turn fails with no salvage path.
#
# Discord (and a few other chat platforms) freely accept attachments in
# formats outside this set -- AVIF screenshots from Chromium, HEIC from
# iPhones, TIFF from scanners, BMP from old Windows tools, ICO -- so users
# do hit this in practice. SVG is vector and Pillow cannot rasterize it;
# it is skipped (logged) rather than transcoded.
_UNIVERSALLY_SUPPORTED_MIMES = frozenset({
"image/png", "image/jpeg", "image/gif", "image/webp",
})
def _transcode_to_png(raw: bytes) -> Optional[bytes]:
"""Decode arbitrary image bytes with Pillow and re-encode as PNG.
Returns None if Pillow isn't installed or can't decode the input
(rare formats, corrupted bytes, missing optional decoder plugin for
HEIC/AVIF, or vector formats like SVG). Caller falls back to skipping
the image so the rest of the turn still works.
HEIC/HEIF and AVIF need optional Pillow plugins; we try to register
them on demand and swallow ImportError so a missing plugin just
looks like 'Pillow can't decode this' rather than crashing.
"""
try:
from PIL import Image
except ImportError:
logger.info(
"image_routing: Pillow not installed; cannot transcode "
"non-standard image format to PNG. Install with `pip install Pillow` "
"(and `pillow-heif` / `pillow-avif-plugin` for those formats)."
)
return None
# Optional plugin registration. Silent on failure: an unsupported
# format will just fall through to Image.open raising below.
try:
import pillow_heif # type: ignore
pillow_heif.register_heif_opener()
except Exception:
pass
try:
import pillow_avif # type: ignore # noqa: F401 -- registers AVIF on import
except Exception:
pass
try:
from io import BytesIO
with Image.open(BytesIO(raw)) as im:
# Pick an output mode PNG can serialise. Anything other than
# the standard set gets normalised to RGBA so transparency is
# preserved where the source had it.
if im.mode not in {"RGB", "RGBA", "L", "LA", "P"}:
im = im.convert("RGBA")
buf = BytesIO()
im.save(buf, format="PNG", optimize=False)
return buf.getvalue()
except Exception as exc:
logger.info(
"image_routing: Pillow could not transcode image to PNG -- %s", exc
)
return None
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
"""Return image MIME type for *path*.
@@ -605,18 +431,8 @@ def _file_to_data_url(path: Path) -> Optional[str]:
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
quality tax just because one other provider is stricter.
Format compatibility IS handled here: if the sniffed MIME isn't one
of ``_UNIVERSALLY_SUPPORTED_MIMES`` (i.e. it's something like AVIF,
HEIC, BMP, TIFF, or ICO that some providers reject outright), we
transcode to PNG with Pillow before declaring media_type. This fixes
the user-visible "Could not process image" HTTP 400 from Anthropic on
Discord-attached AVIF/HEIC/BMP files.
Returns None if the file can't be read OR if the format isn't
universally supported AND Pillow can't transcode it (Pillow missing,
HEIC/AVIF plugin missing, vector format like SVG, corrupt bytes). The
caller reports those paths in ``skipped`` and the rest of the turn
proceeds.
Returns None only if the file can't be read (missing, permission
denied, etc.); the caller reports those paths in ``skipped``.
"""
try:
raw = path.read_bytes()
@@ -624,22 +440,6 @@ def _file_to_data_url(path: Path) -> Optional[str]:
logger.warning("image_routing: failed to read %s%s", path, exc)
return None
mime = _guess_mime(path, raw=raw)
if mime not in _UNIVERSALLY_SUPPORTED_MIMES:
transcoded = _transcode_to_png(raw)
if transcoded is None:
logger.warning(
"image_routing: %s is %s which is not accepted by all major "
"vision providers and could not be transcoded to PNG; "
"skipping this attachment.",
path, mime,
)
return None
logger.info(
"image_routing: transcoded %s (%s) -> image/png for provider compatibility",
path.name, mime,
)
raw = transcoded
mime = "image/png"
b64 = base64.b64encode(raw).decode("ascii")
return f"data:{mime};base64,{b64}"

View File

@@ -8,7 +8,6 @@ iteration.
from __future__ import annotations
import hashlib
import logging
from concurrent.futures import ThreadPoolExecutor
from typing import Any
@@ -26,112 +25,20 @@ logger = logging.getLogger(__name__)
# opening dozens of sockets at once.
_MAX_REFERENCE_WORKERS = 8
# Per-tool-result character budget for the advisory reference view. Tool
# results can be huge (a full diff, a 5000-line file dump); replaying them
# verbatim per reference per tool-loop step would blow the reference model's
# context window and cost. We keep the agent's *actions* (tool calls) in full —
# they are cheap, high-signal, and tell the reference what the agent did — but
# preview each tool *result* head+tail so the reference still sees what came
# back without replaying megabytes. The acting aggregator always gets the full,
# untrimmed transcript; this budget only shapes the advisory copy.
_REFERENCE_TOOL_RESULT_BUDGET = 4000
# System prompt prepended to every reference-model call. References are
# advisory — they do NOT act, call tools, or own the task. Without this
# framing a reference receives the bare trimmed conversation and assumes it is
# the acting agent: it then refuses ("I can't access repositories / URLs from
# here") or tries to call tools it doesn't have. The prompt reframes the model
# as an analyst whose job is to reason about the presented state and hand its
# best thinking to the aggregator/orchestrator that will actually act.
_REFERENCE_SYSTEM_PROMPT = (
"You are a reference advisor in a Mixture of Agents (MoA) process. You are "
"NOT the acting agent and you do NOT execute anything: you cannot call "
"tools, run commands, browse, or access files, repositories, or URLs, and "
"you should not try to or apologize for being unable to. A separate "
"aggregator/orchestrator model holds those capabilities and will take the "
"actual actions.\n\n"
"The conversation below is the current state of a task handled by that "
"acting agent. Your job is to give your most intelligent analysis of that "
"state: understand the goal, reason about the problem, and advise on what "
"to do next. Surface the best approach, concrete next steps and tool-use "
"strategy, likely pitfalls and risks, and anything the acting agent may "
"have missed or gotten wrong. Assume any referenced files, URLs, or "
"systems exist and reason about them from the context given rather than "
"asking for access.\n\n"
"Respond with your advice directly — no preamble, no disclaimers about "
"tools or access. Your response is private guidance handed to the "
"aggregator, not an answer shown to the user."
)
def _slot_label(slot: dict[str, str]) -> str:
return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"
def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
"""Resolve a reference/aggregator slot to real runtime call kwargs.
A MoA slot is just a model selection — it must be called the same way any
model is called elsewhere, not through a bare ``call_llm(provider=...,
model=...)`` that leaves base_url/api_key/api_mode unresolved and lets the
auxiliary auto-detector guess. We route the slot's provider through
``resolve_runtime_provider`` (the canonical provider→api_mode/base_url/
api_key resolver the CLI, gateway, and delegate_task all use), so the slot
gets its provider's real API surface — e.g. MiniMax → anthropic_messages,
GPT-5/o-series → max_completion_tokens, custom endpoints → their base_url.
Returns the kwargs to pass through to ``call_llm`` (provider/model plus the
resolved base_url/api_key when available). Falls back to the bare
provider/model on any resolution error so a misconfigured slot still
attempts the call rather than aborting the whole MoA turn.
"""
provider = str(slot.get("provider") or "").strip()
model = str(slot.get("model") or "").strip()
out: dict[str, Any] = {"provider": provider, "model": model}
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
rt = resolve_runtime_provider(requested=provider, target_model=model)
resolved_provider = str(rt.get("provider") or provider).strip().lower()
# call_llm treats an explicit base_url as a custom endpoint. That is
# correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
# provider-backed targets whose provider branch adds auth refresh,
# request metadata, or request-shape adapters. Keep those providers
# identified by name.
if resolved_provider in {"nous", "openai-codex", "xai-oauth"}:
return out
# Pass the resolved endpoint through so call_llm builds the request for
# the provider's actual API surface instead of auto-detecting. base_url
# routes call_llm to the right adapter (incl. anthropic_messages mode);
# api_key is the resolved credential for that provider.
if rt.get("base_url"):
out["base_url"] = rt["base_url"]
if rt.get("api_key"):
out["api_key"] = rt["api_key"]
except Exception as exc: # pragma: no cover - defensive
logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
return out
def _run_reference(
slot: dict[str, str],
ref_messages: list[dict[str, Any]],
*,
temperature: float | None = None,
max_tokens: int | None = None,
temperature: float,
max_tokens: int,
) -> tuple[str, str]:
"""Call one reference model and return ``(label, text)``.
The slot is resolved to its provider's real runtime (via ``_slot_runtime``)
and called through the same ``call_llm`` request-building path any model
uses, so per-model wire-format handling (anthropic_messages,
max_completion_tokens, fixed/forbidden temperature) applies identically to
a reference as it would if that model were the acting model. MoA imposes no
cap of its own (``max_tokens`` defaults to ``None`` → omitted → the model's
real maximum); ``temperature`` is only the user's configured preset value,
which call_llm may still override per model.
Never raises: a failed reference becomes a labelled note so the aggregator
can still act with partial context. Designed to run inside a thread pool —
``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
@@ -139,17 +46,13 @@ def _run_reference(
"""
label = _slot_label(slot)
try:
# Prepend the advisory-role system prompt so the reference understands
# it is analyzing state for an aggregator, not acting on the task. The
# trimmed view (_reference_messages) already strips the agent's own
# system prompt, so this is the only system message the reference sees.
messages = [{"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}, *ref_messages]
response = call_llm(
task="moa_reference",
messages=messages,
provider=slot["provider"],
model=slot["model"],
messages=ref_messages,
temperature=temperature,
max_tokens=max_tokens,
**_slot_runtime(slot),
)
return label, _extract_text(response) or "(empty response)"
except Exception as exc:
@@ -161,8 +64,8 @@ def _run_references_parallel(
reference_models: list[dict[str, str]],
ref_messages: list[dict[str, Any]],
*,
temperature: float | None = None,
max_tokens: int | None = None,
temperature: float,
max_tokens: int,
) -> list[tuple[str, str]]:
"""Fan out all reference models in parallel, returning outputs in order.
@@ -203,140 +106,40 @@ def _run_references_parallel(
return [r for r in results if r is not None]
def _truncate_tool_result(text: str, budget: int = _REFERENCE_TOOL_RESULT_BUDGET) -> str:
"""Head+tail preview of a tool result for the advisory view.
Keeps the first and last halves of the budget with a ``[... N chars
omitted ...]`` marker between them, so a reference sees both how the result
started and how it ended without replaying the whole payload.
"""
if not text or len(text) <= budget:
return text
half = budget // 2
omitted = len(text) - 2 * half
return f"{text[:half]}\n[... {omitted} chars omitted ...]\n{text[-half:]}"
def _render_tool_calls(tool_calls: Any) -> str:
"""Render an assistant turn's tool_calls as readable text lines.
The advisory view cannot carry real ``tool_calls`` payloads (strict
providers reject tool_calls the reference never produced), so the agent's
actions are flattened to text the reference can read and reason about.
"""
lines: list[str] = []
for tc in tool_calls or []:
fn = (tc.get("function") or {}) if isinstance(tc, dict) else {}
name = fn.get("name") or (tc.get("name") if isinstance(tc, dict) else "") or "tool"
args = fn.get("arguments")
if isinstance(args, str):
args_text = args
elif args is not None:
try:
import json
args_text = json.dumps(args, ensure_ascii=False)
except Exception:
args_text = str(args)
else:
args_text = ""
lines.append(f"[called tool: {name}({args_text})]" if args_text else f"[called tool: {name}]")
return "\n".join(lines)
def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Build an advisory view of the conversation for reference models.
"""Build an advisory-safe view of the conversation for reference models.
A reference gives an INFORMED judgement on the current state, so it must
see what the agent actually did — its tool calls AND the tool results that
came back — not just the agent's narration. We therefore preserve the whole
conversation flow, but flatten it into clean user/assistant *text* turns:
- system prompt: dropped (8K of Hermes boilerplate, not advisory signal).
- assistant turns: kept; any ``tool_calls`` are rendered inline as
``[called tool: name(args)]`` text lines appended to the turn's text.
- ``tool``-role results: NOT dropped. Each is folded (head+tail preview,
see ``_truncate_tool_result``) into the *preceding* assistant turn as a
``[tool result: ...]`` block, so the reference sees what came back.
This emits ZERO ``tool``-role messages and ZERO ``tool_calls`` arrays — only
plain user/assistant text — so strict providers (Mistral, Fireworks) that
reject orphan tool messages / unproduced tool_calls don't 400, while the
reference still has the full picture.
The view MUST end with a ``user`` turn. Anthropic (and OpenRouter→Anthropic)
interpret a trailing assistant turn as an assistant *prefill* to continue,
and no-prefill models (e.g. Claude Opus 4.8) reject it with
``400 ... must end with a user message``. Rather than DELETE the agent's
latest context to satisfy that (which would blind the reference to the
current state), we APPEND a synthetic user turn asking the reference to
judge the state above. End-on-user is satisfied and no context is lost.
The acting aggregator always receives the full, untrimmed transcript; this
function only shapes the disposable advisory copy.
Reference calls are advisory: they never call tools and never emit the
``tool_calls`` the main model did. Replaying the full transcript verbatim
(a) re-bills the ~8K-token Hermes system prompt per reference per
iteration and (b) risks 400s from strict providers (Mistral, Fireworks)
that reject orphan ``tool`` messages or ``tool_calls`` the reference never
produced. We keep only the user/assistant *text* turns, dropping the
system prompt, any ``tool``-role messages, and any ``tool_calls`` payloads.
"""
advisory_instruction = (
"[The conversation above is the current state of the task. Give your "
"most intelligent judgement: what is going on, what should happen next, "
"what risks or mistakes you see, and how the acting agent should "
"proceed.]"
)
rendered: list[dict[str, Any]] = []
last_user_content: str | None = None
trimmed: list[dict[str, Any]] = []
for msg in messages:
role = msg.get("role")
content = msg.get("content")
text = content if isinstance(content, str) else ""
if role == "system":
if role not in ("user", "assistant"):
# Drop system prompt and tool-result messages.
continue
if role == "user":
if text.strip():
last_user_content = text
rendered.append({"role": "user", "content": text})
elif role == "assistant":
parts: list[str] = []
if text.strip():
parts.append(text.strip())
calls_text = _render_tool_calls(msg.get("tool_calls"))
if calls_text:
parts.append(calls_text)
# Empty assistant turns (no text, no calls) carry nothing advisory.
if parts:
rendered.append({"role": "assistant", "content": "\n".join(parts)})
elif role == "tool":
# Fold the tool result into the preceding assistant turn as text so
# the reference sees what came back, without emitting a tool-role
# message a reference never produced.
result_text = _truncate_tool_result(text)
block = f"[tool result: {result_text}]"
if rendered and rendered[-1].get("role") == "assistant":
rendered[-1]["content"] = rendered[-1]["content"] + "\n" + block
else:
# No assistant turn to attach to (e.g. a leading tool result);
# keep it as advisory context on its own assistant-role line.
rendered.append({"role": "assistant", "content": block})
# Any other role is ignored.
# End on a user turn: append a synthetic advisory request rather than
# deleting the agent's latest assistant context. This satisfies Anthropic's
# no-trailing-assistant-prefill rule while preserving full state.
if rendered and rendered[-1].get("role") == "assistant":
rendered.append({"role": "user", "content": advisory_instruction})
elif rendered and rendered[-1].get("role") == "user":
# Already ends on a user turn (fresh user prompt, no agent action yet).
# Leave it — the reference answers that prompt directly.
pass
if not rendered:
# Degenerate case: nothing rendered. Fall back to the latest user turn.
if last_user_content is not None:
return [{"role": "user", "content": last_user_content}]
content = msg.get("content")
if not isinstance(content, str):
# Skip non-text (multimodal/tool-call-only) assistant turns.
if not content:
continue
text = content if isinstance(content, str) else ""
if role == "assistant" and not text.strip():
# Assistant turn that was purely tool calls — nothing advisory.
continue
trimmed.append({"role": role, "content": text})
if not trimmed:
# Degenerate case (e.g. first turn was stripped): fall back to a
# minimal user turn so the reference still has something to answer.
for msg in reversed(messages):
if msg.get("role") == "user" and isinstance(msg.get("content"), str):
return [{"role": "user", "content": msg["content"]}]
return rendered
return trimmed
@@ -366,18 +169,12 @@ def aggregate_moa_context(
aggregator: dict[str, str],
temperature: float = 0.6,
aggregator_temperature: float = 0.4,
max_tokens: int | None = None,
max_tokens: int = 4096,
) -> str:
"""Run configured reference models and synthesize their advice.
Failures are returned as model-specific notes instead of aborting the normal
agent loop; the main model can still act with partial context.
``max_tokens`` is ``None`` by default: MoA does not cap reference or
aggregator output, so each model uses its own maximum. ``call_llm`` omits
the parameter entirely when it is ``None`` (see its docstring), which also
sidesteps providers that reject ``max_tokens`` outright. A hardcoded cap
here previously truncated long aggregator syntheses.
"""
reference_outputs: list[tuple[str, str]] = []
ref_messages = _reference_messages(api_messages)
@@ -406,10 +203,11 @@ def aggregate_moa_context(
try:
response = call_llm(
task="moa_aggregator",
provider=aggregator["provider"],
model=aggregator["model"],
messages=[{"role": "user", "content": synth_prompt}],
temperature=aggregator_temperature,
max_tokens=max_tokens,
**_slot_runtime(aggregator),
)
synthesis = _extract_text(response)
except Exception as exc:
@@ -432,38 +230,8 @@ def aggregate_moa_context(
class MoAChatCompletions:
"""OpenAI-chat-compatible facade where the aggregator is the acting model."""
def __init__(self, preset_name: str, reference_callback: Any = None):
def __init__(self, preset_name: str):
self.preset_name = preset_name or "default"
# Optional display hook. Called as reference outputs become available so
# frontends can show each reference model's answer as a labelled block
# before the aggregator acts. Signature:
# reference_callback(event, **kwargs)
# where event is one of:
# "moa.reference" kwargs: index, count, label, text
# "moa.aggregating" kwargs: aggregator (label), ref_count
# Never raises into the model call — display is best-effort.
self.reference_callback = reference_callback
# State-scoped reference cache. The agent loop calls create() once per
# tool-loop iteration; references should re-run whenever the task STATE
# advances — i.e. on every new user message AND every new tool result —
# so each reference judges the latest state. The advisory view
# (_reference_messages) now renders tool calls + results as text, so its
# signature changes on every new tool response; the cache key is that
# signature, so a new tool result is a cache MISS (references re-run)
# while a redundant create() call with identical state is a HIT (no
# re-run, no re-emit). This gives "fire on every user/tool response"
# for free, without re-firing on a pure no-op re-call.
self._ref_cache_key: tuple | None = None
self._ref_cache_outputs: list[tuple[str, str]] = []
def _emit(self, event: str, **kwargs: Any) -> None:
cb = self.reference_callback
if cb is None:
return
try:
cb(event, **kwargs)
except Exception as exc: # pragma: no cover - display must never break the turn
logger.debug("MoA reference_callback failed for %s: %s", event, exc)
def create(self, **api_kwargs: Any) -> Any:
from hermes_cli.config import load_config
@@ -473,10 +241,7 @@ class MoAChatCompletions:
messages = list(api_kwargs.get("messages") or [])
reference_models = preset.get("reference_models") or []
aggregator = preset.get("aggregator") or {}
# MoA does not cap reference or aggregator output: each model uses its
# own maximum. Passing max_tokens=None makes call_llm omit the parameter
# (it never caps by default), so a long aggregator synthesis is never
# truncated and providers that reject max_tokens don't 400.
max_tokens = int(preset.get("max_tokens", api_kwargs.get("max_tokens") or 4096) or 4096)
temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)
@@ -488,52 +253,12 @@ class MoAChatCompletions:
reference_outputs: list[tuple[str, str]] = []
ref_messages = _reference_messages(messages)
# Turn-scoped cache: only run + display references when the advisory
# view changed (i.e. a new user turn). Within one turn the agent loop
# calls create() once per tool iteration with the same advisory view;
# reuse the cached outputs and skip both the re-run and the re-emit.
_sig = hashlib.sha256(
"\u0000".join(
f"{m.get('role')}:{m.get('content')}" for m in ref_messages
).encode("utf-8", "replace")
).hexdigest()
_cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
_refs_from_cache = _cache_key == self._ref_cache_key and bool(self._ref_cache_outputs)
if _refs_from_cache:
reference_outputs = list(self._ref_cache_outputs)
else:
reference_outputs = _run_references_parallel(
reference_models,
ref_messages,
temperature=temperature,
max_tokens=None,
)
self._ref_cache_key = _cache_key
self._ref_cache_outputs = list(reference_outputs)
# Surface each reference model's answer to the display BEFORE the
# aggregator acts — once per turn (only on the iteration that
# actually ran them). The user sees one labelled block per
# reference (rendered like a thinking block) so the MoA process is
# visible rather than a silent pause. Best-effort: never blocks the
# turn.
_ref_count = len(reference_outputs)
for _idx, (_label, _text) in enumerate(reference_outputs, start=1):
self._emit(
"moa.reference",
index=_idx,
count=_ref_count,
label=_label,
text=_text,
)
if _ref_count:
self._emit(
"moa.aggregating",
aggregator=_slot_label(aggregator),
ref_count=_ref_count,
)
reference_outputs = _run_references_parallel(
reference_models,
ref_messages,
temperature=temperature,
max_tokens=max_tokens,
)
agg_messages = [dict(m) for m in messages]
if reference_outputs:
@@ -561,26 +286,21 @@ class MoAChatCompletions:
raise RuntimeError("MoA aggregator cannot be another MoA preset")
agg_kwargs = dict(api_kwargs)
agg_kwargs["messages"] = agg_messages
# The aggregator is the acting model. Resolve its slot to the provider's
# real runtime (base_url/api_key/api_mode) and call it through the same
# request-building path any model uses — so per-model wire-format
# handling (anthropic_messages, max_completion_tokens, fixed/forbidden
# temperature) applies identically to it. MoA imposes no output cap:
# max_tokens is passed through from the caller (normally None → omitted
# → the model's real maximum). The preset's old hardcoded 4096 default
# is gone — it truncated long syntheses.
agg_kwargs["model"] = aggregator.get("model")
agg_kwargs["temperature"] = aggregator_temperature
return call_llm(
task="moa_aggregator",
provider=aggregator.get("provider"),
model=aggregator.get("model"),
messages=agg_messages,
temperature=aggregator_temperature,
max_tokens=agg_kwargs.get("max_tokens"),
tools=agg_kwargs.get("tools"),
extra_body=agg_kwargs.get("extra_body"),
**_slot_runtime(aggregator),
)
class MoAClient:
def __init__(self, preset_name: str, reference_callback: Any = None):
def __init__(self, preset_name: str):
self.chat = type("_MoAChat", (), {})()
self.chat.completions = MoAChatCompletions(preset_name, reference_callback=reference_callback)
self.chat.completions = MoAChatCompletions(preset_name)

View File

@@ -478,16 +478,6 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
return None
def _lmstudio_server_root(base_url: str) -> str:
"""Return the LM Studio server root for native ``/api/v1`` endpoints."""
root = _normalize_base_url(base_url).rstrip("/")
for suffix in ("/api/v1", "/api", "/v1"):
if root.endswith(suffix):
root = root[: -len(suffix)].rstrip("/")
break
return root
def _is_known_provider_base_url(base_url: str) -> bool:
return _infer_provider_from_url(base_url) is not None
@@ -559,7 +549,6 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
server_url = normalized
if server_url.endswith("/v1"):
server_url = server_url[:-3]
lmstudio_url = _lmstudio_server_root(base_url)
headers = _auth_headers(api_key)
@@ -567,7 +556,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
with httpx.Client(timeout=2.0, headers=headers) as client:
# LM Studio exposes /api/v1/models — check first (most specific)
try:
r = client.get(f"{lmstudio_url}/api/v1/models")
r = client.get(f"{server_url}/api/v1/models")
if r.status_code == 200:
return "lm-studio"
except Exception:
@@ -785,7 +774,7 @@ def fetch_endpoint_model_metadata(
if is_local_endpoint(normalized):
try:
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
server_url = _lmstudio_server_root(normalized)
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
response = requests.get(
server_url.rstrip("/") + "/api/v1/models",
headers=headers,
@@ -1199,56 +1188,6 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
return None
def query_ollama_supports_vision(model: str, base_url: str, api_key: str = "") -> Optional[bool]:
"""Return True/False when Ollama ``/api/show`` reports vision support.
Uses the ``capabilities`` field on Ollama 0.6.0+ and falls back to
``model_info.*.vision.block_count`` on older servers. Returns None when
the server is unreachable, not Ollama, or the model is unknown.
"""
import httpx
bare_model = _strip_provider_prefix(model)
if not bare_model or not base_url:
return None
try:
if detect_local_server_type(base_url, api_key=api_key) != "ollama":
return None
except Exception:
return None
server_url = base_url.rstrip("/")
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
if resp.status_code != 200:
return None
data = resp.json()
except Exception:
return None
caps = data.get("capabilities")
if isinstance(caps, list):
if any(str(cap).lower() == "vision" for cap in caps):
return True
if caps:
return False
model_info = data.get("model_info")
if isinstance(model_info, dict):
for key in model_info:
if "vision.block_count" in str(key).lower():
return True
return None
def _query_ollama_api_show(model: str, base_url: str, api_key: str = "") -> Optional[int]:
"""Query an Ollama server's native ``/api/show`` for context length.
@@ -1358,7 +1297,6 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
server_url = base_url.rstrip("/")
if server_url.endswith("/v1"):
server_url = server_url[:-3]
lmstudio_url = _lmstudio_server_root(base_url)
headers = _auth_headers(api_key)
@@ -1402,7 +1340,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
# Use _model_id_matches for fuzzy matching: LM Studio stores models as
# "publisher/slug" but users configure only "slug" after "local:" prefix.
if server_type == "lm-studio":
resp = client.get(f"{lmstudio_url}/api/v1/models")
resp = client.get(f"{server_url}/api/v1/models")
if resp.status_code == 200:
data = resp.json()
for m in data.get("models", []):
@@ -1708,34 +1646,6 @@ def get_model_context_length(
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
return config_context_length
# 0a. MoA virtual provider — ``model`` is a preset name, not a real model,
# and ``base_url`` is the local virtual endpoint, so every probe below would
# miss and fall through to the 256K default. The aggregator is the acting
# model, so resolve the context window from the aggregator slot's real
# provider+model instead. References are advisory-only and never bound the
# acting context, so they're ignored here.
if (provider or "").strip().lower() == "moa":
try:
from hermes_cli.config import load_config
from hermes_cli.moa_config import resolve_moa_preset
from hermes_cli.runtime_provider import resolve_runtime_provider
preset = resolve_moa_preset(load_config().get("moa") or {}, model)
agg = preset.get("aggregator") or {}
agg_provider = str(agg.get("provider") or "").strip()
agg_model = str(agg.get("model") or "").strip()
if agg_model and agg_provider and agg_provider.lower() != "moa":
rt = resolve_runtime_provider(requested=agg_provider, target_model=agg_model)
return get_model_context_length(
agg_model,
base_url=rt.get("base_url", "") or "",
api_key=rt.get("api_key", "") or "",
provider=agg_provider,
)
except Exception:
logger.debug("MoA aggregator context-length resolution failed", exc_info=True)
# Fall through to the generic default if aggregator resolution failed.
# 0b. custom_providers per-model override — check before any probe.
# This closes the gap where /model switch and display paths used to fall
# back to 128K despite the user having a per-model context_length set.

View File

@@ -26,7 +26,7 @@ from __future__ import annotations
import os
import sys
import urllib.request
from typing import Any, Optional
from typing import Optional
from utils import base_url_hostname, normalize_proxy_url
@@ -142,46 +142,6 @@ def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
return proxy
def build_keepalive_http_client(
base_url: str = "",
*,
async_mode: bool = False,
) -> Optional[Any]:
"""Build an httpx client for OpenAI SDK calls with env-only proxy policy.
Uses explicit ``HTTPS_PROXY`` / ``NO_PROXY`` env vars via
``_get_proxy_for_base_url``. A custom transport disables httpx's default
``trust_env`` path, so macOS system proxy settings from
``urllib.request.getproxies()`` (which omit the ExceptionsList) are not
applied. Mirrors ``AIAgent._build_keepalive_http_client``.
"""
try:
import httpx
import socket
if "api.githubcopilot.com" in str(base_url or "").lower():
client_cls = httpx.AsyncClient if async_mode else httpx.Client
return client_cls()
sock_opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
if hasattr(socket, "TCP_KEEPIDLE"):
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30))
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10))
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3))
elif hasattr(socket, "TCP_KEEPALIVE"):
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 30))
proxy = _get_proxy_for_base_url(base_url)
transport_cls = httpx.AsyncHTTPTransport if async_mode else httpx.HTTPTransport
client_cls = httpx.AsyncClient if async_mode else httpx.Client
return client_cls(
transport=transport_cls(socket_options=sock_opts),
proxy=proxy,
)
except Exception:
return None
def _install_safe_stdio() -> None:
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
for stream_name in ("stdout", "stderr"):
@@ -204,5 +164,4 @@ __all__ = [
"_install_safe_stdio",
"_get_proxy_from_env",
"_get_proxy_for_base_url",
"build_keepalive_http_client",
]

View File

@@ -88,15 +88,12 @@ def _find_hermes_md(cwd: Path) -> Optional[Path]:
stop_at = _find_git_root(cwd)
current = cwd.resolve()
# When there is no git root, only check cwd itself walking parents
# could pick up a .hermes.md planted in /tmp, /home, etc.
search_dirs = [current, *current.parents] if stop_at else [current]
for directory in search_dirs:
for directory in [current, *current.parents]:
for name in _HERMES_MD_NAMES:
candidate = directory / name
if candidate.is_file():
return candidate
# Stop walking at the git root (or filesystem root).
if stop_at and directory == stop_at:
break
return None
@@ -620,12 +617,7 @@ DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
PLATFORM_HINTS = {
"whatsapp": (
"You are on a text messaging communication platform, WhatsApp. "
"Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
"`code`, ```code blocks```, [links](url)) is auto-converted to "
"WhatsApp's native syntax (*bold*, _italic_, ~strike~, monospace) — "
"feel free to write in markdown, and use bullet lists ('- item') "
"freely. Tables are NOT supported — prefer bullet lists or labeled "
"key:value pairs. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. The file "
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
@@ -690,11 +682,7 @@ PLATFORM_HINTS = {
),
"signal": (
"You are on a text messaging communication platform, Signal. "
"Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
"`code`, ```code blocks```) is auto-converted to Signal's native "
"rich formatting — feel free to write in markdown, and use bullet "
"lists ('- item') freely (they render as • bullets). Tables are NOT "
"supported — prefer bullet lists or labeled key:value pairs. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Images "
"(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
@@ -929,7 +917,8 @@ def _probe_remote_backend(env_type: str) -> str | None:
try:
# Import locally: tools/ imports are heavy and only relevant when a
# non-local backend is actually configured.
from tools.terminal_tool import _create_environment, _get_env_config # type: ignore
from tools.terminal_tool import _get_env_config # type: ignore
from tools.environments import get_environment # type: ignore
except Exception as e:
logger.debug("Backend probe unavailable (import failed): %s", e)
_BACKEND_PROBE_CACHE[cache_key] = ""
@@ -937,59 +926,7 @@ def _probe_remote_backend(env_type: str) -> str | None:
try:
config = _get_env_config()
# Build the environment the same way tools/terminal_tool.py does for a
# live command: select the backend image, then assemble ssh/container
# config from the env-derived dict. (There is no `get_environment`
# factory — the real entry point is `_create_environment`.)
if env_type == "docker":
image = config.get("docker_image", "")
elif env_type == "singularity":
image = config.get("singularity_image", "")
elif env_type == "modal":
image = config.get("modal_image", "")
elif env_type == "daytona":
image = config.get("daytona_image", "")
else:
image = ""
ssh_config = None
if env_type == "ssh":
ssh_config = {
"host": config.get("ssh_host", ""),
"user": config.get("ssh_user", ""),
"port": config.get("ssh_port", 22),
"key": config.get("ssh_key", ""),
"persistent": config.get("ssh_persistent", False),
}
container_config = None
if env_type in {"docker", "singularity", "modal", "daytona"}:
container_config = {
"container_cpu": config.get("container_cpu", 1),
"container_memory": config.get("container_memory", 5120),
"container_disk": config.get("container_disk", 51200),
"container_persistent": config.get("container_persistent", True),
"modal_mode": config.get("modal_mode", "auto"),
"docker_volumes": config.get("docker_volumes", []),
"docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
"docker_forward_env": config.get("docker_forward_env", []),
"docker_env": config.get("docker_env", {}),
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
"docker_extra_args": config.get("docker_extra_args", []),
"docker_persist_across_processes": config.get("docker_persist_across_processes", True),
"docker_orphan_reaper": config.get("docker_orphan_reaper", True),
}
env = _create_environment(
env_type=env_type,
image=image,
cwd=config.get("cwd", ""),
timeout=config.get("timeout", 180),
ssh_config=ssh_config,
container_config=container_config,
task_id="prompt-backend-probe",
host_cwd=config.get("host_cwd"),
)
env = get_environment(config)
# Single-line POSIX probe — works on any Unixy backend. Wrapped in
# `2>/dev/null` so a missing binary doesn't pollute the output.
probe_cmd = (

View File

@@ -1,216 +0,0 @@
"""Per-reasoning-model stale-timeout floor for known reasoning models.
Reasoning models (those that emit extended thinking blocks before their
first content token) routinely exceed Hermes's default chat-model
stale detectors:
* Stream stale detector: ``HERMES_STREAM_STALE_TIMEOUT`` default 180s
``agent/chat_completion_helpers.py:2544``
* Non-stream stale detector: ``HERMES_API_CALL_STALE_TIMEOUT`` default 90s
``run_agent.py:1140``
For NVIDIA Nemotron 3 Ultra on the hosted NIM gateway the empirical
upstream idle kill is ~120s (first-party reproduction at
NVIDIA/NemoClaw#4846 — TTFB ~31s, stream dies at 120s). The same
failure mode exists on OpenAI o1/o3, Anthropic Opus 4.x thinking,
DeepSeek R1, Qwen QwQ, xAI Grok reasoning — every cloud reasoning
model hits upstream-proxies / load-balancers with idle timeouts
shorter than the model's thinking phase. Result: the stale detector
kills the connection mid-think, surfacing as
``BrokenPipeError``/``RemoteProtocolError`` on the next read.
This module provides a floor that the existing stale-detector scaling
blocks consult via :func:`get_reasoning_stale_timeout_floor` and
apply as ``max(default, floor)``. It is a FLOOR:
* Never overrides explicit user config (``providers.<id>.models.<model>.stale_timeout_seconds``
or ``request_timeout_seconds`` already wins — this code never runs
in that branch).
* Never lowers an existing threshold.
* Has zero effect on non-reasoning models — they are not in the
allowlist and the resolver returns ``None``.
Matching uses start-anchored regex on the slug-only component of
the model name (after stripping any aggregator prefix like
``openai/``, ``x-ai/``, ``anthropic/``). The right-anchor matches
end-of-string or a ``-``/``.``/``_`` slug separator, so ``qwen3-235b``
matches the ``qwen3`` family entry (a future model slug would be
``qwen3-235b-instruct`` and would also match) but ``some-other-qwen3``
does NOT match ``qwen3`` (the ``-qwen3`` is not at start of slug).
The ``o1`` case is the most delicate: a model named
``llama-4-70b-o1-preview`` is a hypothetical community derivative that
should NOT trigger the reasoning-model floor for the user (the user
chose a non-OpenAI model, not a reasoning model). The start-of-slug
anchor naturally excludes this — the matched ``o1-preview`` is at
position 11 of the slug, not at position 0. The previous substring-
with-trailing-hyphen design would have over-matched here, which is
why start-of-slug anchoring is the right shape.
Fixes #52217.
"""
from __future__ import annotations
import re
from typing import Optional
# (slug, floor_seconds). Each slug is matched as a discrete
# word-boundary component via the wrapper regex in ``_match_any``
# below. Order is irrelevant — the first regex match wins.
_REASONING_STALE_TIMEOUT_FLOORS: tuple[tuple[str, int], ...] = (
# NVIDIA Nemotron — reasoning models behind hosted NIM with
# documented 60-180s upstream idle kill (NVIDIA/NemoClaw#4846:
# 120s measured).
("nemotron-3-ultra", 600),
("nemotron-3-super", 600),
("nemotron-3-nano", 300),
# DeepSeek — R1 reasoning model on hosted NIM / DeepSeek direct.
("deepseek-r1", 600),
("deepseek-reasoner", 600),
# Qwen — QwQ reasoning + Qwen3 thinking variants. QwQ-32B
# preview is the stable slug; ``qwen3`` covers the family of
# thinking-mode Qwen3 models (qwen3-235b-a22b, qwen3-32b, etc.)
# without over-matching every Qwen3 instruct variant — the
# right-anchor requires the slug to be at the start of the
# remaining model name, so ``qwen3-235b-instruct`` (instruct is
# NOT a thinking variant) would still match. Acceptable
# trade-off: instruct variants of qwen3 get the 180s floor
# even though they don't reason. The cost is a slightly longer
# wait on a hung provider; the alternative (matching only
# ``qwen3-.*-thinking``) breaks the moment NVIDIA or Alibaba
# ships a slightly different naming shape.
("qwq-32b", 300),
("qwen3", 180),
# OpenAI o-series — known multi-minute TTFB. Each variant
# enumerated explicitly so bare ``o1`` doesn't over-match
# ``olmo-1`` or hypothetical future community derivatives.
("o1", 600),
("o1-mini", 600),
("o1-pro", 600),
("o1-preview", 600),
("o3", 600),
("o3-pro", 600),
("o3-mini", 300),
("o4-mini", 300),
# Anthropic Claude 4.x thinking variants. Anchored at
# ``claude-opus-4`` so non-thinking Claude 3.x or future
# non-reasoning Claude variants don't match.
("claude-opus-4", 240),
("claude-sonnet-4.5", 180),
("claude-sonnet-4.6", 180),
# xAI Grok reasoning variants. Explicit reasoning-only keys
# plus one for the ``non-reasoning`` variant so users picking
# the fast variant don't get the 300s floor. Bare ``grok-3``,
# ``grok-4`` etc. don't match — only the explicit reasoning /
# non-reasoning pairs.
("grok-4-fast-reasoning", 300),
("grok-4.20-reasoning", 300),
("grok-4-fast-non-reasoning", 180),
)
# Pre-compile each pattern. Wrapper = start-of-slug + slug + end-or-
# separator, where ``start-of-slug`` means start-of-string OR
# immediately after the last ``/`` (aggregator separator) and
# ``end-or-separator`` means end-of-string OR a ``-``/``.``/``_``.
#
# Why start-of-slug and not start-of-string: aggregator prefixes
# like ``openai/`` should not affect matching — the slug identity is
# the part after the last ``/``. Stripping the aggregator prefix in
# :func:`get_reasoning_stale_timeout_floor` before regex matching
# gives the wrapper a clean start-of-string anchor.
#
# Why end-or-separator on the right: ``openai/o3-mini`` must match
# the ``o3-mini`` slug (the right anchor is end-of-string). And
# ``openai/o3-mini-2025-01-31`` must also match ``o3-mini`` (the right
# anchor is the ``-`` separator). But ``openai/o3-mini-fork`` should
# NOT match ``o3-mini`` if we wanted to exclude forks — though the
# pattern ``o3-mini-fork`` would be matched as a derivative anyway,
# so we accept that community forks inheriting the same prefix are
# treated as reasoning models (a reasonable default — the upstream
# gateway timing is the same).
_PATTERN_CACHE: dict[str, re.Pattern[str]] = {}
def _get_pattern(slug: str) -> re.Pattern[str]:
compiled = _PATTERN_CACHE.get(slug)
if compiled is None:
compiled = re.compile(
r"^"
+ re.escape(slug)
+ r"(?:$|[\-._])"
)
_PATTERN_CACHE[slug] = compiled
return compiled
def _match_any(model_lower: str) -> Optional[float]:
"""Return the floor for the first matching slug, else None.
Each table entry is matched as a start-of-slug prefix with the
slug-separator-or-end-of-string right-anchor. Table iteration
order is irrelevant: longest slug wins (so ``o3-mini`` beats
``o3`` on a model like ``openai/o3-mini``).
"""
# Sort by slug length descending so longer / more-specific slugs
# win on shared prefixes (o3-mini beats o3).
sorted_floors = sorted(
_REASONING_STALE_TIMEOUT_FLOORS, key=lambda kv: -len(kv[0])
)
for slug, floor in sorted_floors:
if _get_pattern(slug).search(model_lower):
return float(floor)
return None
def get_reasoning_stale_timeout_floor(model: object) -> Optional[float]:
"""Return the stale-timeout floor (seconds) for a known reasoning model.
Returns ``None`` when the model is not in the allowlist or the
argument is empty / not a string. Matching uses
word-boundary-anchored regex on the lowercased model name, so
``openai/o3-mini`` matches the ``o3-mini`` slug but
``olmo-1`` does NOT match ``o1`` (the ``o1`` substring is not
at a word boundary inside ``olmo-1``).
Aggregator prefixes (``openai/``, ``x-ai/``, ``anthropic/`` etc.)
are preserved through matching — the ``/`` is itself a word
boundary, so ``openai/o3-mini`` matches ``o3-mini`` because the
``/`` before ``o3-mini`` satisfies the left-anchor alternation.
This is a FLOOR — callers must apply it as ``max(default, floor)``
and only when no explicit user-configured per-model
``stale_timeout_seconds`` exists.
>>> get_reasoning_stale_timeout_floor("nvidia/nemotron-3-ultra-550b-a55b")
600.0
>>> get_reasoning_stale_timeout_floor("openai/o3-mini")
300.0
>>> get_reasoning_stale_timeout_floor("deepseek/deepseek-r1")
600.0
>>> get_reasoning_stale_timeout_floor("qwen/qwen3-235b-a22b-thinking")
180.0
>>> get_reasoning_stale_timeout_floor("x-ai/grok-4-fast-reasoning")
300.0
>>> get_reasoning_stale_timeout_floor("anthropic/claude-opus-4-6")
240.0
>>> get_reasoning_stale_timeout_floor("gpt-4o") is None
True
>>> get_reasoning_stale_timeout_floor("olmo-1") is None
True
>>> get_reasoning_stale_timeout_floor(None) is None
True
"""
if not model or not isinstance(model, str):
return None
name = model.strip().lower()
if not name:
return None
# Strip aggregator prefix (everything before and including the
# last ``/``). The wrapper regex anchors at start-of-string, so
# the slug identity is the bare model name.
if "/" in name:
name = name.rsplit("/", 1)[1]
return _match_any(name)

View File

@@ -10,7 +10,6 @@ the first 6 and last 4 characters for debuggability.
import logging
import os
import re
import shlex
logger = logging.getLogger(__name__)
@@ -108,60 +107,12 @@ _PREFIX_PATTERNS = [
r"ntn_[A-Za-z0-9]{10,}", # Notion internal integration token
]
# ENV assignment patterns: KEY=value where KEY contains a secret-like name.
# Uppercase keys tolerate spaces around "=" (e.g. ``FOO_SECRET = bar``) because
# an all-caps key is almost never prose/code.
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
_ENV_ASSIGN_RE = re.compile(
rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
)
# Lowercase / dotted / hyphenated config keys from config files
# (application.properties, .env, YAML-ish dumps): ``spring.datasource.password=secret``,
# ``app.api.key=xyz``, ``password=secret``. The uppercase _ENV_ASSIGN_RE above
# never matched these, so config-file passwords leaked verbatim (issue #16413).
#
# These run only in a config-file context, NOT in prose, code, or URLs — three
# carve-outs preserved from the original design (#4367 + the documented
# web-URL passthrough below):
# 1. The value is bounded by ``[^\s&]`` (stops at whitespace AND ``&``) so
# form-urlencoded bodies are handled pair-by-pair (by _redact_form_body),
# not greedily swallowed.
# 2. _CFG_DOTTED_RE only matches when the key is NAMESPACED (contains a dot),
# which is unambiguously a config key — never a prose word.
# 3. _CFG_ANCHORED_RE matches a bare secret-word key only at line start
# (optionally after ``export``), so conversational ``I have password=foo``
# mid-sentence is left alone.
# The colon-form URL guard (skip when ``://`` present) lives at the call site.
_SECRET_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential|auth)"
_CFG_VALUE = r"(['\"]?)([^\s&]+?)\2(?=[\s&]|$)"
# Namespaced (dotted) key: the secret word may sit anywhere in a dotted path.
_CFG_DOTTED_RE = re.compile(
rf"((?:[A-Za-z0-9_\-]+\.)+[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*"
rf"|[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*\.[A-Za-z0-9_.\-]+)"
rf"={_CFG_VALUE}",
re.IGNORECASE,
)
# Line-anchored bare key: ``password=…`` / ``export api_key=…`` at start of line.
_CFG_ANCHORED_RE = re.compile(
rf"(^[ \t]*(?:export[ \t]+)?[A-Za-z0-9_\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_\-]*)={_CFG_VALUE}",
re.IGNORECASE | re.MULTILINE,
)
# Unquoted YAML / colon config (e.g. ``password: secret``,
# ``spring.datasource.password: hunter2``). The secret keyword must be part of
# the KEY (anchored to the start of the line/indent), and the value is a single
# whitespace-free token — so prose like ``note: secret meeting`` (keyword in the
# value) and ``error: token expired`` are left alone. Bare ``auth`` is excluded
# from the key set so ``Authorization:`` / ``author:`` don't match (the former
# is masked by _AUTH_HEADER_RE); ``auth_token``/``auth-token`` still match via
# the ``token`` keyword. Quoted values defer to _JSON_FIELD_RE via the lookahead.
_YAML_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential)"
_YAML_ASSIGN_RE = re.compile(
rf"(^[ \t]*[A-Za-z0-9_.\-]*{_YAML_CFG_NAMES}[A-Za-z0-9_.\-]*)(:[ \t]*)(?!['\"])([^\s&]+)",
re.IGNORECASE | re.MULTILINE,
)
# JSON field patterns: "apiKey": "value", "token": "value", etc.
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)"
_JSON_FIELD_RE = re.compile(
@@ -174,15 +125,8 @@ _JSON_FIELD_RE = re.compile(
# while the header name and scheme word are preserved for debuggability. The
# previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
# ``token <pat>`` leaked verbatim into logs/transcripts.
#
# The credential class excludes quote characters (``"`` / ``'``): a token sitting
# flush against a closing quote (``"Authorization: Bearer sk-..."``) must not pull
# that quote into the match, or masking turns value corruption into *syntax*
# corruption — the closing quote vanishes and the command/string no longer parses
# (unterminated quote → shell EOF / Python SyntaxError). Real credentials never
# contain ``"`` or ``'``, so excluding them is safe. See #43083.
_AUTH_HEADER_RE = re.compile(
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?([^\s\"']+)",
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
re.IGNORECASE,
)
@@ -210,37 +154,9 @@ _PRIVATE_KEY_RE = re.compile(
)
# Database connection strings: protocol://user:PASSWORD@host
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password.
# The userinfo and password groups forbid whitespace ([^:\s]+ / [^@\s]+) so the
# match can never span a line break. A real DSN password never contains
# whitespace; without this bound the greedy [^@]+ would scan past the end of a
# code line to the next stray "@" (e.g. a Python decorator), swallowing
# intervening lines and corrupting tool OUTPUT for any source containing a
# postgresql:// f-string template. See issue #33801.
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
_DB_CONNSTR_RE = re.compile(
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:\s]+:)([^@\s]+)(@)",
re.IGNORECASE,
)
# Bare-token credential in a web/transport URL: ``scheme://TOKEN@host``.
# This is the ``git remote set-url origin https://PASSWORD@github.com/...``
# shape from issue #6396 — a single opaque credential in the userinfo position
# with NO ``user:pass`` colon. It is unambiguously a secret: legitimate
# round-trip URLs (OAuth callbacks, magic links, pre-signed shares — see the
# "Web-URL redaction is intentionally OFF" note in redact_sensitive_text) carry
# their tokens in the QUERY STRING, never in bare userinfo. The colon form
# ``user:pass@`` is deliberately left to pass through (commit "pass web URLs
# through unchanged", #34029) and is NOT matched here — the token class forbids
# ``:``. DB schemes are handled by _DB_CONNSTR_RE above and excluded here.
#
# Guards against false positives:
# - 8+ char floor skips short usernames (git, admin, root, deploy, ubuntu).
# - The token class ``[^\s:@/]`` cannot cross ``/``, so an ``@`` sitting in a
# path or query (e.g. ``?q=user@example.com``) is never treated as userinfo.
_URL_BARE_TOKEN_RE = re.compile(
r"((?:https?|wss?|git|ssh|ftp|ftps|sftp)://)" # scheme
r"([^\s:@/]{8,})" # bare token (no colon/slash/@), 8+ chars
r"(@[^\s]+)", # @host...
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
re.IGNORECASE,
)
@@ -424,40 +340,7 @@ def _redact_form_body(text: str) -> str:
return _redact_query_string(text.strip())
def _mask_token_nonreusable(token: str) -> str:
"""Redact a prefix-matched credential to a NON-REUSABLE sentinel.
Unlike :func:`_mask_token` (which keeps head/tail chars — fine for logs
that are never fed back into a config), this emits a marker that:
* cannot be mistaken for a usable-but-truncated key, so an agent that
reads it from a config file and writes it back does NOT corrupt the
stored credential into a dead 13-char string (issue #35519); and
* still does not leak the secret material (no head/tail chars).
The vendor prefix label is preserved for debuggability so the agent can
still tell *which* credential is present (e.g. a GitHub PAT vs an OpenAI
key) without seeing any of its bytes.
"""
if not token:
return "«redacted-secret»"
# Preserve only the recognizable vendor prefix label (e.g. "ghp_", "sk-"),
# never any of the random secret body.
label = ""
for sub in _PREFIX_SUBSTRINGS:
if token.startswith(sub):
label = sub
break
return f"«redacted:{label}…»" if label else "«redacted-secret»"
def redact_sensitive_text(
text: str,
*,
force: bool = False,
code_file: bool = False,
file_read: bool = False,
) -> str:
def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
"""Apply all redaction patterns to a block of text.
Safe to call on any string -- non-matching text passes through unchanged.
@@ -470,17 +353,6 @@ def redact_sensitive_text(
constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
private keys, DB connstrings, JWTs, and URL secrets are still redacted.
Set file_read=True for file *content* returned to the agent (read_file /
search_files / cat). Secrets are STILL redacted — they are never exposed —
but prefix-matched credentials are replaced with a non-reusable sentinel
(``«redacted:ghp_…»``) instead of a head/tail-preserving mask
(``ghp_S1...Pn2T``). The old mask looked like a real-but-truncated key, so
an agent reading it from config.yaml and writing it back silently corrupted
the stored credential into a dead 13-char value → 401 (issue #35519). The
sentinel is syntactically invalid as a token, so it can't be mistaken for a
usable key or written back as one. Implies code_file=True (config/data
files shouldn't trigger the source-code ENV/JSON false-positive paths).
Performance: each regex pattern is gated behind a cheap substring
pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
@@ -499,15 +371,9 @@ def redact_sensitive_text(
if not (force or _REDACT_ENABLED):
return text
# file_read content shouldn't hit the source-code ENV/JSON false-positive
# paths either (it's config/data, not log lines).
if file_read:
code_file = True
# Known prefixes (sk-, ghp_, etc.) — gate on substring presence
if _has_known_prefix_substring(text):
_prefix_sub = _mask_token_nonreusable if file_read else _mask_token
text = _PREFIX_RE.sub(lambda m: _prefix_sub(m.group(1)), text)
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
# ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives)
if not code_file:
@@ -516,13 +382,6 @@ def redact_sensitive_text(
name, quote, value = m.group(1), m.group(2), m.group(3)
return f"{name}={quote}{_mask_token(value)}{quote}"
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
# Lowercase/dotted config keys (issue #16413). Skip URLs entirely —
# web-URL query params are intentionally passed through (see note
# near the bottom of this function); _DB_CONNSTR_RE still guards
# connection-string passwords.
if "://" not in text:
text = _CFG_DOTTED_RE.sub(_redact_env, text)
text = _CFG_ANCHORED_RE.sub(_redact_env, text)
# JSON fields: "apiKey": "***" (skip for code files — false positives)
if ":" in text and '"' in text:
@@ -531,15 +390,6 @@ def redact_sensitive_text(
return f'{key}: "{_mask_token(value)}"'
text = _JSON_FIELD_RE.sub(_redact_json, text)
# Unquoted YAML / colon config: password: *** (after JSON so quoted
# values are handled there; the lookahead in _YAML_ASSIGN_RE skips
# quotes). Skip URLs — web-URL query params pass through by design.
if ":" in text and "://" not in text:
def _redact_yaml(m):
key, sep, value = m.group(1), m.group(2), m.group(3)
return f"{key}{sep}{_mask_token(value)}"
text = _YAML_ASSIGN_RE.sub(_redact_yaml, text)
# Authorization headers — _AUTH_HEADER_RE matches any scheme after
# "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
# cheapest substring gate that covers every casing without a casefold().
@@ -569,32 +419,9 @@ def redact_sensitive_text(
if "BEGIN" in text and "-----" in text:
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
# Database connection string passwords. With code_file=True, a password
# group that is a pure ``{...}`` brace expression is an f-string template
# reference (e.g. f"postgresql://{user}:{pass}@{host}"), not a literal
# credential — preserve it. Literal passwords are still redacted. The regex
# forbids whitespace in the password group, so a single-line template's
# group(2) is exactly the brace expression. See issue #33801.
# Database connection string passwords
if "://" in text:
if code_file:
def _redact_db(m):
pw = m.group(2)
if pw.startswith("{") and pw.endswith("}"):
return m.group(0)
return f"{m.group(1)}***{m.group(3)}"
text = _DB_CONNSTR_RE.sub(_redact_db, text)
else:
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# Bare-token userinfo in web/transport URLs: ``scheme://TOKEN@host``.
# The git-remote-with-embedded-password shape from #6396. Only the
# colon-less bare-token form is redacted — ``user:pass@`` and
# query-string tokens are left to pass through (see the web-URL note
# below). See _URL_BARE_TOKEN_RE for the false-positive guards.
text = _URL_BARE_TOKEN_RE.sub(
lambda m: f"{m.group(1)}{_mask_token(m.group(2))}{m.group(3)}",
text,
)
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# JWT tokens (eyJ... — base64-encoded JSON headers)
if "eyJ" in text:
@@ -607,12 +434,7 @@ def redact_sensitive_text(
# blanket-redacting param values by name breaks those skills mid-flow.
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
# are still caught by _DB_CONNSTR_RE. The ONE userinfo case still redacted
# is the colon-less bare-token form ``scheme://TOKEN@host`` (#6396, handled
# by _URL_BARE_TOKEN_RE in the ``://`` block above): a bare credential in
# userinfo is never a round-trip workflow token (those live in the query
# string), so masking it can't break a skill. The ``user:pass@`` form is
# left to pass through per #34029.
# are still caught by _DB_CONNSTR_RE.
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
if "&" in text and "=" in text:
@@ -630,66 +452,6 @@ def redact_sensitive_text(
return text
# Commands whose stdout is an environment-variable dump (KEY=value lines),
# NOT source code. For these, terminal-output redaction must run the
# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
# masked. For all other commands, code_file=True is used to avoid mangling
# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
def is_env_dump_command(command: str | None) -> bool:
"""Return True if ``command`` dumps environment variables to stdout.
Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
``||`` / ``|``). Conservative: a parse failure or anything unrecognized
returns False (callers then fall back to the safer code_file=True path,
which still masks prefix-shaped keys).
"""
if not command or not isinstance(command, str):
return False
# Split on shell separators, then inspect the first token of each segment.
segments = re.split(r"[|;&]+", command)
for seg in segments:
seg = seg.strip()
if not seg:
continue
try:
tokens = shlex.split(seg)
except ValueError:
tokens = seg.split()
if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
return True
return False
def redact_terminal_output(
output: str, command: str | None = None, *, force: bool = False
) -> str:
"""Redact secrets from terminal/process stdout.
Single redaction policy for ALL terminal-output surfaces — foreground
``terminal`` results AND background ``process(action=poll/log/wait)``
output — so they can't diverge. Picks ``code_file`` based on whether
``command`` is an environment dump:
- env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
→ ``code_file=False`` so the ENV-assignment pass masks opaque tokens.
- anything else (or unknown command) → ``code_file=True`` to avoid
false positives on source/config dumps.
``force=True`` bypasses the global ``security.redact_secrets`` preference
for safety boundaries that must never emit raw credentials.
"""
if not output:
return output
code_file = not is_env_dump_command(command or "")
return redact_sensitive_text(output, force=force, code_file=code_file)
# Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
# the input string, the prefix regex cannot match anything, so we skip it.
# False positives are fine (they just run the regex, which then matches

View File

@@ -1,140 +0,0 @@
"""Replay-history sanitization shared across resume code paths.
When a session's last turn dies mid-tool-loop — the process is killed by a
restart/shutdown command, a stale-timeout fires, or an interrupt lands before
the tool result is written — the persisted transcript can end with a dangling
``assistant(tool_calls)`` (no matching ``tool`` answer) or an interrupted
``assistant→tool`` block. On resume the model sees that broken tail and
re-issues the unanswered call, producing an endless "thinking"/reboot loop
(#49201, #29086).
These pure helpers strip those tails before the history is replayed to the
model. They were originally local to ``gateway/run.py`` (which fixed the
messaging-gateway path) and are extracted here so every resume surface — the
messaging gateway AND the TUI/WebUI gateway — shares the same cleanup instead
of the WebUI path silently skipping it.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
def is_interrupted_tool_result(content: Any) -> bool:
"""Return True if a tool result indicates the tool was interrupted."""
if not isinstance(content, str):
return False
lowered = content.lower()
if "[command interrupted]" in lowered:
return True
if "exit_code" in lowered and ("130" in lowered or "-1" in lowered):
return "interrupt" in lowered
return False
def strip_interrupted_tool_tails(
agent_history: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Strip interrupted assistant→tool sequences from replay history.
Older interrupted gateway turns can be followed by a queued real user
message, so the interrupted assistant/tool block is not necessarily the
final tail by the time we rebuild replay history. Remove any contiguous
assistant(tool_calls) + tool-result block that contains an interrupted tool
result, while preserving successful tool-call sequences intact.
"""
if not agent_history:
return agent_history
cleaned: List[Dict[str, Any]] = []
i = 0
n = len(agent_history)
while i < n:
msg = agent_history[i]
if msg.get("role") == "assistant" and "tool_calls" in msg:
j = i + 1
tool_results: List[Dict[str, Any]] = []
while j < n and agent_history[j].get("role") == "tool":
tool_results.append(agent_history[j])
j += 1
if tool_results and any(
is_interrupted_tool_result(m.get("content", ""))
for m in tool_results
):
logger.debug(
"Stripping interrupted assistant→tool replay block "
"(indices %d%d, tool_results=%d)",
i, j - 1, len(tool_results),
)
i = j
continue
if msg.get("role") == "tool" and is_interrupted_tool_result(msg.get("content", "")):
logger.debug("Stripping orphan interrupted tool result from replay history")
i += 1
continue
cleaned.append(msg)
i += 1
return cleaned
def strip_dangling_tool_call_tail(
agent_history: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Strip a trailing ``assistant(tool_calls)`` block left with NO answers.
When a tool call itself kills the gateway process (``docker restart``,
``systemctl restart``, ``kill``, ``hermes gateway restart``), the process
is terminated by SIGKILL *mid-call* — before the tool result is ever
written and before the orderly shutdown rewind
(``_drop_trailing_empty_response_scaffolding``) can run. The last thing
persisted is the ``assistant`` message that issued the ``tool_calls``,
with zero matching ``tool`` rows.
On resume the model sees an unanswered tool call at the tail and naturally
re-issues it — which restarts the gateway again, producing the infinite
reboot loop in #49201. ``strip_interrupted_tool_tails`` does not catch
this because there is no tool result to inspect for an interrupt marker.
This strips that dangling tail at the source so there is nothing for the
model to re-execute. It only acts when the tail is an
``assistant(tool_calls)`` whose calls have NO corresponding ``tool``
results — a completed assistant→tool pair (any tool answers present) is
left untouched so genuine mid-progress tool loops still resume.
"""
if not agent_history:
return agent_history
last = agent_history[-1]
if not (
isinstance(last, dict)
and last.get("role") == "assistant"
and last.get("tool_calls")
):
return agent_history
logger.debug(
"Stripping dangling unanswered assistant(tool_calls) tail "
"(%d call(s)) — process likely killed mid-tool-call by a "
"restart/shutdown command (#49201)",
len(last.get("tool_calls") or []),
)
return agent_history[:-1]
def sanitize_replay_history(
agent_history: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Apply both replay-tail strippers in the canonical order.
Convenience entry point for resume code paths: removes interrupted
assistant→tool blocks anywhere in the history, then removes a dangling
unanswered ``assistant(tool_calls)`` tail. Returns the same list object
when there is nothing to strip.
"""
if not agent_history:
return agent_history
return strip_dangling_tool_call_tail(strip_interrupted_tool_tails(agent_history))

View File

@@ -122,8 +122,6 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
try:
import fcntl # POSIX only; Windows falls back to best-effort without flock.
except ImportError: # pragma: no cover
@@ -443,7 +441,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
return result
t0 = time.monotonic()
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
proc = subprocess.run(
argv,
@@ -452,7 +449,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
timeout=spec.timeout,
text=True,
shell=False,
**_popen_kwargs,
)
except subprocess.TimeoutExpired:
result["timed_out"] = True

View File

@@ -5,8 +5,6 @@ import re
import subprocess
from pathlib import Path
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
logger = logging.getLogger(__name__)
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
@@ -68,7 +66,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
completed = subprocess.run(
["bash", "-c", command],
@@ -78,7 +75,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
timeout=max(1, int(timeout)),
check=False,
stdin=subprocess.DEVNULL,
**_popen_kwargs,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"

View File

@@ -507,34 +507,6 @@ def get_all_skills_dirs() -> List[Path]:
return dirs
def _resolve_for_skill_ownership(path) -> Path:
path_obj = path if isinstance(path, Path) else Path(str(path))
try:
return path_obj.expanduser().resolve()
except (OSError, RuntimeError):
return path_obj.expanduser().absolute()
def is_external_skill_path(path) -> bool:
"""Return True when ``path`` lives under a configured external skills dir.
``skills.external_dirs`` are externally owned: Hermes can discover and view
their skills, and foreground user-directed tool calls may still edit them,
but autonomous lifecycle maintenance must treat them as read-only. This
helper centralizes the ownership boundary so curator/reporting/tool paths do
not each need to re-interpret the config.
"""
candidate = _resolve_for_skill_ownership(path)
for root in get_external_skills_dirs():
resolved_root = _resolve_for_skill_ownership(root)
try:
candidate.relative_to(resolved_root)
return True
except ValueError:
continue
return False
# ── Condition extraction ──────────────────────────────────────────────────

View File

@@ -1,136 +0,0 @@
"""Thinking-timeout detection and user-facing guidance for reasoning models.
When a known reasoning model (NVIDIA Nemotron 3 Ultra, OpenAI o1/o3,
Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ, xAI Grok reasoning)
hits a transport-layer error before the first content token arrives, the
upstream proxy has almost certainly idle-killed a long thinking stream —
not a true context overflow or a configuration error. The user needs
distinct guidance for this case:
"The model's thinking phase exceeded the upstream proxy's idle
timeout before the first content token arrived. This is a known
issue with reasoning models behind cloud gateways (NVIDIA NIM,
OpenAI, Anthropic, DeepSeek). Workarounds in priority order:
1. Set `providers.<provider>.models.<model>.stale_timeout_seconds: 900`
in `~/.hermes/config.yaml` to extend the per-call timeout...
2. Lower `reasoning_budget` or set `reasoning_effort: medium`...
3. Use a smaller / faster reasoning model..."
The existing `_is_stream_drop` guidance at
``agent/conversation_loop.py:3464-3486`` fires for large-file-write
stream drops ("try execute_code with Python's open() for large files")
which is the WRONG advice for the thinking-timeout case. This module
provides the detection and the message as standalone helpers so the
detection logic is unit-testable without driving the full retry loop,
and the message text can be regression-tested for spelling and accuracy.
Part 2 of Fixes #52310.
"""
from __future__ import annotations
from typing import Optional
# Substring set that identifies a transport-layer failure on the
# response stream. Same shape as the existing
# ``_SERVER_DISCONNECT_PATTERNS`` in ``agent/error_classifier.py:394``
# but extended to also catch the OSS-level error signature
# (``broken pipe`` / ``errno 32``) that the upstream kill surfaces
# to the OpenAI SDK wrapper.
_THINKING_TIMEOUT_SUBSTRINGS: tuple[str, ...] = (
"broken pipe",
"errno 32",
"remote protocol",
"connection reset",
"connection lost",
"peer closed",
"server disconnected",
)
def is_thinking_timeout(classified: object, model: str, error_msg: str) -> bool:
"""Return True when a reasoning model's thinking phase hit a transport kill.
Args:
classified: a :class:`agent.error_classifier.ClassifiedError` instance
(duck-typed here to avoid an import cycle in unit tests).
model: the model slug at failure time (e.g.
``"nvidia/nemotron-3-ultra-550b-a55b"``).
error_msg: lowercased string representation of the underlying
exception (typically ``str(api_error).lower()``).
Returns True when ALL conditions hold:
1. ``classified.reason == FailoverReason.timeout`` (the classifier
override at ``agent/error_classifier.py:720-738`` ensures this
is the case for reasoning models even on large sessions).
2. ``api_error`` has no ``.status_code`` attribute set (transport
disconnect, not an HTTP error).
3. ``model`` is in the reasoning-model allowlist (reuses
``agent.reasoning_timeouts.get_reasoning_stale_timeout_floor``).
4. ``error_msg`` contains one of the transport-kill substrings.
Non-reasoning models always return False. Non-transport errors
(billing / rate_limit / auth / context_overflow / format_error)
always return False. HTTP-status errors always return False.
"""
# Import here (not at module top) to keep this helper cheap to
# import even from callers that don't need it. ``agent.reasoning_timeouts``
# is small and dependency-free.
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
# Condition 1: classifier says timeout. Use a string/value check
# rather than importing FailoverReason so this module has zero
# import cycles from the error_classifier package.
reason = getattr(classified, "reason", None)
reason_value = getattr(reason, "value", None)
if reason_value != "timeout":
return False
# Condition 2: no HTTP status code (transport, not API error).
# Caller is expected to gate on ``getattr(api_error, "status_code", None) is None``
# before calling this helper; the surface here is just the post-gate
# boolean so the caller can pass an already-prepped error_msg.
# Condition 3: reasoning model allowlist.
if get_reasoning_stale_timeout_floor(model) is None:
return False
# Condition 4: transport-kill substring in the error message.
error_msg_lower = (error_msg or "").lower()
return any(p in error_msg_lower for p in _THINKING_TIMEOUT_SUBSTRINGS)
def build_thinking_timeout_guidance(
provider: str, model: str, model_label: Optional[str] = None,
) -> str:
"""Return the user-facing guidance string appended to ``_final_response``.
Args:
provider: provider slug (e.g. ``"nvidia"``, ``"openai"``).
model: bare model slug the user would put in their config
(e.g. ``"nemotron-3-ultra-550b-a55b"`` if the user uses
NVIDIA direct, or the full ``"nvidia/nemotron-3-ultra-550b-a55b"``
if they go through an aggregator). Used verbatim in the
config snippet so the user can copy-paste.
model_label: optional short label for the model name in the
prose (e.g. ``"Nemotron 3 Ultra"``). Falls back to the
slug if not provided.
"""
label = model_label or model
return (
"\n\nThe model's thinking phase exceeded the upstream proxy's "
"idle timeout before the first content token arrived. This is a "
f"known issue with reasoning models (like {label}) behind cloud "
"gateways (NVIDIA NIM, OpenAI, Anthropic, DeepSeek). Workarounds "
"in priority order:\n"
f"1. Set `providers.{provider}.models.{model}.stale_timeout_seconds: 900` "
"in `~/.hermes/config.yaml` to extend the per-call timeout. "
"(Hermes's built-in floor is 600s for known reasoning models — "
"if you still see this after raising, the upstream cap is even "
"shorter.)\n"
"2. Lower `reasoning_budget` or set `reasoning_effort: medium` on this "
"model if the provider supports it.\n"
"3. Use a smaller / faster reasoning model if the task doesn't "
"require deep thinking."
)

View File

@@ -26,7 +26,6 @@ from agent.display import (
build_tool_preview as _build_tool_preview,
get_cute_tool_message as _get_cute_tool_message_impl,
get_tool_emoji as _get_tool_emoji,
redact_tool_args_for_display as _redact_tool_args_for_display,
_detect_tool_failure,
)
from agent.tool_guardrails import ToolGuardrailDecision
@@ -470,11 +469,10 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
display_args = _redact_tool_args_for_display(name, args) or args
args_str = json.dumps(display_args, ensure_ascii=False)
args_str = json.dumps(args, ensure_ascii=False)
if agent.verbose_logging:
print(f" 📞 Tool {i}: {name}({list(display_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
else:
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
@@ -484,9 +482,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
continue
if agent.tool_progress_callback:
try:
display_args = _redact_tool_args_for_display(name, args) or args
preview = _build_tool_preview(name, display_args)
agent.tool_progress_callback("tool.started", name, preview, display_args)
preview = _build_tool_preview(name, args)
agent.tool_progress_callback("tool.started", name, preview, args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
@@ -495,8 +492,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
continue
if agent.tool_start_callback:
try:
display_args = _redact_tool_args_for_display(name, args) or args
agent.tool_start_callback(tc.id, name, display_args)
agent.tool_start_callback(tc.id, name, args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
@@ -796,8 +792,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
if not blocked and agent.tool_complete_callback:
try:
display_args = _redact_tool_args_for_display(name, args) or args
agent.tool_complete_callback(tc.id, name, display_args, function_result)
agent.tool_complete_callback(tc.id, name, args, function_result)
except Exception as cb_err:
logging.debug(f"Tool complete callback error: {cb_err}")
@@ -959,11 +954,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
agent._iters_since_skill = 0
if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
args_str = json.dumps(display_args, ensure_ascii=False)
args_str = json.dumps(function_args, ensure_ascii=False)
if agent.verbose_logging:
print(f" 📞 Tool {i}: {function_name}({list(display_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
else:
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
@@ -984,16 +978,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if not _execution_blocked and agent.tool_progress_callback:
try:
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args)
agent.tool_progress_callback("tool.started", function_name, preview, display_args)
preview = _build_tool_preview(function_name, function_args)
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
if not _execution_blocked and agent.tool_start_callback:
try:
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
agent.tool_start_callback(tool_call.id, function_name, display_args)
agent.tool_start_callback(tool_call.id, function_name, function_args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
@@ -1223,8 +1215,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if agent._should_emit_quiet_tool_messages():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_ce_result = None
@@ -1257,8 +1248,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_mem_result = None
@@ -1289,8 +1279,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_spinner_result = None
@@ -1452,8 +1441,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if not _execution_blocked and agent.tool_complete_callback:
try:
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
agent.tool_complete_callback(tool_call.id, function_name, display_args, function_result)
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
except Exception as cb_err:
logging.debug(f"Tool complete callback error: {cb_err}")

View File

@@ -217,7 +217,9 @@ class CodexEventProjector:
def _project_mcp_tool_call(self, item: dict, item_id: str) -> ProjectionResult:
server = item.get("server") or "mcp"
tool = item.get("tool") or "unknown"
call_id = _deterministic_call_id(f"mcp_{server}_{tool}", item_id)
# Mirror the native MCP tool-name convention (mcp__server__tool) so the
# deterministic call_id input stays consistent with registration names.
call_id = _deterministic_call_id(f"mcp__{server}__{tool}", item_id)
args = item.get("arguments") or {}
if not isinstance(args, dict):
args = {"arguments": args}

View File

@@ -28,7 +28,6 @@ import uuid
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from agent.conversation_compression import conversation_history_after_compression
from agent.iteration_budget import IterationBudget
from agent.model_metadata import (
estimate_messages_tokens_rough,
@@ -401,9 +400,7 @@ def build_turn_context(
_orig_len, len(messages), _orig_tokens, _preflight_tokens
):
break # Cannot compress further: neither rows nor tokens moved
conversation_history = conversation_history_after_compression(
agent, messages
)
conversation_history = None
agent._empty_content_retries = 0
agent._thinking_prefill_retries = 0
agent._last_content_with_tools = None

View File

@@ -289,14 +289,7 @@ def finalize_turn(
and len(_stripped) <= 24
and _stripped[-1:] not in {".", "!", "?", "", "", "", "`", ")"}
)
_is_partial_stream_recovery = (
str(_turn_exit_reason) == "partial_stream_recovery"
)
if (
_is_empty_terminal
or _is_partial_fragment
or _is_partial_stream_recovery
):
if _is_empty_terminal or _is_partial_fragment:
_explanation = agent._format_turn_completion_explanation(
_turn_exit_reason
)

View File

@@ -67,11 +67,6 @@ class TurnRetryState:
# ── Restart signals (read by the outer loop after the attempt) ───────
restart_with_compressed_messages: bool = False
restart_with_length_continuation: bool = False
# Set when a content-filter stream stall (e.g. MiniMax "new_sensitive")
# has been escalated to the fallback chain: the partial-stream content
# was rolled back off ``messages`` and the loop should re-issue the API
# call against the newly-activated provider (#32421).
restart_with_rebuilt_messages: bool = False
def __iter__(self):
# Convenience for debugging / tests: iterate (name, value) pairs.

View File

@@ -15,135 +15,9 @@ from typing import Any, Iterable
_MAX_CHANGED_PATHS_IN_NUDGE = 8
# Non-code file extensions whose edits carry no verifiable runtime behavior:
# documentation, prose, and data/markup that no test/build exercises. When a
# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is
# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a
# SKILL.md or README edit must never demand a /tmp verification script). A turn
# that edits any non-listed path (a real source/code/config file) still nudges.
_NON_CODE_VERIFY_EXTENSIONS = frozenset(
{
".md",
".markdown",
".mdx",
".rst",
".txt",
".text",
".adoc",
".asciidoc",
".org",
".log",
".csv",
".tsv",
}
)
# Filenames (case-insensitive, extension-less or otherwise) that are pure prose
# even without a recognized doc extension.
_NON_CODE_VERIFY_FILENAMES = frozenset(
{
"license",
"licence",
"notice",
"authors",
"contributors",
"changelog",
"codeowners",
}
)
def _is_non_code_path(raw: str) -> bool:
"""Return True when a changed path is documentation/prose with nothing to verify."""
try:
p = Path(str(raw))
except Exception:
return False
suffix = p.suffix.lower()
if suffix in _NON_CODE_VERIFY_EXTENSIONS:
return True
if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES:
return True
return False
def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]:
"""Drop documentation/prose paths; keep paths that could have verifiable behavior."""
return [p for p in paths if p and not _is_non_code_path(p)]
# Session identities (platform or source) that are NOT human conversational
# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
# local, gateway) and programmatic callers (API server, webhooks, tools).
# Verify-on-stop stays ON by default for these. Any other resolved gateway
# platform is a conversational messaging surface (Telegram, Discord, WhatsApp,
# Signal, Slack, etc.) where the verification narrative would reach a human as
# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in
# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a
# local or programmatic surface. Default-deny by design: an unrecognized
# identity is treated as messaging (OFF) so a new chat platform never leaks the
# verification receipt before this set is updated.
_NON_MESSAGING_SESSION_SURFACES = frozenset(
{
"",
"cli",
"codex",
"desktop",
"gateway",
"local",
"tui",
"tool",
"api_server",
"webhook",
"msgraph_webhook",
}
)
def _session_is_messaging_surface() -> bool:
"""Return whether this turn is delivered over a human messaging channel.
The gateway binds the platform value (e.g. ``telegram``) to
``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE``
(e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context
helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM``
override, matching the sibling platform resolution in
``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a
messaging surface when a resolved identity is present and is not a known
non-messaging surface.
"""
try:
from gateway.session_context import get_session_env
platform = (
os.getenv("HERMES_PLATFORM")
or get_session_env("HERMES_SESSION_PLATFORM", "")
)
source = get_session_env("HERMES_SESSION_SOURCE", "")
except Exception:
platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get(
"HERMES_SESSION_PLATFORM", ""
)
source = os.environ.get("HERMES_SESSION_SOURCE", "")
for identity in (platform, source):
identity = str(identity or "").strip().lower()
if identity and identity not in _NON_MESSAGING_SESSION_SURFACES:
return True
return False
def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
"""Return whether edit -> verify-before-finish behavior is enabled.
Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
explicit ``agent.verify_on_stop`` config value. The config default is
``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
sets it explicitly: it resolves to ON for interactive coding surfaces
(CLI, TUI, desktop) and programmatic callers, and OFF for conversational
messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
falls back to OFF.
"""
"""Return whether edit -> verify-before-finish behavior is enabled."""
env = os.environ.get("HERMES_VERIFY_ON_STOP")
if env is not None:
return env.strip().lower() not in {"0", "false", "no", "off"}
@@ -155,20 +29,9 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
except Exception:
config = {}
agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
token = cfg_val.strip().lower()
if token in {"1", "true", "yes", "on"}:
return True
if token in {"0", "false", "no", "off"}:
return False
if token == "auto":
# Explicit opt-in to the legacy surface-aware behavior.
return not _session_is_messaging_surface()
# Missing or unknown value -> OFF (the new default).
return False
if isinstance(agent_cfg, dict) and "verify_on_stop" in agent_cfg:
return bool(agent_cfg.get("verify_on_stop"))
return True
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
@@ -251,10 +114,7 @@ def build_verify_on_stop_nudge(
max_attempts: int = 2,
) -> str | None:
"""Return a synthetic follow-up when edited code lacks fresh verification."""
# Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) —
# they carry no verifiable behavior, so a turn that touched only those has
# nothing to verify and must not nudge.
paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)})
paths = sorted({str(p) for p in changed_paths if p})
if not paths or attempts >= max_attempts:
return None

View File

@@ -85,7 +85,7 @@ Installers are built and uploaded to GitHub Releases manually. macOS/Windows sig
### How it works
The packaged app ships the Electron shell and a native React chat surface. On first launch it can install the Hermes Agent runtime into `HERMES_HOME` (`~/.hermes`, or `%LOCALAPPDATA%\hermes` on Windows) — the **same layout a CLI install uses**, so the two are interchangeable. Backend resolution first honours `HERMES_DESKTOP_HERMES_ROOT`, then a completed managed install, then a probed `hermes` on `PATH` (unless `HERMES_DESKTOP_IGNORE_EXISTING=1` is set), and finally an explicit `HERMES_DESKTOP_HERMES` command override for packagers/troubleshooting. The renderer (React, in `src/`) talks to a headless backend the app launches for you — a `hermes serve` process that serves the `tui_gateway` JSON-RPC/WebSocket API — through the framework-agnostic client in [`apps/shared`](../shared/) (the same client the web dashboard consumes), and reuses the agent runtime rather than embedding `hermes --tui`. The app is **self-contained**: it runs its own `hermes serve` backend and never opens or requires the web dashboard UI. (For backward compatibility, a runtime that predates the `serve` command automatically falls back to a headless `dashboard --no-open` — see `electron/backend-command.cjs` — so mid-upgrade installs never break.) The install, backend-resolution, and self-update logic all live in `electron/main.cjs`.
The packaged app ships the Electron shell and a native React chat surface. On first launch it can install the Hermes Agent runtime into `HERMES_HOME` (`~/.hermes`, or `%LOCALAPPDATA%\hermes` on Windows) — the **same layout a CLI install uses**, so the two are interchangeable. Backend resolution first honours `HERMES_DESKTOP_HERMES_ROOT`, then a completed managed install, then a probed `hermes` on `PATH` (unless `HERMES_DESKTOP_IGNORE_EXISTING=1` is set), and finally an explicit `HERMES_DESKTOP_HERMES` command override for packagers/troubleshooting. The renderer (React, in `src/`) talks to a `hermes dashboard` backend over the `tui_gateway`/dashboard APIs and reuses the agent runtime rather than embedding `hermes --tui`. The install, backend-resolution, and self-update logic all live in `electron/main.cjs`.
### Verification

View File

@@ -1,51 +0,0 @@
'use strict'
// Backend subcommand routing for the desktop-managed Hermes process.
//
// The desktop app launches its own headless backend via `hermes serve` — it
// must NEVER depend on or launch the browser `dashboard`. But `serve` is a
// newer subcommand: a runtime that predates it (an older managed install the
// app hasn't updated yet, or an older `hermes` resolved from PATH) only knows
// `dashboard --no-open`. To avoid bricking those users mid-upgrade we detect
// whether the resolved runtime understands `serve` and, only when it does not,
// fall back to the legacy `dashboard --no-open` invocation. Both produce the
// exact same headless gateway; `serve` is just the decoupled name.
//
// These helpers are pure so they can be unit-tested without Electron.
/**
* Build the canonical headless backend argv (always `serve`).
* @param {string} [profile] optional Hermes profile to pin via `--profile`.
*/
function serveBackendArgs(profile) {
const head = profile ? ['--profile', profile] : []
return [...head, 'serve', '--host', '127.0.0.1', '--port', '0']
}
/**
* Rewrite a resolved backend argv from `serve` to the legacy
* `dashboard --no-open` form, preserving every other argument (incl. a leading
* `-m hermes_cli.main` and any `--profile <name>`). Returns a copy; if there is
* no `serve` token the argv is returned unchanged.
*/
function dashboardFallbackArgs(args) {
const i = args.indexOf('serve')
if (i === -1) return args.slice()
return [...args.slice(0, i), 'dashboard', '--no-open', ...args.slice(i + 1)]
}
/**
* True when a runtime's `hermes_cli/subcommands/dashboard.py` source registers
* the `serve` subcommand. Matches `add_parser("serve"` / `add_parser('serve'`
* specifically so the substring "server" (e.g. "start_server", "web server")
* never produces a false positive.
*/
function sourceDeclaresServe(dashboardPySource) {
return /add_parser\(\s*["']serve["']/.test(String(dashboardPySource || ''))
}
module.exports = {
serveBackendArgs,
dashboardFallbackArgs,
sourceDeclaresServe,
}

View File

@@ -1,83 +0,0 @@
'use strict'
const test = require('node:test')
const assert = require('node:assert/strict')
const {
serveBackendArgs,
dashboardFallbackArgs,
sourceDeclaresServe,
} = require('./backend-command.cjs')
test('serveBackendArgs builds a headless serve invocation', () => {
assert.deepEqual(serveBackendArgs(), [
'serve',
'--host',
'127.0.0.1',
'--port',
'0',
])
})
test('serveBackendArgs pins a profile when provided', () => {
assert.deepEqual(serveBackendArgs('worker'), [
'--profile',
'worker',
'serve',
'--host',
'127.0.0.1',
'--port',
'0',
])
})
test('dashboardFallbackArgs rewrites serve -> dashboard --no-open, keeping the -m prefix', () => {
const serve = ['-m', 'hermes_cli.main', 'serve', '--host', '127.0.0.1', '--port', '0']
assert.deepEqual(dashboardFallbackArgs(serve), [
'-m',
'hermes_cli.main',
'dashboard',
'--no-open',
'--host',
'127.0.0.1',
'--port',
'0',
])
})
test('dashboardFallbackArgs preserves a --profile flag ahead of serve', () => {
const serve = ['-m', 'hermes_cli.main', '--profile', 'worker', 'serve', '--host', '127.0.0.1', '--port', '0']
assert.deepEqual(dashboardFallbackArgs(serve), [
'-m',
'hermes_cli.main',
'--profile',
'worker',
'dashboard',
'--no-open',
'--host',
'127.0.0.1',
'--port',
'0',
])
})
test('dashboardFallbackArgs is a no-op (copy) when there is no serve token', () => {
const args = ['-m', 'hermes_cli.main', 'dashboard', '--no-open']
const out = dashboardFallbackArgs(args)
assert.deepEqual(out, args)
assert.notEqual(out, args, 'should return a copy, not the same reference')
})
test('sourceDeclaresServe detects the serve subparser registration', () => {
assert.equal(sourceDeclaresServe('subparsers.add_parser("serve", help="...")'), true)
assert.equal(sourceDeclaresServe("subparsers.add_parser('serve')"), true)
assert.equal(sourceDeclaresServe('subparsers.add_parser(\n "serve",\n)'), true)
})
test('sourceDeclaresServe does not false-positive on the substring "server"', () => {
const oldSource = `
dashboard_parser = subparsers.add_parser("dashboard", help="Start the web UI dashboard")
from hermes_cli.web_server import start_server # web server
`
assert.equal(sourceDeclaresServe(oldSource), false)
})

View File

@@ -61,7 +61,10 @@ function buildDesktopBackendPath({
const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
const saneEntries = platform === 'win32' ? [] : POSIX_SANE_PATH_ENTRIES
return appendUniquePathEntries([hermesNodeBin, venvBin, currentPath, saneEntries], { delimiter })
return appendUniquePathEntries(
[hermesNodeBin, venvBin, currentPath, saneEntries],
{ delimiter }
)
}
function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {

View File

@@ -76,7 +76,10 @@ test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root'
normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
'C:\\Users\\test\\AppData\\Local\\hermes'
)
assert.equal(normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }), '/Users/test/.hermes')
assert.equal(
normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
'/Users/test/.hermes'
)
})
test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
@@ -101,5 +104,8 @@ test('Windows PATH casing and delimiter are preserved without POSIX sane entries
})
test('appendUniquePathEntries drops empty entries and keeps first occurrence', () => {
assert.equal(appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }), '/a:/b:/c')
assert.equal(
appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }),
'/a:/b:/c'
)
})

View File

@@ -37,18 +37,7 @@ const { execFileSync } = require('node:child_process')
const PROBE_TIMEOUT_MS = 5000
/**
* Return the Python snippet used to verify Hermes can import far enough to
* launch the CLI. Kept exported for tests so dependency regressions are
* caught without needing a real broken venv fixture.
*
* @returns {string}
*/
function hermesRuntimeImportProbe() {
return 'import yaml; import hermes_cli.config'
}
/**
* Return true iff the Hermes runtime import probe exits 0.
* Return true iff `python -c "import hermes_cli"` exits 0.
*
* Used to gate the "fallback to system Python with hermes_cli installed"
* rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
@@ -57,20 +46,13 @@ function hermesRuntimeImportProbe() {
* site-packages -- and the resolver returns a backend that immediately
* dies on spawn.
*
* The probe intentionally imports hermes_cli.config, not just the top-level
* package: a broken/empty Windows launcher venv can still see the source tree
* through PYTHONPATH but lack PyYAML, then die on the first real CLI import.
*
* @param {string} pythonPath - Absolute path to a python.exe / python.
* @param {object} [opts]
* @param {object} [opts.env] - Additional environment for the probe.
* @returns {boolean}
*/
function canImportHermesCli(pythonPath, opts = {}) {
function canImportHermesCli(pythonPath) {
if (!pythonPath) return false
try {
execFileSync(pythonPath, ['-c', hermesRuntimeImportProbe()], {
env: { ...process.env, ...(opts.env || {}) },
execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
stdio: 'ignore',
timeout: PROBE_TIMEOUT_MS,
windowsHide: true
@@ -119,7 +101,6 @@ function verifyHermesCli(hermesCommand, opts = {}) {
module.exports = {
canImportHermesCli,
hermesRuntimeImportProbe,
verifyHermesCli,
PROBE_TIMEOUT_MS
}

View File

@@ -11,7 +11,7 @@ const fs = require('node:fs')
const os = require('node:os')
const path = require('node:path')
const { canImportHermesCli, hermesRuntimeImportProbe, verifyHermesCli } = require('./backend-probes.cjs')
const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
// Resolve the host's own Node binary -- guaranteed to be on disk and
// runnable. We use it as both a stand-in for "a python that doesn't
@@ -40,12 +40,6 @@ test('canImportHermesCli returns false when binary does not exist', () => {
assert.equal(canImportHermesCli(ghost), false)
})
test('hermes runtime import probe checks config dependencies', () => {
const probe = hermesRuntimeImportProbe()
assert.match(probe, /\bimport yaml\b/)
assert.match(probe, /\bimport hermes_cli\.config\b/)
})
test('verifyHermesCli returns false when command is falsy', () => {
assert.equal(verifyHermesCli(''), false)
assert.equal(verifyHermesCli(null), false)

View File

@@ -167,5 +167,5 @@ module.exports = {
readDashboardReadyFile,
resolvePortAnnounceTimeoutMs,
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
MIN_PORT_ANNOUNCE_TIMEOUT_MS
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
}

View File

@@ -25,7 +25,7 @@ const {
waitForDashboardReadyFile,
resolvePortAnnounceTimeoutMs,
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
MIN_PORT_ANNOUNCE_TIMEOUT_MS
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
} = require('./backend-ready.cjs')
// A minimal stand-in for a spawned child process: an EventEmitter with a

View File

@@ -179,13 +179,7 @@ function downloadInstallScript(commit, destPath) {
})
}
async function resolveInstallScript({
installStamp,
sourceRepoRoot,
hermesHome,
emit,
_download = downloadInstallScript
}) {
async function resolveInstallScript({ installStamp, sourceRepoRoot, hermesHome, emit, _download = downloadInstallScript }) {
// 1. Dev shortcut: prefer a local checkout's installer so we can iterate
// without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
// of APP_ROOT/../..).
@@ -299,19 +293,15 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]
const child = spawn(
ps,
fullArgs,
hiddenWindowsChildOptions({
stdio: ['ignore', 'pipe', 'pipe'],
env: {
...process.env,
// Pass HERMES_HOME through so install.ps1 respects the caller's
// choice rather than re-computing the default.
HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
}
})
)
const child = spawn(ps, fullArgs, hiddenWindowsChildOptions({
stdio: ['ignore', 'pipe', 'pipe'],
env: {
...process.env,
// Pass HERMES_HOME through so install.ps1 respects the caller's
// choice rather than re-computing the default.
HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
}
}))
let stdout = ''
let stderr = ''

View File

@@ -37,20 +37,6 @@
const AT_COOKIE_VARIANTS = ['__Host-hermes_session_at', '__Secure-hermes_session_at', 'hermes_session_at']
const RT_COOKIE_VARIANTS = ['__Host-hermes_session_rt', '__Secure-hermes_session_rt', 'hermes_session_rt']
// The Nous portal (NAS) does NOT use Hermes gateway session cookies — it is a
// Privy-authed Next.js app. NAS `auth()` (src/server/auth/session.ts) reads the
// `privy-token` access-token cookie (with `privy-id-token` alongside), which is
// also exactly what the `/api/agents` cookie-auth path validates. So portal
// sign-in / discovery liveness must look for the Privy cookie, NOT the gateway
// cookies above. `privy-token` is the access token (the required signal);
// variants cover the secured-prefix forms and the older `privy-session` name.
const PRIVY_SESSION_COOKIE_VARIANTS = [
'__Host-privy-token',
'__Secure-privy-token',
'privy-token',
'privy-session'
]
function normalizeRemoteBaseUrl(rawUrl) {
const value = String(rawUrl || '').trim()
@@ -156,30 +142,19 @@ function normAuthMode(mode) {
return mode === 'oauth' ? 'oauth' : 'token'
}
// True for connection modes that resolve to a REMOTE backend. 'cloud' is a
// Hermes Cloud connection (cloud-auto-discovery Q3/Q6): it carries a
// remote-shaped block and reuses the entire remote connect/probe/reconnect
// path, so every resolution site treats it exactly like 'remote'. The only
// places that distinguish cloud from remote are the settings UI (which card to
// show) and config persistence (remembering the provenance). Centralized here
// so no resolution site forgets the third arm.
function modeIsRemoteLike(mode) {
return mode === 'remote' || mode === 'cloud'
}
/**
* Select a profile's explicit remote override from a connection config, or null
* when it has none (so the caller falls back to env → global remote → local).
*
* The config may carry a `profiles` map keyed by name; an entry counts as an
* override only with a remote-like `mode` (remote or cloud) and a non-empty
* `url`. Pure: `token` is the raw stored secret; main.cjs decrypts it. Returns
* override only with `mode === 'remote'` and a non-empty `url`. Pure: `token`
* is the raw stored secret; main.cjs decrypts it. Returns
* `{ url, authMode, token } | null`.
*/
function profileRemoteOverride(config, profile) {
const key = connectionScopeKey(profile)
const entry = key ? config?.profiles?.[key] : null
if (!entry || typeof entry !== 'object' || !modeIsRemoteLike(entry.mode)) {
if (!entry || typeof entry !== 'object' || entry.mode !== 'remote') {
return null
}
@@ -286,34 +261,23 @@ function cookiesHaveSession(cookies) {
*/
function cookiesHaveLiveSession(cookies) {
if (!Array.isArray(cookies)) return false
return cookies.some(c => c && c.value && (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)))
}
/**
* True if the cookie jar holds a live Nous PORTAL (Privy) session — a non-empty
* `privy-token` (access-token) cookie, or a variant. This is the portal
* analogue of `cookiesHaveLiveSession`: the portal authenticates via Privy, not
* the Hermes gateway session cookies, so cloud sign-in / discovery liveness
* must check THIS, not the gateway helpers. (NAS `auth()` and the `/api/agents`
* cookie path both key off `privy-token`.)
*/
function cookiesHavePrivySession(cookies) {
if (!Array.isArray(cookies)) return false
return cookies.some(c => c && c.value && PRIVY_SESSION_COOKIE_VARIANTS.includes(c.name))
return cookies.some(
c =>
c &&
c.value &&
(AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name))
)
}
module.exports = {
AT_COOKIE_VARIANTS,
RT_COOKIE_VARIANTS,
PRIVY_SESSION_COOKIE_VARIANTS,
authModeFromStatus,
buildGatewayWsUrl,
buildGatewayWsUrlWithTicket,
connectionScopeKey,
cookiesHaveSession,
cookiesHaveLiveSession,
cookiesHavePrivySession,
modeIsRemoteLike,
normAuthMode,
normalizeRemoteBaseUrl,
pathWithGlobalRemoteProfile,

View File

@@ -22,8 +22,6 @@ const {
connectionScopeKey,
cookiesHaveSession,
cookiesHaveLiveSession,
cookiesHavePrivySession,
modeIsRemoteLike,
normAuthMode,
normalizeRemoteBaseUrl,
pathWithGlobalRemoteProfile,
@@ -49,19 +47,6 @@ test('normAuthMode coerces to token unless explicitly oauth', () => {
assert.equal(normAuthMode('weird'), 'token')
})
// --- modeIsRemoteLike ---
test('modeIsRemoteLike is true for remote and cloud, false otherwise', () => {
// cloud resolves to a remote backend under the hood (Q6), so every resolution
// site treats it like remote.
assert.equal(modeIsRemoteLike('remote'), true)
assert.equal(modeIsRemoteLike('cloud'), true)
assert.equal(modeIsRemoteLike('local'), false)
assert.equal(modeIsRemoteLike(undefined), false)
assert.equal(modeIsRemoteLike(null), false)
assert.equal(modeIsRemoteLike('weird'), false)
})
// --- profileRemoteOverride ---
test('profileRemoteOverride returns null when no profile is given', () => {
@@ -100,21 +85,6 @@ test('profileRemoteOverride preserves an explicit oauth auth mode', () => {
assert.equal(profileRemoteOverride(config, 'coder').authMode, 'oauth')
})
test('profileRemoteOverride treats a cloud entry as a remote override', () => {
// A 'cloud' per-profile entry resolves to the same remote backend a 'remote'
// entry would (Q6) — the override must be returned, not dropped.
const config = {
profiles: {
coder: { mode: 'cloud', url: 'https://agent-1.agents.nousresearch.com', authMode: 'oauth' }
}
}
assert.deepEqual(profileRemoteOverride(config, 'coder'), {
url: 'https://agent-1.agents.nousresearch.com',
authMode: 'oauth',
token: undefined
})
})
test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
assert.equal(profileRemoteOverride({}, 'coder'), null)
assert.equal(profileRemoteOverride({ profiles: null }, 'coder'), null)
@@ -361,35 +331,6 @@ test('cookiesHaveLiveSession is false for unrelated cookies and non-arrays', ()
assert.equal(cookiesHaveLiveSession([]), false)
})
// --- cookiesHavePrivySession (Nous portal / Privy auth, NOT gateway cookies) ---
test('cookiesHavePrivySession detects the privy-token access cookie', () => {
assert.equal(cookiesHavePrivySession([{ name: 'privy-token', value: 'jwt' }]), true)
})
test('cookiesHavePrivySession detects __Host-/__Secure- prefixes and the legacy privy-session name', () => {
assert.equal(cookiesHavePrivySession([{ name: '__Host-privy-token', value: 'x' }]), true)
assert.equal(cookiesHavePrivySession([{ name: '__Secure-privy-token', value: 'x' }]), true)
assert.equal(cookiesHavePrivySession([{ name: 'privy-session', value: 'x' }]), true)
})
test('cookiesHavePrivySession is false for an empty value', () => {
assert.equal(cookiesHavePrivySession([{ name: 'privy-token', value: '' }]), false)
})
test('cookiesHavePrivySession does NOT treat hermes gateway cookies as a portal session', () => {
// The whole point of Q7: a gateway session cookie is NOT a portal sign-in.
assert.equal(cookiesHavePrivySession([{ name: 'hermes_session_at', value: 'x' }]), false)
assert.equal(cookiesHavePrivySession([{ name: '__Host-hermes_session_rt', value: 'x' }]), false)
})
test('cookiesHavePrivySession is false for unrelated cookies and non-arrays', () => {
assert.equal(cookiesHavePrivySession([{ name: 'other', value: 'x' }]), false)
assert.equal(cookiesHavePrivySession(null), false)
assert.equal(cookiesHavePrivySession(undefined), false)
assert.equal(cookiesHavePrivySession([]), false)
})
// --- tokenPreview ---
test('tokenPreview returns null for empty', () => {

View File

@@ -138,7 +138,10 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
if (pythonPath) {
lines.push(`export PYTHONPATH=${q(pythonPath)}\${PYTHONPATH:+:$PYTHONPATH}`)
}
lines.push(`cd ${q(agentRoot)} 2>/dev/null || true`, `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`)
lines.push(
`cd ${q(agentRoot)} 2>/dev/null || true`,
`${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`
)
if (appPath) {
lines.push(`rm -rf ${q(appPath)} || true`)
}
@@ -166,15 +169,7 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
* Removal: even after the desktop PID is gone, Windows releases directory
* handles lazily, so a single `rmdir /s /q` can half-fail — retry up to 10x.
*/
function buildWindowsCleanupScript({
desktopPid,
pythonExe,
pythonPath,
agentRoot,
uninstallArgs,
appPath,
hermesHome
}) {
function buildWindowsCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot, uninstallArgs, appPath, hermesHome }) {
const pid = Number(desktopPid) || 0
// cmd.exe has no string escaping inside quotes; strip embedded quotes (paths
// under %LOCALAPPDATA% never contain them). `&`/`^` in a path would still be

View File

@@ -101,7 +101,10 @@ test('resolveRemovableAppPath uses APPIMAGE on Linux when set', () => {
})
test('resolveRemovableAppPath finds the unpacked dir on Linux', () => {
assert.equal(resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}), '/opt/hermes/linux-unpacked')
assert.equal(
resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}),
'/opt/hermes/linux-unpacked'
)
// A system-package install (/usr/bin) → null, left to apt/dnf.
assert.equal(resolveRemovableAppPath('/usr/bin/hermes', 'linux', {}), null)
})

View File

@@ -1,48 +0,0 @@
'use strict'
const { session } = require('electron')
const EMBED_SESSION_PARTITION = 'persist:hermes-embed'
const EMBED_REFERER = 'https://www.youtube.com/'
const YOUTUBE_REFERER_HOST_RE =
/(^|\.)(youtube\.com|youtube-nocookie\.com|googlevideo\.com|ytimg\.com|youtubei\.googleapis\.com)$/i
function installEmbedRefererForSession(embedSession) {
if (!embedSession) {
return
}
embedSession.webRequest.onBeforeSendHeaders((details, callback) => {
let host = ''
try {
host = new URL(details.url).hostname
} catch {
host = ''
}
if (!YOUTUBE_REFERER_HOST_RE.test(host)) {
callback({ requestHeaders: details.requestHeaders })
return
}
const headers = { ...details.requestHeaders }
if (!headers.Referer && !headers.referer) {
headers.Referer = EMBED_REFERER
}
callback({ requestHeaders: headers })
})
}
/** Stamp Referer on YouTube requests in the embed webview partition only. */
function installEmbedReferer() {
try {
installEmbedRefererForSession(session.fromPartition(EMBED_SESSION_PARTITION))
} catch {
// Non-fatal: embeds still render; YouTube may show referer errors.
}
}
module.exports = { installEmbedReferer }

View File

@@ -92,7 +92,9 @@ async function readDirForIpc(dirPath, options = {}) {
try {
const dirents = await fsImpl.promises.readdir(resolved, { withFileTypes: true })
const visibleDirents = dirents.filter(dirent => !FS_READDIR_HIDDEN.has(dirent.name))
const entries = await mapWithStatConcurrency(visibleDirents, dirent => entryForDirent(dirent, resolved, fsImpl))
const entries = await mapWithStatConcurrency(visibleDirents, dirent =>
entryForDirent(dirent, resolved, fsImpl)
)
entries.sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name))

View File

@@ -349,10 +349,7 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
assert.equal(result.error, undefined)
assert.equal(result.entries.length, names.length)
assert.equal(statCalls.length, names.length)
assert.equal(
statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)),
false
)
assert.equal(statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)), false)
assert.ok(peak > 1, `expected concurrent stats, observed peak ${peak}`)
assert.ok(peak <= 16, `expected at most 16 concurrent stats, observed peak ${peak}`)
assert.deepEqual(
@@ -360,5 +357,8 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
expectedNames
)
assert.equal(result.entries.find(entry => entry.name === failedName)?.isDirectory, false)
assert.equal(result.entries.filter(entry => entry.isDirectory).length, successfulDirectoryNames.size)
assert.equal(
result.entries.filter(entry => entry.isDirectory).length,
successfulDirectoryNames.size
)
})

View File

@@ -86,8 +86,10 @@ async function scanGitRepos(roots, options = {}) {
await mapLimit(subdirs, MAX_CONCURRENCY, sub => walk(sub, depth + 1))
}
await mapLimit(searchRoots.map(root => String(root || '').trim()).filter(Boolean), MAX_CONCURRENCY, root =>
walk(root, 0)
await mapLimit(
searchRoots.map(root => String(root || '').trim()).filter(Boolean),
MAX_CONCURRENCY,
root => walk(root, 0)
)
return [...found.entries()].map(([root, label]) => ({ label, root }))

View File

@@ -10,26 +10,7 @@ const { execFile } = require('node:child_process')
const fs = require('node:fs/promises')
const path = require('node:path')
// `simple-git` is a pure-JS runtime dep that workspace dedup hoists into the
// repo-root node_modules. Packaged builds set `files:` in package.json, which
// excludes node_modules from the asar, so the normal require() fails at launch
// (issue #52735: "Cannot find module 'simple-git'"). We ship the dep's
// closure under resources/native-deps/vendor/node_modules/ via extraResources
// + scripts/stage-native-deps.cjs, and resolve from there when the hoisted
// require() isn't reachable. The `vendor/` nesting matters: electron-builder
// drops a node_modules dir at the root of an extraResources copy but keeps a
// nested one. Dev mode never hits the fallback -- Node's normal lookup finds
// the hoisted copy.
let simpleGit
try {
simpleGit = require('simple-git')
} catch {
const resourcesPath = process.resourcesPath
if (!resourcesPath) {
throw new Error("git-review IPC: 'simple-git' not found and no resourcesPath to fall back to")
}
simpleGit = require(path.join(resourcesPath, 'native-deps', 'vendor', 'node_modules', 'simple-git'))
}
const simpleGit = require('simple-git')
const { resolveRequestedPathForIpc } = require('./hardening.cjs')
@@ -207,12 +188,7 @@ async function defaultBranchName(git) {
// Prefer a local trunk, then a remote-only one (returns the clean name either
// way) so "branch off main" works even before main is checked out locally.
for (const ref of [
'refs/heads/main',
'refs/heads/master',
'refs/remotes/origin/main',
'refs/remotes/origin/master'
]) {
for (const ref of ['refs/heads/main', 'refs/heads/master', 'refs/remotes/origin/main', 'refs/remotes/origin/master']) {
try {
await git.raw(['rev-parse', '--verify', '--quiet', ref])

View File

@@ -45,10 +45,7 @@ function parseWorktrees(out) {
} else if (!cur) {
continue
} else if (line.startsWith('branch ')) {
cur.branch = line
.slice(7)
.trim()
.replace(/^refs\/heads\//, '')
cur.branch = line.slice(7).trim().replace(/^refs\/heads\//, '')
} else if (line === 'detached') {
cur.detached = true
} else if (line === 'bare') {
@@ -125,9 +122,10 @@ async function gitLine(gitBin, args, cwd) {
}
async function defaultBranch(gitBin, cwd) {
const remote = (
await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)
).replace(/^origin\//, '')
const remote = (await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)).replace(
/^origin\//,
''
)
if (remote) {
return remote
@@ -179,16 +177,7 @@ async function ensureGitRepo(gitBin, dir) {
// Inline identity so the seed commit lands even with no global git config.
await runGit(
gitBin,
[
'-c',
'user.email=hermes@localhost',
'-c',
'user.name=Hermes',
'commit',
'--allow-empty',
'-m',
'Initial commit'
],
['-c', 'user.email=hermes@localhost', '-c', 'user.name=Hermes', 'commit', '--allow-empty', '-m', 'Initial commit'],
dir
)
}

View File

@@ -186,10 +186,7 @@ async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
if (code === 'ENOENT' || code === 'ENOTDIR') {
throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
}
throw ipcPathError(
code || 'read-error',
`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
)
throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
}
}
@@ -204,10 +201,7 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {
return realPath
} catch (error) {
const code = error && typeof error === 'object' ? error.code : ''
throw ipcPathError(
code || 'read-error',
`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
)
throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -30,8 +30,5 @@ test('setJsonRequestHeaders does not set Electron-restricted Content-Length', ()
setJsonRequestHeaders(request)
assert.deepEqual(headers, [['Content-Type', 'application/json']])
assert.equal(
headers.some(([name]) => name.toLowerCase() === 'content-length'),
false
)
assert.equal(headers.some(([name]) => name.toLowerCase() === 'content-length'), false)
})

View File

@@ -41,16 +41,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
probeConnectionConfig: remoteUrl => ipcRenderer.invoke('hermes:connection-config:probe', remoteUrl),
oauthLoginConnectionConfig: remoteUrl => ipcRenderer.invoke('hermes:connection-config:oauth-login', remoteUrl),
oauthLogoutConnectionConfig: remoteUrl => ipcRenderer.invoke('hermes:connection-config:oauth-logout', remoteUrl),
// Hermes Cloud: one portal login powers discovery + silent per-agent sign-in
// (cloud-auto-discovery Phase 3).
cloud: {
status: () => ipcRenderer.invoke('hermes:cloud:status'),
betaEnabled: () => ipcRenderer.invoke('hermes:cloud:beta-enabled'),
login: () => ipcRenderer.invoke('hermes:cloud:login'),
logout: () => ipcRenderer.invoke('hermes:cloud:logout'),
discover: org => ipcRenderer.invoke('hermes:cloud:discover', org),
agentSignIn: dashboardUrl => ipcRenderer.invoke('hermes:cloud:agent-sign-in', dashboardUrl)
},
profile: {
get: () => ipcRenderer.invoke('hermes:profile:get'),
set: name => ipcRenderer.invoke('hermes:profile:set', name)

View File

@@ -1,24 +0,0 @@
'use strict'
const OVERLAY_FALLBACK_WIDTH = 144
/**
* Static pre-layout reservation (px) for the right-side native window-controls
* overlay (min/max/close). Only a FALLBACK — once laid out the renderer reads
* the exact width from navigator.windowControlsOverlay
* (use-window-controls-overlay-width.ts) and uses this value only when the WCO
* API is unavailable.
*
* macOS uses traffic lights positioned via trafficLightPosition, not a WCO
* overlay, so it reserves nothing here. Every other desktop platform now paints
* the Electron overlay (Windows, WSLg, and plain Linux KDE/GNOME), so they all
* reserve the fallback width.
*
* @param {{ isWindows?: boolean, isWsl?: boolean, isMac?: boolean }} opts
*/
function nativeOverlayWidth({ isWindows = false, isWsl = false, isMac = false } = {}) {
if (isMac) return 0
return OVERLAY_FALLBACK_WIDTH
}
module.exports = { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth }

View File

@@ -1,36 +0,0 @@
const assert = require('node:assert/strict')
const test = require('node:test')
const { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth } = require('./titlebar-overlay-width.cjs')
// This static reservation is only the pre-layout FALLBACK. Once laid out the
// renderer reads the exact width from navigator.windowControlsOverlay
// (use-window-controls-overlay-width.ts) and uses these values only when the WCO
// API is unavailable.
test('Windows reserves the overlay fallback width', () => {
assert.equal(nativeOverlayWidth({ isWindows: true }), OVERLAY_FALLBACK_WIDTH)
})
test('WSLg paints the same WCO, so it reserves the same fallback width', () => {
// The original bug: WSL fell through to 0, so the right tools sat under the
// controls and the title overran into them.
assert.equal(nativeOverlayWidth({ isWsl: true }), OVERLAY_FALLBACK_WIDTH)
})
test('plain Linux paints the WCO too, so it reserves the fallback width', () => {
// Regression #53185: re-enabling the overlay on plain Linux (KDE/GNOME)
// without reserving its width left the native min/max/close buttons painting
// on top of the app's right-edge titlebar tools.
assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), OVERLAY_FALLBACK_WIDTH)
assert.equal(nativeOverlayWidth(), OVERLAY_FALLBACK_WIDTH)
assert.equal(nativeOverlayWidth({}), OVERLAY_FALLBACK_WIDTH)
})
test('macOS uses traffic lights, not a WCO overlay, so it reserves nothing', () => {
assert.equal(nativeOverlayWidth({ isMac: true }), 0)
})
test('the fallback width is a sane positive pixel value', () => {
assert.ok(Number.isInteger(OVERLAY_FALLBACK_WIDTH) && OVERLAY_FALLBACK_WIDTH > 0)
})

View File

@@ -7,81 +7,45 @@ const { resolveBehindCount, shouldCountCommits } = require('./update-count.cjs')
// unconditionally, so a shallow checkout with no merge-base surfaced the bogus
// rev-list count (e.g. 12104). This asserts the new shallow/no-merge-base branch.
test('shallow checkout with no merge-base does NOT trust the bogus rev-list count', () => {
assert.equal(
resolveBehindCount({
countStr: '12104',
currentSha: 'aaa',
targetSha: 'bbb',
isShallow: true,
hasMergeBase: false
}),
1
)
assert.equal(resolveBehindCount({
countStr: '12104', currentSha: 'aaa', targetSha: 'bbb',
isShallow: true, hasMergeBase: false,
}), 1)
})
test('shallow checkout with no merge-base but identical SHA reports up-to-date', () => {
assert.equal(
resolveBehindCount({
countStr: '12104',
currentSha: 'abc',
targetSha: 'abc',
isShallow: true,
hasMergeBase: false
}),
0
)
assert.equal(resolveBehindCount({
countStr: '12104', currentSha: 'abc', targetSha: 'abc',
isShallow: true, hasMergeBase: false,
}), 0)
})
test('shallow checkout WITH a merge-base keeps the exact count (reliable)', () => {
assert.equal(
resolveBehindCount({
countStr: '3',
currentSha: 'aaa',
targetSha: 'bbb',
isShallow: true,
hasMergeBase: true
}),
3
)
assert.equal(resolveBehindCount({
countStr: '3', currentSha: 'aaa', targetSha: 'bbb',
isShallow: true, hasMergeBase: true,
}), 3)
})
test('full (non-shallow) clone keeps the exact count path unchanged', () => {
assert.equal(
resolveBehindCount({
countStr: '7',
currentSha: 'aaa',
targetSha: 'bbb',
isShallow: false,
hasMergeBase: true
}),
7
)
assert.equal(resolveBehindCount({
countStr: '7', currentSha: 'aaa', targetSha: 'bbb',
isShallow: false, hasMergeBase: true,
}), 7)
})
test('up-to-date full clone reports 0', () => {
assert.equal(
resolveBehindCount({
countStr: '0',
currentSha: 'x',
targetSha: 'x',
isShallow: false,
hasMergeBase: true
}),
0
)
assert.equal(resolveBehindCount({
countStr: '0', currentSha: 'x', targetSha: 'x',
isShallow: false, hasMergeBase: true,
}), 0)
})
test('non-numeric count falls back to 0 (defensive, unchanged behaviour)', () => {
assert.equal(
resolveBehindCount({
countStr: '',
currentSha: 'aaa',
targetSha: 'bbb',
isShallow: false,
hasMergeBase: true
}),
0
)
assert.equal(resolveBehindCount({
countStr: '', currentSha: 'aaa', targetSha: 'bbb',
isShallow: false, hasMergeBase: true,
}), 0)
})
// shouldCountCommits gates the expensive `rev-list --count` in checkUpdates().
@@ -104,24 +68,12 @@ test('full (non-shallow) clone always runs the count', () => {
// The skip path produces an empty countStr; resolveBehindCount must NOT trust
// it and must fall through to the SHA compare (mirrors the live call site).
test('skipped-count path resolves via SHA compare, never via empty countStr', () => {
assert.equal(
resolveBehindCount({
countStr: '',
currentSha: 'aaa',
targetSha: 'bbb',
isShallow: true,
hasMergeBase: false
}),
1
)
assert.equal(
resolveBehindCount({
countStr: '',
currentSha: 'same',
targetSha: 'same',
isShallow: true,
hasMergeBase: false
}),
0
)
assert.equal(resolveBehindCount({
countStr: '', currentSha: 'aaa', targetSha: 'bbb',
isShallow: true, hasMergeBase: false,
}), 1)
assert.equal(resolveBehindCount({
countStr: '', currentSha: 'same', targetSha: 'same',
isShallow: true, hasMergeBase: false,
}), 0)
})

View File

@@ -62,10 +62,7 @@ test('resolveUnpackedRelease is null for AppImage / .deb / .rpm / dev / unresolv
assert.equal(resolveUnpackedRelease('/usr/lib/hermes/hermes', ROOT, 'linux'), null)
assert.equal(resolveUnpackedRelease('/opt/Hermes/hermes', ROOT, 'linux'), null)
// dev electron
assert.equal(
resolveUnpackedRelease('/home/u/.hermes/hermes-agent/node_modules/electron/dist/electron', ROOT, 'linux'),
null
)
assert.equal(resolveUnpackedRelease('/home/u/.hermes/hermes-agent/node_modules/electron/dist/electron', ROOT, 'linux'), null)
// empty / missing
assert.equal(resolveUnpackedRelease('', ROOT, 'linux'), null)
assert.equal(resolveUnpackedRelease(path.join(UNPACKED, 'hermes'), '', 'linux'), null)

View File

@@ -39,9 +39,7 @@ function canonicalGitHubRemote(url) {
}
function isSshRemote(url) {
const value = String(url || '')
.trim()
.toLowerCase()
const value = String(url || '').trim().toLowerCase()
return value.startsWith('git@') || value.startsWith('ssh://')
}

View File

@@ -26,11 +26,7 @@ const REQUEST_TIMEOUT_MS = 20_000
const ID_RE = /^[\w-]+\.[\w-]+$/
/** Minimal HTTPS helper with redirect-following, timeout, and a size cap. */
function request(
url,
{ method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {},
redirectsLeft = MAX_REDIRECTS
) {
function request(url, { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {}, redirectsLeft = MAX_REDIRECTS) {
return new Promise((resolve, reject) => {
const req = https.request(url, { method, headers }, res => {
const status = res.statusCode ?? 0
@@ -46,13 +42,7 @@ function request(
const next = new URL(res.headers.location, url).toString()
res.resume()
// Redirects to the CDN are plain GETs (drop the POST body).
resolve(
request(
next,
{ method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes },
redirectsLeft - 1
)
)
resolve(request(next, { method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes }, redirectsLeft - 1))
return
}

View File

@@ -26,16 +26,7 @@ const LAPTOP = [{ workArea: { x: 0, y: 0, width: 1366, height: 728 } }]
// ─── sanitizeWindowState ───────────────────────────────────────────────────
test('sanitizeWindowState rejects missing/garbage input', () => {
for (const bad of [
null,
undefined,
'nope',
42,
{},
{ width: 'x', height: 800 },
{ width: NaN, height: 800 },
{ width: 1000 }
]) {
for (const bad of [null, undefined, 'nope', 42, {}, { width: 'x', height: 800 }, { width: NaN, height: 800 }, { width: 1000 }]) {
assert.equal(sanitizeWindowState(bad), null)
}
})
@@ -121,13 +112,9 @@ test('computeWindowOptions does not clamp when displays are unknown', () => {
test('debounce coalesces a burst into one trailing run', t => {
t.mock.timers.enable({ apis: ['setTimeout'] })
let calls = 0
const d = debounce(() => {
calls += 1
}, 250)
const d = debounce(() => { calls += 1 }, 250)
d()
d()
d()
d(); d(); d()
assert.equal(calls, 0)
t.mock.timers.tick(249)
assert.equal(calls, 0)
@@ -138,9 +125,7 @@ test('debounce coalesces a burst into one trailing run', t => {
test('debounce.flush runs now and cancels the pending timer', t => {
t.mock.timers.enable({ apis: ['setTimeout'] })
let calls = 0
const d = debounce(() => {
calls += 1
}, 250)
const d = debounce(() => { calls += 1 }, 250)
d()
d.flush()

View File

@@ -13,7 +13,7 @@ function readElectronFile(name) {
function requireHiddenChildOptions(source, needle) {
const match = needle instanceof RegExp ? needle.exec(source) : null
const index = needle instanceof RegExp ? (match?.index ?? -1) : source.indexOf(needle)
const index = needle instanceof RegExp ? match?.index ?? -1 : source.indexOf(needle)
assert.notEqual(index, -1, `missing call site: ${needle}`)
const snippet = source.slice(index, index + 700)
assert.match(
@@ -38,40 +38,19 @@ test('desktop background child processes opt into hidden Windows consoles', () =
requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/)
requireHiddenChildOptions(source, /spawn\(\s*py,\s*\['-m', 'hermes_cli\.main', 'uninstall', '--gui-summary'\]/)
assert.match(source, /function unwrapWindowsVenvHermesCommand\(command, backendArgs\)/)
assert.match(source, /function unwrapWindowsVenvHermesCommand\(command, dashboardArgs\)/)
assert.match(source, /existing Hermes no-console Python at/)
assert.match(source, /function getNoConsoleVenvPython\(venvRoot\)/)
assert.match(source, /function toNoConsolePython\(pythonPath\)/)
assert.match(source, /function applyWindowsNoConsoleSpawnHints\(backend\)/)
assert.match(source, /function readVenvHome\(venvRoot\)/)
assert.match(source, /path\.join\(venvRoot, 'Scripts', 'pythonw\.exe'\)/)
assert.match(source, /backendStartFailure/)
assert.match(source, /HERMES_DESKTOP_READY_FILE/)
assert.match(source, /readyFile: true/)
assert.match(source, /function getVenvSitePackagesEntries\(venvRoot\)/)
assert.match(source, /path\.join\(venvRoot, 'Lib', 'site-packages'\)/)
assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.backendArgs\]/)
})
test('desktop backend launches console python so child consoles are inherited, not pythonw', () => {
const source = readElectronFile('main.cjs')
// The flash fix is structural: the backend runs as a console-subsystem
// python.exe under hiddenWindowsChildOptions() (-> CREATE_NO_WINDOW), so it
// owns ONE windowless console that every descendant spawn inherits. Launching
// it as GUI-subsystem pythonw.exe is what made each child allocate (and flash)
// its own console, so the backend command must never be pythonw.
assert.doesNotMatch(source, /pythonw\.exe'\)/, 'backend must not be launched via pythonw.exe')
assert.doesNotMatch(
source,
/function getNoConsoleVenvPython\b/,
'pythonw-conversion helper should be gone; console python is launched directly'
)
assert.doesNotMatch(
source,
/function applyWindowsNoConsoleSpawnHints\b/,
'pythonw spawn-hint rewriter should be gone'
)
// Console python restores stdout, so the port is announced on the normal
// HERMES_DASHBOARD_READY stdout line — no ready-file side channel is set.
assert.doesNotMatch(source, /readyFile: true/, 'no backend should opt into the pythonw ready-file path')
// Both desktop backend launches must still go through hiddenWindowsChildOptions
// so the single backend console is created windowless.
requireHiddenChildOptions(source, /spawn\(\s*backend\.command,\s*backend\.args/)
requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/)
assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.dashboardArgs\]/)
})
test('intentional or interactive desktop child processes stay documented', () => {
@@ -89,5 +68,5 @@ test('bootstrap PowerShell runner hides Windows console children', () => {
const source = readElectronFile('bootstrap-runner.cjs')
assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
requireHiddenChildOptions(source, /spawn\(\s*ps,\s*fullArgs/)
requireHiddenChildOptions(source, 'spawn(ps, fullArgs')
})

View File

@@ -1,67 +0,0 @@
'use strict'
// Regression guards for Windows `hermes` resolution in main.cjs.
//
// main.cjs has no module.exports, so these follow the repo's source-assertion
// test pattern (see windows-child-process.test.cjs). They pin the two Windows
// resolution bugs that caused desktop reinstall loops:
// 1. findOnPath() tried the empty extension FIRST, so an extensionless
// Git-Bash `hermes` shim shadowed the real hermes.cmd/hermes.exe; the
// shim then failed the --version probe and the desktop fell through to a
// spurious bootstrap/repair.
// 2. handOffWindowsBootstrapRecovery() chose --update vs the destructive
// --repair by checking ONLY venv\Scripts\hermes.exe (the console-script
// shim, written at the END of venv setup and absent in interrupted
// states), so it escalated to a full venv recreate even on healthy
// installs.
const test = require('node:test')
const assert = require('node:assert/strict')
const fs = require('node:fs')
const path = require('node:path')
function readMain() {
return fs.readFileSync(path.join(__dirname, 'main.cjs'), 'utf8').replace(/\r\n/g, '\n')
}
test('findOnPath tries PATHEXT extensions before the bare (empty) name on Windows', () => {
const source = readMain()
// Fixed order: PATHEXT first, empty string LAST.
assert.match(
source,
/\(process\.env\.PATHEXT \|\| '\.COM;\.EXE;\.BAT;\.CMD'\)\.split\(';'\)\.filter\(Boolean\), ''\]/,
'extensions array must end with the empty string, not start with it'
)
// The buggy empty-first order must not return.
assert.doesNotMatch(
source,
/\['', \.\.\.\(process\.env\.PATHEXT/,
'empty-extension-first order regressed: an extensionless shim can shadow hermes.cmd/.exe'
)
})
test('Windows bootstrap recovery chooses --update when any real-install signal is present', () => {
const source = readMain()
assert.match(source, /const haveRealInstall =/, 'recovery must compute haveRealInstall')
assert.match(
source,
/fileExists\(venvPython\)/,
'recovery must accept the venv interpreter as a real-install signal'
)
assert.match(
source,
/\.hermes-bootstrap-complete/,
'recovery must accept the bootstrap-complete marker as a real-install signal'
)
assert.match(
source,
/updaterArgs = haveRealInstall \? \['--update'/,
'updaterArgs must gate on haveRealInstall'
)
// The old too-narrow check (only venv\Scripts\hermes.exe) must not return.
assert.doesNotMatch(
source,
/updaterArgs = fileExists\(venvHermes\) \?/,
'recovery regressed to gating only on the hermes.exe shim, which forces destructive --repair'
)
})

View File

@@ -21,7 +21,8 @@ const { execFileSync } = require('node:child_process')
// the requested value line isn't present.
function parseRegQueryValue(stdout, name) {
if (!stdout || !name) return null
const typePattern = /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
const typePattern =
/^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
for (const rawLine of String(stdout).split(/\r?\n/)) {
const line = rawLine.trim()
const match = line.match(typePattern)
@@ -46,7 +47,10 @@ function expandWindowsEnvRefs(value, env = process.env) {
// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
// (the value doesn't exist), or when the value is empty.
function readWindowsUserEnvVar(name, { platform = process.platform, env = process.env, exec = execFileSync } = {}) {
function readWindowsUserEnvVar(
name,
{ platform = process.platform, env = process.env, exec = execFileSync } = {}
) {
if (platform !== 'win32' || !name) return null
let stdout
try {

View File

@@ -1,12 +1,21 @@
const assert = require('node:assert/strict')
const { test } = require('node:test')
const { expandWindowsEnvRefs, parseRegQueryValue, readWindowsUserEnvVar } = require('./windows-user-env.cjs')
const {
expandWindowsEnvRefs,
parseRegQueryValue,
readWindowsUserEnvVar
} = require('./windows-user-env.cjs')
// ── parseRegQueryValue ─────────────────────────────────────────────────────
test('parseRegQueryValue extracts a REG_SZ value', () => {
const out = ['', 'HKEY_CURRENT_USER\\Environment', ' HERMES_HOME REG_SZ F:\\Hermes\\data', ''].join('\r\n')
const out = [
'',
'HKEY_CURRENT_USER\\Environment',
' HERMES_HOME REG_SZ F:\\Hermes\\data',
''
].join('\r\n')
assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
})
@@ -30,7 +39,10 @@ test('parseRegQueryValue returns null when the value line is absent', () => {
// ── expandWindowsEnvRefs ───────────────────────────────────────────────────
test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
assert.equal(expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }), 'C:\\Users\\jeff\\h')
assert.equal(
expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
'C:\\Users\\jeff\\h'
)
})
test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {

View File

@@ -14,7 +14,11 @@ function isPackagedInstallPath(dir, { installRoots, isPackaged }) {
return false
}
const roots = new Set((installRoots ?? []).filter(Boolean).map(candidate => path.resolve(String(candidate))))
const roots = new Set(
(installRoots ?? [])
.filter(Boolean)
.map(candidate => path.resolve(String(candidate)))
)
for (const root of roots) {
if (resolved === root) {

View File

@@ -13,21 +13,33 @@ const { isPackagedInstallPath } = require('./workspace-cwd.cjs')
const installRoot = path.resolve('/opt/Hermes')
test('isPackagedInstallPath returns false when not packaged', () => {
assert.equal(isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }), false)
assert.equal(
isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }),
false
)
})
test('isPackagedInstallPath flags the install root itself', () => {
assert.equal(isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }), true)
assert.equal(
isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }),
true
)
})
test('isPackagedInstallPath flags paths nested under the install root', () => {
const nested = path.join(installRoot, 'resources', 'app.asar')
assert.equal(isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }), true)
assert.equal(
isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }),
true
)
})
test('isPackagedInstallPath ignores paths outside the install root', () => {
const homeProject = path.resolve('/home/user/projects/demo')
assert.equal(isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }), false)
assert.equal(
isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }),
false
)
})

View File

@@ -1,92 +0,0 @@
// Pull a Windows-host clipboard image from inside WSL2 via PowerShell (WSLg
// bridges text but not images). Returns PNG bytes or null; exec injectable.
const { execFileSync } = require('node:child_process')
// STA is mandatory: System.Windows.Forms.Clipboard throws ThreadStateException
// off a single-threaded apartment. We emit base64 (not raw bytes) so the PNG
// survives stdout's text decoding intact, and write with [Console]::Out.Write
// to avoid a trailing newline.
const PS_SCRIPT = [
'Add-Type -AssemblyName System.Windows.Forms,System.Drawing',
'$img = [System.Windows.Forms.Clipboard]::GetImage()',
'if ($null -eq $img) { exit 0 }',
'$ms = New-Object System.IO.MemoryStream',
'$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)',
'[Console]::Out.Write([System.Convert]::ToBase64String($ms.ToArray()))'
].join('\n')
// PowerShell's -EncodedCommand takes UTF-16LE base64. Encoding the whole script
// this way sidesteps every layer of WSL→Windows quoting (spaces, quotes,
// brackets, newlines) that plain -Command arguments would mangle.
function encodePowerShellCommand(script) {
return Buffer.from(String(script), 'utf16le').toString('base64')
}
// Locate powershell.exe. The bare name resolves through WSL's Windows-interop
// PATH on every standard WSL2 setup; the absolute fallback covers a stripped
// PATH. Returns the first candidate — execFile surfaces ENOENT if it's wrong
// and we fall back to null.
function powershellCandidates() {
return ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
}
function decodeClipboardImageBase64(stdout) {
const b64 = String(stdout || '').trim()
if (!b64) return null
let buffer
try {
buffer = Buffer.from(b64, 'base64')
} catch {
return null
}
// Guard against partial / garbage output: require a real PNG signature.
const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
if (buffer.length < PNG_SIGNATURE.length || !buffer.subarray(0, PNG_SIGNATURE.length).equals(PNG_SIGNATURE)) {
return null
}
return buffer
}
// Read the Windows clipboard image from inside WSL. Returns a PNG Buffer, or
// null when there's no image, PowerShell is unreachable, or output is invalid.
// Linux-only by contract (caller gates on IS_WSL); never throws.
function readWslWindowsClipboardImage({ exec = execFileSync, candidates = powershellCandidates() } = {}) {
const encoded = encodePowerShellCommand(PS_SCRIPT)
for (const ps of candidates) {
try {
const stdout = exec(
ps,
['-NoProfile', '-NonInteractive', '-STA', '-ExecutionPolicy', 'Bypass', '-EncodedCommand', encoded],
{
encoding: 'utf8',
windowsHide: true,
timeout: 8000,
// A 4K screenshot base64s to a few MB; give stdout generous headroom.
maxBuffer: 64 * 1024 * 1024,
// PowerShell writes progress/CLIXML noise to stderr — ignore it.
stdio: ['ignore', 'pipe', 'ignore']
}
)
const decoded = decodeClipboardImageBase64(stdout)
if (decoded) return decoded
// Empty stdout = no image on the clipboard; stop, don't try fallbacks.
if (String(stdout || '').trim() === '') return null
} catch {
// This powershell.exe candidate is missing/failed — try the next one.
}
}
return null
}
module.exports = {
decodeClipboardImageBase64,
encodePowerShellCommand,
powershellCandidates,
readWslWindowsClipboardImage
}

View File

@@ -1,114 +0,0 @@
const assert = require('node:assert/strict')
const test = require('node:test')
const {
decodeClipboardImageBase64,
encodePowerShellCommand,
powershellCandidates,
readWslWindowsClipboardImage
} = require('./wsl-clipboard-image.cjs')
const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
function fakePngBuffer(extraBytes = 16) {
return Buffer.concat([PNG_SIGNATURE, Buffer.alloc(extraBytes, 0x42)])
}
test('encodePowerShellCommand produces UTF-16LE base64 PowerShell can decode', () => {
const encoded = encodePowerShellCommand('Write-Output "hi"')
const roundTripped = Buffer.from(encoded, 'base64').toString('utf16le')
assert.equal(roundTripped, 'Write-Output "hi"')
})
test('decodeClipboardImageBase64 returns a Buffer for valid PNG base64', () => {
const png = fakePngBuffer()
const decoded = decodeClipboardImageBase64(png.toString('base64'))
assert.ok(Buffer.isBuffer(decoded))
assert.ok(decoded.equals(png))
})
test('decodeClipboardImageBase64 trims surrounding whitespace before decoding', () => {
const png = fakePngBuffer()
const decoded = decodeClipboardImageBase64(`\n ${png.toString('base64')} \r\n`)
assert.ok(decoded && decoded.equals(png))
})
test('decodeClipboardImageBase64 returns null for empty / whitespace input', () => {
assert.equal(decodeClipboardImageBase64(''), null)
assert.equal(decodeClipboardImageBase64(' \n '), null)
assert.equal(decodeClipboardImageBase64(null), null)
assert.equal(decodeClipboardImageBase64(undefined), null)
})
test('decodeClipboardImageBase64 rejects base64 without a PNG signature', () => {
// Valid base64, but the decoded bytes are not a PNG.
const notPng = Buffer.from('this is not a png at all').toString('base64')
assert.equal(decodeClipboardImageBase64(notPng), null)
})
test('readWslWindowsClipboardImage decodes the first candidate that returns a PNG', () => {
const png = fakePngBuffer()
const calls = []
const exec = (cmd, args) => {
calls.push({ cmd, args })
return png.toString('base64')
}
const result = readWslWindowsClipboardImage({ exec, candidates: ['powershell.exe'] })
assert.ok(result && result.equals(png))
assert.equal(calls.length, 1)
assert.equal(calls[0].cmd, 'powershell.exe')
// -STA is mandatory for System.Windows.Forms.Clipboard.
assert.ok(calls[0].args.includes('-STA'))
assert.ok(calls[0].args.includes('-EncodedCommand'))
})
test('readWslWindowsClipboardImage returns null and stops when stdout is empty (no image)', () => {
let count = 0
const exec = () => {
count += 1
return ''
}
const result = readWslWindowsClipboardImage({
exec,
candidates: ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
})
assert.equal(result, null)
// Empty stdout means "no image on the clipboard" — don't probe further candidates.
assert.equal(count, 1)
})
test('readWslWindowsClipboardImage falls through to the next candidate when one throws', () => {
const png = fakePngBuffer()
const seen = []
const exec = cmd => {
seen.push(cmd)
if (cmd === 'powershell.exe') {
throw Object.assign(new Error('not found'), { code: 'ENOENT' })
}
return png.toString('base64')
}
const result = readWslWindowsClipboardImage({
exec,
candidates: ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
})
assert.ok(result && result.equals(png))
assert.deepEqual(seen, ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe'])
})
test('readWslWindowsClipboardImage returns null when every candidate throws', () => {
const exec = () => {
throw new Error('boom')
}
const result = readWslWindowsClipboardImage({ exec, candidates: ['a', 'b'] })
assert.equal(result, null)
})
test('powershellCandidates lists the bare name first, then the absolute fallback', () => {
const candidates = powershellCandidates()
assert.equal(candidates[0], 'powershell.exe')
assert.ok(candidates.some(c => c.endsWith('WindowsPowerShell/v1.0/powershell.exe')))
})

View File

@@ -18,7 +18,7 @@
"profile:main": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
"profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
"start": "npm run build && electron .",
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && node scripts/bundle-electron-main.mjs && npm run postbuild",
"postbuild": "node scripts/assert-dist-built.cjs",
"prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
"builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 node scripts/run-electron-builder.cjs",
@@ -37,7 +37,7 @@
"test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/wsl-clipboard-image.test.cjs electron/titlebar-overlay-width.test.cjs electron/window-state.test.cjs electron/windows-hermes-resolution.test.cjs",
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/window-state.test.cjs",
"typecheck": "tsc -p . --noEmit",
"lint": "eslint src/ electron/",
"lint:fix": "eslint src/ electron/ --fix",
@@ -51,17 +51,11 @@
"@assistant-ui/react-streamdown": "^0.1.11",
"@audiowave/react": "^0.6.2",
"@chenglou/pretext": "^0.0.6",
"@codemirror/commands": "^6.10.4",
"@codemirror/language": "^6.12.4",
"@codemirror/language-data": "^6.5.2",
"@codemirror/state": "^6.7.0",
"@codemirror/view": "^6.43.3",
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/sortable": "^10.0.0",
"@dnd-kit/utilities": "^3.2.2",
"@hermes/shared": "file:../shared",
"@icons-pack/react-simple-icons": "=13.11.1",
"@lezer/highlight": "^1.2.3",
"@nanostores/react": "^1.1.0",
"@nous-research/ui": "^0.13.0",
"@radix-ui/react-slot": "^1.2.4",
@@ -73,7 +67,6 @@
"@tanstack/react-virtual": "^3.13.24",
"@vscode/codicons": "^0.0.45",
"@xterm/addon-fit": "^0.11.0",
"@xterm/addon-serialize": "^0.14.0",
"@xterm/addon-unicode11": "^0.9.0",
"@xterm/addon-web-links": "^0.12.0",
"@xterm/addon-webgl": "^0.19.0",
@@ -82,13 +75,11 @@
"clsx": "^2.1.1",
"cmdk": "^1.1.1",
"dnd-core": "^14.0.1",
"dompurify": "^3.4.11",
"hast-util-from-html-isomorphic": "^2.0.0",
"hast-util-to-text": "^4.0.2",
"ignore": "^7.0.5",
"katex": "^0.16.45",
"leva": "^0.10.1",
"mermaid": "^11.15.0",
"motion": "^12.38.0",
"nanostores": "^1.3.0",
"node-pty": "1.1.0",

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env node
// bundle-electron-main.mjs — bundles electron/main.cjs into a single
// self-contained file so the nix build doesn't need to ship node_modules/.
//
// `electron` is provided by the runtime; `node-pty` is staged separately
// via stage-native-deps.cjs. `preload.cjs` is NOT require()'d by main —
// Electron loads it via path.join(__dirname, 'preload.cjs') — so it stays
// as a separate file and doesn't need bundling.
import { build } from 'esbuild'
import { resolve, dirname } from 'node:path'
import { fileURLToPath } from 'node:url'
import { renameSync } from 'node:fs'
const here = dirname(fileURLToPath(import.meta.url))
const root = resolve(here, '..')
const entry = resolve(root, 'electron/main.cjs')
const tmp = resolve(root, 'electron/main.bundled.cjs')
await build({
entryPoints: [entry],
bundle: true,
platform: 'node',
format: 'cjs',
target: 'node20',
outfile: tmp,
external: ['electron', 'node-pty'],
logLevel: 'info'
})
// Overwrite the original with the bundled version.
renameSync(tmp, entry)
console.log(`bundled ${entry}`)

View File

@@ -66,31 +66,6 @@ const NATIVE_DEPS = [
}
]
// Pure-JS runtime dependencies that the packaged electron main require()s but
// that workspace dedup hoists into the repo-root node_modules -- out of reach
// of electron-builder's file collector, exactly like node-pty above. Unlike
// node-pty there is no native binary to select; we stage each package's whole
// directory into build/native-deps/vendor/node_modules/<name> so the dep's own
// internal require()s resolve against a real node_modules tree, and the
// requiring file (electron/git-review-ops.cjs) falls back to that path via
// process.resourcesPath when the normal require() fails. See issue #52735
// (packaged app crashed at launch on `Cannot find module 'simple-git'`).
//
// The closure is resolved at stage time by walking dependencies +
// optionalDependencies, so a simple-git version bump that pulls in a new
// transitive dep can't silently re-introduce the crash.
//
// Layout note: the closure lands in build/native-deps/vendor/node_modules/,
// NOT build/native-deps/node_modules/. electron-builder's file collector
// hard-drops a `node_modules` directory that sits at the ROOT of an
// extraResources copy (app-builder-lib/out/util/filter.js: `if (relative ===
// "node_modules") return false`), but keeps a NESTED one. Nesting under
// `vendor/` makes node_modules a subdirectory so it survives packing; the
// require() fallback in git-review-ops.cjs resolves the matching
// vendor/node_modules path.
const JS_DEP_ROOTS = ['simple-git']
const JS_DEP_STAGE_ROOT = path.join(STAGE_ROOT, 'vendor', 'node_modules')
function rmrf(target) {
fs.rmSync(target, { recursive: true, force: true })
}
@@ -173,111 +148,12 @@ function stageOne(spec) {
console.log(`[stage-native-deps] ${path.relative(APP_ROOT, spec.to)}: ${copied} files`)
}
// Resolve a package's directory by name, searching the repo-root node_modules
// first (where workspace dedup hoists everything) and then the requiring
// package's own node_modules for any non-hoisted nested copy.
//
// We deliberately do NOT use require.resolve(`${name}/package.json`): packages
// with an "exports" map that doesn't list "./package.json" (e.g. simple-git
// 3.x) make that subpath unresolvable under Node's exports enforcement
// (ERR_PACKAGE_PATH_NOT_EXPORTED), which fails on CI even though it happened to
// work locally. Instead resolve the package's main entry (exports-aware) and
// walk up to the directory whose package.json's "name" matches.
function resolvePkgDir(name, fromDir) {
const searchPaths = [fromDir, REPO_ROOT, path.join(REPO_ROOT, 'node_modules')]
let entry
try {
entry = require.resolve(name, { paths: searchPaths })
} catch {
return null
}
// Walk up from the resolved entry file to the package root: the first
// ancestor dir whose package.json declares this package's name.
let dir = path.dirname(entry)
while (true) {
const pjPath = path.join(dir, 'package.json')
try {
const pj = JSON.parse(fs.readFileSync(pjPath, 'utf8'))
if (pj.name === name) {
return dir
}
} catch {
// no package.json here (or unreadable) — keep walking up
}
const parent = path.dirname(dir)
if (parent === dir) {
return null
}
dir = parent
}
}
// Walk dependencies + optionalDependencies from each root package and return
// the set of resolved package directories in the runtime closure. Keyed by
// package name so a dep reached via two paths is staged once.
function resolveJsClosure(roots) {
const closure = new Map() // name -> absolute package dir
const stack = roots.map(name => ({ name, fromDir: REPO_ROOT }))
while (stack.length) {
const { name, fromDir } = stack.pop()
if (closure.has(name)) continue
const dir = resolvePkgDir(name, fromDir)
if (!dir) {
throw new Error(
`stage-native-deps: could not resolve '${name}' for the simple-git ` +
`closure. Run \`npm install\` at the workspace root first.`
)
}
closure.set(name, dir)
let pj
try {
pj = JSON.parse(fs.readFileSync(path.join(dir, 'package.json'), 'utf8'))
} catch {
continue
}
const deps = { ...(pj.dependencies || {}), ...(pj.optionalDependencies || {}) }
for (const depName of Object.keys(deps)) {
stack.push({ name: depName, fromDir: dir })
}
}
return closure
}
// Stage the resolved JS dependency closure into build/native-deps/vendor/node_modules/
// so the packaged app (and the nix output) can require() it from
// process.resourcesPath when the hoisted-root require() isn't reachable. Each
// package is copied whole (minus node_modules/ — the closure is flattened so
// every dep already has its own top-level entry) into a real node_modules
// layout, which keeps the deps' own internal require()s working unchanged.
function stageJsClosure(roots) {
const closure = resolveJsClosure(roots)
rmrf(JS_DEP_STAGE_ROOT)
ensureDir(JS_DEP_STAGE_ROOT)
let staged = 0
for (const [name, fromDir] of closure) {
const dest = path.join(JS_DEP_STAGE_ROOT, name)
ensureDir(path.dirname(dest))
// Copy the package directory but skip any nested node_modules/ — the
// closure is flattened, so nested copies would just bloat the bundle.
fs.cpSync(fromDir, dest, {
recursive: true,
filter: src => path.basename(src) !== 'node_modules'
})
staged += 1
}
console.log(
`[stage-native-deps] vendor/node_modules/: ${staged} package(s) ` +
`(${[...closure.keys()].sort().join(', ')})`
)
}
function main() {
rmrf(STAGE_ROOT)
ensureDir(STAGE_ROOT)
for (const spec of NATIVE_DEPS) {
stageOne(spec)
}
stageJsClosure(JS_DEP_ROOTS)
}
main()

View File

@@ -3,8 +3,8 @@ import { type ReactNode, useEffect, useMemo, useState } from 'react'
import { useElapsedSeconds } from '@/components/chat/activity-timer'
import { ActivityTimerText } from '@/components/chat/activity-timer-text'
import { Codicon } from '@/components/ui/codicon'
import { FadeText } from '@/components/ui/fade-text'
import { Codicon } from '@/components/ui/codicon'
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
import { type Translations, useI18n } from '@/i18n'
import { AlertCircle, CheckCircle2 } from '@/lib/icons'
@@ -19,7 +19,7 @@ import {
type SubagentStreamEntry
} from '@/store/subagents'
import { Panel, PanelEmpty, PanelHeader } from '../overlays/panel'
import { OverlayView } from '../overlays/overlay-view'
// Mirrors statusGlyph() in tool-fallback.tsx so subagent rows speak the
// same visual vocabulary as the chat tool blocks.
@@ -86,16 +86,18 @@ export function AgentsView({ onClose }: AgentsViewProps) {
const tree = useMemo(() => buildSubagentTree(allSubagents(subagentsBySession)), [subagentsBySession])
return (
<Panel closeLabel={t.agents.close} onClose={onClose}>
{tree.length === 0 ? (
<PanelEmpty description={t.agents.emptyDesc} icon="hubot" title={t.agents.emptyTitle} />
) : (
<>
<PanelHeader subtitle={t.agents.subtitle} title={t.agents.title} />
<SubagentTree tree={tree} />
</>
)}
</Panel>
<OverlayView
closeLabel={t.agents.close}
contentClassName="px-5 pt-5 pb-4 sm:px-6"
onClose={onClose}
rootClassName="mx-auto max-w-3xl"
>
<header className="mb-3 shrink-0">
<h2 className="text-sm font-semibold text-foreground">{t.agents.title}</h2>
<p className="text-xs text-muted-foreground/80">{t.agents.subtitle}</p>
</header>
<SubagentTree tree={tree} />
</OverlayView>
)
}

View File

@@ -477,20 +477,17 @@ export function ArtifactsView({ setStatusbarItemGroup: _setStatusbarItemGroup, .
}
}, [artifacts])
const openArtifact = useCallback(
async (href: string) => {
try {
if (window.hermesDesktop?.openExternal) {
await window.hermesDesktop.openExternal(href)
} else {
window.open(href, '_blank', 'noopener,noreferrer')
}
} catch (err) {
notifyError(err, a.openFailed)
const openArtifact = useCallback(async (href: string) => {
try {
if (window.hermesDesktop?.openExternal) {
await window.hermesDesktop.openExternal(href)
} else {
window.open(href, '_blank', 'noopener,noreferrer')
}
},
[a]
)
} catch (err) {
notifyError(err, a.openFailed)
}
}, [a])
const markImageFailed = useCallback((id: string) => {
setFailedImageIds(current => {
@@ -842,8 +839,7 @@ const ARTIFACT_COLUMNS: readonly ArtifactColumn[] = [
{
Cell: PrimaryCell,
bodyClassName: 'p-0',
header: (filter, a) =>
filter === 'link' ? a.colTitleLink : filter === 'file' ? a.colTitleFile : a.colTitleDefault,
header: (filter, a) => (filter === 'link' ? a.colTitleLink : filter === 'file' ? a.colTitleFile : a.colTitleDefault),
id: 'primary',
width: filter => (filter === 'link' ? 'w-[50%]' : 'w-[35%]')
},

View File

@@ -2,9 +2,9 @@ import { cleanup, render, screen } from '@testing-library/react'
import { afterEach, describe, expect, it } from 'vitest'
import { I18nProvider } from '@/i18n/context'
import type { ComposerAttachment } from '@/store/composer'
import { AttachmentList } from './attachments'
import type { ComposerAttachment } from '@/store/composer'
function makeAttachment(id: string, label = 'test.pdf'): ComposerAttachment {
return { id, kind: 'file', label }
@@ -32,10 +32,7 @@ describe('AttachmentList', () => {
it('renders empty list without error', () => {
renderWithI18n(<AttachmentList attachments={[]} />)
const container =
screen.getByTestId?.('composer-attachments') ?? document.querySelector('[data-slot="composer-attachments"]')
const container = screen.getByTestId?.('composer-attachments') ?? document.querySelector('[data-slot="composer-attachments"]')
expect(container).toBeDefined()
})
@@ -58,7 +55,10 @@ describe('AttachmentList', () => {
})
it('does not crash when attachments array contains null entries', () => {
const attachments = [null as unknown as ComposerAttachment, makeAttachment('a', 'valid.txt')]
const attachments = [
null as unknown as ComposerAttachment,
makeAttachment('a', 'valid.txt')
]
expect(() => {
renderWithI18n(<AttachmentList attachments={attachments} />)

View File

@@ -73,11 +73,7 @@ export function ContextMenu({
<ContextMenuItem disabled={!onPickImages} icon={ImageIcon} onSelect={onPickImages}>
{c.images}
</ContextMenuItem>
<ContextMenuItem
disabled={!onPasteClipboardImage}
icon={Clipboard}
onSelect={onPasteClipboardImage ? () => void onPasteClipboardImage() : undefined}
>
<ContextMenuItem disabled={!onPasteClipboardImage} icon={Clipboard} onSelect={onPasteClipboardImage}>
{c.pasteImage}
</ContextMenuItem>
<ContextMenuItem icon={Link} onSelect={onOpenUrlDialog}>
@@ -171,7 +167,7 @@ interface ContextMenuItemProps {
interface ContextMenuProps {
onInsertText: (text: string) => void
onOpenUrlDialog: () => void
onPasteClipboardImage?: (opts?: { silent?: boolean }) => Promise<boolean> | void
onPasteClipboardImage?: () => void
onPickFiles?: () => void
onPickFolders?: () => void
onPickImages?: () => void

View File

@@ -4,7 +4,7 @@ import { KbdCombo } from '@/components/ui/kbd'
import { Tip } from '@/components/ui/tooltip'
import { useI18n } from '@/i18n'
import { triggerHaptic } from '@/lib/haptics'
import { AudioLines, Layers3, Loader2, Square, SteeringWheel, Volume2, VolumeX } from '@/lib/icons'
import { AudioLines, Layers3, Loader2, Square, SteeringWheel } from '@/lib/icons'
import { formatCombo } from '@/lib/keybinds/combo'
import { cn } from '@/lib/utils'
@@ -39,7 +39,6 @@ interface ConversationProps {
}
export function ComposerControls({
autoSpeak,
busy,
busyAction,
canSteer,
@@ -51,10 +50,8 @@ export function ComposerControls({
state,
voiceStatus,
onDictate,
onSteer,
onToggleAutoSpeak
onSteer
}: {
autoSpeak: boolean
busy: boolean
busyAction: 'queue' | 'stop'
canSteer: boolean
@@ -67,7 +64,6 @@ export function ComposerControls({
voiceStatus: VoiceStatus
onDictate: () => void
onSteer: () => void
onToggleAutoSpeak: () => void
}) {
const { t } = useI18n()
const c = t.composer
@@ -109,7 +105,6 @@ export function ComposerControls({
) : (
<DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
)}
<AutoSpeakButton active={autoSpeak} disabled={disabled} onToggle={onToggleAutoSpeak} />
{showVoicePrimary ? (
<Tip label={c.startVoice}>
<Button
@@ -259,47 +254,6 @@ function ConversationIndicator({
)
}
// Pure-TTS toggle: type normally, but have every assistant reply read aloud —
// no dictation, no full conversation loop. Filled/accent when on, mirroring the
// muted-mic pressed state above. Driven by (and persisted to) `voice.auto_tts`.
function AutoSpeakButton({
active,
disabled,
onToggle
}: {
active: boolean
disabled: boolean
onToggle: () => void
}) {
const { t } = useI18n()
const c = t.composer
const label = active ? c.stopSpeakingReplies : c.speakReplies
return (
<Tip label={label}>
<Button
aria-label={label}
aria-pressed={active}
className={cn(
GHOST_ICON_BTN,
'p-0',
active && 'bg-primary/10 text-primary hover:bg-primary/15 hover:text-primary'
)}
disabled={disabled}
onClick={() => {
triggerHaptic(active ? 'close' : 'open')
onToggle()
}}
size="icon"
type="button"
variant="ghost"
>
{active ? <Volume2 size={14} /> : <VolumeX size={14} />}
</Button>
</Tip>
)
}
function DictationButton({
disabled,
state,

View File

@@ -59,10 +59,8 @@ function Harness({
}
const editor = editorRef.current
if (editor) {
const domText = composerPlainText(editor)
if (domText !== draftRef.current) {
draftRef.current = domText
setDraft(domText)
@@ -129,11 +127,9 @@ function Harness({
describe('composer Enter submit — live DOM vs stale composer state (#39630)', () => {
it('sends the just-typed text on Enter even when composer state has not synced', async () => {
const onSubmit = vi.fn()
const { getByTestId } = render(
<Harness onCancel={vi.fn()} onDrain={vi.fn()} onQueue={vi.fn()} onSubmit={onSubmit} />
)
const editor = getByTestId('editor')
// Fast typing: the DOM has the text but NO input event fired, so `draft`
@@ -150,11 +146,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
const onQueue = vi.fn()
const onDrain = vi.fn()
const onCancel = vi.fn()
const { getByTestId } = render(
<Harness busy onCancel={onCancel} onDrain={onDrain} onQueue={onQueue} onSubmit={vi.fn()} queued={['queued-1']} />
)
const editor = getByTestId('editor')
await act(async () => {
@@ -171,11 +165,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
const onCancel = vi.fn()
const onSubmit = vi.fn()
const onQueue = vi.fn()
const { getByTestId } = render(
<Harness busy onCancel={onCancel} onDrain={vi.fn()} onQueue={onQueue} onSubmit={onSubmit} />
)
const editor = getByTestId('editor')
await act(async () => {
@@ -191,11 +183,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
it('drains the next queued prompt on Enter when idle with a truly empty editor', async () => {
const onDrain = vi.fn()
const onSubmit = vi.fn()
const { getByTestId } = render(
<Harness onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
)
const editor = getByTestId('editor')
await act(async () => {
@@ -210,18 +200,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
const onSubmit = vi.fn()
const onDrain = vi.fn()
const { getByTestId } = render(
<Harness
disabled
onCancel={vi.fn()}
onDrain={onDrain}
onQueue={vi.fn()}
onSubmit={onSubmit}
queued={['queued-1']}
/>
<Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
)
const editor = getByTestId('editor')
await act(async () => {

View File

@@ -33,7 +33,7 @@ export function HelpHint() {
<Section title={c.hotkeys}>
{COMPOSER_HOTKEY_ROWS.map(row => (
<HotkeyRow combos={[...row.combos]} description={c.hotkeyDescs[row.id] ?? ''} key={row.id} />
<HotkeyRow description={c.hotkeyDescs[row.id] ?? ''} combos={[...row.combos]} key={row.id} />
))}
</Section>

Some files were not shown because too many files have changed in this diff Show More