Compare commits

..

1 Commits

Author SHA1 Message Date
Brooklyn Nicholson
876964f5a8 fix(computer-use): vision capture returns an image on cua-driver >=0.5.x
Vision mode called a `screenshot` MCP tool that cua-driver dropped in
0.5.x (full-window PNG capture was folded into `get_window_state`). The
driver replied "Unknown tool: screenshot", so `images` came back empty,
`png_b64` stayed None, and capture returned a 0x0 result with no image
on every call. `som`/`ax` were unaffected because they already use
`get_window_state`, which masked the regression.

Route vision by capability:
- driver advertises `screenshot` (older builds) -> use it (no AX walk)
- otherwise -> call `get_window_state` but discard the AX tree/elements,
  returning only the PNG so vision stays free of element noise
- capabilities not yet discovered -> try `screenshot`, fall back to
  `get_window_state` on an empty image, so the path self-heals

Add `_image_from_tool_result` to pull the PNG from either an MCP image
content-part or `structuredContent.screenshot_png_b64`, and use it on
the som path too so the image won't silently drop on driver builds that
deliver it via structuredContent instead of a content part.

Verified live (vision: 1568x954, 0 elements; som: image + 527 elements)
and with unit coverage of all four routing cases.
2026-06-22 17:40:18 -05:00
1374 changed files with 14148 additions and 126825 deletions

2
.envrc
View File

@@ -1,5 +1,5 @@
watch_file pyproject.toml uv.lock
watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
use flake

View File

@@ -1,62 +0,0 @@
name: Detect affected areas
description: >-
Classify a PR's changed files into CI work lanes (python, frontend, site,
scan, deps, mcp_catalog) so the orchestrator can conditionally call only
the sub-workflows a PR can affect. Outputs are always "true" on push/dispatch
events and fail open (everything "true") when the diff cannot be computed.
outputs:
python:
description: Run Python tests / ruff / ty / windows-footguns.
value: ${{ steps.classify.outputs.python }}
frontend:
description: Run the TypeScript typecheck matrix + desktop build.
value: ${{ steps.classify.outputs.frontend }}
docker_meta:
description: Docker setup and meta files have changed.
value: ${{ steps.classify.outputs.docker_meta }}
site:
description: Build the Docusaurus docs site.
value: ${{ steps.classify.outputs.site }}
scan:
description: Run the supply-chain critical-pattern scanner.
value: ${{ steps.classify.outputs.scan }}
deps:
description: Check pyproject.toml dependency upper bounds.
value: ${{ steps.classify.outputs.deps }}
mcp_catalog:
description: Require MCP catalog security review label.
value: ${{ steps.classify.outputs.mcp_catalog }}
runs:
using: composite
steps:
- name: Classify changed files
id: classify
shell: bash
env:
GH_TOKEN: ${{ github.token }}
REPO: ${{ github.repository }}
EVENT_NAME: ${{ github.event_name }}
BASE_SHA: ${{ github.event.pull_request.base.sha }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
run: |
set -euo pipefail
# Only pull_request events are gated. Other events (push, release,
# dispatch) leave CHANGED empty, so the classifier fails open and every
# lane runs. Post-merge / on-demand validation is never weakened.
if [ "$EVENT_NAME" = "pull_request" ]; then
# Use the compare endpoint with the pinned base/head SHAs from the
# event payload instead of the "current PR files" endpoint. The SHAs
# are frozen at trigger time, so the file list is deterministic even
# if the PR receives a new push between trigger and detect.
CHANGED="$(gh api \
--paginate \
"repos/${REPO}/compare/${BASE_SHA}...${HEAD_SHA}" \
--jq '.files[].filename' || true)"
fi
echo "Changed files:"
printf '%s\n' "${CHANGED:-(none)}"
printf '%s\n' "${CHANGED:-}" | python3 scripts/ci/classify_changes.py

View File

@@ -0,0 +1,50 @@
name: Hermes smoke test
description: >
Run the image's built-in entrypoint against `--help` and `dashboard --help`
to catch basic runtime regressions before publishing. Requires the image
to already be loaded into the local Docker daemon under `image`.
Works identically on amd64 and arm64 runners.
inputs:
image:
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
required: true
runs:
using: composite
steps:
- name: Ensure /tmp/hermes-test is hermes-writable
shell: bash
run: |
# The image runs as the hermes user (UID 10000). GitHub Actions
# creates /tmp/hermes-test root-owned by default, which hermes
# can't write to — chown it to match the in-container UID before
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
# with their own UID hit the same issue and have their own
# remediations (HERMES_UID env var, or chown locally).
mkdir -p /tmp/hermes-test
sudo chown -R 10000:10000 /tmp/hermes-test
- name: hermes --help
shell: bash
run: |
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
# this exercises the actual production startup path. PR #30136
# review caught that an --entrypoint override here had been
# silently neutered by the s6-overlay migration — stage2-hook
# ignores its CMD args, so the smoke test was a no-op.
docker run --rm \
-v /tmp/hermes-test:/opt/data \
"${{ inputs.image }}" --help
- name: hermes dashboard --help
shell: bash
run: |
# Regression guard for #9153: dashboard was present in source but
# missing from the published image. If this fails, something in
# the Dockerfile is excluding the dashboard subcommand from the
# installed package.
docker run --rm \
-v /tmp/hermes-test:/opt/data \
"${{ inputs.image }}" dashboard --help

View File

@@ -1,50 +0,0 @@
name: Retry a flaky command
description: >-
Run a shell command, retrying on non-zero exit. For dependency installs
(npm ci, uv sync) whose only failures are transient network/toolchain
flakes — a node-gyp header fetch, a registry blip — so CI self-heals
instead of needing a manual re-run.
inputs:
command:
description: Shell command to run (and retry).
required: true
attempts:
description: Max attempts before giving up.
default: "3"
delay:
description: Seconds to wait between attempts.
default: "10"
working-directory:
description: Directory to run in.
default: "."
runs:
using: composite
steps:
- shell: bash
working-directory: ${{ inputs.working-directory }}
# command goes through env, never interpolated into the script body, so
# a command with quotes/specials can't break or inject into the runner.
env:
_CMD: ${{ inputs.command }}
_ATTEMPTS: ${{ inputs.attempts }}
_DELAY: ${{ inputs.delay }}
run: |
set -uo pipefail
n=0
while :; do
n=$((n + 1))
echo "::group::attempt $n/$_ATTEMPTS: $_CMD"
if bash -c "$_CMD"; then
echo "::endgroup::"
exit 0
fi
echo "::endgroup::"
if [ "$n" -ge "$_ATTEMPTS" ]; then
echo "::error::failed after $n attempts: $_CMD"
exit 1
fi
echo "::warning::attempt $n failed; retrying in ${_DELAY}s: $_CMD"
sleep "$_DELAY"
done

View File

@@ -0,0 +1,100 @@
name: Build Windows Installer
on:
workflow_dispatch:
permissions:
contents: read
jobs:
# Gate: workflow_dispatch is already restricted to users with write access,
# but we want ADMIN-only. Explicitly check the triggering actor's repo
# permission via the API and fail fast for anyone below admin.
authorize:
name: Authorize (admins only)
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Check actor is a repo admin
env:
GH_TOKEN: ${{ github.token }}
ACTOR: ${{ github.actor }}
run: |
set -euo pipefail
perm=$(gh api \
"repos/${{ github.repository }}/collaborators/${ACTOR}/permission" \
--jq '.permission')
echo "Actor '${ACTOR}' has permission: ${perm}"
if [ "${perm}" != "admin" ]; then
echo "::error::'${ACTOR}' is not a repo admin (permission=${perm}). Refusing to build/sign."
exit 1
fi
echo "Authorized: '${ACTOR}' is an admin."
build:
name: Hermes-Setup.exe
needs: authorize
runs-on: windows-latest
timeout-minutes: 30
permissions:
contents: read
# Required for OIDC auth to Azure (azure/login federated credentials).
id-token: write
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Setup Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: 22
cache: npm
- name: Install npm dependencies
run: npm ci
- name: Setup Rust
uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
- name: Cache Rust targets
uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
with:
workspaces: apps/bootstrap-installer/src-tauri
- name: Build installer
run: npm run tauri:build
working-directory: apps/bootstrap-installer
- name: Azure login (OIDC)
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Sign Hermes-Setup.exe with Azure Artifact Signing
uses: azure/artifact-signing-action@c7ab2a863ab5f9a846ddb8265964877ef296ee82 # v2
with:
endpoint: ${{ vars.AZURE_SIGNING_ENDPOINT }}
signing-account-name: ${{ vars.AZURE_SIGNING_ACCOUNT_NAME }}
certificate-profile-name: ${{ vars.AZURE_SIGNING_CERTIFICATE_PROFILE }}
# Sign both the raw exe and the bundled NSIS installer.
files-folder: ${{ github.workspace }}\apps\bootstrap-installer\src-tauri\target\release
files-folder-filter: exe
files-folder-recurse: true
file-digest: SHA256
timestamp-rfc3161: http://timestamp.acs.microsoft.com
timestamp-digest: SHA256
- name: Upload NSIS installer
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: Hermes-Setup-installer
path: apps/bootstrap-installer/src-tauri/target/release/bundle/nsis/*.exe
- name: Upload raw exe
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: Hermes-Setup-exe
path: apps/bootstrap-installer/src-tauri/target/release/Hermes-Setup.exe

View File

@@ -1,167 +0,0 @@
name: CI
# Orchestrator workflow. Runs ``detect-changes`` once, then conditionally
# calls the sub-workflows that a PR can actually affect. A final
# ``all-checks-pass`` gate job aggregates results so branch protection only
# needs to require a single check.
#
# Sub-workflows are triggered via ``workflow_call`` and keep their own job
# definitions, matrices, and concurrency settings. They no longer have
# ``push:`` / ``pull_request:`` triggers of their own — everything flows
# through this file.
on:
pull_request:
push:
branches: [main]
permissions:
contents: read
pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
actions: read # needed by osv-scanner (SARIF upload)
security-events: write # needed by osv-scanner (SARIF upload)
packages: write # needed by docker build
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
# ─────────────────────────────────────────────────────────────────────
# detect: run the classifier once. Every downstream job reads its outputs
# to decide whether to run. On push/dispatch the classifier fails open
# (all lanes true) so post-merge validation is never weakened.
# ─────────────────────────────────────────────────────────────────────
detect:
name: Detect affected areas
runs-on: ubuntu-latest
outputs:
python: ${{ steps.classify.outputs.python }}
frontend: ${{ steps.classify.outputs.frontend }}
site: ${{ steps.classify.outputs.site }}
scan: ${{ steps.classify.outputs.scan }}
deps: ${{ steps.classify.outputs.deps }}
docker_meta: ${{ steps.classify.outputs.docker_meta }}
mcp_catalog: ${{ steps.classify.outputs.mcp_catalog }}
event_name: ${{ github.event_name }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Detect affected areas
id: classify
uses: ./.github/actions/detect-changes
# ─────────────────────────────────────────────────────────────────────
# Lane-gated sub-workflows. Each runs in parallel after detect finishes.
# Skipped workflows (if condition is false) don't spin up runners.
# ─────────────────────────────────────────────────────────────────────
tests:
name: Python tests
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/tests.yml
with:
slice_count: 8
lint:
name: Python lints
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/lint.yml
with:
event_name: ${{ needs.detect.outputs.event_name }}
typecheck:
name: TypeScript
needs: detect
if: needs.detect.outputs.frontend == 'true'
uses: ./.github/workflows/typecheck.yml
docs-site:
name: Docs Site
needs: detect
if: needs.detect.outputs.site == 'true'
uses: ./.github/workflows/docs-site-checks.yml
history-check:
name: Deny unrelated histories
needs: detect
if: needs.detect.outputs.event_name == 'pull_request'
uses: ./.github/workflows/history-check.yml
contributor-check:
name: Check contributors
needs: detect
if: needs.detect.outputs.python == 'true'
uses: ./.github/workflows/contributor-check.yml
uv-lockfile:
name: Check uv.lock
needs: detect
uses: ./.github/workflows/uv-lockfile-check.yml
docker-lint:
name: Lint Docker scripts
needs: detect
if: needs.detect.outputs.docker_meta == 'true'
uses: ./.github/workflows/docker-lint.yml
docker:
name: Build&Test Docker image
needs: detect
if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
uses: ./.github/workflows/docker.yml
secrets: inherit
supply-chain:
name: Supply-chain scan
needs: detect
if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
uses: ./.github/workflows/supply-chain-audit.yml
with:
event_name: ${{ needs.detect.outputs.event_name }}
scan: ${{ needs.detect.outputs.scan == 'true' }}
deps: ${{ needs.detect.outputs.deps == 'true' }}
mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}
osv-scanner:
name: OSV scan
uses: ./.github/workflows/osv-scanner.yml
# ─────────────────────────────────────────────────────────────────────
# Gate: runs after everything. ``if: always()`` ensures it reports a
# status even when some deps were skipped. Only actual ``failure``
# results cause it to fail; ``skipped`` is treated as success.
#
# Branch protection should require ONLY this check.
# ─────────────────────────────────────────────────────────────────────
all-checks-pass:
name: All required checks pass
needs:
- tests
- lint
- typecheck
- docs-site
- history-check
- contributor-check
- uv-lockfile
- docker-lint
- supply-chain
- osv-scanner
# we don't require docker to pass rn because it's so slow lol
# - docker
if: always()
runs-on: ubuntu-latest
steps:
- name: Evaluate job results
env:
RESULTS: ${{ toJSON(needs.*.result) }}
run: |
echo "$RESULTS" | python3 -c "
import json, sys
results = json.load(sys.stdin)
failed = [r for r in results if r == 'failure']
if failed:
print(f'::error::{len(failed)} job(s) failed')
sys.exit(1)
print('All checks passed (or were skipped)')
"

View File

@@ -1,8 +1,11 @@
name: Contributor Attribution Check
on:
workflow_call:
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
permissions:
contents: read
@@ -14,7 +17,21 @@ jobs:
with:
fetch-depth: 0 # Full history needed for git log
- name: Check if relevant files changed
id: filter
run: |
BASE="${{ github.event.pull_request.base.sha }}"
HEAD="${{ github.event.pull_request.head.sha }}"
CHANGED=$(git diff --name-only "$BASE"..."$HEAD" -- '*.py' '**/*.py' '.github/workflows/contributor-check.yml' || true)
if [ -n "$CHANGED" ]; then
echo "run=true" >> "$GITHUB_OUTPUT"
else
echo "run=false" >> "$GITHUB_OUTPUT"
echo "No Python files changed, skipping attribution check."
fi
- name: Check for unmapped contributor emails
if: steps.filter.outputs.run == 'true'
run: |
# Get the merge base between this PR and main
MERGE_BASE=$(git merge-base origin/main HEAD)

View File

@@ -2,7 +2,7 @@ name: Docker / shell lint
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
# scripts under docker/ (via shellcheck). These catch the class of regression
# the behavioral docker smoke test can't — unquoted variable
# the behavioral docker-publish smoke test can't — unquoted variable
# expansions, silently-failing RUN commands, etc.
#
# Rules and ignores are documented in .hadolint.yaml at the repo root.
@@ -11,7 +11,19 @@ name: Docker / shell lint
# activate script doesn't exist at lint time.
on:
workflow_call:
push:
branches: [main]
paths:
- Dockerfile
- docker/**
- .hadolint.yaml
- .github/workflows/docker-lint.yml
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
permissions:
contents: read

View File

@@ -1,9 +1,25 @@
name: Docker Build, Test, and Publish
name: Docker Build and Publish
on:
push:
branches: [main]
paths:
- '**/*.py'
- 'pyproject.toml'
- 'uv.lock'
- 'Dockerfile'
- 'docker/**'
- '.github/workflows/docker-publish.yml'
- '.github/actions/hermes-smoke-test/**'
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
release:
types: [published]
workflow_call:
permissions:
contents: read
@@ -24,7 +40,11 @@ env:
IMAGE_NAME: nousresearch/hermes-agent
jobs:
# Build, test, and optionally push the amd64 image.
# ---------------------------------------------------------------------------
# Build amd64 natively. This job also runs the smoke tests (basic --help
# and the dashboard subcommand regression guard from #9153), because amd64
# is the only arch we can `load` into the local daemon on an amd64 runner.
# ---------------------------------------------------------------------------
build-amd64:
# Only run on the upstream repository, not on forks
if: github.repository == 'NousResearch/hermes-agent'
@@ -34,19 +54,16 @@ jobs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# The image build + integration tests run on every event
# (PRs, push-to-main, release). Publish steps below are gated to
# push-to-main / release only.
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Build once, load into the local daemon for testing. Cached
# Build once, load into the local daemon for smoke testing. Cached
# to gha with a per-arch scope; the push step below reuses every
# layer from this build.
- name: Build image (amd64)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
- name: Build image (amd64, smoke test)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -58,12 +75,24 @@ jobs:
cache-from: type=gha,scope=docker-amd64
cache-to: type=gha,mode=max,scope=docker-amd64
- name: Smoke test image
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
# ---------------------------------------------------------------------
# Run the docker-integration test suite against the freshly-built
# image already loaded into the local daemon (`:test`).
# image already loaded into the local daemon (`:test`). These tests
# are excluded from the sharded `tests.yml :: test` matrix on purpose
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
# shard would otherwise reach the session-scoped ``built_image``
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
# ``docker build`` — guaranteed to
# die in fixture setup.
#
# Piggybacking here avoids a second image build: the build step
# already loaded the image into the daemon under
# `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
# Piggybacking here avoids a second image build: the smoke test
# already proved the image loads + runs, so the daemon has it under
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
#
@@ -73,18 +102,20 @@ jobs:
# cheapest path to coverage on every PR that touches docker code.
# ---------------------------------------------------------------------
- name: Install uv (for docker tests)
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Set up Python 3.11 (for docker tests)
run: uv python install 3.11
- name: Install Python dependencies (for docker tests)
run: |
uv venv .venv --python 3.11
source .venv/bin/activate
# ``dev`` extra pulls in pytest, pytest-asyncio —
# everything tests/docker/ needs. We deliberately avoid ``all``
# here because the docker tests only drive the container via
# subprocess and don't import hermes_agent's optional deps.
uv sync --locked --python 3.11 --extra dev
uv pip install -e ".[dev]"
- name: Run docker integration tests
env:
@@ -96,11 +127,12 @@ jobs:
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
scripts/run_tests.sh tests/docker/ --file-timeout 600
source .venv/bin/activate
python -m pytest tests/docker/ -v --tb=short
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -111,7 +143,7 @@ jobs:
- name: Push amd64 by digest
id: push
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -135,7 +167,7 @@ jobs:
- name: Upload digest artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: digest-amd64
path: /tmp/digests/*
@@ -143,7 +175,10 @@ jobs:
retention-days: 1
# ---------------------------------------------------------------------------
# Build, test, and optionally push the arm64 image.
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
# smoke test, then on push/release push by digest.
# ---------------------------------------------------------------------------
build-arm64:
if: github.repository == 'NousResearch/hermes-agent'
@@ -153,35 +188,57 @@ jobs:
digest: ${{ steps.push.outputs.digest }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
# Log in to ghcr.io so the registry-backed build cache below can be
# read (cache-from) on every event and written (cache-to) on
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
# the whole job — unlike the gha cache backend's short-lived Azure SAS
# token, which expired mid-build on slow cold-cache arm64 runs and
# crashed the build before the tests ran (the reason the gha cache
# crashed the build before the smoke test (the reason the gha cache
# was removed from arm64 PRs in the first place).
- name: Log in to ghcr.io (build cache)
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Build once, load into the local daemon for testing, then push
# by digest below. Reads AND writes the registry-backed cache so the
# push reuses layers from this build and the next build starts warm.
# Build once, load into the local daemon for smoke testing.
#
# PR builds use the registry-backed cache READ-ONLY (cache-from only):
# they pull warm layers pushed by the most recent main build but never
# write, so rapid PR pushes don't race on cache writes or pollute the
# cache ref. This restores warm-cache speed to arm64 PR builds (which
# were running fully uncached and were ~45% slower than amd64, making
# them the job most often cancelled on supersede).
#
# Registry cache (type=registry on ghcr.io) is used instead of the gha
# cache that previously broke here: its credential is the job-lifetime
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
# token failure mode cannot recur.
- name: Build image (arm64, cached publish)
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
- name: Build image (arm64, smoke test, cache read-only PR)
if: github.event_name == 'pull_request'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
load: true
platforms: linux/arm64
tags: ${{ env.IMAGE_NAME }}:test
build-args: |
HERMES_GIT_SHA=${{ github.sha }}
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
# Main/release builds read AND write the registry cache so the digest
# push below reuses layers from this smoke-test build, and so the next
# PR/main build starts warm.
- name: Build image (arm64, smoke test, cached publish)
if: github.event_name != 'pull_request'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -193,29 +250,14 @@ jobs:
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
- name: Install uv for docker tests
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
- name: Set up Python 3.11 for docker tests
run: uv python install 3.11
- name: Install Python dependencies for docker tests
run: |
uv sync --locked --python 3.11 --extra dev
- name: Run docker tests
env:
# Skip rebuild; use the image already loaded by the build step.
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
OPENROUTER_API_KEY: ""
OPENAI_API_KEY: ""
NOUS_API_KEY: ""
run: |
scripts/run_tests.sh tests/docker/ --file-timeout 600
- name: Smoke test image
uses: ./.github/actions/hermes-smoke-test
with:
image: ${{ env.IMAGE_NAME }}:test
- name: Log in to Docker Hub
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -223,7 +265,7 @@ jobs:
- name: Push arm64 by digest
id: push
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: Dockerfile
@@ -245,7 +287,7 @@ jobs:
- name: Upload digest artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: digest-arm64
path: /tmp/digests/*
@@ -267,17 +309,17 @@ jobs:
timeout-minutes: 10
steps:
- name: Download digests
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
path: /tmp/digests
pattern: digest-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
- name: Log in to Docker Hub
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

View File

@@ -1,7 +1,13 @@
name: Docs Site Checks
on:
workflow_call:
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
workflow_dispatch:
permissions:
contents: read
@@ -19,19 +25,15 @@ jobs:
cache-dependency-path: website/package-lock.json
- name: Install website dependencies
uses: ./.github/actions/retry
with:
command: npm ci
working-directory: website
run: npm ci
working-directory: website
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"
- name: Install ascii-guard
uses: ./.github/actions/retry
with:
command: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
- name: Extract skill metadata for dashboard
run: python3 website/scripts/extract-skills.py

View File

@@ -14,7 +14,11 @@ name: History Check
# the PR head and main to be non-empty.
on:
workflow_call:
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
permissions:
contents: read

View File

@@ -9,12 +9,18 @@ name: Lint (ruff + ty)
# enforcement fails.
on:
workflow_call:
inputs:
event_name:
description: The event name from the calling orchestrator (pull_request or push).
type: string
required: true
push:
branches: [main]
paths-ignore:
- "**/*.md"
- "docs/**"
- "website/**"
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
permissions:
contents: read
@@ -27,7 +33,6 @@ concurrency:
jobs:
lint-diff:
name: ruff + ty diff
if: inputs.event_name == 'pull_request'
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
@@ -37,19 +42,19 @@ jobs:
fetch-depth: 0 # need full history for merge-base + worktree
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Install ruff + ty
uses: ./.github/actions/retry
with:
command: uv tool install ruff && uv tool install ty
run: |
uv tool install ruff
uv tool install ty
- name: Determine base ref
id: base
run: |
# For PRs, diff against the merge base with the target branch.
# For pushes to main, diff against the previous commit on main.
if [ "${{ inputs.event_name }}" = "pull_request" ]; then
if [ "${{ github.event_name }}" = "pull_request" ]; then
BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
BASE_REF="origin/${{ github.base_ref }}"
else
@@ -105,19 +110,19 @@ jobs:
--base-ty .lint-reports/base/ty.json \
--head-ty .lint-reports/head/ty.json \
--base-ref "${{ steps.base.outputs.ref }}" \
--head-ref "${{ inputs.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
--output .lint-reports/summary.md
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
- name: Upload reports as artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: lint-reports
path: .lint-reports/
retention-days: 14
- name: Post / update PR comment
if: inputs.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
continue-on-error: true
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
with:
@@ -164,12 +169,10 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
- name: Install ruff
uses: ./.github/actions/retry
with:
command: uv tool install ruff
run: uv tool install ruff
- name: ruff check .
# No --exit-zero, no || true. Exit code propagates to the job,

View File

@@ -1,8 +1,8 @@
name: OSV-Scanner
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
# database. Runs on every PR/push (via the ci.yml orchestrator's workflow_call)
# and on a weekly schedule against main.
# database. Runs on every PR that touches a lockfile and on a weekly schedule
# against main.
#
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
# It reports known CVEs in currently-pinned dependency versions so we can
@@ -10,9 +10,9 @@ name: OSV-Scanner
# (full SHA / exact version) is preserved; only the notification signal
# is added.
#
# Complements the supply-chain-audit.yml workflow (which scans for malicious
# code patterns in PR diffs) by covering the orthogonal "currently-pinned
# dep became known-vulnerable" case.
# Complements the existing supply-chain-audit.yml workflow (which scans
# for malicious code patterns in PR diffs) by covering the orthogonal
# "currently-pinned dep became known-vulnerable" case.
#
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
@@ -20,7 +20,19 @@ name: OSV-Scanner
# vulnerabilities in pinned deps that we may need to patch deliberately.
on:
workflow_call:
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
push:
branches: [main]
paths:
- "uv.lock"
- "pyproject.toml"
- "package.json"
- "package-lock.json"
- "website/package-lock.json"
schedule:
# Weekly scan against main — catches CVEs published after merge for
# deps that haven't changed since.

View File

@@ -3,17 +3,17 @@ name: Build Skills Index
on:
schedule:
# Run twice daily: 6 AM and 6 PM UTC
- cron: "0 6,18 * * *"
workflow_dispatch: # Manual trigger
- cron: '0 6,18 * * *'
workflow_dispatch: # Manual trigger
push:
branches: [main]
paths:
- "scripts/build_skills_index.py"
- ".github/workflows/skills-index.yml"
- 'scripts/build_skills_index.py'
- '.github/workflows/skills-index.yml'
permissions:
contents: read
actions: write # to trigger deploy-site.yml on schedule
actions: write # to trigger deploy-site.yml on schedule
jobs:
build-index:
@@ -21,11 +21,11 @@ jobs:
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"
python-version: '3.11'
- name: Install dependencies
run: pip install httpx==0.28.1 pyyaml==6.0.2
@@ -36,7 +36,7 @@ jobs:
run: python scripts/build_skills_index.py
- name: Upload index artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: skills-index
path: website/static/api/skills-index.json

View File

@@ -1,5 +1,16 @@
name: Supply Chain Audit
on:
# No paths filter — the jobs must always run so required checks
# report a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
types: [opened, synchronize, reopened]
permissions:
pull-requests: write
contents: read
# Narrow, high-signal scanner. Only fires on critical indicators of supply
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
@@ -8,40 +19,56 @@ name: Supply Chain Audit
# the scanner. Keep this file's checks ruthlessly narrow: if you find
# yourself adding WARNING-tier patterns here again, make a separate
# advisory-only workflow instead.
#
# Path-gating is handled centrally by the ``ci.yml`` orchestrator's
# ``detect`` job. The orchestrator passes ``scan`` / ``deps`` /
# ``mcp_catalog`` booleans as inputs; this workflow's jobs gate on those
# inputs instead of re-computing the diff.
on:
workflow_call:
inputs:
event_name:
description: The event name from the calling orchestrator.
type: string
required: true
scan:
description: Whether supply-chain-relevant files changed.
type: boolean
required: true
deps:
description: Whether pyproject.toml changed.
type: boolean
required: true
mcp_catalog:
description: Whether the MCP catalog / installer changed.
type: boolean
required: true
permissions:
pull-requests: write
contents: read
jobs:
# ── Path filter (shared by both scan and dep-bounds) ───────────────
changes:
runs-on: ubuntu-latest
outputs:
# True when any file the scanner cares about changed in this PR
scan: ${{ steps.filter.outputs.scan }}
# True when pyproject.toml changed in this PR
deps: ${{ steps.filter.outputs.deps }}
# True when the curated MCP catalog / bundled MCP manifests changed.
mcp_catalog: ${{ steps.filter.outputs.mcp_catalog }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- name: Check for relevant file changes
id: filter
run: |
BASE="${{ github.event.pull_request.base.sha }}"
HEAD="${{ github.event.pull_request.head.sha }}"
SCAN_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
'*.py' '**/*.py' '*.pth' '**/*.pth' \
'setup.py' 'setup.cfg' \
'sitecustomize.py' 'usercustomize.py' '__init__.pth' \
'pyproject.toml' || true)
if [ -n "$SCAN_FILES" ]; then
echo "scan=true" >> "$GITHUB_OUTPUT"
else
echo "scan=false" >> "$GITHUB_OUTPUT"
fi
DEPS_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- 'pyproject.toml' || true)
if [ -n "$DEPS_FILES" ]; then
echo "deps=true" >> "$GITHUB_OUTPUT"
else
echo "deps=false" >> "$GITHUB_OUTPUT"
fi
MCP_CATALOG_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
'optional-mcps/**' \
'hermes_cli/mcp_catalog.py' || true)
if [ -n "$MCP_CATALOG_FILES" ]; then
echo "mcp_catalog=true" >> "$GITHUB_OUTPUT"
else
echo "mcp_catalog=false" >> "$GITHUB_OUTPUT"
fi
scan:
name: Scan PR for critical supply chain risks
if: inputs.scan
needs: changes
if: needs.changes.outputs.scan == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout
@@ -84,7 +111,7 @@ jobs:
fi
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
if [ -n "$B64_EXEC_HITS" ]; then
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: base64 decode + exec/eval combo
@@ -98,7 +125,7 @@ jobs:
fi
# --- subprocess with encoded/obfuscated command argument ---
PROC_HITS=$(echo "$DIFF" | grep -n '^+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
if [ -n "$PROC_HITS" ]; then
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
@@ -160,9 +187,23 @@ jobs:
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
exit 1
# Gate: reports success when scan was skipped (no relevant files changed).
# This ensures the required check always gets a status.
scan-gate:
name: Scan PR for critical supply chain risks
needs: changes
# always() so the gate still reports SUCCESS even if `changes` fails/is
# skipped — without it, a failed dependency would leave the required
# check unreported (i.e. "pending"), the exact failure mode this fixes.
if: always() && needs.changes.outputs.scan != 'true'
runs-on: ubuntu-latest
steps:
- run: echo "No supply-chain-relevant files changed, skipping scan."
dep-bounds:
name: Check PyPI dependency upper bounds
if: inputs.deps
needs: changes
if: needs.changes.outputs.deps == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout
@@ -212,7 +253,7 @@ jobs:
$(cat /tmp/unbounded.txt)
\`\`\`
**Fix:** Add an upper bound, e.g. \`"package>=1.2.0,<2"\`
**Fix:** Add an upper bound, e.g. \`\"package>=1.2.0,<2\"\`
---
*See PR #2810 and CONTRIBUTING.md for the full policy rationale.*"
@@ -225,9 +266,23 @@ jobs:
echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
exit 1
# Gate: reports success when dep-bounds was skipped (no pyproject.toml changed).
# This ensures the required check always gets a status.
dep-bounds-gate:
name: Check PyPI dependency upper bounds
needs: changes
# always() so the gate still reports SUCCESS even if `changes` fails/is
# skipped — without it, a failed dependency would leave the required
# check unreported (i.e. "pending"), the exact failure mode this fixes.
if: always() && needs.changes.outputs.deps != 'true'
runs-on: ubuntu-latest
steps:
- run: echo "No pyproject.toml changes, skipping dependency bounds check."
mcp-catalog-review:
name: MCP catalog security review
if: inputs.mcp_catalog
needs: changes
if: needs.changes.outputs.mcp_catalog == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout
@@ -262,3 +317,11 @@ jobs:
gh pr comment "$PR" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
echo "::error::MCP catalog changes require the mcp-catalog-reviewed label."
exit 1
mcp-catalog-review-gate:
name: MCP catalog security review
needs: changes
if: always() && needs.changes.outputs.mcp_catalog != 'true'
runs-on: ubuntu-latest
steps:
- run: echo "No MCP catalog changes, skipping MCP catalog security review."

View File

@@ -1,27 +1,33 @@
name: Tests
on:
workflow_call:
inputs:
slice_count:
description: Number of parallel test slices
type: number
default: 8
push:
branches: [main]
paths-ignore:
- "**/*.md"
- "docs/**"
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
permissions:
contents: read
# Cancel in-progress runs for the same ref
# Cancel in-progress runs for the same PR/branch
concurrency:
group: tests-${{ github.ref }}
cancel-in-progress: true
jobs:
generate:
name: "Generate slices"
test:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.matrix.outputs.matrix }}
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
slice: [1, 2, 3, 4, 5, 6]
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -30,33 +36,20 @@ jobs:
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: test_durations.json
# main always writes a new suffix, but jobs pick the latest one with the same prefix
# quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
# If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
# If there are no exact matches, the action searches for partial matches of the restore keys.
# When the action finds a partial match, the most recent cache is restored to the path directory.
key: test-durations
- name: Generate test slices
id: matrix
run: |
MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
test:
name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
needs: generate
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install ripgrep (prebuilt binary)
run: |
set -euo pipefail
RG_VERSION=15.1.0
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
curl -sSfL --retry 3 --retry-delay 5 -o "$RG_TARBALL" \
curl -sSfL -o "$RG_TARBALL" \
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
tar -xzf "$RG_TARBALL"
@@ -65,7 +58,7 @@ jobs:
rg --version
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
with:
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
# Keyed on the dependency manifests, so the cache is reused until
@@ -85,28 +78,40 @@ jobs:
# fails if the lock is out of sync with pyproject.toml), giving a
# reproducible env. It also creates .venv itself, so no separate
# `uv venv` step is needed.
uses: ./.github/actions/retry
with:
command: uv sync --locked --python 3.11 --extra all --extra dev
run: uv sync --locked --python 3.11 --extra all --extra dev
- name: Minimize uv cache
# Optimized for CI: prunes pre-built wheels that are cheap to
# re-download, keeping the persisted cache small and fast to restore.
run: uv cache prune --ci
- name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
# Per-file isolation via scripts/run_tests.sh: each test file runs
# in its own freshly-spawned `python -m pytest <file>` subprocess
- name: Run tests (slice ${{ matrix.slice }}/6)
# Per-file isolation via scripts/run_tests_parallel.py: discovers
# every test_*.py file under tests/ (excluding integration/ + e2e/),
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
# with bounded parallelism. No xdist, no shared workers, no
# module-level state leakage between files.
#
# File list is pre-computed by the generate job (--generate-slices)
# which runs LPT distribution once and passes the file list to each
# matrix job via --files. Previously each job re-discovered files and
# re-ran LPT independently — redundant N times.
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
# every file a fresh interpreter — the only isolation boundary
# that matters in practice (cross-file leakage was the original
# flake source; intra-file is the test author's responsibility).
#
# Why drop xdist entirely: xdist's persistent workers accumulate
# state across files, which is exactly the leakage we wanted to
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
# the job with cleaner semantics.
#
# Matrix slicing (--slice I/N): files are distributed across 6
# jobs by cached duration (LPT algorithm) so each job gets
# roughly equal wall time. Without a cache, files default to 2s
# estimate and get split roughly evenly by count — still correct,
# just not perfectly balanced.
run: |
source .venv/bin/activate
scripts/run_tests.sh --files '${{ matrix.slice.files }}'
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
env:
# Ensure tests don't accidentally call real APIs
OPENROUTER_API_KEY: ""
@@ -116,7 +121,7 @@ jobs:
- name: Upload per-slice durations
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: test-durations-slice-${{ matrix.slice.index }}
name: test-durations-slice-${{ matrix.slice }}
path: test_durations.json
retention-days: 1
@@ -166,7 +171,7 @@ jobs:
RG_VERSION=15.1.0
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
curl -sSfL --retry 3 --retry-delay 5 -o "$RG_TARBALL" \
curl -sSfL -o "$RG_TARBALL" \
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
tar -xzf "$RG_TARBALL"
@@ -175,7 +180,7 @@ jobs:
rg --version
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
with:
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
# Keyed on the dependency manifests, so the cache is reused until
@@ -195,9 +200,7 @@ jobs:
# fails if the lock is out of sync with pyproject.toml), giving a
# reproducible env. It also creates .venv itself, so no separate
# `uv venv` step is needed.
uses: ./.github/actions/retry
with:
command: uv sync --locked --python 3.11 --extra all --extra dev
run: uv sync --locked --python 3.11 --extra all --extra dev
- name: Minimize uv cache
# Optimized for CI: prunes pre-built wheels that are cheap to

View File

@@ -2,11 +2,16 @@
name: Typecheck
on:
workflow_call:
push:
branches: [main]
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
jobs:
typecheck:
name: Check TypeScript
runs-on: ubuntu-latest
strategy:
matrix:
@@ -19,13 +24,7 @@ jobs:
with:
node-version: 22
cache: npm
# --ignore-scripts: typecheck only needs the TS sources + type defs, not
# native builds. Skipping install scripts drops node-pty's node-gyp
# header fetch — the transient flake that killed this job pre-`tsc` — and
# is faster. retry covers the remaining registry blips.
- uses: ./.github/actions/retry
with:
command: npm ci --ignore-scripts
- run: npm ci
- run: npm run --prefix ${{ matrix.package }} typecheck
# Production build of the desktop renderer. `typecheck` runs `tsc` only,
@@ -35,7 +34,6 @@ jobs:
# users build apps/desktop from source on install/update. Run the real
# `vite build` here so that class of break fails in CI instead.
desktop-build:
name: Build desktop app
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -43,9 +41,5 @@ jobs:
with:
node-version: 22
cache: npm
# Keep install scripts here: the production build may need node-pty's
# native binary. retry handles the transient install-time fetch flakes.
- uses: ./.github/actions/retry
with:
command: npm ci
- run: npm ci
- run: npm run --prefix apps/desktop build

View File

@@ -5,11 +5,11 @@ name: Publish to PyPI
on:
push:
tags:
- "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
workflow_dispatch:
inputs:
confirm_tag:
description: "Tag to publish (e.g. v2026.5.15). Must already exist."
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
required: true
type: string
@@ -27,7 +27,7 @@ jobs:
name: Build distribution 📦
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
# On workflow_dispatch, check out the confirmed tag.
@@ -43,17 +43,17 @@ jobs:
fi
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.13"
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: "22"
node-version: '22'
- name: Build web dashboard
run: cd web && npm ci && npm run build
@@ -81,7 +81,7 @@ jobs:
run: uv build --sdist --wheel
- name: Upload distribution artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: python-package-distributions
path: dist/
@@ -94,17 +94,17 @@ jobs:
name: pypi
url: https://pypi.org/p/hermes-agent
permissions:
id-token: write # OIDC trusted publishing
id-token: write # OIDC trusted publishing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
with:
skip-existing: true
@@ -116,12 +116,12 @@ jobs:
needs: publish
runs-on: ubuntu-latest
permissions:
contents: write # attach assets to the existing release
id-token: write # sigstore signing
contents: write # attach assets to the existing release
id-token: write # sigstore signing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
@@ -145,7 +145,7 @@ jobs:
- name: Sign with Sigstore
if: env.skip_sign != 'true'
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
with:
inputs: >-
./dist/*.tar.gz

View File

@@ -4,7 +4,7 @@ name: uv.lock check
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
# must not merge, because the Docker build's `uv sync --frozen` step will
# fail on a stale lockfile and we'd rather catch it here than in the
# docker workflow on main.
# docker-publish workflow on main.
#
# ─────────────────────────────────────────────────────────────────────────
# IMPORTANT: this check runs against the MERGED state, not just your branch
@@ -44,14 +44,25 @@ name: uv.lock check
# the same way. Better to catch it here than after merge.
on:
workflow_call:
push:
branches: [main]
paths:
- "pyproject.toml"
- "uv.lock"
- ".github/workflows/uv-lockfile-check.yml"
# No paths filter — the job must always run so the required check
# reports a status (path-gated workflows leave checks "pending" forever
# when no matching files change, which blocks merge).
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: uv-lockfile-check-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
check:
@@ -63,7 +74,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
# `uv lock --check` re-resolves the project from pyproject.toml and
# compares the result to uv.lock, exiting non-zero if they disagree.
@@ -100,7 +111,7 @@ jobs:
This check is blocking because the Docker image build uses
`uv sync --frozen --extra all`, which rejects stale lockfiles
— catching it here avoids a ~15 min failed docker run
— catching it here avoids a ~15 min failed docker-publish run
on `main` post-merge.
EOF
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."

View File

@@ -123,17 +123,6 @@ conservative at the waist.
without E2E proof, and plugins that touch core files.** Plugins live in their
own directory and work within the ABCs/hooks we provide; if a plugin needs
more, widen the generic plugin surface, don't special-case it in core.
- **Third-party products / other people's projects integrated into the core
tree.** Observability backends, vendor SaaS integrations, analytics dashboards,
and similar "someone else's product" plugins do NOT land under `plugins/` in
this repo. They place an ongoing maintenance burden on us to keep them working
against a fast-moving core, for a backend we don't own. Ship them as a
**standalone plugin repo** users install into `~/.hermes/plugins/` (or via a
pip entry point), and promote them in the Nous Research Discord
(`#plugins-skills-and-skins`). This is a coupling-and-maintenance decision, not
a quality bar — the plugin can be excellent and still be a close. PRs that add
such a directory to the tree are closed with a pointer to publish it as its own
repo.
### Before you call it a bug — verify the premise (and when NOT to close)
@@ -794,24 +783,6 @@ landing in this tree. PRs that add a new directory under
provider as its own repo. Existing in-tree providers stay; bug fixes
to them are welcome.
**No new third-party-product plugins in-tree (policy, June 2026):** the
same rule applies beyond memory providers. Plugins that integrate
someone else's product or project — observability/metrics backends,
vendor SaaS connectors, analytics dashboards, paid-service tie-ins —
must ship as **standalone plugin repos** that users install into
`~/.hermes/plugins/` (or via pip entry points). They register through
the existing plugin discovery path and use the ABCs/hooks/ctx surface
we expose; nothing special is needed in core. The reason is
maintenance load: every product we absorb into the tree becomes our
burden to keep working against a fast-moving core, for a backend we
don't own. Promote standalone plugins in the Nous Research Discord
(`#plugins-skills-and-skins`). PRs that add such a directory under
`plugins/` are closed with a pointer to publish it as its own repo —
this is a coupling decision, not a quality judgment. (The
`observability/`, `kanban/`, `disk-cleanup/`, etc. directories already
in the tree are existing precedent, not an invitation to add more
third-party-product plugins alongside them.)
### Model-provider plugins (`plugins/model-providers/<name>/`)
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)

View File

@@ -85,23 +85,6 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
---
## Third-Party Product Integrations: Ship as a Standalone Plugin
The same rule extends to **any plugin that integrates someone else's product or project** — observability/metrics backends, vendor SaaS connectors, analytics dashboards, paid-service tie-ins, and similar third-party integrations. **These do not land in this repo.**
The reason is maintenance load, not quality. Every external product absorbed into the core tree becomes ours to keep working against a fast-moving codebase, for a backend we don't own and can't control. Hermes ships a lot and the core moves quickly; coupling third-party products into it creates an open-ended burden on the maintainers.
Publish these as a **standalone plugin repo** instead:
- Implement the relevant ABC and use the existing plugin discovery path (`~/.hermes/plugins/`, project `.hermes/plugins/`, or a pip entry point) — see [Build a Hermes Plugin](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin)
- Register lifecycle hooks (`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`), tools (`ctx.register_tool`), and CLI subcommands (`ctx.register_cli_command`) through the surface we already expose — no core changes needed
- If your plugin needs a capability the framework doesn't expose, that's a feature request to **widen the generic plugin surface** (a new hook or `ctx` method) — never special-case your plugin in core
- Promote it in the [Nous Research Discord](https://discord.gg/NousResearch) `#plugins-skills-and-skins` channel so users can find and install it
A well-built third-party-product plugin can clear automated review and still be closed for this reason — it's a placement decision, not a verdict on the code. PRs that add such a directory under `plugins/` will be closed with a pointer to publish it as its own repo.
---
## Development Setup
### Prerequisites

View File

@@ -189,13 +189,7 @@ RUN cd web && npm run build && \
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
# --link decouples this layer from parents for cache purposes; --chmod bakes
# the final read-only permissions at copy time so we skip the separate
# `chmod -R` pass that previously walked ~30k files across the venv +
# node_modules + source (21s amd64 / 222s arm64 — #49113). `a+rX,go-w`
# gives the non-root hermes user read + traverse but no write; root retains
# write so the build steps below don't need chmod u+w dances.
COPY --link --chmod=a+rX,go-w . .
COPY . .
# ---------- Permissions ----------
# Link hermes-agent itself (editable). Deps are already installed in the
@@ -203,15 +197,19 @@ COPY --link --chmod=a+rX,go-w . .
# resolution or downloads.
RUN uv pip install --no-cache-dir --no-deps -e "."
# Wire the exec shim and install-method stamp. Files under /opt/hermes are
# already root-owned (COPY, uv sync, npm install all run as root) and
# read-only for the hermes user (go-w from the --chmod above).
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
# instances must not be able to self-edit the installed source or venv; user
# data, skills, plugins, config, logs, and dashboard uploads live under
# /opt/data instead. Root can still repair the image during build/boot, but
# supervised Hermes processes drop to the non-root hermes user.
USER root
RUN mkdir -p /opt/hermes/bin && \
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
chmod 0755 /opt/hermes/bin/hermes && \
printf 'docker\n' > /opt/hermes/.install_method
printf 'docker\n' > /opt/hermes/.install_method && \
chown -R root:root /opt/hermes && \
chmod -R a+rX /opt/hermes && \
chmod -R a-w /opt/hermes
# The ``.install_method`` stamp is baked next to the running code (the install
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
# volume that is commonly bind-mounted from the host and even shared with a
@@ -238,11 +236,13 @@ RUN mkdir -p /opt/hermes/bin && \
#
# The arg is optional — local `docker build` without --build-arg simply
# omits the file, and the runtime falls back to live-git lookup. CI
# (.github/workflows/docker.yml) passes ${{ github.sha }} so
# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
# every published image has it.
ARG HERMES_GIT_SHA=
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
chmod u+w /opt/hermes && \
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
fi
# ---------- s6-overlay service wiring ----------
@@ -290,19 +290,6 @@ ENV HERMES_TUI_DIR=/opt/hermes/ui-tui
ENV HERMES_HOME=/opt/data
ENV HERMES_WRITE_SAFE_ROOT=/opt/data
ENV HERMES_DISABLE_LAZY_INSTALLS=1
# The published image seals /opt/hermes (root-owned, read-only) so a runtime
# lazy install can't mutate the agent's own venv and brick it. But opt-in
# backends (Firecrawl web search, Exa, Feishu, …) keep their SDKs in
# tools/lazy_deps.py — deliberately NOT baked into [all] (see pyproject.toml
# policy 2026-05-12: one quarantined release must not break every install).
# Redirect those lazy installs to a writable dir on the durable data volume.
# lazy_deps appends this dir to the END of sys.path, so a package installed
# here can only ADD modules — it can never shadow or downgrade a core module,
# so the sealed-venv guarantee holds even with installs re-enabled. The dir
# is seeded + chowned to the hermes user by docker/stage2-hook.sh and lives
# on the /opt/data volume, so it persists across container recreates / image
# updates (an ABI stamp invalidates it if a rebuild bumps the interpreter).
ENV HERMES_LAZY_INSTALL_TARGET=/opt/data/lazy-packages
# `docker exec` privilege-drop shim. When operators run
# `docker exec <c> hermes ...` they default to root, and any file the

View File

@@ -18,7 +18,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), OpenRouter, OpenAI, your own endpoint, and [many others](https://hermes-agent.nousresearch.com/docs/integrations/providers). Switch with `hermes model` — no code changes, no lock-in.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
<table>
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>

View File

@@ -23,11 +23,6 @@ except ModuleNotFoundError:
# new code but ``uv pip install -e .`` didn't finish. Missing bootstrap
# means UTF-8 stdio setup is skipped on Windows; POSIX is unaffected.
pass
else:
# Stop a ``utils/``/``proxy/``/``ui/`` package in the launch directory from
# shadowing Hermes's own modules — ``hermes acp`` can be started from any
# cwd, including a project that has same-named packages on its path.
hermes_bootstrap.harden_import_path()
import argparse
import asyncio

View File

@@ -74,7 +74,7 @@ _POLISHED_TOOLS = {
"kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
"kanban_block", "kanban_link", "kanban_heartbeat",
"yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
"yb_send_dm", "yb_send_sticker",
"yb_send_dm", "yb_send_sticker", "mixture_of_agents",
}

View File

@@ -106,12 +106,7 @@ def _custom_provider_extra_body_for_agent(
base_url: str,
custom_providers: List[Dict[str, Any]],
) -> Optional[Dict[str, Any]]:
provider_norm = (provider or "").strip().lower()
if provider_norm == "custom":
provider_key_filter = ""
elif provider_norm.startswith("custom:"):
provider_key_filter = provider_norm.split(":", 1)[1].strip()
else:
if (provider or "").strip().lower() != "custom":
return None
target_url = _normalized_custom_base_url(base_url)
@@ -122,13 +117,6 @@ def _custom_provider_extra_body_for_agent(
for entry in custom_providers or []:
if not isinstance(entry, dict):
continue
if provider_key_filter:
entry_keys = {
str(entry.get("provider_key", "") or "").strip().lower(),
str(entry.get("name", "") or "").strip().lower(),
}
if provider_key_filter not in entry_keys:
continue
if _normalized_custom_base_url(entry.get("base_url")) != target_url:
continue
extra_body = entry.get("extra_body")
@@ -719,55 +707,6 @@ def init_agent(
print("🔑 Using credentials: Microsoft Entra ID")
elif isinstance(effective_key, str) and len(effective_key) > 12:
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
elif agent.provider == "moa":
from agent.moa_loop import MoAClient
agent.api_mode = "chat_completions"
# Route reference-model outputs to the agent's tool_progress_callback so
# every surface that already consumes it (CLI spinner/scrollback, TUI,
# desktop, gateway) can show each reference's answer as a labelled block
# before the aggregator acts. The facade emits "moa.reference" and
# "moa.aggregating" events; we forward them through the same callback
# the tool lifecycle uses. Best-effort and cache-safe — these are
# display-only events, they never touch the message history.
def _moa_reference_relay(event: str, **kwargs: Any) -> None:
cb = getattr(agent, "tool_progress_callback", None)
if cb is None:
return
try:
if event == "moa.reference":
label = str(kwargs.get("label") or "")
text = str(kwargs.get("text") or "")
idx = kwargs.get("index")
count = kwargs.get("count")
cb(
"moa.reference",
label,
text,
None,
moa_index=idx,
moa_count=count,
)
elif event == "moa.aggregating":
cb(
"moa.aggregating",
str(kwargs.get("aggregator") or ""),
None,
None,
moa_ref_count=kwargs.get("ref_count"),
)
except Exception:
pass
agent.client = MoAClient(
agent.model or "default",
reference_callback=_moa_reference_relay,
)
agent._client_kwargs = {}
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = "moa://local"
if not agent.quiet_mode:
print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
elif agent.api_mode == "bedrock_converse":
# AWS Bedrock — uses boto3 directly, no OpenAI client needed.
# Region is extracted from the base_url or defaults to us-east-1.
@@ -1307,12 +1246,6 @@ def init_agent(
_agent_section = {}
agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
# Intent-ack continuation config: "auto" (default — codex_responses only,
# the historical gate), true (all api_modes), false (never), or a list of
# model-name substrings. Resolved against the active api_mode/model in the
# conversation loop's intent-ack block.
agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
# Universal task-completion guidance toggle. Default True. Surfaced
# as a separate flag from tool_use_enforcement because the guidance
# applies to ALL models, not just the model families enforcement
@@ -1573,7 +1506,6 @@ def init_agent(
# 3. Check general plugin system (user-installed plugins)
# 4. Fall back to built-in ContextCompressor
_selected_engine = None
_copy_failed = False
_engine_name = "compressor" # default
try:
_ctx_cfg = _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {}
@@ -1591,35 +1523,15 @@ def init_agent(
# Try general plugin system as fallback
if _selected_engine is None:
_candidate = None
try:
from hermes_cli.plugins import get_plugin_context_engine
_candidate = get_plugin_context_engine()
if _candidate and _candidate.name == _engine_name:
_selected_engine = _candidate
except Exception:
_candidate = None
if _candidate is not None and _candidate.name == _engine_name:
# Deep-copy the shared plugin singleton so a child agent's
# update_model() can't mutate the parent's compressor (#42449).
# Copy can fail for engines holding uncopyable state (locks, DB
# connections, clients); in that case fall back to the built-in
# compressor with an ACCURATE message rather than silently
# mislabelling it "not found".
import copy
try:
_selected_engine = copy.deepcopy(_candidate)
except Exception as _copy_err:
_copy_failed = True
_ra().logger.warning(
"Context engine '%s' could not be safely copied for this "
"agent (%s) — falling back to built-in compressor. Plugin "
"engines that hold uncopyable state (locks, DB connections) "
"should implement __deepcopy__ to copy only mutable budget "
"state.",
_engine_name, _copy_err,
)
_selected_engine = None
pass
if _selected_engine is None and not _copy_failed:
if _selected_engine is None:
_ra().logger.warning(
"Context engine '%s' not found — falling back to built-in compressor",
_engine_name,
@@ -1676,10 +1588,8 @@ def init_agent(
f"Model {agent.model} has a context window of {_ctx:,} tokens, "
f"which is below the minimum {MINIMUM_CONTEXT_LENGTH:,} required "
f"by Hermes Agent. Choose a model with at least "
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context. If your server "
f"reports a window smaller than the model's true window, set "
f"model.context_length in config.yaml to the real value "
f"(this must be at least {MINIMUM_CONTEXT_LENGTH // 1000}K)."
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context, or set "
f"model.context_length in config.yaml to override."
)
# Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand).
@@ -1711,27 +1621,16 @@ def init_agent(
for t in agent.tools
if isinstance(t, dict)
}
from agent.memory_manager import normalize_tool_schema as _normalize_tool_schema
for _raw_schema in agent.context_compressor.get_tool_schemas():
_schema = _normalize_tool_schema(_raw_schema)
if _schema is None:
# A schema with no resolvable name (e.g. an already-wrapped
# entry) would append a nameless tool that strict providers
# 400 on, disabling the whole toolset (#47707). Skip it.
_ra().logger.warning(
"Context engine returned a tool schema with no resolvable "
"name; skipping to avoid poisoning the request (%r)",
_raw_schema,
)
continue
_tname = _schema["name"]
if _tname in _existing_tool_names:
for _schema in agent.context_compressor.get_tool_schemas():
_tname = _schema.get("name", "")
if _tname and _tname in _existing_tool_names:
continue # already registered via plugin/cache path
_wrapped = {"type": "function", "function": _schema}
agent.tools.append(_wrapped)
agent.valid_tool_names.add(_tname)
agent._context_engine_tool_names.add(_tname)
_existing_tool_names.add(_tname)
if _tname:
agent.valid_tool_names.add(_tname)
agent._context_engine_tool_names.add(_tname)
_existing_tool_names.add(_tname)
# Notify context engine of session start
if hasattr(agent, "context_compressor") and agent.context_compressor:

View File

@@ -42,14 +42,6 @@ from utils import base_url_host_matches, base_url_hostname, env_var_enabled, ato
logger = logging.getLogger(__name__)
# Max consecutive successful credential-pool token refreshes of the SAME entry
# on a persistent auth failure before we give up and let the fallback chain
# activate. A single-entry OAuth pool can re-mint a fresh token indefinitely
# even when the upstream keeps rejecting it, so without this cap the retry loop
# spins forever and never reaches ``_try_activate_fallback``. See #26080.
_MAX_AUTH_REFRESH_ATTEMPTS = 2
def _ra():
"""Lazy ``run_agent`` reference for test-patch routing."""
import run_agent
@@ -783,30 +775,6 @@ def recover_with_credential_pool(
return False, has_retried_429
refreshed = pool.try_refresh_current()
if refreshed is not None:
# ``try_refresh_current()`` re-mints a fresh OAuth token and reports
# success even when the upstream keeps rejecting it — a single-entry
# pool (common for OAuth/Max subscribers) has nothing to rotate to,
# so a bare "refreshed → retry" loop spins forever on the same dead
# token and the configured fallback never activates. Cap consecutive
# same-entry refreshes and fall through to fallback once exceeded.
# See #26080.
refreshed_id = getattr(refreshed, "id", None)
if refreshed_id is not None:
refresh_counts = getattr(agent, "_auth_pool_refresh_counts", None)
if refresh_counts is None:
refresh_counts = {}
agent._auth_pool_refresh_counts = refresh_counts
refresh_key = (agent.provider, refreshed_id)
refresh_counts[refresh_key] = refresh_counts.get(refresh_key, 0) + 1
if refresh_counts[refresh_key] > _MAX_AUTH_REFRESH_ATTEMPTS:
_ra().logger.warning(
"Credential auth failure persists after %s refreshes for "
"pool entry %s — treating as unrecoverable and allowing "
"fallback to activate.",
refresh_counts[refresh_key] - 1,
refreshed_id,
)
return False, has_retried_429
_ra().logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
agent._swap_credential(refreshed)
return True, has_retried_429
@@ -1078,34 +1046,6 @@ def restore_primary_runtime(agent) -> bool:
api_mode=rt.get("compressor_api_mode", ""),
)
# ── Re-select from the credential pool if one is available ──
# The snapshot's api_key was captured at construction time. Across
# turns the pool may have rotated (token revocation, billing/rate-limit
# exhaustion, cooldown), leaving the snapshot key stale. Restoring it
# blindly re-fails on the first request and burns through the remaining
# pool entries before cross-provider fallback even gets a chance. Ask
# the pool for its current best entry and swap the live credential in.
# When the pool is absent, empty, or the entry has no usable key, we
# keep the snapshot key (the existing behavior). Fixes #25205.
pool = getattr(agent, "_credential_pool", None)
if pool is not None and pool.has_available():
entry = pool.select()
if entry is not None:
entry_key = (
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
if entry_key:
# ``_swap_credential`` rebuilds the OpenAI/Anthropic client,
# reapplies base-url-scoped headers, and carries the
# accumulated base_url / OAuth-detection fixes (#33163).
agent._swap_credential(entry)
logger.info(
"Restore re-selected pool entry %s (%s)",
getattr(entry, "id", "?"),
getattr(entry, "label", "?"),
)
# ── Reset fallback chain for the new turn ──
agent._fallback_activated = False
agent._fallback_index = 0
@@ -1480,15 +1420,6 @@ def create_openai_client(agent, client_kwargs: dict, *, reason: str, shared: boo
keepalive_http = agent._build_keepalive_http_client(client_kwargs.get("base_url", ""))
if keepalive_http is not None:
client_kwargs["http_client"] = keepalive_http
# Delegate all rate-limit / 5xx retry to hermes's outer conversation loop,
# which honors Retry-After and applies adaptive/jittered backoff. The OpenAI
# SDK default (max_retries=2) uses its own 1-2s backoff that ignores
# Retry-After and double-retries inside our loop — the same deadlock the
# Anthropic clients hit (#26293). This is the single chokepoint every primary
# OpenAI/aggregator client passes through (init, switch_model, recovery,
# restore, request-scoped); auxiliary_client builds its own clients and keeps
# SDK retries because it is NOT wrapped by the conversation loop.
client_kwargs.setdefault("max_retries", 0)
# Uses the module-level `OpenAI` name, resolved lazily on first
# access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
client = _ra().OpenAI(**client_kwargs)
@@ -1568,10 +1499,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
# _client_kwargs is a dict — snapshot a shallow copy so mutating the
# live dict doesn't poison the rollback target.
_snapshot["_client_kwargs"] = dict(getattr(agent, "_client_kwargs", {}) or {})
# Snapshot the credential pool reference so a failed client rebuild can
# restore the original pool (issue #52727: pool reload is part of this
# switch and must be reversible on rollback).
_snapshot["_credential_pool"] = getattr(agent, "_credential_pool", _MISSING)
try:
# Clear the per-config context_length override so the new model's
@@ -1596,36 +1523,8 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
if api_key:
agent.api_key = api_key
# ── Reload credential pool for the new provider (issue #52727) ──
# Without this, ``recover_with_credential_pool`` sees a
# ``pool.provider != agent.provider`` mismatch and short-circuits,
# leaving the new provider with no rotation/recovery on 401/429 and
# burning the original pool's entries. Only reload when the provider
# actually changed (or the pool was missing) — re-selecting the same
# provider must not churn the pool reference. A reload failure is
# logged + swallowed: the switch itself must still complete.
old_norm = (old_provider or "").strip().lower()
new_norm = (new_provider or "").strip().lower()
if old_norm != new_norm or getattr(agent, "_credential_pool", None) is None:
try:
from agent.credential_pool import load_pool
agent._credential_pool = load_pool(new_provider)
except Exception as _pool_exc: # noqa: BLE001
logger.warning(
"switch_model: credential pool reload failed for %s (%s); "
"continuing without pool rotation this turn",
new_provider, _pool_exc,
)
# ── Build new client ──
if (new_provider or "").strip().lower() == "moa":
from agent.moa_loop import MoAClient
agent.api_key = api_key or "moa-virtual-provider"
agent.base_url = "moa://local"
agent._client_kwargs = {}
agent.client = MoAClient(agent.model or "default")
elif api_mode == "anthropic_messages":
if api_mode == "anthropic_messages":
from agent.anthropic_adapter import (
build_anthropic_client,
resolve_anthropic_token,
@@ -1798,27 +1697,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
old_model, old_provider, new_model, new_provider,
)
# ── Persist billing route to session DB ──
# The agent's _session_db / session_id may not be set in all contexts
# (tests, bare agents without a session DB, etc.). This ensures the
# dashboard Model cards show the actual provider after a mid-session
# /model switch instead of the stale session-creation provider.
# See #48248 for the full bug description.
_session_db = getattr(agent, "_session_db", None)
_session_id = getattr(agent, "session_id", None)
if _session_db is not None and _session_id:
try:
_session_db.update_session_billing_route(
_session_id,
provider=agent.provider,
base_url=agent.base_url,
billing_mode=getattr(agent, "api_mode", None),
)
except Exception:
logger.warning(
"Failed to persist billing route after model switch",
exc_info=True,
)
def invoke_tool(agent, function_name: str, function_args: dict, effective_task_id: str,
@@ -2205,21 +2083,8 @@ def looks_like_codex_intermediate_ack(
user_message: str,
assistant_content: str,
messages: List[Dict[str, Any]],
require_workspace: bool = True,
) -> bool:
"""Detect a planning/ack message that should continue instead of ending the turn.
``require_workspace`` (default True) keeps the original codex-coding scope:
the ack must reference a filesystem/repo workspace. The conversation loop
passes ``require_workspace=False`` when the user has explicitly opted into
intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
is ``true`` or a model-list), so general autonomous workflows ("I'll run a
health check on the server", "I'll start the deployment") — which carry a
future-ack and an action verb but no filesystem reference — are caught too.
The future-ack + short-content + no-prior-tools + action-verb requirements
always apply, which is what keeps conversational "I'll help you brainstorm"
replies from tripping it.
"""
"""Detect a planning/ack message that should continue instead of ending the turn."""
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
return False
@@ -2272,67 +2137,17 @@ def looks_like_codex_intermediate_ack(
"path",
)
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
if not assistant_mentions_action:
return False
# Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
# call is enough — the user asked us to keep going when the model only
# announces intent, regardless of whether a filesystem is involved.
if not require_workspace:
return True
user_text = (user_message or "").strip().lower()
user_targets_workspace = (
any(marker in user_text for marker in workspace_markers)
or "~/" in user_text
or "/" in user_text
)
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
assistant_targets_workspace = any(
marker in assistant_text for marker in workspace_markers
)
return user_targets_workspace or assistant_targets_workspace
def intent_ack_continuation_mode(agent) -> str:
"""Classify the resolved intent-ack continuation mode for this turn.
Returns one of:
* ``"off"`` — never continue.
* ``"codex_only"`` — historical scope: continue only on the
``codex_responses`` api_mode, and only for codebase/workspace acks
(``require_workspace=True``).
* ``"all"`` — user opted in for every api_mode; continue on any
future-ack + action verb (``require_workspace=False``).
Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
(default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
matches the active model name, else off.
"""
mode = getattr(agent, "_intent_ack_continuation", "auto")
if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
return "all"
if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
return "off"
if isinstance(mode, list):
model_lower = (agent.model or "").lower()
return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
# "auto" or any unrecognised value — historical codex-only behavior.
return "codex_only" if agent.api_mode == "codex_responses" else "off"
def intent_ack_continuation_enabled(agent) -> bool:
"""Whether intent-ack continuation should fire at all for this turn.
The ``codex_ack_continuations < 2`` per-turn cap and the
``looks_like_codex_intermediate_ack`` detector are applied by the caller;
this only decides the on/off gate. Callers that also need to know whether
the workspace requirement applies should use ``intent_ack_continuation_mode``
directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
"""
return intent_ack_continuation_mode(agent) != "off"
return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action

View File

@@ -673,9 +673,6 @@ def _build_anthropic_client_with_bearer_hook(
kwargs = {
"timeout": timeout_obj,
"http_client": http_client,
# Delegate retry to hermes's outer loop (honors Retry-After); the SDK
# default max_retries=2 ignores it and double-retries. (#26293)
"max_retries": 0,
# The SDK requires *something* for api_key/auth_token. Our
# event hook overrides Authorization per request so this value
# is never sent. The sentinel string makes accidental leaks
@@ -760,12 +757,6 @@ def build_anthropic_client(
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
kwargs = {
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
# Delegate all rate-limit / 5xx retry to hermes's outer conversation
# loop, which honors Retry-After. The SDK default (max_retries=2) uses
# its own 1-2s backoff that ignores Retry-After and double-retries
# inside our loop — burning request slots against a bucket that won't
# refill for minutes. (#26293)
"max_retries": 0,
}
if normalized_base_url:
# Azure Anthropic endpoints require an ``api-version`` query parameter.
@@ -861,9 +852,6 @@ def build_anthropic_bedrock_client(region: str):
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
# Delegate retry to hermes's outer loop (honors Retry-After); the SDK
# default max_retries=2 ignores it and double-retries. (#26293)
max_retries=0,
default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])},
)
@@ -926,72 +914,44 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
return None
def _read_claude_code_credentials_from_file() -> Optional[Dict[str, Any]]:
"""Read Claude Code OAuth credentials from ~/.claude/.credentials.json.
Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
"""
cred_path = Path.home() / ".claude" / ".credentials.json"
if not cred_path.exists():
return None
try:
data = json.loads(cred_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError, IOError) as e:
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
return None
oauth_data = data.get("claudeAiOauth")
if not (oauth_data and isinstance(oauth_data, dict)):
return None
access_token = oauth_data.get("accessToken", "")
if not access_token:
return None
return {
"accessToken": access_token,
"refreshToken": oauth_data.get("refreshToken", ""),
"expiresAt": oauth_data.get("expiresAt", 0),
"source": "claude_code_credentials_file",
}
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials.
Reads from two possible sources and reconciles them:
Checks two sources in order:
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
2. ~/.claude/.credentials.json file
Selection rules when both are present:
- If exactly one is non-expired, prefer that one. (Handles the case
where Claude Code refreshes one source but not the other — observed
in the wild on Claude Code 2.1.x.)
- Otherwise, prefer the source with the later ``expiresAt`` so that
any subsequent refresh uses the most recent ``refreshToken``.
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
subscription flow is OAuth/setup-token based with refreshable credentials,
and native direct Anthropic provider usage should follow that path rather
than auto-detecting Claude's first-party managed key.
Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
"""
# Try macOS Keychain first (covers Claude Code >=2.1.114)
kc_creds = _read_claude_code_credentials_from_keychain()
file_creds = _read_claude_code_credentials_from_file()
if kc_creds:
return kc_creds
if kc_creds and file_creds:
kc_valid = is_claude_code_token_valid(kc_creds)
file_valid = is_claude_code_token_valid(file_creds)
if kc_valid and not file_valid:
return kc_creds
if file_valid and not kc_valid:
return file_creds
# Both valid or both expired: prefer the later expiresAt so the
# downstream refresh path uses the freshest refresh_token.
kc_exp = kc_creds.get("expiresAt", 0) or 0
file_exp = file_creds.get("expiresAt", 0) or 0
return kc_creds if kc_exp >= file_exp else file_creds
# Fall back to JSON file
cred_path = Path.home() / ".claude" / ".credentials.json"
if cred_path.exists():
try:
data = json.loads(cred_path.read_text(encoding="utf-8"))
oauth_data = data.get("claudeAiOauth")
if oauth_data and isinstance(oauth_data, dict):
access_token = oauth_data.get("accessToken", "")
if access_token:
return {
"accessToken": access_token,
"refreshToken": oauth_data.get("refreshToken", ""),
"expiresAt": oauth_data.get("expiresAt", 0),
"source": "claude_code_credentials_file",
}
except (json.JSONDecodeError, OSError, IOError) as e:
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
return kc_creds or file_creds
return None
def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
@@ -1074,40 +1034,8 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
"""Attempt to refresh an expired Claude Code OAuth token.
Claude Code's OAuth refresh tokens are single-use: a successful refresh
rotates the pair and invalidates the old refresh token. Claude Code itself
also refreshes on its own schedule (IDE/CLI activity), so by the time
Hermes notices an expired token, Claude Code may have already rotated it.
POSTing our now-stale refresh token in that window races Claude Code and
fails with ``invalid_grant``.
So before refreshing, re-read the live credential sources. If Claude Code
has already produced a valid token, adopt it and skip the POST entirely.
Only fall back to refreshing ourselves when no fresh credential is found.
"""
# Claude Code may have already refreshed — adopt its token rather than
# racing it with our (possibly already-rotated) refresh token. Only adopt
# when the live re-read produced a DIFFERENT token with a real future
# expiry: re-adopting the same credential we were just handed would be a
# no-op, and a 0/absent ``expiresAt`` means "managed key / unknown expiry"
# (see is_claude_code_token_valid) which must NOT be treated as a fresh
# refresh here.
current = read_claude_code_credentials()
if current:
current_token = current.get("accessToken", "")
current_exp = current.get("expiresAt", 0) or 0
if (
current_token
and current_token != creds.get("accessToken", "")
and current_exp > 0
and is_claude_code_token_valid(current)
):
logger.debug("Adopted Claude Code's already-refreshed OAuth token")
return current_token
refresh_token = (current or {}).get("refreshToken", "") or creds.get("refreshToken", "")
"""Attempt to refresh an expired Claude Code OAuth token."""
refresh_token = creds.get("refreshToken", "")
if not refresh_token:
logger.debug("No refresh token available — cannot refresh")
return None
@@ -1369,15 +1297,7 @@ def run_oauth_setup_token() -> Optional[str]:
# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
# Anthropic migrated the OAuth token endpoint to platform.claude.com;
# console.anthropic.com now 404s. Callers should iterate _OAUTH_TOKEN_URLS
# (new host first, console fallback). _OAUTH_TOKEN_URL is kept as the primary
# for backward compatibility with existing imports and now points at the live host.
_OAUTH_TOKEN_URLS = [
"https://platform.claude.com/v1/oauth/token",
"https://console.anthropic.com/v1/oauth/token",
]
_OAUTH_TOKEN_URL = _OAUTH_TOKEN_URLS[0]
_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"
@@ -1475,34 +1395,18 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"code_verifier": verifier,
}).encode()
# Anthropic migrated the OAuth token endpoint to platform.claude.com;
# console.anthropic.com now 404s. Try the new host first, then fall
# back to console for older deployments (mirrors the refresh path).
result = None
last_error = None
for endpoint in _OAUTH_TOKEN_URLS:
req = urllib.request.Request(
endpoint,
data=exchange_data,
headers={
"Content-Type": "application/json",
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
result = json.loads(resp.read().decode())
break
except Exception as exc:
last_error = exc
logger.debug("Anthropic token exchange failed at %s: %s", endpoint, exc)
continue
req = urllib.request.Request(
_OAUTH_TOKEN_URL,
data=exchange_data,
headers={
"Content-Type": "application/json",
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
},
method="POST",
)
if result is None:
raise last_error if last_error is not None else ValueError(
"Anthropic token exchange failed"
)
with urllib.request.urlopen(req, timeout=15) as resp:
result = json.loads(resp.read().decode())
except Exception as e:
print(f"Token exchange failed: {e}")
return None

View File

@@ -101,8 +101,6 @@ class _OpenAIProxy:
OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
from agent.credential_pool import load_pool
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length
from agent.process_bootstrap import build_keepalive_http_client
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
@@ -110,23 +108,6 @@ from utils import base_url_host_matches, base_url_hostname, env_float, model_for
logger = logging.getLogger(__name__)
def _openai_http_client_kwargs(
base_url: Optional[str],
*,
async_mode: bool = False,
) -> Dict[str, Any]:
"""Inject keepalive httpx client with env-only proxy (not macOS system proxy)."""
client = build_keepalive_http_client(str(base_url or ""), async_mode=async_mode)
if client is None:
return {}
return {"http_client": client}
def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any:
kwargs = {**_openai_http_client_kwargs(base_url), **kwargs}
return OpenAI(api_key=api_key, base_url=base_url, **kwargs)
# ── Interrupt protection for atomic auxiliary tasks ──────────────────────
# Some auxiliary tasks must NOT be aborted mid-flight by a gateway interrupt
# (e.g. an incoming user message while the agent is busy). Context
@@ -684,28 +665,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
return str(url or "").strip().rstrip("/")
# Hostnames (lowercase, exact) that the auxiliary Anthropic path is allowed to
# be pointed at via config.yaml model.base_url. Anything else falls back to the
# Anthropic default — operators routing main-session traffic through a
# non-Anthropic host (e.g. OpenRouter, OpenAI) with provider=anthropic in config
# must NOT have that foreign host leak into the auxiliary client. See #52608.
_ANTHROPIC_COMPATIBLE_HOSTS = frozenset({
"api.anthropic.com",
})
def _is_anthropic_compatible_host(url: str) -> bool:
"""Return True if ``url``'s hostname is an Anthropic endpoint we trust for aux calls."""
if not url:
return False
try:
from urllib.parse import urlparse
host = (urlparse(url).hostname or "").strip().lower().rstrip(".")
return host in _ANTHROPIC_COMPATIBLE_HOSTS
except Exception:
return False
def _nous_min_key_ttl_seconds() -> int:
try:
return max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
@@ -1632,7 +1591,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
_merged_aux = _apply_user_default_headers(extra.get("default_headers"))
if _merged_aux:
extra["default_headers"] = _merged_aux
_client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
return _client, model
@@ -1672,7 +1631,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
_merged_aux2 = _apply_user_default_headers(extra.get("default_headers"))
if _merged_aux2:
extra["default_headers"] = _merged_aux2
_client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
return _client, model
@@ -1687,21 +1646,20 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
or_key = explicit_api_key or _pool_runtime_api_key(entry)
if or_key:
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return _create_openai_client(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
# Pool exists but is exhausted (no usable runtime key) — fall through to
# the OPENROUTER_API_KEY env-var path rather than failing outright.
logger.debug("Auxiliary client: OpenRouter pool exhausted, trying OPENROUTER_API_KEY")
if not or_key:
_mark_provider_unhealthy("openrouter", ttl=60)
return None, None
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return OpenAI(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
if not or_key:
_mark_provider_unhealthy("openrouter", ttl=60)
return None, None
logger.debug("Auxiliary client: OpenRouter")
return _create_openai_client(api_key=or_key, base_url=OPENROUTER_BASE_URL,
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
@@ -1794,7 +1752,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
return (
_create_openai_client(
OpenAI(
api_key=api_key,
base_url=base_url,
),
@@ -2071,7 +2029,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
if _custom_headers:
_extra["default_headers"] = _custom_headers
if custom_mode == "codex_responses":
real_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
return CodexAuxiliaryClient(real_client, model), model
if custom_mode == "anthropic_messages":
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -2085,14 +2043,14 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
"Custom endpoint declares api_mode=anthropic_messages but the "
"anthropic SDK is not installed — falling back to OpenAI-wire."
)
return _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra), model
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
return (
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
# URL-based anthropic detection for custom endpoints that didn't set
# api_mode explicitly (e.g. kimi.com/coding reached via custom config).
_fallback_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
_fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
_fallback_client = _maybe_wrap_anthropic(
_fallback_client, model, custom_key, custom_base, custom_mode,
)
@@ -2121,7 +2079,7 @@ def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str
return None, None
api_key, base_url = resolved
logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
real_client = _create_openai_client(api_key=api_key, base_url=base_url)
real_client = OpenAI(api_key=api_key, base_url=base_url)
return CodexAuxiliaryClient(real_client, model), model
@@ -2158,7 +2116,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
return None, None
base_url = _CODEX_AUX_BASE_URL
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
real_client = _create_openai_client(
real_client = OpenAI(
api_key=codex_token,
base_url=base_url,
default_headers=_codex_cloudflare_headers(codex_token),
@@ -2258,7 +2216,7 @@ def _try_azure_foundry(
if _dq:
extra["default_query"] = _dq
client = _create_openai_client(api_key=api_key, base_url=_clean_base, **extra)
client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
if runtime_api_mode == "codex_responses":
# GPT-5.x / o-series / codex models on Azure Foundry are
@@ -2297,16 +2255,9 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
if not token:
return None, None
# Allow base URL override from config.yaml model.base_url, but only when:
# 1. the configured provider is anthropic (otherwise a non-Anthropic
# base_url, e.g. Codex endpoint, would leak into Anthropic requests), AND
# 2. the override URL actually points at an Anthropic-compatible endpoint.
# Without gate (2), operators who route main-session traffic through a
# non-Anthropic provider that accepts Anthropic-format requests (e.g.
# OpenRouter at openrouter.ai/api/v1, with provider=anthropic in config.yaml)
# would have every auxiliary side-channel call (memory extractors,
# reflection, vision, title generation) 401 from the foreign host —
# see issue #52608.
# Allow base URL override from config.yaml model.base_url, but only
# when the configured provider is anthropic otherwise a non-Anthropic
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
try:
from hermes_cli.config import load_config
@@ -2316,7 +2267,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
if cfg_provider == "anthropic":
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
if cfg_base_url and _is_anthropic_compatible_host(cfg_base_url):
if cfg_base_url:
base_url = cfg_base_url
except Exception:
pass
@@ -2519,7 +2470,7 @@ def _is_payment_error(exc: Exception) -> bool:
# but sometimes wrap them in 429 or other codes.
# Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
# uses different language but is semantically identical to credit exhaustion.
if status in {402, 403, 404, 429, None}:
if status in {402, 404, 429, None}:
if any(kw in err_lower for kw in (
"credits", "insufficient funds",
"can only afford", "billing",
@@ -2528,8 +2479,6 @@ def _is_payment_error(exc: Exception) -> bool:
"balance_depleted", "no usable credits",
"model_not_supported_on_free_tier",
"not available on the free tier",
"requires a subscription", "upgrade for access",
"upgrade for higher limits", "reached your session usage limit",
# Daily / monthly / weekly quota exhaustion keywords
"quota exceeded", "quota_exceeded",
"too many tokens per day", "daily limit",
@@ -2748,79 +2697,6 @@ def _is_model_not_found_error(exc: Exception) -> bool:
))
def _is_model_incompatible_error(exc: Exception) -> bool:
"""Detect "this route cannot serve this model" 400s (capability mismatch).
Distinct from :func:`_is_model_not_found_error` (the model does not exist
anywhere): here the model name is valid but the *current provider/account*
is structurally unable to run it. The canonical case is a configured
fallback that cannot run the main model — e.g. an ``openai-codex`` /
ChatGPT-account fallback asked to compress a ``glm-5.2`` conversation::
Error code: 400 - {'detail': "The 'glm-5.2' model is not supported
when using Codex with a ChatGPT account."}
The candidate authenticates fine and builds a client, so the auth and
payment predicates don't fire and the call would otherwise raise and
abort the whole auxiliary task (commonly compression — which then drops
middle turns and churns the session, destroying the prompt cache).
Treating it as a fallback-worthy capability error lets the chain skip the
incapable route and continue to the next candidate, mirroring the
context-window feasibility screen (#52392).
Billing/quota 400s belong to :func:`_is_payment_error`; "model does not
exist" 400s belong to :func:`_is_model_not_found_error`. This predicate
explicitly excludes both so the three don't overlap.
"""
status = getattr(exc, "status_code", None)
if status not in {400, None}:
return False
err_lower = str(exc).lower()
# Not-found 400s ("invalid model ID", "model does not exist") are owned by
# _is_model_not_found_error. Billing/free-tier 400s are owned by the
# payment path — key on the billing keywords directly here rather than
# calling _is_payment_error(), because that predicate is status-gated
# ({402,403,404,429,None}) and would not recognise a 400-coded billing
# body, letting it leak into this capability bucket.
if _is_model_not_found_error(exc):
return False
if any(kw in err_lower for kw in (
"credits", "insufficient funds", "billing", "out of funds",
"balance_depleted", "no usable credits", "payment required",
"free tier", "free-tier", "not available on the free tier",
"model_not_supported_on_free_tier", "quota",
)):
return False
return any(kw in err_lower for kw in (
"is not supported when using", # codex/ChatGPT-account model gating
"model is not supported",
"not supported with this",
"not supported for this account",
"model_not_supported",
"does not support this model",
"unsupported model",
))
def _is_invalid_aux_response_error(exc: Exception) -> bool:
"""Detect provider responses that authenticated but cannot serve aux shape.
Some OpenAI-compatible routes return HTTP 200 with an empty/malformed
ChatCompletion instead of a normal provider error. That is still a
provider/model capability failure for auxiliary tasks: downstream callers
need ``choices[0].message`` and should be able to continue through the
same fallback path as explicit model-incompatibility errors.
"""
if not isinstance(exc, RuntimeError):
return False
msg = str(exc).lower()
return (
"auxiliary " in msg
and "llm returned invalid response" in msg
and "choices[0].message" in msg
)
def _evict_cached_clients(provider: str) -> None:
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
normalized = _normalize_aux_provider(provider)
@@ -3271,88 +3147,6 @@ def _try_main_agent_model_fallback(
return client, resolved_model or main_model, label
# ── Context-window screening for runtime fallback chains (issue #52392) ──
#
# When the runtime auxiliary fallback chain selects a candidate that is
# reachable but has a context window smaller than the compression task
# requires, the call errors out instead of continuing to the next, viable
# candidate. The startup feasibility check in
# ``agent.conversation_compression.check_compression_model_feasibility``
# already filters too-small auxiliary models at startup, but the runtime
# fallback chain (``_try_configured_fallback_chain`` and
# ``_try_main_fallback_chain``) does not apply the same filter, so
# compression can stop at the first alive door even if the room behind it
# is too small.
#
# The helpers below screen each candidate by its effective context window
# before it is returned. ``None`` results from ``get_model_context_length``
# are passed through (we cannot prove a model is too small, so we do not
# block it). This preserves the existing fallback surface for
# unrecognised/custom models while closing the gap on the well-known ones.
def _task_minimum_context_length(task: Optional[str]) -> Optional[int]:
"""Return the minimum context length required for an auxiliary task.
Only ``compression`` carries an explicit minimum today (the same
``MINIMUM_CONTEXT_LENGTH`` (64K) floor that
``check_compression_model_feasibility`` already enforces at startup).
Other tasks (``vision``, ``title_generation``, ``web_extract``,
``skills_hub``, ``mcp``, ``session_search``) return ``None`` — they
have no per-task context floor and the runtime chain must remain
permissive for them.
Returns ``None`` for an empty/``None`` task name so the helper is a
safe no-op when called from generic sites.
"""
if not task:
return None
if task == "compression":
return MINIMUM_CONTEXT_LENGTH
return None
def _candidate_context_window(
provider: str,
model: str,
base_url: str = "",
api_key: str = "",
) -> Optional[int]:
"""Resolve the effective context window for a fallback candidate.
Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
that swallows probe failures (returns ``None``). Callers treat
``None`` as "unknown — pass through" so the existing fallback
surface is preserved when the context-length resolver chain cannot
determine a value (custom endpoints, models not in the registry,
offline endpoints).
Best-effort, never raises — the runtime fallback chain must keep
moving even if the resolver hits a probe error.
"""
if not model:
return None
try:
ctx = get_model_context_length(
model,
base_url=base_url,
api_key=api_key,
provider=provider,
)
except Exception as exc:
logger.debug(
"Auxiliary fallback: could not resolve context window for %s/%s: %s",
provider, model, exc,
)
return None
# ``get_model_context_length`` returns an int (with a 256K default
# fallback when nothing else matches). We still propagate ``None`` if
# a future change returns ``Optional[int]`` — being explicit is
# cheap and the test suite covers both shapes.
if isinstance(ctx, int) and ctx > 0:
return ctx
return None
def _try_configured_fallback_chain(
task: str,
failed_provider: str,
@@ -3377,7 +3171,6 @@ def _try_configured_fallback_chain(
skip = failed_provider.lower().strip()
tried = []
min_ctx = _task_minimum_context_length(task)
for i, entry in enumerate(chain):
if not isinstance(entry, dict):
@@ -3395,20 +3188,6 @@ def _try_configured_fallback_chain(
fb_client, resolved_model = None, None
if fb_client is not None:
if min_ctx is not None and resolved_model:
fb_ctx = _candidate_context_window(
fb_provider,
resolved_model,
base_url=str(entry.get("base_url") or ""),
api_key=_fallback_entry_api_key(entry) or "",
)
if fb_ctx is not None and fb_ctx < min_ctx:
logger.info(
"Auxiliary %s: skipping %s (%s context=%d < min=%d), continuing chain",
task, label, resolved_model, fb_ctx, min_ctx,
)
tried.append(f"{label} (context too small: {fb_ctx}<{min_ctx})")
continue
logger.info(
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
task, reason, failed_provider, label, resolved_model or fb_model or "default",
@@ -3424,28 +3203,6 @@ def _try_configured_fallback_chain(
return None, None, ""
def _try_configured_fallback_for_unavailable_client(
task: Optional[str],
failed_provider: str,
) -> Tuple[Optional[Any], Optional[str], str]:
"""Try task fallback_chain when an explicit aux provider cannot build.
This covers the "no client" case before any request is sent: missing
raw env key, unavailable OAuth/pool credentials, or provider resolver
returning ``(None, None)``. It deliberately stops at the configured
per-task fallback chain; the main-agent model remains the last-resort
runtime fallback for request-time capacity errors.
"""
explicit = (failed_provider or "").strip().lower()
if not task or not explicit or explicit in {"auto"}:
return None, None, ""
return _try_configured_fallback_chain(
task,
explicit,
reason="provider unavailable",
)
def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
"""Resolve inline or env-backed API key from a fallback-chain entry."""
explicit = str(entry.get("api_key") or "").strip()
@@ -3504,7 +3261,6 @@ def _try_main_fallback_chain(
main_norm = (_read_main_provider() or "").strip().lower()
skip = {p for p in (failed_norm, main_norm, "auto") if p}
tried: List[str] = []
min_ctx = _task_minimum_context_length(task)
for i, entry in enumerate(chain):
if not isinstance(entry, dict):
@@ -3528,20 +3284,6 @@ def _try_main_fallback_chain(
logger.debug("Auxiliary %s: main fallback %s failed to resolve: %s", task or "call", label, exc)
fb_client, resolved_model = None, None
if fb_client is not None:
if min_ctx is not None:
fb_ctx = _candidate_context_window(
fb_provider,
resolved_model or fb_model,
base_url=str(entry.get("base_url") or ""),
api_key=_fallback_entry_api_key(entry) or "",
)
if fb_ctx is not None and fb_ctx < min_ctx:
logger.info(
"Auxiliary %s: skipping %s (context=%d < min=%d), continuing chain",
task or "call", label, fb_ctx, min_ctx,
)
tried.append(f"{label} (context too small: {fb_ctx}<{min_ctx})")
continue
logger.info(
"Auxiliary %s: %s on %s — main fallback chain to %s (%s)",
task or "call", reason, failed_provider or "auto", label,
@@ -3643,37 +3385,6 @@ def _resolve_auto(
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = str(runtime_provider or _read_main_provider() or "")
main_model = str(runtime_model or _read_main_model() or "")
# MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
# there is no real "moa" HTTP endpoint, so resolving an aux client against
# provider="moa"/model=<preset> sends the preset name as the model id and
# the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
# (title generation, compression, vision, …) don't need the reference
# fan-out — they should run on the aggregator, which is the preset's acting
# model. Resolve the MoA preset to its aggregator slot and continue Step 1
# with that real provider+model. Mirrors the MoA context-length resolution.
if main_provider == "moa":
try:
from hermes_cli.config import load_config
from hermes_cli.moa_config import resolve_moa_preset
_preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
_agg = _preset.get("aggregator") or {}
_agg_provider = str(_agg.get("provider") or "").strip()
_agg_model = str(_agg.get("model") or "").strip()
if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
main_provider = _agg_provider
main_model = _agg_model
# The MoA virtual runtime carries a non-HTTP base_url
# ("moa://local") and a placeholder api_key; they belong to the
# facade, not the aggregator's real provider. Drop them so the
# aggregator resolves through its own provider credentials.
runtime_base_url = ""
runtime_api_key = ""
runtime_api_mode = ""
except Exception:
logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
if (main_provider and main_model
and main_provider not in {"auto", ""}):
resolved_provider = main_provider
@@ -3820,10 +3531,6 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
_merged_async = _apply_user_default_headers(async_kwargs.get("default_headers"))
if _merged_async:
async_kwargs["default_headers"] = _merged_async
async_kwargs = {
**_openai_http_client_kwargs(sync_base_url, async_mode=True),
**async_kwargs,
}
return AsyncOpenAI(**async_kwargs), model
@@ -4034,7 +3741,7 @@ def resolve_provider_client(
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = _normalize_resolved_model(model, provider)
raw_client = _create_openai_client(
raw_client = OpenAI(
api_key=codex_token,
base_url=_CODEX_AUX_BASE_URL,
default_headers=_codex_cloudflare_headers(codex_token),
@@ -4115,7 +3822,7 @@ def resolve_provider_client(
_merged_custom = _apply_user_default_headers(extra.get("default_headers"))
if _merged_custom:
extra["default_headers"] = _merged_custom
client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **extra)
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base, custom_key)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
@@ -4219,7 +3926,7 @@ def resolve_provider_client(
_fb_headers = _apply_user_default_headers(_fb_extra.get("default_headers"))
if _fb_headers:
_fb_extra["default_headers"] = _fb_headers
client = _create_openai_client(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
sync_anthropic = AnthropicAuxiliaryClient(
@@ -4228,7 +3935,7 @@ def resolve_provider_client(
if async_mode:
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
return sync_anthropic, final_model
client = _create_openai_client(api_key=custom_key, base_url=_clean_base2, **_extra2)
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
# codex_responses or inherited auto-detect (via _wrap_if_needed).
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
# override). Named-provider entry api_mode=codex_responses also
@@ -4370,7 +4077,7 @@ def resolve_provider_client(
_merged_main = _apply_user_default_headers(headers)
if _merged_main:
headers = _merged_main
client = _create_openai_client(api_key=api_key, base_url=base_url,
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
# Copilot GPT-5+ models (except gpt-5-mini) require the Responses
@@ -4906,7 +4613,7 @@ def _refresh_nous_auxiliary_client(
return None, model
fresh_key, fresh_base_url = runtime
sync_client = _create_openai_client(api_key=fresh_key, base_url=fresh_base_url)
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
final_model = model
current_loop = None
@@ -5547,9 +5254,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
if not choices or not hasattr(choices[0], "message"):
raise AttributeError("missing choices[0].message")
except (AttributeError, TypeError, IndexError) as exc:
recovered = _recover_aux_response_message(response)
if recovered is not None:
return recovered
response_type = type(response).__name__
response_preview = str(response)[:120]
raise RuntimeError(
@@ -5561,64 +5265,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
return response
def _recover_aux_response_message(response: Any) -> Optional[Any]:
"""Synthesize chat-completions shape from Responses-style text fields.
Auxiliary callers consume ``choices[0].message``. Some compatible
endpoints return text outside ``choices`` (for example ``output_text`` or
``output`` items). Preserve that response before declaring it malformed.
"""
text = _extract_aux_response_text(response)
if not text:
return None
choice = SimpleNamespace(
message=SimpleNamespace(content=text),
finish_reason=getattr(response, "finish_reason", None) or "stop",
)
try:
response.choices = [choice]
return response
except Exception:
return SimpleNamespace(
id=getattr(response, "id", ""),
model=getattr(response, "model", ""),
object=getattr(response, "object", "chat.completion"),
choices=[choice],
usage=getattr(response, "usage", None),
)
def _extract_aux_response_text(response: Any) -> str:
output_text = _obj_get(response, "output_text")
if isinstance(output_text, str) and output_text.strip():
return output_text.strip()
output = _obj_get(response, "output")
if not isinstance(output, list):
return ""
parts: List[str] = []
for item in output:
item_type = _obj_get(item, "type")
if item_type and item_type != "message":
continue
for part in (_obj_get(item, "content") or []):
part_type = _obj_get(part, "type")
if part_type in {"output_text", "text", None}:
text = _obj_get(part, "text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
def _obj_get(obj: Any, key: str, default: Any = None) -> Any:
value = getattr(obj, key, default)
if value is default and isinstance(obj, dict):
value = obj.get(key, default)
return value
def call_llm(
task: str = None,
*,
@@ -5698,30 +5344,21 @@ def call_llm(
)
if client is None:
# When the user explicitly chose a non-OpenRouter provider but no
# credentials were found, honor the task fallback_chain before
# raising. Missing raw env keys are recoverable for auxiliary
# tasks because fallback entries may use OAuth / credential-pool
# auth (for example openai-codex).
# credentials were found, fail fast instead of silently routing
# through OpenRouter (which causes confusing 404s).
_explicit = (resolved_provider or "").strip().lower()
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
task, _explicit,
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment "
f"variable, or switch to a different provider with `hermes model`."
)
if fb_client is not None:
client, final_model = fb_client, fb_model
resolved_provider = fb_label or resolved_provider
else:
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment "
f"variable, or switch to a different provider with `hermes model`."
)
# For auto/custom with no credentials, try the full auto chain
# rather than hardcoding OpenRouter (which may be depleted).
# Pass model=None so each provider uses its own default —
# resolved_model may be an OpenRouter-format slug that doesn't
# work on other providers.
if client is None and not resolved_base_url:
if not resolved_base_url:
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
task or "call", resolved_provider)
client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
@@ -6016,21 +5653,10 @@ def call_llm(
# When the provider returns a 429 rate-limit (not billing), fall
# back to an alternative provider instead of exhausting retries
# against the same rate-limited endpoint.
#
# ── Auth error fallback (#21165) ─────────────────────────────
# When the resolved provider returns 401 and neither the Nous
# refresh path nor explicit provider credential refresh applies,
# fall back to an alternative provider instead of dropping the
# auxiliary task on the floor (silent compression failure /
# message loss). Auth is NOT a capacity error: it only bypasses
# the explicit-provider gate when the user is in auto mode.
should_fallback = (
_is_auth_error(first_err)
or _is_payment_error(first_err)
_is_payment_error(first_err)
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Respect explicit provider choice for transient errors (auth, request
# validation, etc.) but allow fallback when the provider clearly cannot
@@ -6041,24 +5667,9 @@ def call_llm(
is_auto = resolved_provider in {"auto", "", None}
# Capacity errors bypass the explicit-provider gate: the provider
# literally cannot serve this request regardless of user intent.
# Rate limits are included: after retries are exhausted, a 429 means
# the provider cannot serve this request — fall back. See #52228.
# Model-incompatibility 400s are also a hard capability mismatch (the
# route cannot run this model at all — e.g. a codex/ChatGPT-account
# fallback asked to compress a glm-5.2 conversation), so they bypass
# the explicit-provider gate and continue to the next candidate
# instead of aborting the auxiliary task and churning the session.
is_capacity_error = (
_is_payment_error(first_err)
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
if should_fallback and (is_auto or is_capacity_error):
if _is_auth_error(first_err):
reason = "auth error"
elif _is_payment_error(first_err):
if _is_payment_error(first_err):
reason = "payment error"
# Resolve the actual provider label (resolved_provider may be
# "auto"; the client's base_url tells us which backend got the
@@ -6069,10 +5680,6 @@ def call_llm(
)
elif _is_rate_limit_error(first_err):
reason = "rate limit"
elif _is_model_incompatible_error(first_err):
reason = "model incompatible with route"
elif _is_invalid_aux_response_error(first_err):
reason = "invalid provider response"
else:
reason = "connection error"
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
@@ -6247,21 +5854,12 @@ async def async_call_llm(
if client is None:
_explicit = (resolved_provider or "").strip().lower()
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
task, _explicit,
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment "
f"variable, or switch to a different provider with `hermes model`."
)
if fb_client is not None:
client, final_model = _to_async_client(
fb_client, fb_model or "", is_vision=(task == "vision")
)
resolved_provider = fb_label or resolved_provider
else:
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment "
f"variable, or switch to a different provider with `hermes model`."
)
if client is None and not resolved_base_url:
if not resolved_base_url:
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
task or "call", resolved_provider)
client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
@@ -6507,47 +6105,24 @@ async def async_call_llm(
raise
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
# Auth error fallback (#21165): a 401 that survived the refresh path
# falls back in auto mode just like the sync call_llm() path. Auth is
# NOT a capacity error, so on an explicit provider it still respects
# the user's choice (handled by the is_auto/is_capacity_error gate).
should_fallback = (
_is_auth_error(first_err)
or _is_payment_error(first_err)
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Capacity errors (payment/quota/connection/rate-limit) bypass the
# explicit-provider gate — the provider cannot serve the request
# regardless of user intent. Rate limits are included: after retries
# are exhausted, a 429 means the provider is at capacity. See #52228.
# See #26803: daily token quota must fall back like a 402 credit error.
# Model-incompatibility 400s (route cannot run this model at all)
# bypass the gate too — see the sync call_llm() path for rationale.
is_auto = resolved_provider in {"auto", "", None}
is_capacity_error = (
_is_payment_error(first_err)
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Capacity errors (payment/quota/connection) bypass the explicit-provider
# gate — the provider cannot serve the request regardless of user intent.
# See #26803: daily token quota must fall back like a 402 credit error.
is_auto = resolved_provider in {"auto", "", None}
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
if should_fallback and (is_auto or is_capacity_error):
if _is_auth_error(first_err):
reason = "auth error"
elif _is_payment_error(first_err):
if _is_payment_error(first_err):
reason = "payment error"
_mark_provider_unhealthy(
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
)
elif _is_rate_limit_error(first_err):
reason = "rate limit"
elif _is_model_incompatible_error(first_err):
reason = "model incompatible with route"
elif _is_invalid_aux_response_error(first_err):
reason = "invalid provider response"
else:
reason = "connection error"
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",

View File

@@ -37,18 +37,6 @@ from tools.terminal_tool import is_persistent_env
from utils import base_url_host_matches, base_url_hostname, env_float, env_int
logger = logging.getLogger(__name__)
_OPENROUTER_PROVIDER_SORT_VALUES = {"throughput", "latency", "price"}
# When the fallback chain is fully exhausted on a non-rate-limit failure
# (e.g. every provider returns a non-retryable client error like HTTP 400),
# arm a short cooldown so the NEXT turn's restore_primary_runtime stays gated
# and does not reset _fallback_index=0 to replay the entire chain again.
# Without this, a client/gateway that re-submits immediately would re-marshal
# the full (potentially 80k-token) context once per provider every turn and
# can drive a constrained host into memory/swap exhaustion. Rate-limit /
# billing reasons keep their own 60s cooldown (set above); this is the
# narrower non-rate-limit case. See issue #24996.
_FALLBACK_EXHAUSTED_COOLDOWN_S = 5.0
def _ra():
@@ -127,23 +115,6 @@ def _is_openai_codex_backend(agent) -> bool:
)
def _validated_openrouter_provider_sort(raw_sort: Any) -> Optional[str]:
"""Return a normalized OpenRouter provider.sort value or None."""
if not isinstance(raw_sort, str):
return None
sort_value = raw_sort.strip().lower()
if not sort_value:
return None
if sort_value in _OPENROUTER_PROVIDER_SORT_VALUES:
return sort_value
logger.warning(
"Ignoring invalid OpenRouter provider.sort value %r (allowed: %s)",
raw_sort,
", ".join(sorted(_OPENROUTER_PROVIDER_SORT_VALUES)),
)
return None
def _env_float(name: str, default: float) -> float:
try:
return float(os.getenv(name, str(default)))
@@ -258,11 +229,6 @@ def interruptible_api_call(agent, api_kwargs: dict):
invalidate_runtime_client(region)
raise
result["response"] = normalize_converse_response(raw_response)
elif agent.provider == "moa":
# MoA is a virtual chat-completions provider backed by the
# in-process MoAClient facade. Do not rebuild a request-local
# OpenAI client from the virtual runtime metadata.
result["response"] = agent.client.chat.completions.create(**api_kwargs)
else:
request_client = _set_request_client(
agent._create_request_openai_client(
@@ -732,9 +698,8 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
_prefs["ignore"] = agent.providers_ignored
if agent.providers_order:
_prefs["order"] = agent.providers_order
_provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
if _provider_sort:
_prefs["sort"] = _provider_sort
if agent.provider_sort:
_prefs["sort"] = agent.provider_sort
if agent.provider_require_parameters:
_prefs["require_parameters"] = True
if agent.provider_data_collection:
@@ -1050,23 +1015,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
"arguments": tool_call.function.arguments
},
}
# Tool-call arguments are intentionally NOT redacted here. This
# dict enters the in-memory conversation history that is replayed
# to the model on every subsequent turn AND persisted to state.db,
# which is itself replayed verbatim on session resume
# (get_messages_as_conversation). Masking a credential to `***`
# here poisons that replay: the model reads back its own
# `PGPASSWORD='***' psql ...` call and copies the placeholder into
# the next tool call, breaking every credential-dependent command
# on the second turn (#43083). The masking also provided no real
# protection — the same secret still leaks verbatim through tool
# OUTPUT (file contents, command output, diffs, the compaction
# block), none of which this pass ever touched. Keeping secrets
# out of the replayable store is a separate tokenization/vault
# concern, not something arg-redaction can deliver without
# breaking replay. Storage-time redaction remains governed by the
# `security.redact_secrets` toggle. (#19798 introduced this;
# #43083 removed it.)
# Defence-in-depth: redact credentials from tool call arguments
# before they enter conversation history. Tool execution uses the
# raw API response object, not this dict, so redacting the
# persisted shape is safe and only affects storage. Catches the
# case where a model accidentally inlines a secret into a tool
# call (e.g. `terminal(command="curl -H 'Authorization: Bearer
# sk-...'")`). (#19798)
if isinstance(tc_dict["function"]["arguments"], str):
from agent.redact import redact_sensitive_text
tc_dict["function"]["arguments"] = redact_sensitive_text(
tc_dict["function"]["arguments"]
)
# Preserve extra_content (e.g. Gemini thought_signature) so it
# is sent back on subsequent API calls. Without this, Gemini 3
# thinking models reject the request with a 400 error.
@@ -1133,22 +1093,8 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
agent._rate_limited_until = time.monotonic() + 60
if agent._fallback_index >= len(agent._fallback_chain):
# Chain exhausted. If we actually walked a non-empty chain and the
# failure was NOT a rate-limit/billing event (those already armed
# their own 60s cooldown above), arm a short cooldown so the next
# turn's restore_primary_runtime stays gated instead of resetting
# _fallback_index=0 and re-marshaling the whole context across every
# provider again. Guards the cross-turn replay storm in #24996.
if (
len(agent._fallback_chain) > 0
and reason not in {FailoverReason.rate_limit, FailoverReason.billing}
):
_existing_cooldown = getattr(agent, "_rate_limited_until", 0) or 0
agent._rate_limited_until = max(
_existing_cooldown,
time.monotonic() + _FALLBACK_EXHAUSTED_COOLDOWN_S,
)
return False
fb = agent._fallback_chain[agent._fallback_index]
agent._fallback_index += 1
fb_provider = (fb.get("provider") or "").strip().lower()
@@ -1264,16 +1210,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
agent._transport_cache.clear()
agent._fallback_activated = True
# Rebind the credential pool to the fallback provider when the provider
# changes. Keeping the primary pool attached would make downstream
# recovery (rate_limit / billing / auth) mutate the wrong credential
# set and can overwrite the fallback's base_url back to the primary
# endpoint. See #33163.
#
# Clear the credential pool when the fallback provider doesn't match
# the pool's provider. The pool was seeded for the primary provider;
# leaving it attached means downstream recovery (rate_limit / billing /
# auth) calls ``_swap_credential`` with a primary entry which overwrites
# the agent's ``base_url`` back to the primary's endpoint — every
# fallback request then 404s against the wrong host. See #33163.
# When the fallback shares the pool's provider (e.g. both openrouter
# entries with different routing) the pool is preserved. When the
# providers differ, load the fallback provider's own pool if one exists
# so provider-specific rotation continues to work after the switch.
# entries with different routing) the pool is preserved.
_existing_pool = getattr(agent, "_credential_pool", None)
if _existing_pool is not None:
_pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
@@ -1284,22 +1228,6 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
fb_provider, fb_model, _pool_provider,
)
agent._credential_pool = None
if getattr(agent, "_credential_pool", None) is None:
try:
from agent.credential_pool import load_pool
fallback_pool = load_pool(fb_provider)
if fallback_pool and fallback_pool.has_credentials():
agent._credential_pool = fallback_pool
logger.info(
"Fallback to %s/%s: attached fallback credential pool",
fb_provider, fb_model,
)
except Exception as exc:
logger.debug(
"Fallback to %s/%s: could not attach credential pool: %s",
fb_provider, fb_model, exc,
)
# Honor per-provider / per-model request_timeout_seconds for the
# fallback target (same knob the primary client uses). None = use
@@ -1530,9 +1458,8 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
provider_preferences["ignore"] = agent.providers_ignored
if agent.providers_order:
provider_preferences["order"] = agent.providers_order
_provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
if _provider_sort:
provider_preferences["sort"] = _provider_sort
if agent.provider_sort:
provider_preferences["sort"] = agent.provider_sort
if provider_preferences and (
(agent.provider or "").strip().lower() == "openrouter"
or agent._is_openrouter_url()
@@ -2464,19 +2391,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
diag=request_client_holder.get("diag"),
)
_close_request_client_once("stream_mid_tool_retry_cleanup")
if agent.api_mode == "anthropic_messages":
try:
agent._anthropic_client.close()
agent._rebuild_anthropic_client()
except Exception:
pass
else:
try:
agent._replace_primary_openai_client(
reason="stream_mid_tool_retry_pool_cleanup"
)
except Exception:
pass
try:
agent._replace_primary_openai_client(
reason="stream_mid_tool_retry_pool_cleanup"
)
except Exception:
pass
continue
# SSE error events from proxies (e.g. OpenRouter sends
@@ -2524,19 +2444,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
_close_request_client_once("stream_retry_cleanup")
# Also rebuild the primary client to purge
# any dead connections from the pool.
if agent.api_mode == "anthropic_messages":
try:
agent._anthropic_client.close()
agent._rebuild_anthropic_client()
except Exception:
pass
else:
try:
agent._replace_primary_openai_client(
reason="stream_retry_pool_cleanup"
)
except Exception:
pass
try:
agent._replace_primary_openai_client(
reason="stream_retry_pool_cleanup"
)
except Exception:
pass
continue
# Retries exhausted. Log the final failure with
# full diagnostic detail (chain, headers,
@@ -2648,17 +2561,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
else:
_stream_stale_timeout = _stream_stale_timeout_base
# Reasoning-model floor: known reasoning models (Nemotron 3 Ultra,
# OpenAI o1/o3, Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ,
# xAI Grok reasoning, etc.) routinely exceed the default 180s chat-
# model threshold during their thinking phase. The cloud gateway
# upstream kills the socket first, surfacing as BrokenPipeError.
# Raises the floor only — never overrides explicit user config
# (handled by get_provider_stale_timeout above).
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
_reasoning_floor = get_reasoning_stale_timeout_floor(api_kwargs.get("model"))
if _reasoning_floor is not None:
_stream_stale_timeout = max(_stream_stale_timeout, _reasoning_floor)
t = threading.Thread(target=_call, daemon=True)
t.start()
@@ -2707,17 +2609,10 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
pass
# Rebuild the primary client too — its connection pool
# may hold dead sockets from the same provider outage.
if agent.api_mode == "anthropic_messages":
try:
agent._anthropic_client.close()
agent._rebuild_anthropic_client()
except Exception:
pass
else:
try:
agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
except Exception:
pass
try:
agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
except Exception:
pass
# Reset the timer so we don't kill repeatedly while
# the inner thread processes the closure.
last_chunk_time["t"] = time.time()
@@ -2793,30 +2688,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
role="assistant", content=_partial_text, tool_calls=None,
reasoning_content=None,
)
# Detect provider output-layer content filtering (e.g. MiniMax
# "output new_sensitive (1027)", Azure/OpenAI content_filter,
# Anthropic safety refusal). The raw error is about to be
# swallowed into a finish_reason=length stub, so classify it HERE
# while we still have it and stamp the stub. Retrying such a
# content-deterministic filter on the same primary just re-hits
# the filter — the conversation loop reads this tag and activates
# the fallback chain instead of burning continuation retries.
# error_classifier is the single source of truth for "what counts
# as a content filter" (#32421).
_content_filter_terminated = False
try:
from agent.error_classifier import classify_api_error, FailoverReason
_cls = classify_api_error(
result["error"],
provider=str(getattr(agent, "provider", "") or ""),
model=str(getattr(agent, "model", "") or ""),
)
_content_filter_terminated = (
_cls.reason == FailoverReason.content_policy_blocked
)
except Exception:
_content_filter_terminated = False
_stub = SimpleNamespace(
return SimpleNamespace(
id=PARTIAL_STREAM_STUB_ID,
model=getattr(agent, "model", "unknown"),
choices=[SimpleNamespace(
@@ -2825,9 +2697,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
usage=None,
_dropped_tool_names=_partial_names or None,
)
if _content_filter_terminated:
_stub._content_filter_terminated = True
return _stub
raise result["error"]
return result["response"]

View File

@@ -60,8 +60,6 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
logger = logging.getLogger("hermes.coding_context")
CODING_TOOLSET = "coding"
@@ -85,59 +83,6 @@ _PROJECT_MARKERS = (
# Agent-instruction files surfaced separately from manifests in the snapshot.
_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
# Source-file extensions that make a git repo a *code* workspace even with no
# manifest. Without this, `git init` on a notes/writing/research folder (a huge
# non-coding use case) would flip the whole session into the coding posture just
# for having a `.git`. A manifest still wins on its own (see `_PROJECT_MARKERS`).
_CODE_EXTENSIONS = frozenset({
".py", ".pyi", ".ipynb", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
".go", ".rs", ".java", ".kt", ".kts", ".scala", ".rb", ".php", ".c", ".h",
".cc", ".cpp", ".hpp", ".cs", ".swift", ".m", ".mm", ".dart", ".ex", ".exs",
".lua", ".sh", ".bash", ".zsh", ".sql", ".vue", ".svelte", ".r", ".jl",
".hs", ".clj", ".erl", ".pl",
})
# Dirs never worth scanning for the code check (deps/build/vcs/venv noise).
_CODE_SCAN_SKIP_DIRS = frozenset({
".git", "node_modules", "venv", ".venv", "__pycache__", "dist", "build",
"target", ".next", ".turbo", "vendor",
})
# Bounded sweep: a code workspace reveals itself in the first handful of entries.
_CODE_SCAN_MAX_ENTRIES = 500
def _has_code_files(root: Path) -> bool:
"""Cheap, bounded check for source files in a repo's top two levels.
Lets a git repo of loose scripts (no manifest) still read as a code
workspace while a bare notes/writing repo does not. Scans the root and its
immediate subdirectories only, capped at ``_CODE_SCAN_MAX_ENTRIES`` stats —
a handful of readdirs at session start, not a full walk.
"""
seen = 0
stack = [(root, True)]
while stack:
directory, is_root = stack.pop()
try:
with os.scandir(directory) as entries:
for entry in entries:
seen += 1
if seen > _CODE_SCAN_MAX_ENTRIES:
return False
name = entry.name
try:
if entry.is_file():
if os.path.splitext(name)[1].lower() in _CODE_EXTENSIONS:
return True
elif is_root and entry.is_dir() and name not in _CODE_SCAN_SKIP_DIRS and not name.startswith("."):
stack.append((Path(entry.path), False))
except OSError:
continue
except OSError:
continue
return False
# Lockfile → package manager, checked in priority order.
_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
_JS_LOCKFILES = (
@@ -423,16 +368,10 @@ def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str:
if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS:
return GENERAL_PROFILE.name
cwd = Path(cwd_str)
# A recognized project root (manifest / AGENTS.md / .cursorrules) is a code
# workspace on its own — cheap stat checks, no scan.
if _marker_root(cwd) is not None:
return CODING_PROFILE.name
git_root = _git_root(cwd)
if git_root is not None and git_root == _home():
git_root = None # dotfiles repo at $HOME — not a code workspace
# A bare git repo only counts when it actually holds code, so `git init` on a
# notes/writing/research folder stays in the general posture.
if git_root is not None and _has_code_files(git_root):
if git_root is not None or _marker_root(cwd) is not None:
return CODING_PROFILE.name
return GENERAL_PROFILE.name
@@ -649,14 +588,12 @@ def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:
def _git(cwd: Path, *args: str) -> str:
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
out = subprocess.run(
["git", "-C", str(cwd), *args],
capture_output=True,
text=True,
timeout=_GIT_TIMEOUT,
**_popen_kwargs,
)
except (OSError, subprocess.SubprocessError):
return ""
@@ -698,32 +635,25 @@ def _read_small(path: Path) -> str:
return ""
@dataclass(frozen=True)
class ProjectFacts:
"""Structured project facts — the model's verify loop, detected once.
def _project_facts(root: Path) -> list[str]:
"""Detected project facts for the workspace snapshot.
The same data that feeds the workspace snapshot, exposed structurally so
non-prompt consumers (e.g. the desktop verify UI) read it instead of
re-detecting and drifting from the prompt.
The point is to hand the model its *verify loop* up front — which manifest,
which package manager, and the exact test/lint/build commands — instead of
making it rediscover them every session. Cheap: stat calls plus reads of a
couple of small files; built once at prompt-build time (cache-safe).
"""
facts: list[str] = []
manifests: list[str]
package_managers: list[str]
verify_commands: list[str]
context_files: list[str]
def detect_project_facts(root: Path) -> ProjectFacts:
"""Detect manifests, package manager(s), verify commands, and context files.
Cheap: stat calls plus reads of a couple of small files. The single source
of truth for both the prompt snapshot (:func:`_project_facts`) and the
gateway's ``project.facts`` — so the UI never re-sniffs verify commands.
"""
manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()]
package_managers = list(
dict.fromkeys(pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file())
)
package_managers = [
pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
]
if manifests:
line = f"- Project: {', '.join(manifests[:6])}"
if package_managers:
line += f" ({'/'.join(dict.fromkeys(package_managers))})"
facts.append(line)
verify: list[str] = []
if (root / "scripts" / "run_tests.sh").is_file():
@@ -743,61 +673,17 @@ def detect_project_facts(root: Path) -> ProjectFacts:
f"make {name}" for name in _VERIFY_TARGETS
if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
)
if verify:
deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
facts.append(f"- Verify: {'; '.join(deduped)}")
return ProjectFacts(
manifests=manifests,
package_managers=package_managers,
verify_commands=list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS],
context_files=[c for c in _CONTEXT_FILES if (root / c).is_file()],
)
def _project_facts(root: Path) -> list[str]:
"""Render :func:`detect_project_facts` as workspace-snapshot lines.
Hands the model its *verify loop* up front — which manifest, which package
manager, and the exact test/lint/build commands — instead of making it
rediscover them every session. Built once at prompt-build time; the string
output must stay byte-stable to preserve the prompt cache.
"""
f = detect_project_facts(root)
facts: list[str] = []
if f.manifests:
line = f"- Project: {', '.join(f.manifests[:6])}"
if f.package_managers:
line += f" ({'/'.join(f.package_managers)})"
facts.append(line)
if f.verify_commands:
facts.append(f"- Verify: {'; '.join(f.verify_commands)}")
if f.context_files:
facts.append(f"- Context files: {', '.join(f.context_files)}")
context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
if context_files:
facts.append(f"- Context files: {', '.join(context_files)}")
return facts
def project_facts_for(cwd: Optional[str | Path] = None) -> Optional[dict[str, Any]]:
"""Structured project facts for ``cwd`` — ``None`` outside a workspace.
Same detection the system-prompt snapshot uses (git root, else marker root),
exposed for non-prompt consumers (the desktop verify UI) so they never
re-derive "are we coding?" or duplicate the verify-command sniffing.
"""
resolved = _resolve_cwd(cwd)
root = _git_root(resolved) or _marker_root(resolved)
if root is None:
return None
f = detect_project_facts(root)
return {
"root": str(root),
"manifests": f.manifests,
"packageManagers": f.package_managers,
"verifyCommands": f.verify_commands,
"contextFiles": f.context_files,
}
def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str:
"""Workspace snapshot for the system prompt (empty outside a workspace).

View File

@@ -890,15 +890,7 @@ class ContextCompressor(ContextEngine):
# This is independent of the abort_on_summary_failure config flag:
# rotating on a broken credential is never the right behavior.
self._last_summary_auth_failure: bool = False
# Set when summary generation ultimately fails due to a transient
# network/connection error (httpx/httpcore connection drop, premature
# stream close, etc.) — distinct from auth failures but treated the
# same way by compress(): ABORT and preserve the session unchanged
# rather than destroy the middle window for a deterministic
# "summary unavailable" marker. Retrying once the network recovers is
# strictly better than discarding context for a transient blip
# (#29559, #25585). Independent of abort_on_summary_failure.
self._last_summary_network_failure: bool = False
# When a user-configured summary model fails and we recover by
# retrying on the main model, record the failure so gateway /
# CLI callers can still warn the user even though compression
# succeeded. Silent recovery would hide the broken config.
@@ -1695,7 +1687,6 @@ This compaction should PRIORITISE preserving all information related to the focu
self._summary_model_fallen_back = False
self._last_summary_error = None
self._last_summary_auth_failure = False
self._last_summary_network_failure = False
return self._with_summary_prefix(summary)
except Exception as e:
# ``call_llm`` raises ``RuntimeError`` for two very different cases:
@@ -1828,15 +1819,6 @@ This compaction should PRIORITISE preserving all information related to the focu
if len(err_text) > 220:
err_text = err_text[:217].rstrip() + "..."
self._last_summary_error = err_text
# A terminal connection/network failure (we reach this branch only
# after any main-model fallback has already been tried or is
# unavailable). Flag it so compress() ABORTS and preserves the
# session unchanged instead of destroying the middle window for a
# placeholder marker — retrying once the network recovers is
# strictly better than dropping context (#29559, #25585). Mirrors
# the auth-failure carve-out; independent of abort_on_summary_failure.
if _is_streaming_closed:
self._last_summary_network_failure = True
logger.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
@@ -2400,7 +2382,6 @@ This compaction should PRIORITISE preserving all information related to the focu
self._last_aux_model_failure_model = None
self._last_compress_aborted = False
self._last_summary_auth_failure = False
self._last_summary_network_failure = False
# Manual /compress (force=True) bypasses the failure cooldown so the
# user can retry immediately after an auto-compress abort. Without
@@ -2517,21 +2498,15 @@ This compaction should PRIORITISE preserving all information related to the focu
# surface a warning.
# Default is False (historical behavior).
#
# EXCEPTION — auth AND transient network failures always abort. A
# 401/403 from the summary call means the credential or endpoint is
# broken (invalid/blocked key, or a token pointed at the wrong
# inference host). A connection/stream-close error means the network
# blipped at the compaction moment (#29559). In BOTH cases rotating into
# EXCEPTION — auth failures always abort. A 401/403 from the summary
# call means the credential or endpoint is broken (invalid/blocked
# key, or a token pointed at the wrong inference host). Rotating into
# a child session with a placeholder summary on a broken credential
# strands the user on a degraded session for zero benefit — every
# subsequent call fails the same way. So when the failure was an auth
# error we abort regardless of abort_on_summary_failure, preserving
# the conversation unchanged until the credential is fixed.
if not summary and (
self.abort_on_summary_failure
or self._last_summary_auth_failure
or self._last_summary_network_failure
):
if not summary and (self.abort_on_summary_failure or self._last_summary_auth_failure):
n_skipped = compress_end - compress_start
self._last_summary_dropped_count = 0 # nothing actually dropped
self._last_summary_fallback_used = False
@@ -2546,15 +2521,6 @@ This compaction should PRIORITISE preserving all information related to the focu
"with /compress or start fresh with /new.",
n_skipped,
)
elif self._last_summary_network_failure:
logger.warning(
"Summary generation failed with a network/connection "
"error — aborting compression. %d message(s) preserved "
"unchanged; the session was NOT rotated. This is "
"transient: retry with /compress once connectivity "
"recovers, or continue the conversation as-is.",
n_skipped,
)
else:
logger.warning(
"Summary generation failed — aborting compression "

View File

@@ -12,7 +12,6 @@ from pathlib import Path
from typing import Awaitable, Callable
from agent.model_metadata import estimate_tokens_rough
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
REFERENCE_PATTERN = re.compile(
@@ -291,7 +290,6 @@ def _expand_git_reference(
args: list[str],
label: str,
) -> tuple[str | None, str | None]:
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
result = subprocess.run(
["git", *args],
@@ -300,7 +298,6 @@ def _expand_git_reference(
text=True,
timeout=30,
stdin=subprocess.DEVNULL,
**_popen_kwargs,
)
except subprocess.TimeoutExpired:
return f"{ref.raw}: git command timed out (30s)", None
@@ -486,7 +483,6 @@ def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:
def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
result = subprocess.run(
["rg", "--files", str(path.relative_to(cwd))],
@@ -495,7 +491,6 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
text=True,
timeout=10,
stdin=subprocess.DEVNULL,
**_popen_kwargs,
)
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
return None

View File

@@ -90,7 +90,6 @@ def check_compression_model_feasibility(agent: Any) -> None:
try:
from agent.auxiliary_client import (
_resolve_task_provider_model,
_try_configured_fallback_for_unavailable_client,
get_text_auxiliary_client,
)
from agent.model_metadata import (
@@ -98,6 +97,10 @@ def check_compression_model_feasibility(agent: Any) -> None:
get_model_context_length,
)
client, aux_model = get_text_auxiliary_client(
"compression",
main_runtime=agent._current_main_runtime(),
)
# Best-effort aux provider label for the warning message. The
# configured provider may be "auto", in which case we fall back
# to the client's base_url hostname so the user can still tell
@@ -106,19 +109,6 @@ def check_compression_model_feasibility(agent: Any) -> None:
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
except Exception:
_aux_cfg_provider = ""
client, aux_model = get_text_auxiliary_client(
"compression",
main_runtime=agent._current_main_runtime(),
)
if client is None or not aux_model:
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
"compression",
_aux_cfg_provider,
)
if fb_client is not None and fb_model:
client, aux_model = fb_client, fb_model
if "(" in fb_label and fb_label.endswith(")"):
_aux_cfg_provider = fb_label.rsplit("(", 1)[1][:-1]
if client is None or not aux_model:
if _aux_cfg_provider and _aux_cfg_provider != "auto":
msg = (
@@ -288,29 +278,6 @@ def replay_compression_warning(agent: Any) -> None:
pass
def conversation_history_after_compression(agent: Any, messages: list) -> Optional[list]:
"""Return the correct flush baseline after a compression boundary.
Legacy compression rotates to a fresh child session. That child has not
seen the compacted transcript through the normal same-turn flush path yet,
so callers must clear ``conversation_history`` to ``None`` and let the next
persistence call write the whole compacted list.
In-place compaction is different: ``archive_and_compact()`` has already
soft-archived the previous active rows and inserted ``messages`` as the new
active live transcript under the same session id. If the same agent turn
continues with ``conversation_history=None``, the identity-based flush path
treats those already-persisted compacted dicts as new and appends them a
second time, doubling the active context and retriggering compression.
A shallow copy is intentional: it captures the current compacted dict
identities as history while allowing later same-turn appends to remain new.
"""
if bool(getattr(agent, "_last_compaction_in_place", False)):
return list(messages)
return None
def compress_context(
agent: Any,
messages: list,
@@ -838,11 +805,10 @@ def try_shrink_image_parts_in_messages(
Pillow couldn't help (caller should surface the original error).
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
``data:image/...;base64,...`` payload, plus Anthropic-native
``{"type": "image", "source": {"type": "base64", ...}}`` blocks.
For each one whose encoded size exceeds 4 MB (a safe target that slides
under Anthropic's 5 MB ceiling with header overhead) or whose longest side
exceeds ``max_dimension``, write the base64 to a tempfile, call
``data:image/...;base64,...`` payload. For each one whose encoded
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
ceiling with header overhead) or whose longest side exceeds
``max_dimension``, write the base64 to a tempfile, call
``vision_tools._resize_image_for_vision`` to produce a smaller data
URL, and substitute it in place.
@@ -998,28 +964,6 @@ def try_shrink_image_parts_in_messages(
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
return None, triggered_by is not None
def _source_to_data_url(source: Any) -> Optional[str]:
if not isinstance(source, dict) or source.get("type") != "base64":
return None
data = source.get("data")
if not isinstance(data, str) or not data:
return None
media_type = str(source.get("media_type") or "image/jpeg").strip()
if not media_type.startswith("image/"):
media_type = "image/jpeg"
return f"data:{media_type};base64,{data}"
def _write_data_url_to_source(source: dict, data_url: str) -> None:
header, _, data = data_url.partition(",")
media_type = "image/jpeg"
if header.startswith("data:"):
candidate = header[len("data:"):].split(";", 1)[0].strip()
if candidate.startswith("image/"):
media_type = candidate
source["type"] = "base64"
source["media_type"] = media_type
source["data"] = data
for msg in api_messages:
if not isinstance(msg, dict):
continue
@@ -1030,16 +974,6 @@ def try_shrink_image_parts_in_messages(
if not isinstance(part, dict):
continue
ptype = part.get("type")
if ptype == "image":
source = part.get("source")
url = _source_to_data_url(source)
resized, unshrinkable = _shrink_data_url(url or "")
if resized and isinstance(source, dict):
_write_data_url_to_source(source, resized)
changed_count += 1
elif unshrinkable:
unshrinkable_oversized += 1
continue
if ptype not in {"image_url", "input_image"}:
continue
image_value = part.get("image_url")

View File

@@ -28,7 +28,6 @@ import uuid
from typing import Any, Dict, List, Optional
from agent.codex_responses_adapter import _summarize_user_message_for_log
from agent.conversation_compression import conversation_history_after_compression
from agent.display import KawaiiSpinner
from agent.error_classifier import FailoverReason, classify_api_error
from agent.iteration_budget import IterationBudget
@@ -36,7 +35,6 @@ from agent.turn_context import build_turn_context
from agent.turn_retry_state import TurnRetryState
from agent.memory_manager import build_memory_context_block
from agent.message_sanitization import (
close_interrupted_tool_sequence,
_repair_tool_call_arguments,
_sanitize_messages_non_ascii,
_sanitize_messages_surrogates,
@@ -57,7 +55,7 @@ from agent.model_metadata import (
)
from agent.process_bootstrap import _install_safe_stdio
from agent.prompt_caching import apply_anthropic_cache_control
from agent.retry_utils import adaptive_rate_limit_backoff, jittered_backoff
from agent.retry_utils import jittered_backoff
from agent.trajectory import has_incomplete_scratchpad
from agent.usage_pricing import estimate_usage_cost, normalize_usage
from hermes_constants import PARTIAL_STREAM_STUB_ID
@@ -503,7 +501,6 @@ def run_conversation(
stream_callback: Optional[callable] = None,
persist_user_message: Optional[str] = None,
persist_user_timestamp: Optional[float] = None,
moa_config: Optional[dict[str, Any]] = None,
) -> Dict[str, Any]:
"""
Run a complete conversation with tool calling until completion.
@@ -526,19 +523,6 @@ def run_conversation(
Returns:
Dict: Complete conversation result with final response and message history
"""
if moa_config is None:
try:
from hermes_cli.moa_config import decode_moa_turn
_decoded_message, _decoded_moa_config = decode_moa_turn(user_message)
if _decoded_moa_config is not None:
user_message = _decoded_message
moa_config = _decoded_moa_config
if persist_user_message is None:
persist_user_message = _decoded_message
except Exception:
pass
# ── Per-turn setup (the prologue) ──
# All once-per-turn setup — stdio guarding, retry-counter resets, user
# message sanitization, todo/nudge hydration, system-prompt restore-or-
@@ -588,13 +572,6 @@ def run_conversation(
compression_attempts = 0
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
# Per-turn tally of consecutive successful credential-pool token refreshes,
# keyed by (provider, pool-entry-id). A persistent upstream 401 lets
# ``try_refresh_current()`` "succeed" forever on a single-entry OAuth pool,
# so this tally caps same-entry refreshes and lets the fallback chain take
# over instead of spinning. Reset here so each turn starts fresh. See #26080.
agent._auth_pool_refresh_counts = {}
# Optional opt-in runtime: if api_mode == codex_app_server, hand the
# turn to the codex app-server subprocess (terminal/file ops/patching
# all run inside Codex). Default Hermes path is bypassed entirely.
@@ -824,28 +801,6 @@ def run_conversation(
if effective_system:
api_messages = [{"role": "system", "content": effective_system}] + api_messages
if moa_config:
try:
from agent.moa_loop import aggregate_moa_context
_moa_context = aggregate_moa_context(
user_prompt=original_user_message if isinstance(original_user_message, str) else str(original_user_message),
api_messages=api_messages,
reference_models=moa_config.get("reference_models") or [],
aggregator=moa_config.get("aggregator") or {},
temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
)
if _moa_context:
for _msg in reversed(api_messages):
if _msg.get("role") == "user":
_base = _msg.get("content", "")
if isinstance(_base, str):
_msg["content"] = _base + "\n\n" + _moa_context
break
except Exception as _moa_exc:
logger.warning("MoA context aggregation failed: %s", _moa_exc)
# Inject ephemeral prefill messages right after the system prompt
# but before conversation history. Same API-call-time-only pattern.
if agent.prefill_messages:
@@ -1167,7 +1122,7 @@ def run_conversation(
# stream. Mirror the ACP exclusion used for Responses
# API upgrade (lines ~1083-1085).
elif (
agent.provider in {"copilot-acp", "moa"}
agent.provider == "copilot-acp"
or str(agent.base_url or "").lower().startswith("acp://copilot")
or str(agent.base_url or "").lower().startswith("acp+tcp://")
):
@@ -1441,12 +1396,10 @@ def run_conversation(
while time.time() < sleep_end:
if agent._interrupt_requested:
agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
_interrupt_text = f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries})."
close_interrupted_tool_sequence(messages, _interrupt_text)
agent._persist_session(messages, conversation_history)
agent.clear_interrupt()
return {
"final_response": _interrupt_text,
"final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).",
"messages": messages,
"api_calls": api_call_count,
"completed": False,
@@ -1699,56 +1652,6 @@ def run_conversation(
if agent.api_mode in {"chat_completions", "bedrock_converse", "anthropic_messages"}:
assistant_message = _trunc_msg
# ── Content-filter stream stall → fallback (#32421) ──
# When the provider's output-layer safety filter (e.g.
# MiniMax "output new_sensitive (1027)", Azure
# content_filter) kills the stream mid-delivery, the
# raw error was classified at the swallow point and the
# stub tagged ``_content_filter_terminated``. This
# filter is content-deterministic — continuation
# retries against the SAME primary just re-hit it and
# burn paid attempts (the loop used to give up with
# "Response remained truncated after 3 continuation
# attempts" and never consult the fallback chain).
# Escalate to the configured fallback BEFORE retrying.
_cf_terminated = getattr(
response, "_content_filter_terminated", False
)
if (
_cf_terminated
and agent._fallback_index < len(agent._fallback_chain)
):
agent._vprint(
f"{agent.log_prefix}🛡️ Content filter terminated "
f"stream — activating fallback provider...",
force=True,
)
agent._emit_status(
"Content filter terminated stream; switching to fallback..."
)
if agent._try_activate_fallback():
# Roll the partial content (if any was already
# appended in a prior continuation pass) back to
# the last clean turn so the fallback provider
# gets a coherent continuation point.
if truncated_response_parts:
messages = agent._get_messages_up_to_last_assistant(messages)
agent._session_messages = messages
length_continue_retries = 0
truncated_response_parts = []
retry_count = 0
compression_attempts = 0
_retry.primary_recovery_attempted = False
_retry.restart_with_rebuilt_messages = True
break
# No fallback available — fall through to normal
# continuation (best-effort, may loop).
agent._vprint(
f"{agent.log_prefix}⚠️ No fallback provider "
f"configured — retrying with same provider "
f"(may re-hit filter)...",
force=True,
)
if assistant_message is not None and not _trunc_has_tool_calls:
length_continue_retries += 1
interim_msg = agent._build_assistant_message(assistant_message, finish_reason)
@@ -2068,21 +1971,9 @@ def run_conversation(
agent.thinking_callback("")
api_elapsed = time.time() - api_start_time
agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True)
interrupted = True
# Preserve any assistant text already streamed to the user
# before the stop landed. Dropping it leaves history with no
# record of the half-finished reply on screen, so the next turn
# the model "forgets" what it just said — exactly what users hit
# when they stop to redirect mid-response.
_partial = agent._strip_think_blocks(
getattr(agent, "_current_streamed_assistant_text", "") or ""
).strip()
if _partial:
messages.append({"role": "assistant", "content": _partial})
final_response = _partial
else:
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
agent._persist_session(messages, conversation_history)
interrupted = True
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
break
except Exception as api_error:
@@ -2316,15 +2207,6 @@ def run_conversation(
# "unknown variant `image_url`, expected `text`".
"unknown variant `image_url`, expected `text`",
"unknown variant image_url, expected text",
# OpenRouter routes a request to upstream endpoints and,
# when none of the candidate endpoints for the model accept
# image input, returns HTTP 404 "No endpoints found that
# support image input". Without this phrase the agent never
# strips the images, the retry loop re-sends the same
# rejected request until exhaustion, and the gateway leaves
# every subsequent message queued behind the stuck turn —
# the P1 in issue #21160. The 404 passes the 4xx gate below.
"no endpoints found that support image input",
)
_err_lower = _err_body.lower()
_looks_like_image_rejection = any(
@@ -2781,12 +2663,10 @@ def run_conversation(
# Check for interrupt before deciding to retry
if agent._interrupt_requested:
agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", force=True)
_interrupt_text = f"Operation interrupted: handling API error ({error_type}: {agent._clean_error_message(str(api_error))})."
close_interrupted_tool_sequence(messages, _interrupt_text)
agent._persist_session(messages, conversation_history)
agent.clear_interrupt()
return {
"final_response": _interrupt_text,
"final_response": f"Operation interrupted: handling API error ({error_type}: {agent._clean_error_message(str(api_error))}).",
"messages": messages,
"api_calls": api_call_count,
"completed": False,
@@ -2896,9 +2776,10 @@ def run_conversation(
approx_tokens=approx_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
if len(messages) < original_len or old_ctx > _reduced_ctx:
agent._buffer_status(
f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
@@ -2910,25 +2791,15 @@ def run_conversation(
# Fall through to normal error handling if compression
# is exhausted or didn't help.
# Eager fallback for rate-limit errors (429 or quota exhaustion)
# and transport errors (connection failure / timeout / provider
# overloaded). Rate limits and billing: switch immediately —
# the primary provider won't recover within the retry window.
# Transport errors: allow 1 retry first (transient hiccups
# recover), then fall back if the provider is truly unreachable.
# Eager fallback for rate-limit errors (429 or quota exhaustion).
# When a fallback model is configured, switch immediately instead
# of burning through retries with exponential backoff -- the
# primary provider won't recover within the retry window.
is_rate_limited = classified.reason in {
FailoverReason.rate_limit,
FailoverReason.billing,
}
_is_transport_failure = classified.reason in {
FailoverReason.timeout,
FailoverReason.overloaded,
}
_should_fallback = (
is_rate_limited
or (_is_transport_failure and retry_count >= 2)
)
if _should_fallback and agent._fallback_index < len(agent._fallback_chain):
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
# Don't eagerly fallback if credential pool rotation may
# still recover. See _pool_may_recover_from_rate_limit
# for the single-credential-pool and CloudCode-quota
@@ -2943,10 +2814,6 @@ def run_conversation(
agent._buffer_status(
"⚠️ Billing or credits exhausted — switching to fallback provider..."
)
elif _is_transport_failure:
agent._buffer_status(
"⚠️ Provider unreachable — switching to fallback provider..."
)
else:
agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
if agent._try_activate_fallback(reason=classified.reason):
@@ -3121,9 +2988,10 @@ def run_conversation(
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
# Re-estimate tokens after compression. Same-message-count
# compression (tool-result pruning, in-place summarization)
@@ -3287,9 +3155,10 @@ def run_conversation(
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history
# so _flush_messages_to_session_db writes compressed
# messages to the new session, not skipping them.
conversation_history = None
# Re-estimate tokens after compression. Same-message-count
# compression (tool-result pruning, in-place summarization)
@@ -3551,13 +3420,6 @@ def run_conversation(
):
_retry.primary_recovery_attempted = True
retry_count = 0
# Primary transport recovery starts a fresh attempt
# cycle. Re-open fallback state so a follow-on 429 can
# still activate fallback_providers after stale
# pre-recovery fallback/credential-pool bookkeeping.
_retry.has_retried_429 = False
agent._fallback_index = 0
agent._fallback_activated = False
continue
# Try fallback before giving up entirely
if agent._has_pending_fallback():
@@ -3623,65 +3485,6 @@ def run_conversation(
force=True,
)
# Detect thinking-timeout pattern: a known reasoning model
# hit a transport-layer error before the first content
# token arrived. Distinct from _is_stream_drop above
# (which fires for large file-write stream drops) and
# from any classifier reason that's not a transport
# timeout. Reuses the reasoning-model allowlist from
# agent/reasoning_timeouts.py (Fixes #52217) so the
# trigger is consistent with what the per-model
# stale-timeout floor covers. After the classifier
# override at agent/error_classifier.py:720-738 (this
# PR), transport disconnects on reasoning models route
# to FailoverReason.timeout rather than
# context_overflow, so this branch actually fires.
# Detection and message text live in
# agent.thinking_timeout_guidance so they're
# unit-testable without driving the full retry loop.
# (Part 2 of Fixes #52310.)
from agent.thinking_timeout_guidance import (
is_thinking_timeout,
)
_is_thinking_timeout = is_thinking_timeout(
classified,
_model,
error_msg,
)
if _is_thinking_timeout:
agent._vprint(
f"{agent.log_prefix} 💡 The model's thinking "
f"phase exceeded the upstream proxy's idle "
f"timeout before the first content token "
f"arrived. This is a known issue with "
f"reasoning models behind cloud gateways "
f"(NVIDIA NIM, OpenAI, Anthropic, DeepSeek).",
force=True,
)
agent._vprint(
f"{agent.log_prefix} Workarounds in priority order:",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 1. Set "
f"`providers.{_provider}.models.{_model}.stale_timeout_seconds: 900` "
f"in `~/.hermes/config.yaml` to extend the per-call "
f"timeout. (Hermes's built-in floor is 600s for "
f"known reasoning models — if you still see this "
f"after raising, the upstream cap is even shorter.)",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 2. Lower `reasoning_budget` or set "
f"`reasoning_effort: medium` on this model if the provider supports it.",
force=True,
)
agent._vprint(
f"{agent.log_prefix} 3. Use a smaller / faster reasoning "
f"model if the task doesn't require deep thinking.",
force=True,
)
logger.error(
"%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
agent.log_prefix, max_retries, _final_summary,
@@ -3698,22 +3501,7 @@ def run_conversation(
_final_response += f"\n\n{_billing_guidance}"
else:
_final_response = f"API call failed after {max_retries} retries: {_final_summary}"
if _is_thinking_timeout:
# Thinking-timeout guidance overrides the generic
# stream-drop guidance — the latter is wrong for
# this case (it suggests splitting large file
# writes, which isn't what happened). See the
# reasoning-model override at
# agent/error_classifier.py:720-738 and the
# detection block above for context.
from agent.thinking_timeout_guidance import (
build_thinking_timeout_guidance,
)
_final_response += build_thinking_timeout_guidance(
provider=_provider,
model=_model,
)
elif _is_stream_drop:
if _is_stream_drop:
_final_response += (
"\n\nThe provider's stream connection keeps "
"dropping — this often happens when generating "
@@ -3745,47 +3533,20 @@ def run_conversation(
_ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
if _ra_raw:
try:
# Cap at 10 minutes. Anthropic Tier 1 input-token
# buckets reset in ~171s, so a 120s cap caused us to
# retry before the actual reset window and re-trip the
# limit. 600s covers all realistic provider reset
# windows while still rejecting pathological values. (#26293)
_retry_after = min(float(_ra_raw), 600)
_retry_after = min(float(_ra_raw), 120) # Cap at 2 minutes
except (TypeError, ValueError):
pass
wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
_backoff_policy = None
if is_rate_limited and not _retry_after:
wait_time, _backoff_policy = adaptive_rate_limit_backoff(
retry_count,
base_url=str(_base),
model=_model,
error=api_error,
default_wait=wait_time,
)
if is_rate_limited:
_policy_note = ""
if _backoff_policy == "zai_coding_overload_long":
_policy_note = " (Z.AI Coding overload adaptive long backoff)"
elif _backoff_policy == "zai_coding_overload_short":
_policy_note = " (Z.AI Coding overload short retry)"
_rate_limit_status = f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries}){_policy_note}..."
# Normal retries are buffered to avoid noisy transient chatter. Long
# Z.AI Coding waits are different: they can last minutes, so surface
# progress immediately instead of making the TUI look frozen.
if _backoff_policy == "zai_coding_overload_long":
agent._emit_status(_rate_limit_status)
else:
agent._buffer_status(_rate_limit_status)
agent._buffer_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...")
else:
agent._buffer_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...")
logger.warning(
"Retrying API call in %ss (attempt %s/%s) %s policy=%s error=%s",
"Retrying API call in %ss (attempt %s/%s) %s error=%s",
wait_time,
retry_count,
max_retries,
agent._client_log_context(),
_backoff_policy or "default",
api_error,
)
# Sleep in small increments so we can respond to interrupts quickly
@@ -3795,12 +3556,10 @@ def run_conversation(
while time.time() < sleep_end:
if agent._interrupt_requested:
agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
_interrupt_text = f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries})."
close_interrupted_tool_sequence(messages, _interrupt_text)
agent._persist_session(messages, conversation_history)
agent.clear_interrupt()
return {
"final_response": _interrupt_text,
"final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).",
"messages": messages,
"api_calls": api_call_count,
"completed": False,
@@ -3831,17 +3590,6 @@ def run_conversation(
_retry.restart_with_compressed_messages = False
continue
if _retry.restart_with_rebuilt_messages:
# A content-filter stream stall (#32421) was escalated to the
# fallback chain and the partial content rolled back. Re-issue
# the API call against the now-active fallback provider. Refund
# the budget/count for the stalled attempt so the fallback gets a
# fair turn.
api_call_count -= 1
agent.iteration_budget.refund()
_retry.restart_with_rebuilt_messages = False
continue
if _retry.restart_with_length_continuation:
# Progressively boost the output token budget on each retry.
# Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
@@ -4302,19 +4050,6 @@ def run_conversation(
messages.append(assistant_msg)
agent._emit_interim_assistant_message(assistant_msg)
try:
# Persist the assistant tool-call turn before any tool
# side effects run. If a destructive tool restarts or
# terminates Hermes mid-turn, resume logic still sees the
# exact tool-call block that already executed.
agent._flush_messages_to_session_db(messages, conversation_history)
except Exception as exc:
logger.warning(
"Incremental tool-call persistence failed before execution "
"(session=%s): %s",
agent.session_id or "none",
exc,
)
# Close any open streaming display (response box, reasoning
# box) before tool execution begins. Intermediate turns may
@@ -4416,9 +4151,10 @@ def run_conversation(
approx_tokens=agent.context_compressor.last_prompt_tokens,
task_id=effective_task_id,
)
conversation_history = conversation_history_after_compression(
agent, messages
)
# Compression created a new session — clear history so
# _flush_messages_to_session_db writes compressed messages
# to the new session (see preflight compression comment).
conversation_history = None
# Save session log incrementally (so progress is visible even if interrupted)
agent._session_messages = messages
@@ -4460,11 +4196,7 @@ def run_conversation(
"as final response"
)
final_response = _recovered
# Streaming delivered a fragment, not a confirmed
# final preview. Leave response_previewed false so
# gateway fallback delivery can send the recovered
# text plus the abnormal-turn explanation.
agent._response_was_previewed = False
agent._response_was_previewed = True
break
# If the previous turn already delivered real content alongside
@@ -4709,20 +4441,14 @@ def run_conversation(
# status from earlier failed attempts in this turn.
agent._clear_status_buffer()
from agent.agent_runtime_helpers import (
intent_ack_continuation_mode,
)
_ack_mode = intent_ack_continuation_mode(agent)
if (
_ack_mode != "off"
agent.api_mode == "codex_responses"
and agent.valid_tool_names
and codex_ack_continuations < 2
and agent._looks_like_codex_intermediate_ack(
user_message=user_message,
assistant_content=final_response,
messages=messages,
require_workspace=(_ack_mode == "codex_only"),
)
):
codex_ack_continuations += 1
@@ -4753,10 +4479,9 @@ def run_conversation(
final_msg = agent._build_assistant_message(assistant_message, finish_reason)
# Pop thinking-only prefill and empty-response retry
# scaffolding before appending either a final response or a
# verification-stop follow-up. These internal turns are only
# for the next API retry and should not become durable
# transcript context.
# scaffolding before appending the final response. These
# internal turns are only for the next API retry and should
# not become durable transcript context.
while (
messages
and isinstance(messages[-1], dict)
@@ -4768,48 +4493,6 @@ def run_conversation(
):
messages.pop()
try:
from agent.verification_stop import (
build_verify_on_stop_nudge,
verify_on_stop_enabled,
)
if verify_on_stop_enabled():
_verify_nudge = build_verify_on_stop_nudge(
session_id=getattr(agent, "session_id", None),
changed_paths=getattr(agent, "_turn_file_mutation_paths", set()),
attempts=getattr(agent, "_verification_stop_nudges", 0),
)
else:
_verify_nudge = None
except Exception:
logger.debug("verification stop-loop check failed", exc_info=True)
_verify_nudge = None
if _verify_nudge:
agent._verification_stop_nudges = (
getattr(agent, "_verification_stop_nudges", 0) + 1
)
final_msg["finish_reason"] = "verification_required"
messages.append(final_msg)
# Keep the attempted final answer in model history so the
# synthetic user nudge preserves role alternation, but do
# not surface it to the user as an interim answer. The
# whole point of this guard is to prevent premature
# "done" claims before checks run.
messages.append({
"role": "user",
"content": _verify_nudge,
"_verification_stop_synthetic": True,
})
agent._session_messages = messages
# Run the verification-stop loop silently — the nudge is an
# internal turn that should not add noise to the user's
# terminal. Keep a debug breadcrumb in agent.log for tracing.
logger.debug("verification stop-loop nudge issued (attempt %d)",
agent._verification_stop_nudges)
continue
messages.append(final_msg)
_turn_exit_reason = f"text_response(finish_reason={finish_reason})"

View File

@@ -23,7 +23,6 @@ from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
from tools.environments.local import hermes_subprocess_env
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -95,10 +94,7 @@ def _resolve_home_dir() -> str:
def _build_subprocess_env() -> dict[str, str]:
# Copilot ACP is a model-driving CLI executor: it legitimately needs LLM
# provider credentials. Route through the central helper so Tier-1 secrets
# (gateway bot tokens, GitHub auth, infra) are still stripped (#29157).
env = hermes_subprocess_env(inherit_credentials=True)
env = os.environ.copy()
home = _resolve_home_dir()
env["HOME"] = home
from hermes_constants import apply_subprocess_home_env

View File

@@ -11,7 +11,6 @@ import uuid
import re
from dataclasses import dataclass, fields, replace
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
@@ -448,63 +447,6 @@ def get_pool_strategy(provider: str) -> str:
DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
def _write_through_provider_state_to_global_root(
provider_id: str, state: Dict[str, Any]
) -> None:
"""Persist a rotated OAuth ``state`` into the global-root auth.json.
Best-effort write-through for the multi-profile rotation hazard
(#48415 / #43589): nous, openai-codex, and xai-oauth rotate the
refresh_token on refresh, so when a profile pool refresh rotates a grant
it resolved from the root fallback, the rotated chain must land back in
root. Otherwise root keeps a now-revoked refresh token and every other
profile reading the stale root grant dies with ``refresh_token_reused`` /
``invalid_grant`` once its access token expires.
Only updates ``providers.<provider_id>`` in the root store; never touches
the profile store (the caller already saved that). Swallows all errors — a
failed write-through degrades to the pre-existing behavior (root stale), it
must never break the profile's own successful save. Mirrors
``hermes_cli.auth._write_through_xai_oauth_to_global_root`` (which covers
the non-pool xAI refresh path) for the credential-pool refresh path.
"""
try:
global_path = auth_mod._global_auth_file_path()
except Exception:
return
if global_path is None:
# Classic mode (profile == root); the profile save already hit root.
return
# Seat belt: under pytest, refuse to write the real user's
# ~/.hermes/auth.json even when HERMES_HOME points at a profile path
# (mirrors the read-side guard in _load_global_auth_store). Uses the
# unmodified HOME env, not Path.home() which fixtures may monkeypatch.
if os.environ.get("PYTEST_CURRENT_TEST"):
real_home_env = os.environ.get("HOME", "")
if real_home_env:
real_root = Path(real_home_env) / ".hermes" / "auth.json"
try:
if global_path.resolve(strict=False) == real_root.resolve(strict=False):
return
except Exception:
return
try:
if global_path.exists():
global_store = _load_auth_store(global_path)
else:
global_store = {}
if not isinstance(global_store, dict):
return
_store_provider_state(global_store, provider_id, dict(state), set_active=False)
auth_mod._save_auth_store(global_store, global_path)
except Exception as exc: # pragma: no cover - best effort
logger.debug(
"%s pool refresh: write-through to global root failed: %s",
provider_id,
exc,
)
class CredentialPool:
def __init__(self, provider: str, entries: List[PooledCredential]):
self.provider = provider
@@ -537,11 +479,10 @@ class CredentialPool:
self._entries[idx] = new
return
def _persist(self, *, removed_ids: Optional[List[str]] = None) -> None:
def _persist(self) -> None:
write_credential_pool(
self.provider,
[entry.to_dict() for entry in self._entries],
removed_ids=removed_ids,
)
def _is_terminal_auth_failure(
@@ -859,28 +800,6 @@ class CredentialPool:
try:
with _auth_store_lock():
auth_store = _load_auth_store()
# Decide BEFORE writing whether this profile is reading the
# grant from the global root (no own providers.<id> block) vs.
# genuinely shadowing it. A pool refresh rotates single-use
# OAuth refresh tokens, so a profile that resolved the grant
# from root MUST write the rotated chain back to root too —
# otherwise root keeps a revoked refresh token and every other
# profile reading the stale root grant dies with
# refresh_token_reused / invalid_grant once its access token
# expires. This mirrors the xAI write-through in
# hermes_cli.auth._save_xai_oauth_tokens (#43589); the pool
# refresh path is the Codex/xAI analog reported in #48415.
_wt_provider_id = {
"nous": "nous",
"openai-codex": "openai-codex",
"xai-oauth": "xai-oauth",
}.get(self.provider)
write_through_to_root = bool(_wt_provider_id) and not (
isinstance(auth_store.get("providers"), dict)
and isinstance(
auth_store["providers"].get(_wt_provider_id), dict
)
)
if self.provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state is None:
@@ -936,10 +855,6 @@ class CredentialPool:
return
_save_auth_store(auth_store)
if write_through_to_root and _wt_provider_id:
_write_through_provider_state_to_global_root(
_wt_provider_id, state
)
except Exception as exc:
logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)
@@ -1125,17 +1040,13 @@ class CredentialPool:
logger.debug(
"Failed to clear terminal xAI OAuth state: %s", clear_exc
)
removed_ids = [
item.id for item in self._entries
if item.source == "loopback_pkce"
]
self._entries = [
item for item in self._entries
if item.source != "loopback_pkce"
]
if self._current_id == entry.id:
self._current_id = None
self._persist(removed_ids=removed_ids)
self._persist()
return None
# For openai-codex: same race as xAI/nous — another Hermes process
# may have consumed the refresh token between our proactive sync
@@ -1195,17 +1106,13 @@ class CredentialPool:
logger.debug(
"Failed to clear terminal Codex OAuth state: %s", clear_exc
)
removed_ids = [
item.id for item in self._entries
if item.source == "device_code"
]
self._entries = [
item for item in self._entries
if item.source != "device_code"
]
if self._current_id == entry.id:
self._current_id = None
self._persist(removed_ids=removed_ids)
self._persist()
return None
# For nous: another process may have consumed the refresh token
# between our proactive sync and the HTTP call. Re-sync from
@@ -1262,17 +1169,13 @@ class CredentialPool:
auth_mod.NOUS_DEVICE_CODE_SOURCE,
f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
}
removed_ids = [
item.id for item in self._entries
if item.source in singleton_sources
]
self._entries = [
item for item in self._entries
if item.source not in singleton_sources
]
if self._current_id == entry.id:
self._current_id = None
self._persist(removed_ids=removed_ids)
self._persist()
return None
self._mark_exhausted(entry, None)
return None
@@ -1434,7 +1337,7 @@ class CredentialPool:
pruned_ids = set(entries_to_prune)
self._entries = [e for e in self._entries if e.id not in pruned_ids]
if cleared_any:
self._persist(removed_ids=entries_to_prune)
self._persist()
return available
def _select_unlocked(self) -> Optional[PooledCredential]:
@@ -1608,11 +1511,7 @@ class CredentialPool:
replace(entry, priority=new_priority)
for new_priority, entry in enumerate(self._entries)
]
write_credential_pool(
self.provider,
[entry.to_dict() for entry in self._entries],
removed_ids=[removed.id],
)
self._persist()
if self._current_id == removed.id:
self._current_id = None
return removed
@@ -2274,11 +2173,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
def load_pool(provider: str) -> CredentialPool:
provider = (provider or "").strip().lower()
raw_entries = read_credential_pool(provider)
disk_ids = {
entry.get("id")
for entry in raw_entries
if isinstance(entry, dict) and entry.get("id")
}
raw_needs_sanitization = any(
isinstance(payload, dict)
and sanitize_borrowed_credential_payload(payload, provider) != payload
@@ -2307,10 +2201,8 @@ def load_pool(provider: str) -> CredentialPool:
changed |= _normalize_pool_priorities(provider, entries)
if changed:
new_ids = {entry.id for entry in entries}
write_credential_pool(
provider,
[entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
removed_ids=disk_ids - new_ids,
)
return CredentialPool(provider, entries)

View File

@@ -273,21 +273,6 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
# Automatic state transitions (pure function, no LLM)
# ---------------------------------------------------------------------------
def _cron_referenced_skills() -> Set[str]:
"""Skill names referenced by any cron job (incl. paused/disabled).
Best-effort: a cron-module import error or corrupt jobs store must never
break the curator, so any failure yields an empty set (no protection,
but no crash).
"""
try:
from cron.jobs import referenced_skill_names as _refs
return _refs()
except Exception as e:
logger.debug("Curator could not read cron skill references: %s", e, exc_info=True)
return set()
def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
"""Walk every curator-managed skill and move active/stale/archived based on
the latest real activity timestamp. Pinned skills are never touched.
@@ -307,8 +292,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
stale_cutoff = now - timedelta(days=get_stale_after_days())
archive_cutoff = now - timedelta(days=get_archive_after_days())
cron_referenced = _cron_referenced_skills()
counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0, "seeded": 0}
for row in _u.agent_created_report():
@@ -317,15 +300,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
if row.get("pinned"):
continue
# A skill referenced by any cron job (incl. paused/disabled) is in
# use by definition — resuming or the next fire must find it. The
# scheduler only bumps usage when a job actually fires, so jobs that
# fire less often than archive_after_days, paused jobs, and far-future
# one-shots would otherwise have their skills aged out from under
# them. Treat referenced skills like pinned: never auto-transition.
if name in cron_referenced:
continue
# First sight of a curation-eligible skill with no persisted record
# (e.g. a newly-eligible built-in): anchor its clock to now and defer.
if not row.get("_persisted", True):
@@ -342,18 +316,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
current = row.get("state", _u.STATE_ACTIVE)
# Never-used skills (use_count == 0) get a grace floor: don't archive
# one until it is at least stale_after_days old. A use=0 skill is
# absence of evidence, not evidence of staleness — a skill created
# recently may simply not have had its trigger come up yet.
never_used = int(row.get("use_count", 0) or 0) == 0
if never_used and anchor > stale_cutoff:
# Younger than the stale window — leave it alone entirely.
if current == _u.STATE_STALE:
_u.set_state(name, _u.STATE_ACTIVE)
counts["reactivated"] += 1
continue
if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
ok, _msg = _u.archive_skill(name)
if ok:
@@ -415,10 +377,8 @@ CURATOR_REVIEW_PROMPT = (
"bodies + `references/`, `templates/`, and `scripts/` subfiles for "
"session-specific detail — not one-session-one-skill micro-entries.\n\n"
"Hard rules — do not violate:\n"
"1. DO NOT touch bundled, hub-installed, or external-dir skills "
"(`skills.external_dirs`). The candidate list below is already filtered "
"to local curator-managed skills only; external skills are externally "
"owned and read-only to this background curator.\n"
"1. DO NOT touch bundled or hub-installed skills. The candidate list "
"below is already filtered to agent-created skills only.\n"
"2. DO NOT delete any skill. Archiving (moving the skill's directory "
"into ~/.hermes/skills/.archive/) is the maximum destructive action. "
"Archives are recoverable; deletion is not.\n"
@@ -428,19 +388,10 @@ CURATOR_REVIEW_PROMPT = (
"back load-bearing UX (slash-command entry points referenced in docs and "
"tips) and are filtered out of the candidate list below — never resurrect "
"one as an archive or absorb target.\n"
"3c. DO NOT archive or prune any skill marked `cron=yes` in the candidate "
"list. A cron job depends on it and will fail to load it on its next "
"run. You MAY still consolidate it into an umbrella — but only because "
"the curator rewrites cron job skill references to follow consolidations; "
"never simply prune it.\n"
"4. DO NOT use usage counters as a reason to skip consolidation. The "
"counters are new and often mostly zero. Judge overlap on CONTENT, "
"not on use_count. 'use=0' is not evidence a skill is valuable; it's "
"absence of evidence either way. Corollary: 'use=0' is ALSO not a "
"reason to PRUNE a skill. Never archive a never-used skill (use=0) "
"unless it is at least 30 days old (check last_activity / created date) "
"AND its content is genuinely obsolete or fully absorbed elsewhere — a "
"recently-created skill simply may not have had its trigger come up yet.\n"
"absence of evidence either way.\n"
"5. DO NOT reject consolidation on the grounds that 'each skill has "
"a distinct trigger'. Pairwise distinctness is the wrong bar. The "
"right bar is: 'would a human maintainer write this as N separate "
@@ -518,9 +469,8 @@ CURATOR_REVIEW_PROMPT = (
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
"forwarding target. This drives cron-job skill-reference migration — "
"guessing from your YAML summary after the fact is fragile.\n"
" - terminal — move LOCAL candidate content into "
"a support subfile when package integrity requires it; never mv, cp, rm, "
"patch, or rewrite bundled, hub-installed, or external-dir skills\n\n"
" - terminal — mv a sibling into the archive "
"OR move its content into a support subfile\n\n"
"'keep' is a legitimate decision ONLY when the skill is already a "
"class-level umbrella and none of the proposed merges would improve "
"discoverability. 'This is narrow but distinct from its siblings' "
@@ -1460,14 +1410,12 @@ def _render_candidate_list() -> str:
rows = skill_usage.agent_created_report()
if not rows:
return "No agent-created skills to review."
cron_referenced = _cron_referenced_skills()
lines = [f"Agent-created skills ({len(rows)}):\n"]
for r in rows:
lines.append(
f"- {r['name']} "
f"state={r['state']} "
f"pinned={'yes' if r.get('pinned') else 'no'} "
f"cron={'yes' if r['name'] in cron_referenced else 'no'} "
f"activity={r.get('activity_count', 0)} "
f"use={r.get('use_count', 0)} "
f"view={r.get('view_count', 0)} "
@@ -1895,14 +1843,6 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
# Disable recursive nudges — the curator must never spawn its own review.
review_agent._memory_nudge_interval = 0
review_agent._skill_nudge_interval = 0
# Tag this fork as autonomous background curation so skill_manage's
# background-review write guard fires. Without this the fork inherits
# the default "assistant_tool" origin, is_background_review() is False,
# and the external/bundled/hub-installed skill_manage guards never
# trigger during the curation pass they exist to protect against.
# turn_context.py binds this onto the write-origin ContextVar at turn
# start (see agent/turn_context.py).
review_agent._memory_write_origin = "background_review"
# Redirect the forked agent's stdout/stderr to /dev/null while it
# runs so its tool-call chatter doesn't pollute the foreground

View File

@@ -6,7 +6,6 @@ Used by AIAgent._execute_tool_calls for CLI feedback.
import logging
import os
import re
import sys
import threading
import time
@@ -16,7 +15,6 @@ from pathlib import Path
from typing import Any
from utils import safe_json_loads
from agent.redact import redact_sensitive_text
from agent.tool_result_classification import file_mutation_result_landed
# ANSI escape codes for coloring tool failure indicators
@@ -179,223 +177,6 @@ def _truncate_preview(text: str, max_len: int | None) -> str:
return text
_SHELL_SILENT_HEADS = {"cd", "pushd", "popd", "export", "set", "unset", "source", ".", "true", "false", ":"}
_SHELL_PIPE_TAIL_HEADS = {"head", "tail", "wc", "sort", "uniq"}
def _shell_basename(head: str) -> str:
return head.rsplit("/", 1)[-1] if head else ""
def _split_shell_words(segment: str) -> list[str]:
words: list[str] = []
buf: list[str] = []
quote: str | None = None
for i, ch in enumerate(segment):
if quote:
buf.append(ch)
if ch == quote and (i == 0 or segment[i - 1] != "\\"):
quote = None
continue
if ch in {"'", '"'}:
quote = ch
buf.append(ch)
continue
if ch.isspace():
if buf:
words.append("".join(buf))
buf = []
continue
buf.append(ch)
if buf:
words.append("".join(buf))
return words
def _strip_shell_pipe_tail(segment: str) -> str:
words = _split_shell_words(segment)
out: list[str] = []
for i, word in enumerate(words):
if word == "|" and _shell_basename(words[i + 1] if i + 1 < len(words) else "") in _SHELL_PIPE_TAIL_HEADS:
break
out.append(word)
return " ".join(out).strip()
def _split_shell_compound(command: str) -> list[str]:
segments: list[str] = []
buf: list[str] = []
quote: str | None = None
i = 0
while i < len(command):
ch = command[i]
if quote:
buf.append(ch)
if ch == quote and (i == 0 or command[i - 1] != "\\"):
quote = None
i += 1
continue
if ch in {"'", '"'}:
quote = ch
buf.append(ch)
i += 1
continue
op_len = 2 if command.startswith("&&", i) or command.startswith("||", i) else 1 if ch in {";", "\n"} else 0
if op_len:
segment = _strip_shell_pipe_tail("".join(buf).strip())
if segment:
segments.append(segment)
buf = []
i += op_len
continue
buf.append(ch)
i += 1
segment = _strip_shell_pipe_tail("".join(buf).strip())
if segment:
segments.append(segment)
return segments
def _shell_head_word(segment: str) -> str:
words = _split_shell_words(segment)
index = 0
while index < len(words) and re.match(r"^[A-Za-z_]\w*=", words[index]):
index += 1
return _shell_basename(words[index] if index < len(words) else "")
def _clean_shell_segment(segment: str) -> str:
words = _split_shell_words(segment)
out: list[str] = []
i = 0
while i < len(words):
word = words[i]
if re.match(r"^\d*(?:>>?|<)$", word):
i += 2
continue
if re.match(r"^\d*(?:>&|<&)\d+$", word) or re.match(r"^\d*>&\d+$", word):
i += 1
continue
out.append(word)
i += 1
return " ".join(out).strip()
def _is_shell_boundary_echo(segment: str) -> bool:
words = _split_shell_words(segment)
if _shell_basename(words[0] if words else "") != "echo":
return False
rest = " ".join(words[1:])
return bool(re.search(r"-{2,}|_exit=|(?:^|\s|=)\$[?{]|PIPESTATUS", rest))
def summarize_shell_command(command: str) -> str:
"""Compact shell wrapper/plumbing for display while preserving raw command elsewhere."""
original = _oneline(command)
if not original:
return ""
segments = _split_shell_compound(original)
if len(segments) <= 1:
return _clean_shell_segment(segments[0] if segments else original) or original
core: list[str] = []
for segment in segments:
cleaned = _clean_shell_segment(segment)
head = _shell_head_word(cleaned)
if cleaned and head not in _SHELL_SILENT_HEADS and not _is_shell_boundary_echo(cleaned):
core.append(cleaned)
if not core:
return original
if len(core) == 1:
return core[0]
count = len(core) - 1
return f"{core[0]} + {count} {'command' if count == 1 else 'commands'}"
def _read_file_line_label(args: dict) -> str:
offset = args.get("offset")
limit = args.get("limit")
if not isinstance(offset, int) or offset <= 0:
return ""
if not isinstance(limit, int) or limit <= 1:
return f"L{offset}"
return f"L{offset}-{offset + limit - 1}"
def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any:
"""Apply secret redaction to browser_type text in display-facing payloads.
Backends sometimes echo the attempted input in error strings or fallback
metadata. When the raw typed value contains a recognizable secret (API
key, token, JWT, etc.) the redacted form differs from the raw value, so we
replace every occurrence of the raw value with its redacted form before a
browser_type result reaches logs, callbacks, the model, or chat history.
Normal typed text (search queries, addresses, form fields) matches no
secret pattern, so it passes through unchanged and stays readable.
Redaction is forced here regardless of the global ``security.redact_secrets``
preference: a typed credential leaking into chat history is a security
boundary, not mere log hygiene.
"""
if typed_text is None:
return value
needle = str(typed_text)
if needle == "":
return value
redacted = redact_sensitive_text(needle, force=True)
if redacted == needle:
# Nothing secret-looking in the typed text; leave payload untouched.
return value
if isinstance(value, str):
return value.replace(needle, redacted)
if isinstance(value, dict):
return {
key: redact_browser_typed_text_for_display(item, typed_text)
for key, item in value.items()
}
if isinstance(value, list):
return [redact_browser_typed_text_for_display(item, typed_text) for item in value]
if isinstance(value, tuple):
return tuple(redact_browser_typed_text_for_display(item, typed_text) for item in value)
return value
def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | None:
"""Return a copy of tool args safe for logs/progress UI.
For ``browser_type`` the ``text`` argument is run through the same
secret-pattern redactor used for logs. Recognizable credentials (API
keys, tokens) are masked before the value reaches tool progress
notifications; normal typed text is left intact for debuggability.
"""
if not isinstance(args, dict):
return args
if tool_name == "browser_type" and isinstance(args.get("text"), str):
safe_args = dict(args)
safe_args["text"] = redact_sensitive_text(args["text"], force=True)
return safe_args
return args
def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
if not isinstance(tasks, list):
return 0, []
@@ -419,14 +200,13 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
max_len = _tool_preview_max_len
if not args:
return None
args = redact_tool_args_for_display(tool_name, args) or args
primary_args = {
"terminal": "command", "web_search": "query", "web_extract": "urls",
"read_file": "path", "write_file": "path", "patch": "path",
"search_files": "pattern", "browser_navigate": "url",
"browser_click": "ref", "browser_type": "text",
"image_generate": "prompt", "text_to_speech": "text",
"vision_analyze": "question",
"vision_analyze": "question", "mixture_of_agents": "user_prompt",
"skill_view": "name", "skills_list": "category",
"cronjob": "action",
"execute_code": "code", "delegate_task": "goal",
@@ -473,23 +253,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
else:
return f"planning {len(todos_arg)} task(s)"
if tool_name in {"terminal", "execute_code"}:
key = "code" if tool_name == "execute_code" else "command"
command = args.get(key)
if command is None:
return None
preview = summarize_shell_command(str(command))
return _truncate_preview(preview, max_len) if preview else None
if tool_name == "read_file":
path = args.get("path") or args.get("file") or args.get("filepath")
if path is None:
return None
label = Path(str(path).replace("\\", "/")).name or str(path)
line_label = _read_file_line_label(args)
preview = f"{label} {line_label}".strip()
return _truncate_preview(preview, max_len) if preview else None
if tool_name == "session_search":
query = _oneline(args.get("query", ""))
return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
@@ -1143,7 +906,6 @@ def get_cute_tool_message(
When *result* is provided the line is checked for failure indicators.
Failed tool calls get a red prefix and an informational suffix.
"""
args = redact_tool_args_for_display(tool_name, args) or args
dur = f"{duration:.1f}s"
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
skin_prefix = get_skin_tool_prefix()
@@ -1181,7 +943,7 @@ def get_cute_tool_message(
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
return _wrap(f"┊ 📄 fetch pages {dur}")
if tool_name == "terminal":
return _wrap(f"┊ 💻 $ {_trunc(build_tool_preview(tool_name, args) or args.get('command', ''), 42)} {dur}")
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "")[:12]
@@ -1189,7 +951,7 @@ def get_cute_tool_message(
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
if tool_name == "read_file":
return _wrap(f"┊ 📖 read {_trunc(build_tool_preview(tool_name, args) or args.get('path', ''), 42)} {dur}")
return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
if tool_name == "write_file":
return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
if tool_name == "patch":
@@ -1275,6 +1037,8 @@ def get_cute_tool_message(
return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
if tool_name == "vision_analyze":
return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
if tool_name == "mixture_of_agents":
return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
if tool_name == "send_message":
return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
if tool_name == "cronjob":

View File

@@ -133,31 +133,6 @@ _RATE_LIMIT_PATTERNS = [
"servicequotaexceededexception",
]
# Patterns that indicate provider-side overload, NOT a per-credential rate
# limit or billing problem. The credential is valid — the server is just
# busy — so the correct recovery is "back off and retry the same key", never
# "rotate the credential" (rotating exhausts the pool while the endpoint is
# still busy; a single-key user has nothing to rotate to). Some providers
# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
# status path matches the body against this list before falling through to
# the rate_limit default. Phrases are kept narrow and overload-flavoured so a
# normal rate-limit message ("you have been rate-limited") doesn't hit this
# bucket. (#14038, #15297)
_OVERLOADED_PATTERNS = [
"overloaded",
"temporarily overloaded",
"service is temporarily overloaded",
"service may be temporarily overloaded",
"server is overloaded",
"server overloaded",
"service overloaded",
"service is overloaded",
"upstream overloaded",
"currently overloaded",
"at capacity",
"over capacity",
]
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
_USAGE_LIMIT_PATTERNS = [
"usage limit",
@@ -355,14 +330,6 @@ _CONTENT_POLICY_BLOCKED_PATTERNS = [
# echo back; the underscore form is provider-specific enough.
"content_filter",
"responsibleaipolicyviolation",
# MiniMax output-layer safety filter. The error string is surfaced
# verbatim by MiniMax SDK / OpenAI-compatible endpoints, usually in the
# form "output new_sensitive (1027)" when the model's *output* (often a
# large tool-call argument block) trips the upstream safety filter and
# the SSE stream is truncated mid-flight. ``new_sensitive`` is the
# filter name and is narrow enough that billing / format / auth error
# strings will not collide. See #32421.
"new_sensitive",
]
# Auth patterns (non-status-code signals)
@@ -750,26 +717,6 @@ def classify_api_error(
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
if is_disconnect and not status_code:
# Reasoning-model override: a transport disconnect on a reasoning
# model is much more likely the upstream proxy idle-killing a
# long thinking stream than a true context overflow — even on
# large sessions. The default disconnect+large-session routing
# below would otherwise send the user into the compression
# branch (should_compress=True) and silently delete
# conversation history on a phantom context-length error.
# Reasoning models have multi-minute thinking phases that
# routinely exceed the cloud gateway's idle window (NVIDIA
# NIM ~120s — first-party repro at NVIDIA/NemoClaw#4846;
# OpenAI worker / Anthropic stream-idle similar). The
# per-reasoning-model stale-timeout floor in
# agent/reasoning_timeouts.py raises the stale-detector
# threshold to tolerate long thinking, so a true
# transport-layer failure here is recoverable via the retry
# path — not via context compression. Reclassify as timeout.
# (Part 1 of Fixes #52310.)
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
if get_reasoning_stale_timeout_floor(model) is not None:
return _result(FailoverReason.timeout, retryable=True)
# Absolute token/message-count thresholds are only a proxy for smaller
# context windows. Large-context sessions can have hundreds of
# messages while still being far below their actual token budget.
@@ -896,19 +843,7 @@ def _classify_by_status(
)
if status_code == 429:
# Already checked long_context_tier above. Some providers (notably
# Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
# code as a true per-credential rate limit, but the credential is
# valid and the correct recovery is "back off and retry the same key",
# NOT "rotate the credential" (which exhausts the pool while the
# endpoint is still busy, and does nothing for a single-key user).
# Disambiguate on the error body so an overload 429 takes the
# transient-overload path instead of burning the pool. (#14038)
if any(p in error_msg for p in _OVERLOADED_PATTERNS):
return result_fn(
FailoverReason.overloaded,
retryable=True,
)
# Already checked long_context_tier above; this is a normal rate limit
return result_fn(
FailoverReason.rate_limit,
retryable=True,
@@ -1259,17 +1194,6 @@ def _classify_by_message(
should_fallback=True,
)
# Overloaded / server-busy patterns — must come BEFORE the rate_limit and
# billing checks so that a message-only "overloaded" (no 503/529 status,
# e.g. some Anthropic-compatible proxies) classifies as a transient
# overload (backoff + retry) instead of falling through to `unknown` or
# incorrectly triggering credential rotation.
if any(p in error_msg for p in _OVERLOADED_PATTERNS):
return result_fn(
FailoverReason.overloaded,
retryable=True,
)
# Billing patterns
if any(p in error_msg for p in _BILLING_PATTERNS):
return result_fn(
@@ -1359,25 +1283,19 @@ def _extract_status_code(error: Exception) -> Optional[int]:
def _extract_error_body(error: Exception) -> dict:
"""Extract the structured error body from an SDK exception or its cause chain."""
current = error
for _ in range(5): # Match _extract_status_code() traversal depth.
body = getattr(current, "body", None)
if isinstance(body, dict):
return body
# Some errors have .response.json()
response = getattr(current, "response", None)
if response is not None:
try:
json_body = response.json()
if isinstance(json_body, dict):
return json_body
except Exception:
pass
cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
if cause is None or cause is current:
break
current = cause
"""Extract the structured error body from an SDK exception."""
body = getattr(error, "body", None)
if isinstance(body, dict):
return body
# Some errors have .response.json()
response = getattr(error, "response", None)
if response is not None:
try:
json_body = response.json()
if isinstance(json_body, dict):
return json_body
except Exception:
pass
return {}

View File

@@ -77,22 +77,15 @@ def build_write_denied_prefixes(home: str) -> list[str]:
]
def get_safe_write_roots() -> set[str]:
"""Return resolved HERMES_WRITE_SAFE_ROOT paths. Supports multiple directories
separated by ``os.pathsep`` (``:`` on Unix, ``;`` on Windows).
E.g., ``/opt/data:/var/www/html`` on Unix, ``C:\\data;D:\\www`` on Windows."""
env = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not env:
return set()
roots: set[str] = set()
for path in env.split(os.pathsep):
if path:
try:
resolved = os.path.realpath(os.path.expanduser(path))
roots.add(resolved)
except (OSError, ValueError):
continue
return roots
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
@@ -131,15 +124,9 @@ def is_write_denied(path: str) -> bool:
except Exception:
pass
safe_roots = get_safe_write_roots()
if safe_roots:
allowed = False
for safe_root in safe_roots:
if resolved == safe_root or resolved.startswith(safe_root + os.sep):
allowed = True
break
if not allowed:
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False

View File

@@ -388,98 +388,14 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
# BMP: "BM"
if raw.startswith(b"BM"):
return "image/bmp"
# ISO-BMFF family (HEIC/HEIF/AVIF): bytes 4..8 == 'ftyp', major brand at 8..12
if len(raw) >= 12 and raw[4:8] == b"ftyp":
brand = raw[8:12]
if brand in {b"avif", b"avis"}:
return "image/avif"
if brand in {
b"heic", b"heix", b"hevc", b"hevx",
b"mif1", b"msf1", b"heim", b"heis",
}:
return "image/heic"
# TIFF: II*\0 (little-endian) or MM\0* (big-endian)
if raw[:4] in {b"II*\x00", b"MM\x00*"}:
return "image/tiff"
# ICO: 00 00 01 00 (reserved=0, type=1=icon)
if raw[:4] == b"\x00\x00\x01\x00":
return "image/x-icon"
# SVG: text-based, look for an <svg tag near the start (skip BOM/whitespace)
head = raw[:512].lstrip().lower()
if head.startswith(b"<?xml") or head.startswith(b"<svg"):
if b"<svg" in head:
return "image/svg+xml"
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
}:
return "image/heic"
return None
# Formats every major vision provider (Anthropic, OpenAI, Gemini, Bedrock)
# accepts natively. Anything outside this set has to be transcoded to PNG
# before we declare media_type, otherwise the provider returns HTTP 400
# ("Could not process image" / "Unsupported image media type") and the
# whole turn fails with no salvage path.
#
# Discord (and a few other chat platforms) freely accept attachments in
# formats outside this set -- AVIF screenshots from Chromium, HEIC from
# iPhones, TIFF from scanners, BMP from old Windows tools, ICO -- so users
# do hit this in practice. SVG is vector and Pillow cannot rasterize it;
# it is skipped (logged) rather than transcoded.
_UNIVERSALLY_SUPPORTED_MIMES = frozenset({
"image/png", "image/jpeg", "image/gif", "image/webp",
})
def _transcode_to_png(raw: bytes) -> Optional[bytes]:
"""Decode arbitrary image bytes with Pillow and re-encode as PNG.
Returns None if Pillow isn't installed or can't decode the input
(rare formats, corrupted bytes, missing optional decoder plugin for
HEIC/AVIF, or vector formats like SVG). Caller falls back to skipping
the image so the rest of the turn still works.
HEIC/HEIF and AVIF need optional Pillow plugins; we try to register
them on demand and swallow ImportError so a missing plugin just
looks like 'Pillow can't decode this' rather than crashing.
"""
try:
from PIL import Image
except ImportError:
logger.info(
"image_routing: Pillow not installed; cannot transcode "
"non-standard image format to PNG. Install with `pip install Pillow` "
"(and `pillow-heif` / `pillow-avif-plugin` for those formats)."
)
return None
# Optional plugin registration. Silent on failure: an unsupported
# format will just fall through to Image.open raising below.
try:
import pillow_heif # type: ignore
pillow_heif.register_heif_opener()
except Exception:
pass
try:
import pillow_avif # type: ignore # noqa: F401 -- registers AVIF on import
except Exception:
pass
try:
from io import BytesIO
with Image.open(BytesIO(raw)) as im:
# Pick an output mode PNG can serialise. Anything other than
# the standard set gets normalised to RGBA so transparency is
# preserved where the source had it.
if im.mode not in {"RGB", "RGBA", "L", "LA", "P"}:
im = im.convert("RGBA")
buf = BytesIO()
im.save(buf, format="PNG", optimize=False)
return buf.getvalue()
except Exception as exc:
logger.info(
"image_routing: Pillow could not transcode image to PNG -- %s", exc
)
return None
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
"""Return image MIME type for *path*.
@@ -515,18 +431,8 @@ def _file_to_data_url(path: Path) -> Optional[str]:
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
quality tax just because one other provider is stricter.
Format compatibility IS handled here: if the sniffed MIME isn't one
of ``_UNIVERSALLY_SUPPORTED_MIMES`` (i.e. it's something like AVIF,
HEIC, BMP, TIFF, or ICO that some providers reject outright), we
transcode to PNG with Pillow before declaring media_type. This fixes
the user-visible "Could not process image" HTTP 400 from Anthropic on
Discord-attached AVIF/HEIC/BMP files.
Returns None if the file can't be read OR if the format isn't
universally supported AND Pillow can't transcode it (Pillow missing,
HEIC/AVIF plugin missing, vector format like SVG, corrupt bytes). The
caller reports those paths in ``skipped`` and the rest of the turn
proceeds.
Returns None only if the file can't be read (missing, permission
denied, etc.); the caller reports those paths in ``skipped``.
"""
try:
raw = path.read_bytes()
@@ -534,22 +440,6 @@ def _file_to_data_url(path: Path) -> Optional[str]:
logger.warning("image_routing: failed to read %s%s", path, exc)
return None
mime = _guess_mime(path, raw=raw)
if mime not in _UNIVERSALLY_SUPPORTED_MIMES:
transcoded = _transcode_to_png(raw)
if transcoded is None:
logger.warning(
"image_routing: %s is %s which is not accepted by all major "
"vision providers and could not be transcoded to PNG; "
"skipping this attachment.",
path, mime,
)
return None
logger.info(
"image_routing: transcoded %s (%s) -> image/png for provider compatibility",
path.name, mime,
)
raw = transcoded
mime = "image/png"
b64 = base64.b64encode(raw).decode("ascii")
return f"data:{mime};base64,{b64}"

View File

@@ -81,19 +81,6 @@ def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
return ["" * max(1, int(v / peak * max_width)) if v > 0 else "" for v in values]
def _fmt_ms(ms: float) -> str:
"""Compact human duration from milliseconds (e.g. 850ms, 2.4s, 1.5m)."""
try:
ms = float(ms or 0)
except (TypeError, ValueError):
return "0ms"
if ms < 1000:
return f"{int(ms)}ms"
if ms < 60_000:
return f"{ms / 1000:.1f}s"
return f"{ms / 60_000:.1f}m"
class InsightsEngine:
"""
Analyzes session history and produces usage insights.
@@ -151,7 +138,6 @@ class InsightsEngine:
},
"activity": {},
"top_sessions": [],
"telemetry": {},
}
# Compute insights
@@ -162,7 +148,6 @@ class InsightsEngine:
skills = self._compute_skill_breakdown(skill_usage)
activity = self._compute_activity_patterns(sessions)
top_sessions = self._compute_top_sessions(sessions)
telemetry = self._compute_telemetry(cutoff)
return {
"days": days,
@@ -176,37 +161,8 @@ class InsightsEngine:
"skills": skills,
"activity": activity,
"top_sessions": top_sessions,
"telemetry": telemetry,
}
# =========================================================================
# Telemetry (observability) — from the tel_* tables (local telemetry)
# =========================================================================
def _compute_telemetry(self, cutoff: float) -> Dict[str, Any]:
"""Roll up the local telemetry tables for the same window.
Reuses the engine's existing connection. Fully fail-soft: if the tel_*
tables are empty or absent (telemetry.local disabled, fresh install), this
returns an empty dict and the renderer skips the section.
"""
try:
from agent.telemetry import metrics
except Exception:
return {}
try:
since_ns = int(cutoff * 1e9)
if not metrics.has_data(conn=self._conn):
return {}
return {
"workflows": metrics.workflow_summary(since_ns=since_ns, conn=self._conn),
"model_calls": metrics.model_call_summary(since_ns=since_ns, conn=self._conn),
"tool_calls": metrics.tool_call_summary(conn=self._conn),
"errors": metrics.error_summary(conn=self._conn),
}
except Exception:
return {}
# =========================================================================
# Data gathering (SQL queries)
# =========================================================================
@@ -896,80 +852,8 @@ class InsightsEngine:
lines.append(f" {ts['label']:<20} {ts['value']:<18} ({ts['date']}, {ts['session_id']})")
lines.append("")
# Telemetry / observability (local telemetry) — only when data exists
tel = report.get("telemetry") or {}
if tel:
self._append_telemetry_section(lines, tel)
return "\n".join(lines)
def _append_telemetry_section(self, lines: List[str], tel: Dict[str, Any]) -> None:
"""Render the observability rollups (workflows, tools, providers, errors)."""
wf = tel.get("workflows", {})
mc = tel.get("model_calls", {})
tc = tel.get("tool_calls", {})
errs = tel.get("errors", {}).get("by_class", {})
lines.append(" 📡 Observability (local telemetry)")
lines.append(" " + "" * 56)
total_runs = wf.get("total_runs", 0)
if total_runs:
sr = wf.get("success_rate", 0.0) * 100
p50 = wf.get("duration_ms_p50", 0)
p95 = wf.get("duration_ms_p95", 0)
lines.append(
f" Workflows: {total_runs:,} Success: {sr:.1f}% "
f"Duration p50/p95: {_fmt_ms(p50)} / {_fmt_ms(p95)}"
)
by_entry = wf.get("by_entrypoint", {})
if by_entry:
entry_str = ", ".join(
f"{k}: {v}" for k, v in sorted(by_entry.items(), key=lambda x: -x[1])
)
lines.append(f" Entrypoints: {entry_str}")
# Tool reliability
if tc.get("total"):
fail_pct = tc.get("failure_rate", 0.0) * 100
lines.append(
f" Tool calls: {tc['total']:,} Failure rate: {fail_pct:.1f}%"
)
tools = tc.get("by_tool", {})
fails = tc.get("failures_by_tool", {})
top = sorted(tools.items(), key=lambda x: -x[1])[:6]
if top:
parts = []
for name, n in top:
f = fails.get(name, 0)
parts.append(f"{name}: {n}" + (f" ({f} failed)" if f else ""))
lines.append(" " + " ".join(parts))
# Provider / model mix + cache (real names)
by_provider = mc.get("by_provider", {})
if by_provider:
prov_str = ", ".join(
f"{k}: {v}" for k, v in sorted(by_provider.items(), key=lambda x: -x[1])
)
lines.append(f" Providers: {prov_str}")
by_model = mc.get("by_model", {})
if by_model:
model_str = ", ".join(
f"{k}: {v}" for k, v in sorted(by_model.items(), key=lambda x: -x[1])[:8]
)
cache = mc.get("cache_hit_rate", 0.0) * 100
suffix = f" Cache hit: {cache:.1f}%" if cache else ""
lines.append(f" Models: {model_str}{suffix}")
# Error classes
if errs:
err_str = ", ".join(
f"{k}: {v}" for k, v in sorted(errs.items(), key=lambda x: -x[1])[:6]
)
lines.append(f" Errors: {err_str}")
lines.append("")
def format_gateway(self, report: Dict) -> str:
"""Format the insights report for gateway/messaging (shorter)."""
if report.get("empty"):

View File

@@ -1,136 +0,0 @@
#!/usr/bin/env python3
"""``/learn`` — build the standards-guided prompt that turns whatever the user
described into a reusable skill.
``/learn`` is open-ended. The user can point it at anything they can describe:
a directory of code, an API doc URL, a workflow they just walked the agent
through in this conversation, or pasted notes. This module builds ONE prompt
that instructs the live agent to:
1. Gather the sources the user named, using the tools it already has
(``read_file`` / ``search_files`` for dirs, ``web_extract`` for URLs, the
current conversation for "what I just did", the user's text for pasted
material).
2. Author a single ``SKILL.md`` via ``skill_manage`` that follows the Hermes
skill-authoring standards (description <=60 chars, the modern section
order, Hermes-tool framing, no invented commands).
There is no separate distillation engine and no model-tool footprint: the
agent does the work with its existing toolset, so this works identically on
local, Docker, and remote terminal backends. Every surface (CLI ``/learn``,
gateway ``/learn``, the dashboard "Learn a skill" panel) calls
:func:`build_learn_prompt` and feeds the result to the agent as a normal turn.
"""
from __future__ import annotations
# The house-style rules, distilled from AGENTS.md "Skill authoring standards
# (HARDLINE)" and the hermes-agent-dev new-skill salvage reference. Embedded in
# the prompt so the agent authors skills the way a maintainer would by hand.
_AUTHORING_STANDARDS = """\
Follow the Hermes skill-authoring standards exactly. These are the same
HARDLINE rules a maintainer enforces in review:
Frontmatter:
- name: lowercase-hyphenated, <=64 chars, no spaces.
- description: ONE sentence, **<=60 characters**, ends with a period. State the
capability, not the implementation. No marketing words (powerful,
comprehensive, seamless, advanced, robust). Do NOT repeat the skill name. If
the description contains a colon, wrap the whole value in double quotes.
This is the most-violated rule and it is NOT cosmetic: the system-prompt
skill index truncates the description to 60 chars and loads it every
session, so anything past char 60 is silently cut and never routes. After
you write the description, COUNT the characters; if it is over 60, cut it
down before saving — do not ship a sentence and hope.
Good (<=60): `Search arXiv papers by keyword, author, or ID.`
Bad (123): `A comprehensive skill that lets the agent search arXiv for
academic papers using keywords, authors, and categories.`
- version: 0.1.0
- author: always the literal value `Hermes`. NEVER fill it from the host
environment — the OS/login username (e.g. the `user=` line in your
environment hints), git config, or any identity you can probe must not be
written. Skills get shared and published, so an environment-derived name is
a privacy leak the user never opted into; the skill names itself as Hermes.
- platforms: declare `[macos]`, `[linux]`, and/or `[windows]` IF the skill
uses OS-bound primitives (osascript/apt/systemctl => the matching OS; /proc,
os.setsid, signal.SIGKILL => linux; fcntl/termios => POSIX). Prefer fixing it
cross-platform first (tempfile.gettempdir(), pathlib.Path, psutil); gate only
when the dependency is genuinely platform-bound. Omit the field for portable
skills.
- metadata.hermes.tags: a few Capitalized, Relevant, Tags.
Body section order (omit a section only if it genuinely has no content):
1. "# <Human Title>" then a 2-3 sentence intro: what it does, what it does NOT
do, and the key dependency stance (e.g. "stdlib only").
2. "## When to Use" — bullet list of concrete trigger phrases.
3. "## Prerequisites" — exact env vars, install steps, credentials.
4. "## How to Run" — the canonical invocation, framed through Hermes tools.
5. "## Quick Reference" — a flat command/endpoint list, no narration.
6. "## Procedure" — numbered steps with copy-paste-exact commands.
7. "## Pitfalls" — known limits, rate limits, things that look broken but aren't.
8. "## Verification" — a single command/check that proves the skill worked.
Hermes-tool framing (this is what makes it a skill, not shell docs):
- Frame running scripts as "invoke through the `terminal` tool".
- Reference Hermes tools by name in backticks: `terminal`, `read_file`,
`write_file`, `search_files`, `patch`, `web_extract`, `web_search`,
`vision_analyze`, `browser_navigate`, `delegate_task`, `image_generate`,
`text_to_speech`, `cronjob`, `memory`, `skill_view`, `execute_code`.
- Do NOT name shell utilities the agent already has wrapped: say `read_file`
not cat/head/tail, `search_files` not grep/rg/find/ls, `patch` not sed/awk,
`web_extract` not curl-to-scrape, `write_file` not echo>file or heredocs.
- Third-party CLIs (ffmpeg, gh, an SDK) are fine inside a script file, but the
prose still frames them as "invoke through the `terminal` tool". If the
skill needs an MCP server, name it and document its setup in Prerequisites.
Quality bar:
- Prefer exact commands, endpoint URLs, function signatures, and config keys
that appear VERBATIM in the source. NEVER invent flags, paths, or APIs — if
you didn't see it in the source, don't write it.
- Keep it tight and scannable: ~100 lines for a simple skill, ~200 for a
complex one. Don't re-paste the source docs.
- Don't write a router/index/hub skill that only points at other skills.
- Larger scripts/parsers belong in a `scripts/` file (add via
`skill_manage` write_file), referenced from SKILL.md by relative path — not
inlined for the agent to re-type every run. References go in `references/`,
templates in `templates/`."""
def build_learn_prompt(user_request: str) -> str:
"""Build the agent prompt for an open-ended ``/learn`` request.
Args:
user_request: the free-text the user gave after ``/learn`` — a
description of the workflow, paths, URLs, or "what I just did".
Returns:
A complete instruction the agent runs as a normal turn. The agent
gathers the described sources with its existing tools and authors the
skill via ``skill_manage``.
"""
req = (user_request or "").strip()
if not req:
req = (
"the workflow we just went through in this conversation — review "
"the steps taken and distill them into a reusable skill"
)
return (
"[/learn] The user wants you to learn a reusable skill from the "
"source(s) they described below, and save it.\n\n"
f"WHAT TO LEARN FROM:\n{req}\n\n"
"Do this:\n"
"1. Gather the material. Resolve whatever the user named using the "
"tools you already have — `read_file`/`search_files` for local files "
"or directories, `web_extract` for URLs, the current conversation "
"history if they referred to something you just did, and the text "
"they pasted as-is. If the request is ambiguous about scope, make a "
"reasonable choice and note it; do not stall.\n"
"2. Author ONE SKILL.md and save it with the `skill_manage` tool "
"(action=\"create\"). Pick a sensible category. If the procedure needs "
"a non-trivial script, add it under the skill's `scripts/` with "
"`skill_manage` write_file and reference it by relative path.\n\n"
f"{_AUTHORING_STANDARDS}\n\n"
"When done, tell the user the skill name, its category, and a "
"one-line summary of what it captured."
)

View File

@@ -46,39 +46,6 @@ logger = logging.getLogger(__name__)
_SYNC_DRAIN_TIMEOUT_S = 5.0
def normalize_tool_schema(schema: Any) -> Optional[Dict[str, Any]]:
"""Return a function-tool dict with a resolvable top-level ``name``.
Context engines and memory providers expose tool schemas via
``get_tool_schemas()``. The expected shape is a bare function schema
(``{"name": ..., "description": ..., "parameters": ...}``) which callers
wrap as ``{"type": "function", "function": schema}``.
Some providers instead return an entry that is *already* in OpenAI tool
form (``{"type": "function", "function": {"name": ...}}``). Wrapping that
a second time produces ``{"type": "function", "function": {"type":
"function", "function": {...}}}`` whose ``function`` has no top-level
``name``. Strict providers (e.g. DeepSeek) reject the *entire* request
with ``tools[N].function: missing field name`` (HTTP 400), so one bad
schema disables the whole toolset and breaks every turn (#47707).
This helper normalizes both shapes to the bare function schema and
returns ``None`` for anything without a resolvable name, so callers can
skip-with-warning rather than appending a nameless tool.
"""
if not isinstance(schema, dict):
return None
# Unwrap an already-wrapped OpenAI tool entry.
if schema.get("type") == "function" and isinstance(schema.get("function"), dict):
schema = schema["function"]
if not isinstance(schema, dict):
return None
name = schema.get("name", "")
if not name or not isinstance(name, str):
return None
return schema
def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
"""Return whether external memory-provider tools should be exposed."""
if enabled_toolsets is None:
@@ -125,17 +92,11 @@ def inject_memory_provider_tools(agent: Any) -> int:
agent.valid_tool_names = valid_tool_names
added = 0
for raw_schema in get_schemas():
schema = normalize_tool_schema(raw_schema)
if schema is None:
logger.warning(
"Memory provider returned a tool schema with no resolvable "
"name; skipping to avoid poisoning the request (%r)",
raw_schema,
)
for schema in get_schemas():
if not isinstance(schema, dict):
continue
tool_name = schema["name"]
if tool_name in existing_tool_names:
tool_name = schema.get("name", "")
if not tool_name or tool_name in existing_tool_names:
continue
tools.append({"type": "function", "function": schema})
valid_tool_names.add(tool_name)
@@ -409,11 +370,8 @@ class MemoryManager:
_core_tool_names = set(_HERMES_CORE_TOOLS)
# Index tool names → provider for routing
for raw_schema in provider.get_tool_schemas():
schema = normalize_tool_schema(raw_schema)
if schema is None:
continue
tool_name = schema["name"]
for schema in provider.get_tool_schemas():
tool_name = schema.get("name", "")
if tool_name in _core_tool_names:
logger.warning(
"Memory provider '%s' tool '%s' shadows a reserved core "
@@ -700,19 +658,11 @@ class MemoryManager:
seen = set()
for provider in self._providers:
try:
for raw_schema in provider.get_tool_schemas():
schema = normalize_tool_schema(raw_schema)
if schema is None:
logger.warning(
"Memory provider '%s' returned a tool schema with "
"no resolvable name; skipping (%r)",
provider.name, raw_schema,
)
continue
name = schema["name"]
for schema in provider.get_tool_schemas():
name = schema.get("name", "")
if name in _core_tool_names:
continue
if name not in seen:
if name and name not in seen:
schemas.append(schema)
seen.add(name)
except Exception as e:

View File

@@ -279,38 +279,6 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
return "{}"
def close_interrupted_tool_sequence(messages: list, final_response: Any = None) -> bool:
"""Append a synthetic assistant turn when an interrupted tail is a tool result.
A turn cut short by ``/stop`` can leave the transcript ending on a raw
``tool`` message (a tool finished, or its execution was cancelled, but the
model never streamed a closing assistant turn). Persisting that tail means
the next user message lands as ``… tool → user`` — a role-alternation
violation that strict providers (Gemini, Claude) react to by hallucinating
a continuation of the user's message and ignoring prior context, which
reads to the user as "lost context" (#48879).
``finalize_turn`` closes this on the happy interrupt path, but the
retry/backoff/error interrupt aborts in ``conversation_loop`` ``return``
early and never reach it — this shared helper closes the sequence on all of
them. ``final_response`` is usually empty on an interrupt, so an explicit
placeholder is used rather than an empty-content assistant turn.
Mutates ``messages`` in place. Returns True if a closing turn was appended.
"""
if not messages:
return False
last = messages[-1]
if not isinstance(last, dict) or last.get("role") != "tool":
return False
text = final_response if isinstance(final_response, str) else ""
messages.append({
"role": "assistant",
"content": text.strip() or "Operation interrupted.",
})
return True
def _strip_non_ascii(text: str) -> str:
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
@@ -463,7 +431,6 @@ def _sanitize_structure_non_ascii(payload: Any) -> bool:
__all__ = [
"_SURROGATE_RE",
"close_interrupted_tool_sequence",
"_sanitize_surrogates",
"_sanitize_structure_surrogates",
"_sanitize_messages_surrogates",

View File

@@ -1,586 +0,0 @@
"""Mixture-of-Agents runtime helpers for /moa turns.
The slash command is deliberately not a model tool. It marks one user turn as
MoA-enabled; the normal Hermes agent loop still owns tool calling and turn
termination, while this module gathers reference-model context before each model
iteration.
"""
from __future__ import annotations
import hashlib
import logging
from concurrent.futures import ThreadPoolExecutor
from typing import Any
from agent.auxiliary_client import call_llm
from agent.transports import get_transport
logger = logging.getLogger(__name__)
# Upper bound on concurrent reference-model calls. References are independent
# advisory calls (no tools, no inter-dependence), so we fan them out the same
# way delegate_task runs a batch: all in flight at once, results collected when
# every reference finishes. Presets rarely list more than a handful of
# references; this cap just protects against a pathologically large preset
# opening dozens of sockets at once.
_MAX_REFERENCE_WORKERS = 8
# Per-tool-result character budget for the advisory reference view. Tool
# results can be huge (a full diff, a 5000-line file dump); replaying them
# verbatim per reference per tool-loop step would blow the reference model's
# context window and cost. We keep the agent's *actions* (tool calls) in full —
# they are cheap, high-signal, and tell the reference what the agent did — but
# preview each tool *result* head+tail so the reference still sees what came
# back without replaying megabytes. The acting aggregator always gets the full,
# untrimmed transcript; this budget only shapes the advisory copy.
_REFERENCE_TOOL_RESULT_BUDGET = 4000
# System prompt prepended to every reference-model call. References are
# advisory — they do NOT act, call tools, or own the task. Without this
# framing a reference receives the bare trimmed conversation and assumes it is
# the acting agent: it then refuses ("I can't access repositories / URLs from
# here") or tries to call tools it doesn't have. The prompt reframes the model
# as an analyst whose job is to reason about the presented state and hand its
# best thinking to the aggregator/orchestrator that will actually act.
_REFERENCE_SYSTEM_PROMPT = (
"You are a reference advisor in a Mixture of Agents (MoA) process. You are "
"NOT the acting agent and you do NOT execute anything: you cannot call "
"tools, run commands, browse, or access files, repositories, or URLs, and "
"you should not try to or apologize for being unable to. A separate "
"aggregator/orchestrator model holds those capabilities and will take the "
"actual actions.\n\n"
"The conversation below is the current state of a task handled by that "
"acting agent. Your job is to give your most intelligent analysis of that "
"state: understand the goal, reason about the problem, and advise on what "
"to do next. Surface the best approach, concrete next steps and tool-use "
"strategy, likely pitfalls and risks, and anything the acting agent may "
"have missed or gotten wrong. Assume any referenced files, URLs, or "
"systems exist and reason about them from the context given rather than "
"asking for access.\n\n"
"Respond with your advice directly — no preamble, no disclaimers about "
"tools or access. Your response is private guidance handed to the "
"aggregator, not an answer shown to the user."
)
def _slot_label(slot: dict[str, str]) -> str:
return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"
def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
"""Resolve a reference/aggregator slot to real runtime call kwargs.
A MoA slot is just a model selection — it must be called the same way any
model is called elsewhere, not through a bare ``call_llm(provider=...,
model=...)`` that leaves base_url/api_key/api_mode unresolved and lets the
auxiliary auto-detector guess. We route the slot's provider through
``resolve_runtime_provider`` (the canonical provider→api_mode/base_url/
api_key resolver the CLI, gateway, and delegate_task all use), so the slot
gets its provider's real API surface — e.g. MiniMax → anthropic_messages,
GPT-5/o-series → max_completion_tokens, custom endpoints → their base_url.
Returns the kwargs to pass through to ``call_llm`` (provider/model plus the
resolved base_url/api_key when available). Falls back to the bare
provider/model on any resolution error so a misconfigured slot still
attempts the call rather than aborting the whole MoA turn.
"""
provider = str(slot.get("provider") or "").strip()
model = str(slot.get("model") or "").strip()
out: dict[str, Any] = {"provider": provider, "model": model}
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
rt = resolve_runtime_provider(requested=provider, target_model=model)
resolved_provider = str(rt.get("provider") or provider).strip().lower()
# call_llm treats an explicit base_url as a custom endpoint. That is
# correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
# provider-backed targets whose provider branch adds auth refresh,
# request metadata, or request-shape adapters. Keep those providers
# identified by name.
if resolved_provider in {"nous", "openai-codex", "xai-oauth"}:
return out
# Pass the resolved endpoint through so call_llm builds the request for
# the provider's actual API surface instead of auto-detecting. base_url
# routes call_llm to the right adapter (incl. anthropic_messages mode);
# api_key is the resolved credential for that provider.
if rt.get("base_url"):
out["base_url"] = rt["base_url"]
if rt.get("api_key"):
out["api_key"] = rt["api_key"]
except Exception as exc: # pragma: no cover - defensive
logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
return out
def _run_reference(
slot: dict[str, str],
ref_messages: list[dict[str, Any]],
*,
temperature: float | None = None,
max_tokens: int | None = None,
) -> tuple[str, str]:
"""Call one reference model and return ``(label, text)``.
The slot is resolved to its provider's real runtime (via ``_slot_runtime``)
and called through the same ``call_llm`` request-building path any model
uses, so per-model wire-format handling (anthropic_messages,
max_completion_tokens, fixed/forbidden temperature) applies identically to
a reference as it would if that model were the acting model. MoA imposes no
cap of its own (``max_tokens`` defaults to ``None`` → omitted → the model's
real maximum); ``temperature`` is only the user's configured preset value,
which call_llm may still override per model.
Never raises: a failed reference becomes a labelled note so the aggregator
can still act with partial context. Designed to run inside a thread pool —
``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
concurrency primitive, mirroring ``delegate_task``'s batch fan-out.
"""
label = _slot_label(slot)
try:
# Prepend the advisory-role system prompt so the reference understands
# it is analyzing state for an aggregator, not acting on the task. The
# trimmed view (_reference_messages) already strips the agent's own
# system prompt, so this is the only system message the reference sees.
messages = [{"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}, *ref_messages]
response = call_llm(
task="moa_reference",
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
**_slot_runtime(slot),
)
return label, _extract_text(response) or "(empty response)"
except Exception as exc:
logger.warning("MoA reference model %s failed: %s", label, exc)
return label, f"[failed: {exc}]"
def _run_references_parallel(
reference_models: list[dict[str, str]],
ref_messages: list[dict[str, Any]],
*,
temperature: float | None = None,
max_tokens: int | None = None,
) -> list[tuple[str, str]]:
"""Fan out all reference models in parallel, returning outputs in order.
Like ``delegate_task``'s batch mode, every reference is dispatched at once
and we block until all of them finish before handing the joined results to
the aggregator. Output order matches ``reference_models`` so the
``Reference {idx}`` labelling stays stable. MoA presets that reference
another MoA preset are skipped here (recursion guard) with a labelled note.
"""
if not reference_models:
return []
results: list[tuple[str, str] | None] = [None] * len(reference_models)
futures = {}
workers = min(_MAX_REFERENCE_WORKERS, len(reference_models))
with ThreadPoolExecutor(max_workers=workers) as executor:
for idx, slot in enumerate(reference_models):
if slot.get("provider") == "moa":
results[idx] = (
_slot_label(slot),
"[skipped: MoA presets cannot recursively reference MoA]",
)
continue
futures[
executor.submit(
_run_reference,
slot,
ref_messages,
temperature=temperature,
max_tokens=max_tokens,
)
] = idx
# Collect every reference before returning — the aggregator needs the
# complete set, so there is no early-exit / first-completed path here.
for future, idx in futures.items():
results[idx] = future.result()
return [r for r in results if r is not None]
def _truncate_tool_result(text: str, budget: int = _REFERENCE_TOOL_RESULT_BUDGET) -> str:
"""Head+tail preview of a tool result for the advisory view.
Keeps the first and last halves of the budget with a ``[... N chars
omitted ...]`` marker between them, so a reference sees both how the result
started and how it ended without replaying the whole payload.
"""
if not text or len(text) <= budget:
return text
half = budget // 2
omitted = len(text) - 2 * half
return f"{text[:half]}\n[... {omitted} chars omitted ...]\n{text[-half:]}"
def _render_tool_calls(tool_calls: Any) -> str:
"""Render an assistant turn's tool_calls as readable text lines.
The advisory view cannot carry real ``tool_calls`` payloads (strict
providers reject tool_calls the reference never produced), so the agent's
actions are flattened to text the reference can read and reason about.
"""
lines: list[str] = []
for tc in tool_calls or []:
fn = (tc.get("function") or {}) if isinstance(tc, dict) else {}
name = fn.get("name") or (tc.get("name") if isinstance(tc, dict) else "") or "tool"
args = fn.get("arguments")
if isinstance(args, str):
args_text = args
elif args is not None:
try:
import json
args_text = json.dumps(args, ensure_ascii=False)
except Exception:
args_text = str(args)
else:
args_text = ""
lines.append(f"[called tool: {name}({args_text})]" if args_text else f"[called tool: {name}]")
return "\n".join(lines)
def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Build an advisory view of the conversation for reference models.
A reference gives an INFORMED judgement on the current state, so it must
see what the agent actually did — its tool calls AND the tool results that
came back — not just the agent's narration. We therefore preserve the whole
conversation flow, but flatten it into clean user/assistant *text* turns:
- system prompt: dropped (8K of Hermes boilerplate, not advisory signal).
- assistant turns: kept; any ``tool_calls`` are rendered inline as
``[called tool: name(args)]`` text lines appended to the turn's text.
- ``tool``-role results: NOT dropped. Each is folded (head+tail preview,
see ``_truncate_tool_result``) into the *preceding* assistant turn as a
``[tool result: ...]`` block, so the reference sees what came back.
This emits ZERO ``tool``-role messages and ZERO ``tool_calls`` arrays — only
plain user/assistant text — so strict providers (Mistral, Fireworks) that
reject orphan tool messages / unproduced tool_calls don't 400, while the
reference still has the full picture.
The view MUST end with a ``user`` turn. Anthropic (and OpenRouter→Anthropic)
interpret a trailing assistant turn as an assistant *prefill* to continue,
and no-prefill models (e.g. Claude Opus 4.8) reject it with
``400 ... must end with a user message``. Rather than DELETE the agent's
latest context to satisfy that (which would blind the reference to the
current state), we APPEND a synthetic user turn asking the reference to
judge the state above. End-on-user is satisfied and no context is lost.
The acting aggregator always receives the full, untrimmed transcript; this
function only shapes the disposable advisory copy.
"""
advisory_instruction = (
"[The conversation above is the current state of the task. Give your "
"most intelligent judgement: what is going on, what should happen next, "
"what risks or mistakes you see, and how the acting agent should "
"proceed.]"
)
rendered: list[dict[str, Any]] = []
last_user_content: str | None = None
for msg in messages:
role = msg.get("role")
content = msg.get("content")
text = content if isinstance(content, str) else ""
if role == "system":
continue
if role == "user":
if text.strip():
last_user_content = text
rendered.append({"role": "user", "content": text})
elif role == "assistant":
parts: list[str] = []
if text.strip():
parts.append(text.strip())
calls_text = _render_tool_calls(msg.get("tool_calls"))
if calls_text:
parts.append(calls_text)
# Empty assistant turns (no text, no calls) carry nothing advisory.
if parts:
rendered.append({"role": "assistant", "content": "\n".join(parts)})
elif role == "tool":
# Fold the tool result into the preceding assistant turn as text so
# the reference sees what came back, without emitting a tool-role
# message a reference never produced.
result_text = _truncate_tool_result(text)
block = f"[tool result: {result_text}]"
if rendered and rendered[-1].get("role") == "assistant":
rendered[-1]["content"] = rendered[-1]["content"] + "\n" + block
else:
# No assistant turn to attach to (e.g. a leading tool result);
# keep it as advisory context on its own assistant-role line.
rendered.append({"role": "assistant", "content": block})
# Any other role is ignored.
# End on a user turn: append a synthetic advisory request rather than
# deleting the agent's latest assistant context. This satisfies Anthropic's
# no-trailing-assistant-prefill rule while preserving full state.
if rendered and rendered[-1].get("role") == "assistant":
rendered.append({"role": "user", "content": advisory_instruction})
elif rendered and rendered[-1].get("role") == "user":
# Already ends on a user turn (fresh user prompt, no agent action yet).
# Leave it — the reference answers that prompt directly.
pass
if not rendered:
# Degenerate case: nothing rendered. Fall back to the latest user turn.
if last_user_content is not None:
return [{"role": "user", "content": last_user_content}]
for msg in reversed(messages):
if msg.get("role") == "user" and isinstance(msg.get("content"), str):
return [{"role": "user", "content": msg["content"]}]
return rendered
def _extract_text(response: Any) -> str:
try:
transport = get_transport("chat_completions")
if transport is None:
raise RuntimeError("chat_completions transport unavailable")
normalized = transport.normalize_response(response)
text = (normalized.content or "").strip()
if text:
return text
except Exception:
pass
try:
content = response.choices[0].message.content
return (content or "").strip()
except Exception:
return ""
def aggregate_moa_context(
*,
user_prompt: str,
api_messages: list[dict[str, Any]],
reference_models: list[dict[str, str]],
aggregator: dict[str, str],
temperature: float = 0.6,
aggregator_temperature: float = 0.4,
max_tokens: int | None = None,
) -> str:
"""Run configured reference models and synthesize their advice.
Failures are returned as model-specific notes instead of aborting the normal
agent loop; the main model can still act with partial context.
``max_tokens`` is ``None`` by default: MoA does not cap reference or
aggregator output, so each model uses its own maximum. ``call_llm`` omits
the parameter entirely when it is ``None`` (see its docstring), which also
sidesteps providers that reject ``max_tokens`` outright. A hardcoded cap
here previously truncated long aggregator syntheses.
"""
reference_outputs: list[tuple[str, str]] = []
ref_messages = _reference_messages(api_messages)
reference_outputs = _run_references_parallel(
reference_models,
ref_messages,
temperature=temperature,
max_tokens=max_tokens,
)
joined = "\n\n".join(
f"Reference {idx}{label}:\n{text}"
for idx, (label, text) in enumerate(reference_outputs, start=1)
)
synth_prompt = (
"You are the aggregator in a Mixture of Agents process. Synthesize the "
"reference responses into concise, actionable guidance for the main "
"Hermes agent. Focus on next steps, tool-use strategy, risks, and any "
"disagreements. Do not answer the user directly unless that is all that "
"is needed; produce context the main agent should use in its normal loop.\n\n"
f"Original user prompt:\n{user_prompt}\n\n"
f"Reference responses:\n{joined}"
)
agg_label = _slot_label(aggregator)
try:
response = call_llm(
task="moa_aggregator",
messages=[{"role": "user", "content": synth_prompt}],
temperature=aggregator_temperature,
max_tokens=max_tokens,
**_slot_runtime(aggregator),
)
synthesis = _extract_text(response)
except Exception as exc:
logger.warning("MoA aggregator model %s failed: %s", agg_label, exc)
synthesis = ""
if not synthesis:
synthesis = joined
return (
"[Mixture of Agents context — use this as private guidance for the "
"normal Hermes agent loop. You may call tools, continue reasoning, or "
"finish normally.]\n"
f"Aggregator: {agg_label}\n"
f"References: {', '.join(_slot_label(slot) for slot in reference_models)}\n\n"
f"{synthesis.strip()}"
)
class MoAChatCompletions:
"""OpenAI-chat-compatible facade where the aggregator is the acting model."""
def __init__(self, preset_name: str, reference_callback: Any = None):
self.preset_name = preset_name or "default"
# Optional display hook. Called as reference outputs become available so
# frontends can show each reference model's answer as a labelled block
# before the aggregator acts. Signature:
# reference_callback(event, **kwargs)
# where event is one of:
# "moa.reference" kwargs: index, count, label, text
# "moa.aggregating" kwargs: aggregator (label), ref_count
# Never raises into the model call — display is best-effort.
self.reference_callback = reference_callback
# State-scoped reference cache. The agent loop calls create() once per
# tool-loop iteration; references should re-run whenever the task STATE
# advances — i.e. on every new user message AND every new tool result —
# so each reference judges the latest state. The advisory view
# (_reference_messages) now renders tool calls + results as text, so its
# signature changes on every new tool response; the cache key is that
# signature, so a new tool result is a cache MISS (references re-run)
# while a redundant create() call with identical state is a HIT (no
# re-run, no re-emit). This gives "fire on every user/tool response"
# for free, without re-firing on a pure no-op re-call.
self._ref_cache_key: tuple | None = None
self._ref_cache_outputs: list[tuple[str, str]] = []
def _emit(self, event: str, **kwargs: Any) -> None:
cb = self.reference_callback
if cb is None:
return
try:
cb(event, **kwargs)
except Exception as exc: # pragma: no cover - display must never break the turn
logger.debug("MoA reference_callback failed for %s: %s", event, exc)
def create(self, **api_kwargs: Any) -> Any:
from hermes_cli.config import load_config
from hermes_cli.moa_config import resolve_moa_preset
preset = resolve_moa_preset(load_config().get("moa") or {}, self.preset_name)
messages = list(api_kwargs.get("messages") or [])
reference_models = preset.get("reference_models") or []
aggregator = preset.get("aggregator") or {}
# MoA does not cap reference or aggregator output: each model uses its
# own maximum. Passing max_tokens=None makes call_llm omit the parameter
# (it never caps by default), so a long aggregator synthesis is never
# truncated and providers that reject max_tokens don't 400.
temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)
# When the preset is disabled, skip the reference fan-out and let the
# configured aggregator act alone — it is the preset's acting model, so
# a disabled MoA preset is simply "use the aggregator directly."
if not preset.get("enabled", True):
reference_models = []
reference_outputs: list[tuple[str, str]] = []
ref_messages = _reference_messages(messages)
# Turn-scoped cache: only run + display references when the advisory
# view changed (i.e. a new user turn). Within one turn the agent loop
# calls create() once per tool iteration with the same advisory view;
# reuse the cached outputs and skip both the re-run and the re-emit.
_sig = hashlib.sha256(
"\u0000".join(
f"{m.get('role')}:{m.get('content')}" for m in ref_messages
).encode("utf-8", "replace")
).hexdigest()
_cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
_refs_from_cache = _cache_key == self._ref_cache_key and bool(self._ref_cache_outputs)
if _refs_from_cache:
reference_outputs = list(self._ref_cache_outputs)
else:
reference_outputs = _run_references_parallel(
reference_models,
ref_messages,
temperature=temperature,
max_tokens=None,
)
self._ref_cache_key = _cache_key
self._ref_cache_outputs = list(reference_outputs)
# Surface each reference model's answer to the display BEFORE the
# aggregator acts — once per turn (only on the iteration that
# actually ran them). The user sees one labelled block per
# reference (rendered like a thinking block) so the MoA process is
# visible rather than a silent pause. Best-effort: never blocks the
# turn.
_ref_count = len(reference_outputs)
for _idx, (_label, _text) in enumerate(reference_outputs, start=1):
self._emit(
"moa.reference",
index=_idx,
count=_ref_count,
label=_label,
text=_text,
)
if _ref_count:
self._emit(
"moa.aggregating",
aggregator=_slot_label(aggregator),
ref_count=_ref_count,
)
agg_messages = [dict(m) for m in messages]
if reference_outputs:
joined = "\n\n".join(
f"Reference {idx}{label}:\n{text}"
for idx, (label, text) in enumerate(reference_outputs, start=1)
)
guidance = (
"[Mixture of Agents reference context]\n"
f"Preset: {self.preset_name}\n"
f"Aggregator/acting model: {_slot_label(aggregator)}\n"
f"References: {', '.join(label for label, _ in reference_outputs)}\n\n"
"Use the reference responses below as private context. You are the aggregator and acting model: "
"answer the user directly or call tools as needed.\n\n"
f"{joined}"
)
for msg in reversed(agg_messages):
if msg.get("role") == "user" and isinstance(msg.get("content"), str):
msg["content"] = msg["content"] + "\n\n" + guidance
break
else:
agg_messages.append({"role": "user", "content": guidance})
if aggregator.get("provider") == "moa":
raise RuntimeError("MoA aggregator cannot be another MoA preset")
agg_kwargs = dict(api_kwargs)
agg_kwargs["messages"] = agg_messages
# The aggregator is the acting model. Resolve its slot to the provider's
# real runtime (base_url/api_key/api_mode) and call it through the same
# request-building path any model uses — so per-model wire-format
# handling (anthropic_messages, max_completion_tokens, fixed/forbidden
# temperature) applies identically to it. MoA imposes no output cap:
# max_tokens is passed through from the caller (normally None → omitted
# → the model's real maximum). The preset's old hardcoded 4096 default
# is gone — it truncated long syntheses.
return call_llm(
task="moa_aggregator",
messages=agg_messages,
temperature=aggregator_temperature,
max_tokens=agg_kwargs.get("max_tokens"),
tools=agg_kwargs.get("tools"),
extra_body=agg_kwargs.get("extra_body"),
**_slot_runtime(aggregator),
)
class MoAClient:
def __init__(self, preset_name: str, reference_callback: Any = None):
self.chat = type("_MoAChat", (), {})()
self.chat.completions = MoAChatCompletions(preset_name, reference_callback=reference_callback)

View File

@@ -1646,34 +1646,6 @@ def get_model_context_length(
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
return config_context_length
# 0a. MoA virtual provider — ``model`` is a preset name, not a real model,
# and ``base_url`` is the local virtual endpoint, so every probe below would
# miss and fall through to the 256K default. The aggregator is the acting
# model, so resolve the context window from the aggregator slot's real
# provider+model instead. References are advisory-only and never bound the
# acting context, so they're ignored here.
if (provider or "").strip().lower() == "moa":
try:
from hermes_cli.config import load_config
from hermes_cli.moa_config import resolve_moa_preset
from hermes_cli.runtime_provider import resolve_runtime_provider
preset = resolve_moa_preset(load_config().get("moa") or {}, model)
agg = preset.get("aggregator") or {}
agg_provider = str(agg.get("provider") or "").strip()
agg_model = str(agg.get("model") or "").strip()
if agg_model and agg_provider and agg_provider.lower() != "moa":
rt = resolve_runtime_provider(requested=agg_provider, target_model=agg_model)
return get_model_context_length(
agg_model,
base_url=rt.get("base_url", "") or "",
api_key=rt.get("api_key", "") or "",
provider=agg_provider,
)
except Exception:
logger.debug("MoA aggregator context-length resolution failed", exc_info=True)
# Fall through to the generic default if aggregator resolution failed.
# 0b. custom_providers per-model override — check before any probe.
# This closes the gap where /model switch and display paths used to fall
# back to 128K despite the user having a per-model context_length set.

View File

@@ -1,158 +0,0 @@
"""Shared one-off LLM requests for non-conversational helpers.
A "one-shot" is a single, stateless model call that runs *outside* any
conversation: it never touches a session's history, never breaks prompt
caching, and returns plain text. UI surfaces use it for small generative
chores — a commit message from a diff, a rename suggestion, a summary —
where spinning up an agent turn would be wrong (it would pollute the thread)
and hand-rolling an LLM call at every call site would be worse.
Two ways to call it:
* ``run_oneshot(instructions=..., user_input=...)`` — caller supplies the
full prompt.
* ``run_oneshot(template="commit_message", variables={...})`` — caller
names a registered template and passes its variables; the template owns
the prompt engineering so it stays consistent across CLI/TUI/desktop.
Model selection rides the same auxiliary plumbing as title generation
(:func:`agent.auxiliary_client.call_llm`): pass ``main_runtime`` to inherit
the live session's provider/model, otherwise the configured ``task`` (default
``title_generation``) resolves a cheap/fast backend.
"""
import logging
from typing import Any, Callable, Dict, Optional, Tuple
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
logger = logging.getLogger(__name__)
# A template turns a variables dict into a (instructions, user_input) pair.
# Templates are plain callables (not str.format) so diff/code payloads with
# literal "{" / "}" pass through untouched.
PromptTemplate = Callable[[Dict[str, Any]], Tuple[str, str]]
def _truncate(text: str, limit: int) -> str:
text = text or ""
if len(text) <= limit:
return text
return text[:limit].rstrip() + "\n…(truncated)"
_COMMIT_INSTRUCTIONS = (
"You write git commit messages. Given a diff of staged changes, write ONE "
"concise Conventional Commits message describing what the change does and why.\n"
"Rules:\n"
"- Subject line: type(scope): summary — imperative mood, lower-case, no "
"trailing period, ≤ 72 characters. Types: feat, fix, refactor, perf, docs, "
"test, build, chore, style, ci.\n"
"- Omit the scope if it isn't obvious.\n"
"- Add a short body (wrapped at ~72 cols) ONLY when the change needs "
"explanation; skip it for small/obvious changes.\n"
"- Describe the actual change, never restate the diff line-by-line.\n"
"- Return ONLY the commit message text — no quotes, no markdown fences, no "
"preamble."
)
def _commit_message_template(variables: Dict[str, Any]) -> Tuple[str, str]:
diff = _truncate(str(variables.get("diff") or ""), 12000)
recent = _truncate(str(variables.get("recent_commits") or ""), 1500)
parts = []
if recent.strip():
parts.append(
"Recent commit subjects from this repo (match their style/conventions):\n"
f"{recent}"
)
parts.append("Diff to describe:\n" + (diff or "(no textual diff available)"))
# "Regenerate" must yield something new even on models that decode greedily
# / pin temperature server-side. A trailing nonce isn't enough, so we hand
# back the previous message and require a genuinely different one.
avoid = _truncate(str(variables.get("avoid") or "").strip(), 1000)
if avoid:
parts.append(
"You already proposed the message below and the user wants a "
"different one. Write a NEW message with different wording (and, if "
"reasonable, a different emphasis or scope framing) — do not repeat "
f"it:\n{avoid}"
)
return _COMMIT_INSTRUCTIONS, "\n\n".join(parts)
# Registry of named templates. Add an entry here to give a new surface a
# consistent, reusable prompt without teaching every caller the prompt text.
PROMPT_TEMPLATES: Dict[str, PromptTemplate] = {
"commit_message": _commit_message_template,
}
def render_template(name: str, variables: Optional[Dict[str, Any]] = None) -> Tuple[str, str]:
"""Resolve a registered template into (instructions, user_input).
Raises KeyError if the template name is unknown so callers fail loudly
instead of silently sending an empty prompt.
"""
template = PROMPT_TEMPLATES.get(name)
if template is None:
raise KeyError(f"unknown one-shot template: {name}")
return template(variables or {})
def run_oneshot(
*,
instructions: str = "",
user_input: str = "",
template: Optional[str] = None,
variables: Optional[Dict[str, Any]] = None,
task: str = "title_generation",
max_tokens: int = 1024,
temperature: Optional[float] = 0.3,
timeout: float = 60.0,
main_runtime: Optional[Dict[str, Any]] = None,
) -> str:
"""Run a single stateless LLM request and return its text.
Provide either a registered ``template`` (+ ``variables``) or an explicit
``instructions`` / ``user_input`` pair. Returns the model's text answer,
stripped of surrounding whitespace and any wrapping code fence.
Raises RuntimeError when no LLM provider is configured (surfaced from
:func:`call_llm`) and KeyError for an unknown template name.
"""
if template:
instructions, user_input = render_template(template, variables)
if not (instructions or "").strip() and not (user_input or "").strip():
raise ValueError("run_oneshot requires a template or instructions/user_input")
messages = []
if (instructions or "").strip():
messages.append({"role": "system", "content": instructions})
messages.append({"role": "user", "content": user_input or ""})
response = call_llm(
task=task,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
timeout=timeout,
main_runtime=main_runtime,
)
text = (extract_content_or_reasoning(response) or "").strip()
return _strip_code_fence(text)
def _strip_code_fence(text: str) -> str:
"""Drop a single wrapping ``` fence the model may have added."""
if not text.startswith("```"):
return text
lines = text.splitlines()
if len(lines) >= 2 and lines[0].startswith("```") and lines[-1].strip() == "```":
return "\n".join(lines[1:-1]).strip()
return text

View File

@@ -1,51 +0,0 @@
"""Petdex pet engine — shared core for the CLI, TUI, and desktop surfaces.
Petdex (https://github.com/crafter-station/petdex) is a public gallery of
animated sprite "pets" for coding agents. Each pet is a ``pet.json`` plus a
``spritesheet.{webp,png}`` of 192×208 px cells. Current Codex/petdex sheets use
an 8-column × 9-row atlas; older Hermes/petdex sheets used an 8-row atlas.
Hermes infers the row taxonomy from the sheet and maps agent activity onto
idle/run/review/failed/wave/jump.
This package is the **single source of truth** for the feature so the base
CLI (Python) and TUI (Ink, via ``tui_gateway``) never duplicate the hard
parts:
- :mod:`agent.pet.constants` — frame geometry + the :class:`PetState` enum.
- :mod:`agent.pet.state` — map agent activity → a :class:`PetState`.
- :mod:`agent.pet.manifest` — fetch the public petdex manifest.
- :mod:`agent.pet.store` — install / list / resolve pets on disk
(profile-aware via ``get_hermes_home()``).
- :mod:`agent.pet.render` — decode a spritesheet and encode frames for a
terminal (kitty / iTerm2 / sixel graphics
protocols, with a Unicode half-block
fallback).
Rendering in the Electron desktop is necessarily TypeScript (canvas), but it
reuses the same on-disk store and the same state semantics.
The whole feature is a *display* concern: it adds no model tool, mutates no
system prompt or toolset, and therefore has zero effect on prompt caching.
"""
from agent.pet.constants import (
DEFAULT_SCALE,
FRAME_H,
FRAME_W,
FRAMES_PER_STATE,
LOOP_MS,
STATE_ROWS,
PetState,
)
from agent.pet.state import derive_pet_state
__all__ = [
"DEFAULT_SCALE",
"FRAME_H",
"FRAME_W",
"FRAMES_PER_STATE",
"LOOP_MS",
"STATE_ROWS",
"PetState",
"derive_pet_state",
]

View File

@@ -1,167 +0,0 @@
"""Pet sprite geometry + animation-state taxonomy.
These values are the common petdex/Codex pet geometry. The real ``pet.json``
usually only carries ``id``/``displayName``/``description``/``spritesheetPath``;
row taxonomy is inferred from the atlas shape so Hermes can render both legacy
8-row sheets and current 9-row Codex sheets.
"""
from __future__ import annotations
from enum import Enum
# Frame geometry (pixels). Current Codex/petdex spritesheets are 8 columns x 9
# rows (1536x1872), while older Hermes/petdex sheets used 9 columns x 8 rows
# (1728x1664). Renderers derive both row taxonomy and real column count from the
# concrete sheet, so either shape works.
FRAME_W = 192
FRAME_H = 208
# Frames consumed per animation state (the petdex web app uses CSS
# ``steps(6)``). A sheet may physically contain more columns; we only step
# through the first ``FRAMES_PER_STATE``.
FRAMES_PER_STATE = 6
# Full-loop duration for one state, milliseconds (petdex default).
LOOP_MS = 1100
# Default on-screen scale relative to native frame size. ``display.pet.scale``
# is the single master scalar: the desktop canvas multiplies its native pixels
# by it and every terminal surface derives its half-block/kitty column width
# from it (see :func:`cols_for_scale`), so one number shrinks all three
# interfaces together. (petdex's own clients render at 0.7; we default smaller
# so the kitty/GUI mascot stays a glanceable corner sprite. The half-block
# fallback can't shrink as far — see ``UNICODE_MIN_COLS`` — and clamps to its
# legibility floor instead.)
DEFAULT_SCALE = 0.33
# User-settable scale bounds (``/pet scale``, desktop slider). Floor keeps the
# pet clickable/visible; ceiling stops a fat-fingered value from filling the
# screen. The unicode fallback additionally clamps to ``UNICODE_MIN_COLS``.
MIN_SCALE = 0.1
MAX_SCALE = 3.0
def clamp_scale(scale: float) -> float:
"""Clamp *scale* to ``[MIN_SCALE, MAX_SCALE]`` (the single validation point)."""
return max(MIN_SCALE, min(MAX_SCALE, scale))
# Terminal cells one native frame spans at ``scale == 1.0``. A cell is ~8px
# wide, a frame is ``FRAME_W`` (192) px → 24 cells. This mirrors the kitty
# graphics placement (``scaled_px // 8``) so at full scale every renderer agrees.
BASE_UNICODE_COLS = FRAME_W // 8
# Legibility floor for the half-block fallback. A half-block cell samples the
# sprite at only 1 horizontal + 2 vertical taps, so below this width a 192×208
# pet collapses into an unreadable blob *regardless* of scale. kitty/GUI draw
# true pixels and have no such floor — that's why the same ``scale: 0.33`` is
# crisp there but mush in half-blocks. ``scale`` shrinks the unicode pet down
# TO this floor (and grows it above), instead of past it into noise.
UNICODE_MIN_COLS = 16
def cols_for_scale(scale: float) -> int:
"""Half-block width implied by *scale*, clamped to the legibility floor.
Above the floor it tracks the kitty cell box (``scaled_px // 8``) so the two
renderers converge at larger sizes; below it the floor keeps the sprite
readable rather than letting it devolve into a blob.
"""
return max(UNICODE_MIN_COLS, round(BASE_UNICODE_COLS * (scale or DEFAULT_SCALE)))
def resolve_cols(scale: float, unicode_cols: int = 0) -> int:
"""Resolve terminal width: explicit *unicode_cols* override, else from *scale*."""
return int(unicode_cols) if unicode_cols and int(unicode_cols) > 0 else cols_for_scale(scale)
class PetState(str, Enum):
"""Animation state a pet can be shown in.
These are Hermes' activity state names. They are not always identical to the
source atlas row names: Codex-format pets use rows like ``jumping`` /
``running`` while the UI keeps the shorter ``jump`` / ``run`` names.
"""
IDLE = "idle"
WAVE = "wave"
RUN = "run"
FAILED = "failed"
REVIEW = "review"
JUMP = "jump"
WAITING = "waiting"
# Legacy Hermes/petdex row order (top -> bottom) used by the older 8-row,
# 9-column atlas shape.
LEGACY_STATE_ROWS: list[str] = [
PetState.IDLE.value,
PetState.WAVE.value,
PetState.RUN.value,
PetState.FAILED.value,
PetState.REVIEW.value,
PetState.JUMP.value,
"extra1",
"extra2",
]
# Current Petdex row order (top -> bottom) used by 1536x1872 atlases:
# 8 columns x 9 rows of 192x208 cells.
CODEX_STATE_ROWS: list[str] = [
PetState.IDLE.value,
"running-right",
"running-left",
"waving",
"jumping",
PetState.FAILED.value,
PetState.WAITING.value,
"running",
PetState.REVIEW.value,
]
# Default/fallback for callers without a sheet. Prefer the current 9-row Codex
# format because generated pets and the public Codex pet contract use it.
STATE_ROWS: list[str] = CODEX_STATE_ROWS
# Canonical Hermes activity names -> accepted row-name aliases in descending
# preference. This keeps our internal state names stable (`wave`/`jump`/`run`)
# while matching Petdex's current `waving`/`jumping`/`running` taxonomy.
STATE_ALIASES: dict[str, tuple[str, ...]] = {
PetState.IDLE.value: (PetState.IDLE.value,),
PetState.WAVE.value: (PetState.WAVE.value, "waving"),
PetState.JUMP.value: (PetState.JUMP.value, "jumping"),
PetState.RUN.value: (PetState.RUN.value, "running"),
PetState.FAILED.value: (PetState.FAILED.value,),
PetState.REVIEW.value: (PetState.REVIEW.value,),
PetState.WAITING.value: (PetState.WAITING.value,),
}
def state_aliases_for(state: "PetState | str") -> tuple[str, ...]:
"""Return accepted row-name aliases for *state* (always non-empty)."""
value = state.value if isinstance(state, PetState) else str(state)
aliases = STATE_ALIASES.get(value)
return aliases if aliases else (value,)
def state_rows_for_grid(row_count: int | None) -> list[str]:
"""Return the row taxonomy for a spritesheet with *row_count* rows."""
try:
rows = int(row_count or 0)
except (TypeError, ValueError):
rows = 0
if rows >= len(CODEX_STATE_ROWS):
return CODEX_STATE_ROWS
return LEGACY_STATE_ROWS
def state_row_index(state: "PetState | str", row_count: int | None = None) -> int:
"""Return the spritesheet row index for *state* (clamped, never raises)."""
rows = state_rows_for_grid(row_count)
for name in state_aliases_for(state):
try:
return rows.index(name)
except ValueError:
continue
return 0 # fall back to the idle row

View File

@@ -1,29 +0,0 @@
"""Pet generation — base-draft → hatch pipeline.
Public surface used by the gateway RPCs, the CLI ``hermes pets generate``
command, and tests:
- :func:`generate_base_drafts` / :func:`hatch_pet` — the two-step flow.
- :class:`HatchResult`, :class:`GenerationError`.
- :mod:`atlas` — deterministic frame extraction + atlas composition/validation.
Image generation is delegated to the active reference-capable
:class:`~agent.image_gen_provider.ImageGenProvider` (OpenAI gpt-image-2 or Krea);
atlas assembly is fully deterministic so it's testable without any API calls.
"""
from __future__ import annotations
from agent.pet.generate.imagegen import GenerationError
from agent.pet.generate.orchestrate import (
HatchResult,
generate_base_drafts,
hatch_pet,
)
__all__ = [
"GenerationError",
"HatchResult",
"generate_base_drafts",
"hatch_pet",
]

File diff suppressed because it is too large Load Diff

View File

@@ -1,251 +0,0 @@
"""Thin image-generation layer for pet sprites.
Wraps the active :class:`~agent.image_gen_provider.ImageGenProvider` with the
two things sprite generation needs that the agent-facing ``image_generate`` tool
doesn't expose: **N variants** (loop) and **reference-image grounding** (so each
animation row stays the same character as the chosen base).
Reference grounding only works on providers that support it — currently OpenAI
``gpt-image-2`` (image edits) and Krea (style references). We resolve to one of
those and surface a clear, actionable error otherwise rather than silently
producing an ungrounded, drifting pet.
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass
from pathlib import Path
logger = logging.getLogger(__name__)
# Providers that can ground generation on a reference image, in preference order
# (Nous Portal → OpenAI → OpenRouter → …). OpenRouter/Nous run a quality-first
# model chain and may fall back depending on account access and endpoint behavior,
# so fidelity can vary by configured backend + model availability.
_REF_CAPABLE = ("nous", "openai", "openai-codex", "openrouter", "krea")
# Friendly display label per reference-capable provider, surfaced in the desktop
# pet-gen picker.
_PROVIDER_LABELS: dict[str, str] = {
"nous": "Nous Portal",
"openrouter": "OpenRouter",
"openai": "OpenAI",
"openai-codex": "OpenAI (Codex)",
"krea": "Krea",
}
def _forced_provider_from_env() -> str | None:
"""Optional QA override to force a pet-gen backend.
`HERMES_PET_IMAGE_PROVIDER=<name>` (e.g. `openrouter`) bypasses the normal
active/default provider resolution for pet generation only. Unknown values are
ignored so existing users are unaffected.
"""
forced = os.environ.get("HERMES_PET_IMAGE_PROVIDER", "").strip().lower()
return forced if forced in _REF_CAPABLE else None
class GenerationError(RuntimeError):
"""Raised on any image-generation failure (no provider, API error, IO)."""
@dataclass(frozen=True)
class SpriteProvider:
"""Resolved provider plus whether it can take reference images."""
name: str
provider: object
supports_references: bool
def _discover() -> None:
try:
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
except Exception as exc: # noqa: BLE001 - discovery is best-effort
logger.debug("image-gen plugin discovery failed: %s", exc)
def resolve_provider(*, require_references: bool = True, prefer: str | None = None) -> SpriteProvider:
"""Pick the image provider to use for sprite work.
Preference: an explicit *prefer* choice (the desktop pet-gen picker) when it's
reference-capable and configured, then the configured/active provider when
it's reference-capable, else the first available reference-capable provider.
With *require_references* off we fall back to any available provider (used for
prompt-only base drafts).
"""
_discover()
from agent.image_gen_registry import get_active_provider, get_provider
# QA override: force one provider for pet-gen iteration regardless of the
# globally active image_gen backend.
forced = _forced_provider_from_env()
if forced:
chosen = get_provider(forced)
if chosen is not None and chosen.is_available():
return SpriteProvider(name=forced, provider=chosen, supports_references=True)
# An explicit user pick wins when it's reference-capable and has credentials;
# otherwise we ignore it and fall through to the normal resolution.
if prefer:
chosen = get_provider(prefer)
if prefer in _REF_CAPABLE and chosen is not None and chosen.is_available():
return SpriteProvider(name=prefer, provider=chosen, supports_references=True)
# Configured / active provider first.
active = None
try:
active = get_active_provider()
except Exception: # noqa: BLE001
active = None
if active is not None:
name = getattr(active, "name", "")
if name in _REF_CAPABLE and active.is_available():
return SpriteProvider(name=name, provider=active, supports_references=True)
# Any available reference-capable provider.
for name in _REF_CAPABLE:
provider = get_provider(name)
if provider is not None and provider.is_available():
return SpriteProvider(name=name, provider=provider, supports_references=True)
if not require_references and active is not None and active.is_available():
return SpriteProvider(
name=getattr(active, "name", "unknown"), provider=active, supports_references=False
)
raise GenerationError(
"Pet generation needs an image backend that supports reference images. "
"Open `hermes tools` → Image Generation and configure Nous Portal, "
"OpenRouter, or OpenAI (gpt-image-2) with an API key."
)
def list_sprite_providers() -> list[dict]:
"""The reference-capable providers available to pick for pet generation.
Returns ``[{name, label, default}]`` for every ref-capable provider the user
actually has credentials for, in preference order, marking the one
:func:`resolve_provider` would choose with no explicit preference. Empty when
none is configured (the picker hides itself). Best-effort: discovery hiccups
yield an empty list.
"""
_discover()
from agent.image_gen_registry import get_provider
try:
default_name = resolve_provider(require_references=True).name
except GenerationError:
default_name = ""
out: list[dict] = []
for name in _REF_CAPABLE:
provider = get_provider(name)
if provider is None or not provider.is_available():
continue
out.append(
{
"name": name,
"label": _PROVIDER_LABELS.get(name, name),
"default": name == default_name,
}
)
return out
def _save_local(image_ref: str, *, prefix: str) -> Path:
"""Return a local path for *image_ref*, downloading it if it's a URL."""
if image_ref.startswith(("http://", "https://")):
from agent.image_gen_provider import save_url_image
return Path(save_url_image(image_ref, prefix=prefix))
return Path(image_ref)
def _rejected_background(error: str) -> bool:
"""True when a provider error is specifically about the ``background`` param.
Transparent backgrounds are a per-model capability (e.g. some gpt-image tiers
reject ``background=transparent`` outright). We detect that one rejection so
we can retry without the flag rather than failing the whole pet — our chroma
key pass makes the result transparent regardless.
"""
lowered = (error or "").lower()
return "background" in lowered and ("not supported" in lowered or "transparent" in lowered)
def generate(
prompt: str,
*,
n: int = 1,
reference_images: list[Path] | None = None,
provider: SpriteProvider | None = None,
prefix: str = "pet_gen",
aspect_ratio: str = "square",
) -> list[Path]:
"""Generate *n* sprite images and return their local paths.
*reference_images* grounds the output on a base image (required for rows).
*aspect_ratio* picks the canvas: ``"square"`` for single-character base
drafts, ``"landscape"`` for multi-frame row strips (the wider 1536px canvas
gives every frame real horizontal room so winged poses don't have to be
shrunk to avoid touching their neighbors).
We *ask* for a transparent background, but fall back to an opaque generation
(cleaned up downstream by the chroma-key pass) on models that reject the
flag. Raises :class:`GenerationError` if nothing usable comes back.
"""
sprite = provider or resolve_provider(require_references=bool(reference_images))
if reference_images and not sprite.supports_references:
raise GenerationError(
f"image backend '{sprite.name}' cannot use reference images; "
"configure OpenAI gpt-image-2 or Krea for pet generation"
)
refs = [str(p) for p in (reference_images or [])]
def _run(extra: dict) -> tuple[Path | None, str]:
kwargs: dict = {"aspect_ratio": aspect_ratio, **extra}
if refs:
# Providers disagree on the ref kwarg name: our OpenRouter/Nous
# backends read ``reference_images``, OpenAI's gpt-image-2 reads
# ``reference_image_urls``. Send both; each ignores the other.
kwargs["reference_images"] = refs
kwargs["reference_image_urls"] = refs
try:
result = sprite.provider.generate(prompt, **kwargs)
except Exception as exc: # noqa: BLE001 - normalize provider crashes
logger.debug("provider.generate crashed: %s", exc)
return None, str(exc)
if not isinstance(result, dict) or not result.get("success"):
return None, (result or {}).get("error", "unknown error") if isinstance(result, dict) else "no result"
image_ref = result.get("image")
if not image_ref:
return None, "provider returned no image"
try:
return _save_local(str(image_ref), prefix=prefix), ""
except Exception as exc: # noqa: BLE001
return None, f"could not save generated image: {exc}"
out: list[Path] = []
last_error = ""
allow_transparent = True
for _ in range(max(1, n)):
path, err = _run({"background": "transparent"} if allow_transparent else {})
# Model doesn't support the transparent flag → drop it for this and every
# remaining variant (no point re-probing a capability we just disproved).
if path is None and allow_transparent and _rejected_background(err):
allow_transparent = False
path, err = _run({})
if path is not None:
out.append(path)
else:
last_error = err
if not out:
raise GenerationError(last_error or "image generation produced no output")
return out

View File

@@ -1,358 +0,0 @@
"""Pet generation orchestration — the base-draft → hatch flow.
Two steps, mirroring the UX across every surface:
1. :func:`generate_base_drafts` — a handful of prompt-only "what should this pet
look like" variants. Cheap; the user picks one (or retries for a fresh set).
2. :func:`hatch_pet` — takes the chosen base and generates one grounded row
strip per Hermes state, slices each into frames, composes the atlas, validates
it, and writes the pet into the store.
Splitting it this way bounds cost (4 cheap base calls per round; the ~6 row
calls happen once, on the pet you actually keep) and gives each UI a natural
preview/loading point.
"""
from __future__ import annotations
import logging
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from pathlib import Path
from typing import Callable
from agent.pet.generate import atlas, imagegen, prompts
from agent.pet.generate.imagegen import GenerationError, SpriteProvider
logger = logging.getLogger(__name__)
# (event, detail) — e.g. ("row", "idle"), ("compose", ""), ("save", "<slug>").
ProgressFn = Callable[[str, str], None]
# Image generations are independent network calls, so we fan them out instead of
# blocking on each in turn — a hatch is ~8 row calls that would otherwise run
# back-to-back and routinely blow past the client's RPC timeout. Capped so we
# don't hammer the provider's rate limit (one cold call can still be slow).
_MAX_PARALLEL_GENERATIONS = 4
# How many times to (re)generate a single row before accepting a best-effort
# slice. Early attempts demand clean per-pose gutters; the last is lenient so a
# stubborn row still yields frames instead of dropping out entirely.
_ROW_GEN_ATTEMPTS = 3
_MIN_FILLED_STATES = 6
_REQUIRED_STATES = frozenset({"idle", "running-right", "waving"})
@dataclass(frozen=True)
class HatchResult:
"""Outcome of a successful :func:`hatch_pet`."""
slug: str
display_name: str
spritesheet: Path
states: list[str]
validation: dict
def _harden_transparency(path: Path) -> Path:
"""Key out any solid backdrop the provider painted; save as an RGBA PNG.
``background=transparent`` is requested on every call, but image models honor
it inconsistently — some still paint a flat (often near-white) backdrop. We
run the same chroma-key pass the row extractor uses so every base draft the
user picks between (and the reference the rows are grounded on) is a clean
cutout. Best-effort: a decode failure leaves the original untouched.
"""
from PIL import Image
try:
with Image.open(path) as opened:
keyed = atlas.remove_background(opened.convert("RGBA"))
# Zero the RGB of any leftover semi-transparent edge pixels so a keyed
# draft has no colored halo when composited on the dark UI.
keyed = atlas._clear_transparent_rgb(keyed)
out = path.with_suffix(".png")
keyed.save(out, format="PNG")
return out
except Exception as exc: # noqa: BLE001 - cosmetic; fall back to the raw image
logger.debug("base draft transparency hardening failed for %s: %s", path, exc)
return path
def generate_base_drafts(
concept: str,
*,
n: int = 4,
style: str = "auto",
reference_images: list[Path] | None = None,
provider: SpriteProvider | None = None,
on_draft: Callable[[int, Path], None] | None = None,
is_cancelled: Callable[[], bool] | None = None,
) -> list[Path]:
"""Generate *n* candidate base looks for *concept*; returns image paths.
Each draft is hardened to a transparent cutout (see :func:`_harden_transparency`).
Drafts are generated concurrently and *on_draft(index, path)* fires as each
one finishes (not at the end) so callers can stream previews to the UI
instead of leaving it blank until the whole batch is done.
*is_cancelled*, when supplied, is polled cooperatively: a draft that hasn't
started yet is skipped, and once it trips we stop staging/streaming further
drafts and cancel any queued work (already-in-flight provider calls can't be
hard-killed, but their results are dropped).
"""
# A user reference image (e.g. their own pet) grounds every draft, so it
# needs a reference-capable provider — same requirement as the row passes.
refs = reference_images or None
sprite = provider or imagegen.resolve_provider(require_references=bool(refs))
cancelled = is_cancelled or (lambda: False)
# Each draft is its own one-shot generation, run concurrently so the user
# waits for one image, not N. A single draft failing must not sink the set.
# Each gets a distinct variation nudge so the options aren't near-duplicates.
logger.info("pet generate: drafting %d base looks for %r (style=%s)", n, concept, style)
def _one(index: int) -> tuple[int, Path | None, str | None]:
if cancelled():
return index, None, None
t0 = time.monotonic()
variation = prompts.BASE_VARIATIONS[index % len(prompts.BASE_VARIATIONS)]
prompt = prompts.build_base_prompt(concept, style=style, variation=variation)
try:
out = imagegen.generate(prompt, n=1, reference_images=refs, provider=sprite, prefix="pet_base")
except Exception as exc: # noqa: BLE001 - tolerate a single failed draft
logger.warning("pet generate: draft %d failed after %.1fs: %s", index, time.monotonic() - t0, exc)
return index, None, str(exc)
if not out:
logger.warning("pet generate: draft %d produced no image", index)
return index, None, "the image provider returned no image"
logger.info("pet generate: draft %d ready in %.1fs", index, time.monotonic() - t0)
return index, _harden_transparency(out[0]), None
workers = max(1, min(n, _MAX_PARALLEL_GENERATIONS))
results: dict[int, Path] = {}
errors: list[str] = []
with ThreadPoolExecutor(max_workers=workers) as pool:
futures = [pool.submit(_one, i) for i in range(n)]
# as_completed runs in *this* (the caller's) thread, so on_draft — and any
# gateway event it emits — inherits the request's bound transport, unlike
# the worker threads above.
for fut in as_completed(futures):
if cancelled():
logger.info("pet generate: cancelled — dropping remaining drafts")
for pending in futures:
pending.cancel()
break
index, path, err = fut.result()
if path is None:
if err:
errors.append(err)
continue
results[index] = path
if on_draft is not None:
try:
on_draft(index, path)
except Exception as exc: # noqa: BLE001 - progress is best-effort
logger.debug("on_draft callback failed: %s", exc)
drafts = [results[i] for i in sorted(results)]
if not drafts and not cancelled():
# Surface *why* — every draft failed for a reason (a content-policy refusal
# on a name like "minion", a provider/auth error, …); the most common one
# is the representative cause. Far more useful than "no usable drafts".
raise GenerationError(_drafts_failed_reason(errors))
return drafts
def _drafts_failed_reason(errors: list[str]) -> str:
"""The representative reason a draft round produced nothing, humanized."""
if not errors:
return "image generation produced no usable drafts"
from collections import Counter
return _humanize_image_error(Counter(errors).most_common(1)[0][0])
def _humanize_image_error(error: str) -> str:
"""Turn a raw provider error into a friendly, actionable sentence.
The big one is moderation: image models refuse trademarked characters and
real people (e.g. "minion"), which reads as an opaque 400 otherwise.
"""
low = error.lower()
if any(s in low for s in ("moderation_blocked", "safety system", "content policy", "content_policy")):
return (
"The image provider blocked this prompt — its safety filter rejects "
"trademarked characters and real people. Try an original description."
)
if any(s in low for s in ("api key", "unauthorized", "401", "auth")):
return "The image provider rejected the request — check your API key in Settings → Providers."
if "rate limit" in low or "429" in low:
return "The image provider is rate-limiting — wait a moment and try again."
# Otherwise the first line, trimmed of the noisy provider envelope.
return error.splitlines()[0].strip()[:200]
def hatch_pet(
*,
base_image: str | Path,
slug: str,
display_name: str = "",
description: str = "",
concept: str = "",
style: str = "auto",
on_progress: ProgressFn | None = None,
provider: SpriteProvider | None = None,
is_cancelled: Callable[[], bool] | None = None,
) -> HatchResult:
"""Turn an approved base image into a full, installed Hermes pet.
Generates a grounded row strip per state, extracts frames, composes +
validates the atlas, and registers it. The idle row falls back to the base
look so the pet always renders. Raises :class:`GenerationError` on failure.
*is_cancelled*, when supplied, is polled cooperatively: rows that haven't
started are skipped, queued rows are cancelled, and once every row is done we
abort (raising :class:`GenerationError`) before composing/saving so a stopped
hatch never writes a half-built pet.
"""
base = Path(base_image)
if not base.is_file():
raise GenerationError(f"base image not found: {base}")
sprite = provider or imagegen.resolve_provider(require_references=True)
progress = on_progress or (lambda *_: None)
cancelled = is_cancelled or (lambda: False)
label = concept or display_name or slug
frames_by_state: dict[str, list] = {}
total_rows = len(atlas.ROW_SPECS)
logger.info("pet hatch %r: generating %d animation rows", slug, total_rows)
# Generate every state's row strip concurrently — they're independent
# grounded calls, so the hatch waits for the slowest row, not their sum. A
# single row failing is tolerated (idle is guaranteed below).
def _gen_row(spec: tuple[str, int, int]) -> tuple[str, list | None]:
state, _row, count = spec
if cancelled():
return state, None
t0 = time.monotonic()
last_exc: Exception | None = None
# Self-healing: a model occasionally returns a row whose poses are touching
# (no clean gutters), which slices badly. We retry such rolls; only the
# final attempt falls back to lenient ``auto`` slicing so a stubborn row
# still yields *something* rather than dropping the whole row.
for attempt in range(_ROW_GEN_ATTEMPTS):
if cancelled():
return state, None
strict = attempt < _ROW_GEN_ATTEMPTS - 1
try:
strips = imagegen.generate(
prompts.build_row_prompt(state, count, label, style=style),
n=1,
reference_images=[base],
provider=sprite,
prefix=f"pet_row_{state}",
# Wider canvas → each frame gets real horizontal room, so winged
# poses keep a full, healthy size and still leave clean gutters.
aspect_ratio="landscape",
)
# ``components`` requires clean per-pose gutters (raises otherwise),
# so a touching roll is rejected and regenerated; the last attempt
# uses ``auto`` (equal-slot fallback, never raises). Raw (fit=False)
# so normalize_cells registers the whole pet at once.
method = "components" if strict else "auto"
frames = atlas.extract_strip_frames(strips[0], count, method=method, fit=False)
logger.info(
"pet hatch %r: row %r ready in %.1fs (attempt %d)",
slug, state, time.monotonic() - t0, attempt + 1,
)
return state, frames
except Exception as exc: # noqa: BLE001 - retried; one bad row is tolerated
last_exc = exc
logger.warning(
"pet hatch %r: row %r attempt %d/%d failed: %s",
slug, state, attempt + 1, _ROW_GEN_ATTEMPTS, exc,
)
logger.warning(
"pet hatch %r: row %r gave up after %.1fs: %s",
slug, state, time.monotonic() - t0, last_exc,
)
return state, None
# running-left is derived by mirroring running-right (guaranteed-consistent
# and one fewer generation), so we don't generate it directly.
generated_specs = [spec for spec in atlas.ROW_SPECS if spec[0] != "running-left"]
workers = max(1, min(len(generated_specs), _MAX_PARALLEL_GENERATIONS))
done = 0
with ThreadPoolExecutor(max_workers=workers) as pool:
futures = [pool.submit(_gen_row, spec) for spec in generated_specs]
# as_completed runs on the caller (request) thread, so progress events
# emitted here inherit the request transport — unlike the worker threads.
for fut in as_completed(futures):
if cancelled():
logger.info("pet hatch %r: cancelled — dropping remaining rows", slug)
for pending in futures:
pending.cancel()
break
state, frames = fut.result()
done += 1
progress("row", f"{state}:{done}:{total_rows}")
if frames:
frames_by_state[state] = frames
if cancelled():
raise GenerationError("hatch cancelled")
# Derive running-left from the approved running-right row (per-frame mirror,
# preserving order/timing). Missing running-right is rejected below; a pet
# without its canonical walk cycle is a failed hatch, not a shippable mascot.
right = frames_by_state.get("running-right")
if right:
done += 1
progress("row", f"running-left:{done}:{total_rows}")
frames_by_state["running-left"] = atlas.mirror_frames(right)
logger.info("pet hatch %r: row 'running-left' mirrored from running-right", slug)
else:
logger.warning("pet hatch %r: no running-right to mirror; left walk left empty", slug)
# Idle is the resting state the renderer falls back to — guarantee it.
if not frames_by_state.get("idle"):
progress("row", "idle-fallback")
frames_by_state["idle"] = [atlas.single_frame(base, fit=False)]
progress("compose", "")
logger.info("pet hatch %r: composing atlas from %d states", slug, len(frames_by_state))
# One shared scale + baseline across every state so the pet never slides or
# pulses size between frames; compose just packs the normalized cells.
sheet = atlas.compose_atlas(atlas.normalize_cells(frames_by_state))
validation = atlas.validate_atlas(sheet)
if not validation["ok"]:
raise GenerationError("; ".join(validation["errors"]) or "atlas validation failed")
filled_states = set(validation["filled_states"])
missing_required = sorted(_REQUIRED_STATES - filled_states)
if missing_required:
raise GenerationError(f"missing required animation row(s): {', '.join(missing_required)}")
if len(filled_states) < _MIN_FILLED_STATES:
raise GenerationError(
f"only {len(filled_states)}/{len(atlas.ROW_SPECS)} animation rows were usable; regenerate"
)
from agent.pet import store
progress("save", slug)
logger.info("pet hatch %r: saving pet", slug)
pet = store.register_local_pet(
sheet,
slug=slug,
display_name=display_name or slug,
description=description,
)
return HatchResult(
slug=pet.slug,
display_name=pet.display_name,
spritesheet=pet.spritesheet,
states=validation["filled_states"],
validation=validation,
)

View File

@@ -1,183 +0,0 @@
"""Prompt builders for pet generation.
Two prompt shapes: a *base* prompt (prompt-only, produces the canonical look the
user picks between) and per-*state* *row* prompts (grounded on the chosen base,
produce one horizontal strip of N poses). Prompts stay concise and
sprite-production oriented; the identity lock and "one transparent row" framing
matter more than flowery description.
We generate the full petdex/Codex nine-state set (see
:data:`agent.pet.generate.atlas.ROW_SPECS`) so a hatched pet is a valid
``petdex submit`` spritesheet.
"""
from __future__ import annotations
# What each petdex/Codex state should depict (kept short — these go straight into
# the row prompt). Phrased to avoid the common sprite-gen failure modes (detached
# effects, motion lines, shadows). Critical distinction: ``running`` is the
# *working* state (in place), while ``running-right`` / ``running-left`` are the
# actual directional walk/run cycles.
STATE_ACTIONS: dict[str, str] = {
"idle": "a calm idle loop: subtle breathing, a tiny blink or gentle bob, no big gestures",
"running-right": (
"a sideways walk/run locomotion cycle moving to the RIGHT: the character "
"faces and travels right with clear directional steps, a smooth gait loop"
),
"running-left": (
"a sideways walk/run locomotion cycle moving to the LEFT: the character "
"faces and travels left with clear directional steps (the mirror of the "
"right-facing run)"
),
"waving": "a friendly greeting: raising a paw/hand/limb to wave, clear up-and-down gesture",
"jumping": "a happy celebration jump: anticipation, lift off the ground, peak, and land",
"failed": "a sad or deflated reaction: slumped, dejected, small frown — readable but not noisy",
"waiting": (
"an expectant 'waiting on you' pose: looking up/out as if asking for input "
"or approval — distinct from idle and review"
),
"running": (
"focused active work, staying IN PLACE (NOT walking or foot-running): "
"leaning in, concentrating, busy 'thinking / processing / typing' energy"
),
"review": "careful inspection: a focused lean, head tilt, studying something intently",
}
_STYLE_HINTS: dict[str, str] = {
# Default to the popular petdex look: crisp 16-bit PIXEL ART, not the smooth
# 2D illustration (let alone 3D render) gpt-image reaches for by default.
"auto": (
" Style: crisp 16-bit PIXEL-ART game sprite — visible square pixels, a small "
"limited palette, clean dark outline, flat cel shading, chunky chibi "
"proportions, like a classic SNES/JRPG party member or a petdex.dev mascot. "
"Absolutely NOT 3D-rendered, NOT a smooth painted or vector illustration, "
"NOT photorealistic — no soft gradients, no realistic lighting, no figurine look."
),
"pixel": " Render in clean 16-bit pixel-art style with visible square pixels and a limited palette.",
"plush": " Render as a soft plush toy.",
"clay": " Render as a claymation / soft 3D clay figure.",
"sticker": " Render as a glossy die-cut sticker.",
"flat-vector": " Render in flat vector mascot style.",
"3d-toy": " Render as a glossy 3D toy.",
"painterly": " Render in a soft painterly style.",
}
_BACKGROUND = (
"Center the character on a SINGLE flat, uniform, high-contrast chroma-key "
"background — pure hot magenta #FF00FF (only if magenta appears on the "
"character, use pure green #00FF00 instead). The background is ONE continuous "
"even color that completely surrounds the character with NO gradient, "
"vignette, texture, pattern, scenery, shadow, ground line, frame, border, "
"panel, comic cell, gutter line, grid, or divider of any kind, so it keys out "
"cleanly. The background color must not appear anywhere on the character. "
"No text, no labels, no speech bubbles, no UI."
)
def style_hint(style: str | None) -> str:
return _STYLE_HINTS.get((style or "auto").strip().lower(), "")
# Row strips are generated on the wider landscape canvas (see imagegen.generate /
# orchestrate). The extra width is what lets each pose stay a healthy size AND
# leave a real gutter — used here only to cite concrete pixel numbers.
_ASSUMED_STRIP_WIDTH = 1536
def _spacing_spec(frame_count: int) -> tuple[int, int]:
"""(per-pose width px, gap px) for a row of *frame_count* poses.
Pixel counts alone don't hold — the model fills each slot edge-to-edge with
the full wingspan, so neighbors touch even when bodies are spaced. The lever
that works is proportional containment on a wide canvas: give each pose its
own equal cell and keep the ENTIRE silhouette (wings/tail/halo included)
inside it. On the 1536px landscape strip ~70% occupancy still leaves a
generous gutter, so the pet stays a normal, good-looking size — no shrinking.
"""
slots = max(1, frame_count)
slot_w = _ASSUMED_STRIP_WIDTH / slots
pose_px = round(slot_w * 0.7)
gap_px = max(48, round(slot_w * 0.3))
return pose_px, gap_px
# Per-draft nudges so the 4 base options are actually distinct — gpt-image returns
# near-duplicates for a single prompt. We vary the *look* (palette, build,
# expression, accents), NOT the pose, so the chosen base still grounds clean,
# consistent animation rows.
BASE_VARIATIONS: tuple[str, ...] = (
"",
"a distinctly different colour palette and markings",
"a heavier, broader silhouette with sturdier proportions",
"a different facial structure and expression matching the concept tone, with unique accent/accessory details",
"a leaner, taller build and an alternate colour scheme",
"bolder, more saturated colours and a stronger expression matching the concept tone",
)
def build_base_prompt(concept: str, *, style: str | None = "auto", variation: str = "") -> str:
"""The base look: a single, clean, centered full-body mascot.
*variation* differentiates one draft from the next (see :data:`BASE_VARIATIONS`).
"""
concept = (concept or "a distinctive mascot creature").strip()
nudge = f" Make this design distinct: {variation}." if variation else ""
return (
f"A stylized mascot pet character: {concept}. "
"Honor the requested tone and mood exactly (cute, eerie, scary, menacing, whimsical, etc.) "
"while staying non-graphic. "
"Compact, whole-body silhouette that reads clearly at small size, "
"clear readable facial features, simple consistent palette. "
# A neutral, symmetric, at-rest stance makes the cleanest identity anchor
"Neutral front-facing standing pose, upright and symmetric, arms/limbs "
"relaxed at the sides, feet together on the ground, any cape/accessories "
"hanging straight and still."
f"{nudge} "
f"{_BACKGROUND}{style_hint(style)}"
)
def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str | None = "auto") -> str:
"""A row strip: *frame_count* poses of the SAME character, left→right.
The attached base image is the identity source of truth; the prompt locks
species, palette, face, and props to it.
"""
action = STATE_ACTIONS.get(state, "a simple idle pose")
concept = (concept or "the mascot").strip()
pose_px, gap_px = _spacing_spec(frame_count)
return (
f"Using the attached reference image as the exact same character "
f"(same species, face, colors, markings, proportions, and props), "
"preserving the same emotional tone/mood (e.g., scary stays scary, cute stays cute), "
f"draw a single WIDE horizontal strip of {frame_count} animation frames showing {action}. "
f"LAYOUT: arrange {frame_count} poses in ONE horizontal row at equal spacing, "
"each pose centered in its own imaginary equal region. Draw NO panel borders, "
"NO comic cells, NO boxes, NO vertical divider/gutter lines, NO grid, NO frame "
"outlines between poses — the backdrop is one unbroken flat field behind all of them. "
"Fill the WHOLE strip with the SAME single flat chroma-key color as the attached "
"reference image's background (identical hue in every frame, no per-pose color shifts). "
f"SPACING (critical): draw each pose at a consistent, healthy, clearly "
f"visible size (roughly {pose_px}px wide on a {_ASSUMED_STRIP_WIDTH}px "
f"strip) — do NOT shrink it tiny — but keep its ENTIRE silhouette "
f"(wings, tail, halo, horns, cape, every appendage) fully INSIDE its own "
f"cell. Leave at least {gap_px}px of empty chroma-key background between "
f"neighboring silhouettes at their closest point (wingtip to wingtip), and "
f"the same empty margin before the first pose and after the last. If a wing, "
f"cape, or tail would reach into a neighbor, FOLD or angle it inward rather "
f"than letting it cross the gap. Silhouettes must NEVER touch, overlap, "
f"share a shadow, share a ground line, share motion trails, or merge into "
f"one connected shape. "
# Registration: a clean sprite sheet keeps the character locked in place
# so only the action moves — this is what stops the loop sliding/pulsing.
"REGISTRATION (critical): the character is the SAME height and SAME width "
"in every frame, drawn at the SAME scale, centered over the SAME point, "
"with all feet aligned to the SAME invisible horizontal baseline across the "
"whole strip — this baseline is conceptual ONLY: draw NO ground line, floor, "
"platform, horizon, or contact shadow beneath the feet. Keep the body's center, size, and stance fixed frame to "
"frame — ONLY the limbs/features the action needs may move. Capes, cloaks, "
"bags, and scarves stay in the SAME place and shape every frame (no "
"swinging, flowing, or drifting) unless the action itself requires it. No "
"pose is cropped at the strip edges. "
f"{_BACKGROUND}{style_hint(style)}"
)

View File

@@ -1,165 +0,0 @@
"""Fetch the public petdex manifest.
``https://petdex.dev/api/manifest`` 307-redirects to a JSON document on R2:
{
"generatedAt": "...",
"total": 2926,
"pets": [
{"slug": "boba", "displayName": "Boba", "kind": "creature",
"submittedBy": "railly",
"spritesheetUrl": "https://assets.petdex.dev/.../spritesheet.webp",
"petJsonUrl": "https://assets.petdex.dev/.../pet.json",
"zipUrl": "https://assets.petdex.dev/.../boba.zip"},
...
]
}
Read-only and unauthenticated; no credentials involved.
"""
from __future__ import annotations
import logging
import threading
import time
from dataclasses import dataclass
logger = logging.getLogger(__name__)
MANIFEST_URL = "https://petdex.dev/api/manifest"
_DEFAULT_TIMEOUT = 10.0
# In-process cache for the (large, slow, identical-per-call) manifest. The list
# is a static CDN object that barely changes, yet a single session can ask for
# it many times — every gallery open, plus a full re-fetch per install/select
# (``find_entry``). A short TTL collapses those into one network hit without
# going stale for long. Cleared by :func:`clear_cache` (tests).
_MANIFEST_TTL = 300.0
_cache: tuple[float, list[ManifestEntry]] | None = None
_prefetch_lock = threading.Lock()
_prefetching = False
def clear_cache() -> None:
"""Drop the cached manifest (forces the next fetch to hit the network)."""
global _cache
_cache = None
def _cache_is_warm() -> bool:
return _cache is not None and time.monotonic() - _cache[0] < _MANIFEST_TTL
def prefetch(*, timeout: float = _DEFAULT_TIMEOUT) -> None:
"""Warm the manifest cache in a daemon thread — idempotent, never blocks.
The desktop picker calls this when it loads the (instant) local-only gallery
so the full petdex catalog is usually cached by the time it's requested,
without ever holding up the user's own pets on a network round-trip.
"""
global _prefetching
if _cache_is_warm():
return
with _prefetch_lock:
if _prefetching:
return
_prefetching = True
def _run() -> None:
global _prefetching
try:
fetch_manifest(timeout=timeout)
except Exception as exc: # noqa: BLE001 - best-effort warm
logger.debug("petdex manifest prefetch failed: %s", exc)
finally:
_prefetching = False
threading.Thread(target=_run, name="petdex-prefetch", daemon=True).start()
@dataclass(frozen=True)
class ManifestEntry:
"""A single pet's row in the manifest."""
slug: str
display_name: str
kind: str
submitted_by: str
spritesheet_url: str
pet_json_url: str
zip_url: str
@classmethod
def from_dict(cls, data: dict) -> "ManifestEntry":
return cls(
slug=str(data.get("slug", "")).strip(),
display_name=str(data.get("displayName", "") or data.get("slug", "")),
kind=str(data.get("kind", "") or "pet"),
submitted_by=str(data.get("submittedBy", "") or ""),
spritesheet_url=str(data.get("spritesheetUrl", "") or ""),
pet_json_url=str(data.get("petJsonUrl", "") or ""),
zip_url=str(data.get("zipUrl", "") or ""),
)
class ManifestError(RuntimeError):
"""Raised when the manifest can't be fetched or parsed."""
def fetch_manifest(*, timeout: float = _DEFAULT_TIMEOUT, force: bool = False) -> list[ManifestEntry]:
"""Return every approved pet from the public manifest.
Cached in-process for ``_MANIFEST_TTL`` seconds (pass ``force=True`` to
bypass). Follows the 307 redirect to R2. Raises :class:`ManifestError` on
any network/parse failure so callers can surface a clean message.
"""
global _cache
if not force and _cache is not None and time.monotonic() - _cache[0] < _MANIFEST_TTL:
return _cache[1]
try:
import httpx
except ImportError as exc: # pragma: no cover - httpx is a core dep
raise ManifestError("httpx is required to fetch the petdex manifest") from exc
try:
resp = httpx.get(
MANIFEST_URL,
timeout=timeout,
follow_redirects=True,
headers={"User-Agent": "hermes-agent-petdex"},
)
resp.raise_for_status()
payload = resp.json()
except Exception as exc: # noqa: BLE001 - normalize to one error type
raise ManifestError(f"could not fetch petdex manifest: {exc}") from exc
pets = payload.get("pets") if isinstance(payload, dict) else None
if not isinstance(pets, list):
raise ManifestError("petdex manifest had no 'pets' array")
entries: list[ManifestEntry] = []
for raw in pets:
if not isinstance(raw, dict):
continue
entry = ManifestEntry.from_dict(raw)
if entry.slug and entry.spritesheet_url:
entries.append(entry)
_cache = (time.monotonic(), entries)
return entries
def find_entry(slug: str, *, timeout: float = _DEFAULT_TIMEOUT) -> ManifestEntry | None:
"""Return the manifest entry for *slug*, or ``None`` if not listed."""
slug = slug.strip().lower()
for entry in fetch_manifest(timeout=timeout):
if entry.slug.lower() == slug:
return entry
return None

View File

@@ -1,618 +0,0 @@
"""Decode a pet spritesheet and encode frames for a terminal.
Shared by the base CLI (writes the escape bytes to its own stdout) and the
TUI (``tui_gateway`` ships the encoded bytes to Ink, which writes them) so the
decode + capability-detection + protocol-encoding logic exists exactly once.
Supported output modes, in fidelity order:
- ``kitty`` — the kitty graphics protocol (kitty, Ghostty, WezTerm).
- ``iterm`` — iTerm2 inline images (iTerm2, WezTerm).
- ``sixel`` — DEC sixel (xterm -ti vt340, foot, mlterm, WezTerm, …).
- ``unicode`` — 24-bit half-block downscale; works in any truecolor terminal.
Frame decoding requires Pillow (a core Hermes dependency). If Pillow or the
spritesheet is unavailable the renderer degrades to ``unicode`` text or an
empty string rather than raising.
"""
from __future__ import annotations
import base64
import io
import logging
import os
import sys
from functools import lru_cache
from pathlib import Path
from agent.pet.constants import (
DEFAULT_SCALE,
FRAME_H,
FRAME_W,
FRAMES_PER_STATE,
PetState,
state_row_index,
)
logger = logging.getLogger(__name__)
# Public render-mode names accepted by ``display.pet.render_mode``.
RENDER_MODES = ("auto", "kitty", "iterm", "sixel", "unicode", "off")
# ─────────────────────────────────────────────────────────────────────────
# Terminal capability detection
# ─────────────────────────────────────────────────────────────────────────
def detect_terminal_graphics() -> str:
"""Best-effort detection of the richest graphics protocol available.
Env-based (non-blocking — we never issue a DA1/terminal query that could
hang a pipe). Returns one of ``kitty`` / ``iterm`` / ``sixel`` /
``unicode``. Conservative: unknown terminals get ``unicode``, which works
anywhere with truecolor.
"""
term = os.environ.get("TERM", "").lower()
term_program = os.environ.get("TERM_PROGRAM", "").lower()
# The VS Code / Cursor integrated terminal sets TERM_PROGRAM=vscode
# authoritatively but does NOT scrub the terminal env vars it inherits when
# launched from another emulator (ITERM_SESSION_ID, KITTY_WINDOW_ID, …).
# Trusting those leaks emits an image protocol the embedded xterm.js can't
# display — you get a blank frame. Inline images there are opt-in
# (terminal.integrated.enableImages), so default to half-blocks, which
# always render in its truecolor grid. Users who enabled images can pin
# display.pet.render_mode explicitly.
if term_program == "vscode":
return "unicode"
# kitty graphics protocol
if os.environ.get("KITTY_WINDOW_ID") or "kitty" in term or "ghostty" in term:
return "kitty"
if term_program in {"ghostty"}:
return "kitty"
# WezTerm speaks both kitty and iterm; prefer kitty (richer placement).
if term_program == "wezterm" or os.environ.get("WEZTERM_PANE"):
return "kitty"
# iTerm2 inline images
if term_program == "iterm.app" or os.environ.get("ITERM_SESSION_ID"):
return "iterm"
# sixel-capable terminals (env heuristics only)
if term_program in {"mintty"} or "foot" in term or "mlterm" in term:
return "sixel"
if "sixel" in term:
return "sixel"
return "unicode"
def resolve_mode(configured: str | None, *, stream=None) -> str:
"""Resolve the effective render mode from config + the environment.
``configured`` is ``display.pet.render_mode`` (``auto`` → detect). Returns
``off`` when not attached to a TTY (no point emitting graphics into a pipe
or logfile).
"""
mode = (configured or "auto").strip().lower()
if mode not in RENDER_MODES:
mode = "auto"
if mode == "off":
return "off"
stream = stream or sys.stdout
try:
if not (hasattr(stream, "isatty") and stream.isatty()):
return "off"
except (ValueError, OSError):
return "off"
if mode == "auto":
return detect_terminal_graphics()
return mode
# ─────────────────────────────────────────────────────────────────────────
# Frame decoding
# ─────────────────────────────────────────────────────────────────────────
def _open_sheet(path: Path):
from PIL import Image
img = Image.open(path)
return img.convert("RGBA")
# Max alpha at/below which a frame counts as blank padding. petdex sheets are
# left-packed: a state with fewer real frames than ``FRAMES_PER_STATE`` fills
# the trailing columns with fully transparent cells. Animating into one flashes
# the pet blank, so we stop the row at the first such gap.
_BLANK_ALPHA = 8
def _frame_is_blank(frame) -> bool:
"""True if *frame* has no meaningfully opaque pixel (transparent padding)."""
return frame.getchannel("A").getextrema()[1] <= _BLANK_ALPHA
@lru_cache(maxsize=16)
def _raw_frames(
sheet_path: str,
state_value: str,
frame_w: int,
frame_h: int,
frames_per_state: int,
) -> tuple:
"""Cropped, padding-trimmed RGBA frames for one state row (unscaled).
Steps across the row until the first blank column so pets with ragged
per-state frame counts never animate into empty padding. Cached; returns
``()`` on any decode failure.
"""
try:
sheet = _open_sheet(Path(sheet_path))
cols = max(1, sheet.width // frame_w)
rows = max(1, sheet.height // frame_h)
row = state_row_index(state_value, rows)
top = row * frame_h
# Clamp the row to the sheet (some pets ship fewer rows than the 8 the
# taxonomy reserves).
if top + frame_h > sheet.height:
top = max(0, sheet.height - frame_h)
frames = []
for i in range(min(frames_per_state, cols)):
left = i * frame_w
frame = sheet.crop((left, top, left + frame_w, top + frame_h))
if _frame_is_blank(frame):
break # trailing transparent padding — real frames end here
frames.append(frame)
return tuple(frames)
except Exception as exc: # noqa: BLE001 - cosmetic feature, never fatal
logger.debug("pet frame decode failed (%s, %s): %s", sheet_path, state_value, exc)
return ()
@lru_cache(maxsize=8)
def _frames_for(
sheet_path: str,
state_value: str,
frame_w: int,
frame_h: int,
frames_per_state: int,
scale_w: int,
scale_h: int,
):
"""Return padding-trimmed RGBA frames for one state row, scaled.
Thin scaling layer over :func:`_raw_frames`; both are cached so repeated
frame requests during animation are free.
"""
raw = _raw_frames(sheet_path, state_value, frame_w, frame_h, frames_per_state)
if not raw or (scale_w, scale_h) == (frame_w, frame_h):
return list(raw)
from PIL import Image
return [f.resize((scale_w, scale_h), Image.LANCZOS) for f in raw]
def state_frame_counts(
sheet_path: str | Path,
*,
frame_w: int = FRAME_W,
frame_h: int = FRAME_H,
frames_per_state: int = FRAMES_PER_STATE,
) -> dict[str, int]:
"""Map each driven :class:`PetState` → its real (padding-trimmed) frame count.
The single source of truth for "how many frames does this state actually
have?". The CLI/TUI consume the trimmed frame lists directly; the gateway
ships this map to the desktop canvas, which steps its own loop.
"""
return {
state.value: len(
_raw_frames(str(sheet_path), state.value, frame_w, frame_h, frames_per_state)
)
for state in PetState
}
# ─────────────────────────────────────────────────────────────────────────
# Encoders
# ─────────────────────────────────────────────────────────────────────────
def _png_bytes(frame) -> bytes:
buf = io.BytesIO()
frame.save(buf, format="PNG")
return buf.getvalue()
def _kitty_apc(ctrl: str, data: str) -> str:
"""Emit a kitty APC escape for *data*, chunked into ≤4096-byte ``m`` pieces."""
chunk = 4096
if len(data) <= chunk:
return f"\x1b_G{ctrl},m=0;{data}\x1b\\"
out = [f"\x1b_G{ctrl},m=1;{data[:chunk]}\x1b\\"]
rest = data[chunk:]
while rest:
piece, rest = rest[:chunk], rest[chunk:]
out.append(f"\x1b_Gm={1 if rest else 0};{piece}\x1b\\")
return "".join(out)
def _encode_kitty(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
"""Encode one frame via the kitty graphics protocol (transmit + display).
``a=T`` transmits & displays at the cursor; ``c``/``r`` request a display
box in terminal cells so successive frames overwrite the same area.
"""
ctrl = "f=100,a=T,q=2"
if cell_cols:
ctrl += f",c={cell_cols}"
if cell_rows:
ctrl += f",r={cell_rows}"
return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
# ─────────────────────────────────────────────────────────────────────────
# kitty Unicode placeholders
#
# Ink (the TUI's React-for-terminal layer) owns the screen and measures every
# cell's width, so it can't host raw kitty image escapes (no width to count,
# clobbered on the next repaint). kitty's *Unicode placeholder* protocol is the
# grid-safe path: transmit the image once (q=2, virtual placement U=1), then the
# host app prints ordinary-width placeholder cells (U+10EEEE + diacritics) whose
# foreground color encodes the image id. Ink counts those as width-1 text, so
# layout stays correct and the terminal paints the image underneath.
# https://sw.kovidgoyal.net/kitty/graphics-protocol/#unicode-placeholders
# ─────────────────────────────────────────────────────────────────────────
_KITTY_PLACEHOLDER = "\U0010eeee"
# Row/column diacritics, in order (index → diacritic). Verbatim from kitty's
# gen/rowcolumn-diacritics.txt (Unicode 6.0.0, combining class 230). Index i is
# the diacritic that encodes the number i; we only ever need the row index.
_ROWCOL_DIACRITICS: tuple[int, ...] = (
0x0305, 0x030D, 0x030E, 0x0310, 0x0312, 0x033D, 0x033E, 0x033F, 0x0346, 0x034A,
0x034B, 0x034C, 0x0350, 0x0351, 0x0352, 0x0357, 0x035B, 0x0363, 0x0364, 0x0365,
0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0592, 0x0593, 0x0594, 0x0595, 0x0597,
0x0598, 0x0599, 0x059C, 0x059D, 0x059E, 0x059F, 0x05A0, 0x05A1, 0x05A8, 0x05A9,
0x05AB, 0x05AC, 0x05AF, 0x05C4, 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615,
0x0616, 0x0617, 0x0657, 0x0658, 0x0659, 0x065A, 0x065B, 0x065D, 0x065E, 0x06D6,
0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, 0x06DF, 0x06E0, 0x06E1, 0x06E2,
0x06E4, 0x06E7, 0x06E8, 0x06EB, 0x06EC, 0x0730, 0x0732, 0x0733, 0x0735, 0x0736,
0x073A, 0x073D, 0x073F, 0x0740, 0x0741, 0x0743, 0x0745, 0x0747, 0x0749, 0x074A,
0x07EB, 0x07EC, 0x07ED, 0x07EE, 0x07EF, 0x07F0, 0x07F1, 0x07F3, 0x0816, 0x0817,
0x0818, 0x0819, 0x081B, 0x081C, 0x081D, 0x081E, 0x081F, 0x0820, 0x0821, 0x0822,
0x0823, 0x0825, 0x0826, 0x0827, 0x0829, 0x082A, 0x082B, 0x082C, 0x082D, 0x0951,
0x0953, 0x0954, 0x0F82, 0x0F83, 0x0F86, 0x0F87, 0x135D, 0x135E, 0x135F, 0x17DD,
0x193A, 0x1A17, 0x1A75, 0x1A76, 0x1A77, 0x1A78, 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C,
0x1B6B, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73, 0x1CD0, 0x1CD1,
0x1CD2, 0x1CDA, 0x1CDB, 0x1CE0, 0x1DC0, 0x1DC1, 0x1DC3, 0x1DC4, 0x1DC5, 0x1DC6,
0x1DC7, 0x1DC8, 0x1DC9, 0x1DCB, 0x1DCC, 0x1DD1, 0x1DD2, 0x1DD3, 0x1DD4, 0x1DD5,
0x1DD6, 0x1DD7, 0x1DD8, 0x1DD9, 0x1DDA, 0x1DDB, 0x1DDC, 0x1DDD, 0x1DDE, 0x1DDF,
0x1DE0, 0x1DE1, 0x1DE2, 0x1DE3, 0x1DE4, 0x1DE5, 0x1DE6, 0x1DFE, 0x20D0, 0x20D1,
0x20D4, 0x20D5, 0x20D6, 0x20D7, 0x20DB, 0x20DC, 0x20E1, 0x20E7, 0x20E9, 0x20F0,
0x2CEF, 0x2CF0, 0x2CF1, 0x2DE0, 0x2DE1, 0x2DE2, 0x2DE3, 0x2DE4, 0x2DE5, 0x2DE6,
0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, 0x2DEF, 0x2DF0,
0x2DF1, 0x2DF2, 0x2DF3, 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA,
0x2DFB, 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0xA66F, 0xA67C, 0xA67D, 0xA6F0, 0xA6F1,
0xA8E0, 0xA8E1, 0xA8E2, 0xA8E3, 0xA8E4, 0xA8E5, 0xA8E6, 0xA8E7, 0xA8E8, 0xA8E9,
0xA8EA, 0xA8EB, 0xA8EC, 0xA8ED, 0xA8EE, 0xA8EF, 0xA8F0, 0xA8F1, 0xAAB0, 0xAAB2,
0xAAB3, 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, 0xAAC1, 0xFE20, 0xFE21, 0xFE22, 0xFE23,
0xFE24, 0xFE25, 0xFE26, 0x10A0F, 0x10A38, 0x1D185, 0x1D186, 0x1D187, 0x1D188,
0x1D189, 0x1D1AA, 0x1D1AB, 0x1D1AC, 0x1D1AD, 0x1D242, 0x1D243, 0x1D244,
)
def kitty_image_id(slug: str) -> int:
"""Stable per-pet image id in ``[1, 0x7FFF]``.
The id is encoded in the placeholder's 24-bit foreground color, so it must
be non-zero and fit comfortably under ``0xFFFFFF``. A small CRC keeps it
deterministic per slug (so re-renders reuse the same terminal-side image)
while making collisions between two different pets unlikely.
"""
import zlib
return (zlib.crc32(slug.encode("utf-8")) % 0x7FFE) + 1
def kitty_color_hex(image_id: int) -> str:
"""Hex foreground color (``#rrggbb``) that encodes *image_id* for kitty."""
return "#%06x" % (image_id & 0xFFFFFF)
def kitty_placeholder_rows(cols: int, rows: int) -> list[str]:
"""Build the placeholder text grid for an *rows*×*cols* image.
Each line is one row of the grid: the first cell carries the row diacritic
(column defaults to 0), and the remaining ``cols-1`` bare placeholders let
the terminal auto-increment the column. The foreground color (the image id)
is applied by the caller / Ink, not embedded here.
"""
cols = max(1, cols)
out: list[str] = []
for r in range(max(1, rows)):
idx = min(r, len(_ROWCOL_DIACRITICS) - 1)
first = _KITTY_PLACEHOLDER + chr(_ROWCOL_DIACRITICS[idx])
out.append(first + _KITTY_PLACEHOLDER * (cols - 1))
return out
def _encode_kitty_virtual(frame, *, image_id: int, cols: int, rows: int) -> str:
"""Transmit a frame as a kitty *virtual* placement for Unicode placeholders.
``a=T`` transmits and creates the placement in one shot; ``U=1`` marks it
virtual (no on-screen output, cursor untouched); ``q=2`` suppresses the
terminal's OK/error replies that would otherwise corrupt the host app's
output. Re-sending with the same ``i`` replaces the image, so the static
placeholder cells animate underneath.
"""
ctrl = f"a=T,U=1,i={image_id},c={cols},r={rows},f=100,q=2"
return _kitty_apc(ctrl, base64.standard_b64encode(_png_bytes(frame)).decode("ascii"))
def _encode_iterm(frame, *, cell_cols: int | None = None, cell_rows: int | None = None) -> str:
"""Encode one frame as an iTerm2 inline image (OSC 1337 File)."""
payload = base64.standard_b64encode(_png_bytes(frame)).decode("ascii")
size = len(payload)
args = [f"inline=1", f"size={size}", "preserveAspectRatio=1"]
if cell_cols:
args.append(f"width={cell_cols}")
if cell_rows:
args.append(f"height={cell_rows}")
return f"\x1b]1337;File={';'.join(args)}:{payload}\x07"
def _encode_sixel(frame) -> str:
"""Encode one frame as DEC sixel.
Quantizes to an adaptive palette (≤255 colors) and emits the sixel band
stream. Pillow has no sixel writer, so this is a compact hand-rolled
encoder. Transparent pixels render as background (color register skipped).
"""
from PIL import Image
rgba = frame
# Composite onto transparent-as-skip: track alpha to decide background.
pal = rgba.convert("RGB").quantize(colors=255, method=Image.MEDIANCUT)
palette = pal.getpalette() or []
px = pal.load()
alpha = rgba.getchannel("A").load()
w, h = pal.size
out = ["\x1bP0;1;0q", '"1;1;%d;%d' % (w, h)]
# Color register definitions (sixel uses 0..100 scale).
used = sorted({px[x, y] for y in range(h) for x in range(w)})
for idx in used:
r = palette[idx * 3] if idx * 3 < len(palette) else 0
g = palette[idx * 3 + 1] if idx * 3 + 1 < len(palette) else 0
b = palette[idx * 3 + 2] if idx * 3 + 2 < len(palette) else 0
out.append("#%d;2;%d;%d;%d" % (idx, r * 100 // 255, g * 100 // 255, b * 100 // 255))
# Emit in 6-row bands.
for band in range(0, h, 6):
for color_idx in used:
line = ["#%d" % color_idx]
run_char = None
run_len = 0
def flush():
nonlocal run_char, run_len
if run_char is None:
return
if run_len > 3:
line.append("!%d%s" % (run_len, run_char))
else:
line.append(run_char * run_len)
run_char, run_len = None, 0
for x in range(w):
bits = 0
for bit in range(6):
y = band + bit
if y < h and alpha[x, y] > 32 and px[x, y] == color_idx:
bits |= 1 << bit
ch = chr(63 + bits)
if ch == run_char:
run_len += 1
else:
flush()
run_char, run_len = ch, 1
flush()
out.append("".join(line) + "$") # carriage return within band
out.append("-") # next band
out.append("\x1b\\")
return "".join(out)
_HALF_BLOCK = ""
# A single half-block cell: top pixel + bottom pixel as (r, g, b, a) tuples.
Cell = tuple[tuple[int, int, int, int], tuple[int, int, int, int]]
def _downscale_cells(frame, *, target_cols: int) -> list[list[Cell]]:
"""Downscale a frame to a grid of half-block cells.
Each cell pairs a top and bottom pixel so one terminal row encodes two
pixel rows. Returns rows of ``((tr,tg,tb,ta),(br,bg,bb,ba))`` — the
framework-neutral representation shared by the ANSI encoder (CLI) and the
structured ``cells`` API (Ink).
"""
from PIL import Image
target_cols = max(4, target_cols)
aspect = frame.height / max(1, frame.width)
target_rows = max(2, int(round(target_cols * aspect * 0.5)) * 2)
small = frame.resize((target_cols, target_rows), Image.LANCZOS).convert("RGBA")
px = small.load()
grid: list[list[Cell]] = []
for y in range(0, target_rows, 2):
row: list[Cell] = []
for x in range(target_cols):
top = px[x, y]
bottom = px[x, y + 1] if y + 1 < target_rows else (0, 0, 0, 0)
row.append((top, bottom))
grid.append(row)
return grid
def _encode_unicode(frame, *, target_cols: int) -> str:
"""Downscale to truecolor ANSI half-blocks (one char = 2 vertical pixels)."""
lines: list[str] = []
for row in _downscale_cells(frame, target_cols=target_cols):
cells: list[str] = []
for (tr, tg, tb, ta), (br, bg, bb, ba) in row:
if ta < 32 and ba < 32:
cells.append("\x1b[0m ") # fully transparent → blank
continue
cells.append(f"\x1b[38;2;{tr};{tg};{tb}m\x1b[48;2;{br};{bg};{bb}m{_HALF_BLOCK}")
lines.append("".join(cells) + "\x1b[0m")
return "\n".join(lines)
# ─────────────────────────────────────────────────────────────────────────
# Public renderer
# ─────────────────────────────────────────────────────────────────────────
class PetRenderer:
"""Holds a pet's spritesheet and yields encoded frames per (state, index).
Construct once per pet, then call :meth:`frame` on an animation timer.
Cheap to call repeatedly — decoded frames are cached.
"""
def __init__(
self,
spritesheet: str | Path,
*,
mode: str = "unicode",
scale: float = DEFAULT_SCALE,
unicode_cols: int = 20,
frame_w: int = FRAME_W,
frame_h: int = FRAME_H,
frames_per_state: int = FRAMES_PER_STATE,
) -> None:
self.spritesheet = str(spritesheet)
self.mode = mode if mode in RENDER_MODES else "unicode"
self.scale = scale
self.unicode_cols = unicode_cols
self.frame_w = frame_w
self.frame_h = frame_h
self.frames_per_state = frames_per_state
@property
def available(self) -> bool:
return self.mode != "off" and Path(self.spritesheet).is_file()
def frame_count(self, state: PetState | str) -> int:
return len(self._frames(state))
def _frames(self, state: PetState | str):
value = state.value if isinstance(state, PetState) else str(state)
scale_w = max(1, int(self.frame_w * self.scale))
scale_h = max(1, int(self.frame_h * self.scale))
return _frames_for(
self.spritesheet,
value,
self.frame_w,
self.frame_h,
self.frames_per_state,
scale_w,
scale_h,
)
def cells(self, state: PetState | str, index: int, *, cols: int | None = None) -> list[list[Cell]]:
"""Return one frame as a half-block cell grid (framework-neutral).
Used by the TUI, which renders the grid with native Ink color props
instead of raw ANSI. Returns ``[]`` when no frame is available.
"""
frames = self._frames(state)
if not frames:
return []
frame = frames[index % len(frames)]
return _downscale_cells(frame, target_cols=cols or self.unicode_cols)
def _cell_box(self, frame) -> tuple[int, int]:
"""Terminal cell box for a scaled frame (~8×16 px per cell).
Must match :meth:`frame` graphics sizing — kitty stretches the image to
fill ``c``×``r`` cells, so these must reflect the scaled pixel
dimensions, not a native-aspect column count (that upscales small pets).
"""
return max(1, frame.width // 8), max(1, frame.height // 16)
def kitty_payload(self, state: PetState | str, *, image_id: int) -> dict | None:
"""Build the kitty Unicode-placeholder payload for one state.
Returns ``{cols, rows, placeholder, frames}`` where ``frames`` is a
list of transmit escapes (one per animation frame, all reusing
``image_id``) and ``placeholder`` is the static text grid Ink paints.
Placement geometry is derived from the scaled frame pixels (via
:meth:`_cell_box`), not ``unicode_cols`` — kitty upscales to fill
``c``×``r`` cells. ``None`` when no frame is available.
"""
frames = self._frames(state)
if not frames:
return None
cols, rows = self._cell_box(frames[0])
return {
"cols": cols,
"rows": rows,
"placeholder": kitty_placeholder_rows(cols, rows),
"frames": [
_encode_kitty_virtual(f, image_id=image_id, cols=cols, rows=rows) for f in frames
],
}
def frame(self, state: PetState | str, index: int) -> str:
"""Return the encoded escape string for one frame, or ``""``.
``index`` is taken modulo the available frame count so callers can pass
a free-running counter.
"""
if self.mode == "off":
return ""
frames = self._frames(state)
if not frames:
return ""
frame = frames[index % len(frames)]
cell_cols, cell_rows = self._cell_box(frame)
try:
if self.mode == "kitty":
return _encode_kitty(frame, cell_cols=cell_cols, cell_rows=cell_rows)
if self.mode == "iterm":
return _encode_iterm(frame, cell_cols=cell_cols, cell_rows=cell_rows)
if self.mode == "sixel":
return _encode_sixel(frame)
return _encode_unicode(frame, target_cols=self.unicode_cols)
except Exception as exc: # noqa: BLE001 - degrade silently
logger.debug("pet frame encode failed (mode=%s): %s", self.mode, exc)
return ""
def build_renderer(
spritesheet: str | Path,
*,
configured_mode: str | None = None,
scale: float = DEFAULT_SCALE,
unicode_cols: int = 20,
stream=None,
) -> PetRenderer:
"""Convenience factory: resolve the mode from config+env, then construct."""
mode = resolve_mode(configured_mode, stream=stream)
return PetRenderer(
spritesheet,
mode=mode,
scale=scale,
unicode_cols=unicode_cols,
)

View File

@@ -1,81 +0,0 @@
"""Map agent activity → a :class:`PetState`.
This is the one place the "what is the agent doing right now?""which
animation row?" decision lives. Each surface feeds it the signals it already
tracks:
- CLI — ``KawaiiSpinner`` waiting/thinking state + tool outcomes.
- TUI — gateway ``tool.start/complete`` + ``message.delta/complete`` events.
- Desktop — the ``$busy``/``$awaitingResponse``/tool-event nanostores
(re-implemented in TS, but mirroring this priority order).
Keeping the priority order here (and documenting it) lets the TypeScript
mirror stay faithful without a second design.
"""
from __future__ import annotations
from collections.abc import Iterable
from typing import Any
from agent.pet.constants import PetState
def todos_all_done(todos: Iterable[Any] | None) -> bool:
"""True iff there's ≥1 todo and every one is completed/cancelled.
The "celebrate" beat (``JUMP``) fires when a plan finishes; this mirrors
the TUI's ``isTodoDone`` so the trigger is defined once across surfaces.
Accepts dicts (``{"status": ...}``) or objects with a ``status`` attr.
"""
items = list(todos or [])
if not items:
return False
def _status(t: Any) -> Any:
return t.get("status") if isinstance(t, dict) else getattr(t, "status", None)
return all(_status(t) in ("completed", "cancelled") for t in items)
def derive_pet_state(
*,
busy: bool = False,
awaiting_input: bool = False,
error: bool = False,
celebrate: bool = False,
just_completed: bool = False,
tool_running: bool = False,
reasoning: bool = False,
) -> PetState:
"""Resolve the animation state from coarse activity signals.
Priority (highest first) — only one row can show at a time, so the most
salient signal wins:
1. ``error`` → ``FAILED`` (a tool/turn just failed)
2. ``celebrate`` → ``JUMP`` (explicit success beat, e.g. todos done)
3. ``just_completed`` → ``WAVE`` (turn finished cleanly / greeting)
4. ``awaiting_input`` → ``WAITING`` (blocked on the user — a clarify/approval
prompt is open; this outranks the in-flight signals below because the turn
is paused on *you*, even though a tool is technically mid-call)
5. ``tool_running`` → ``RUN`` (a tool is executing)
6. ``reasoning`` → ``REVIEW`` (model is thinking / reading)
7. ``busy`` → ``RUN`` (turn in flight, unspecified work)
8. otherwise → ``IDLE``
"""
if error:
return PetState.FAILED
if celebrate:
return PetState.JUMP
if just_completed:
return PetState.WAVE
if awaiting_input:
return PetState.WAITING
if tool_running:
return PetState.RUN
if reasoning:
return PetState.REVIEW
if busy:
return PetState.RUN
return PetState.IDLE

View File

@@ -1,503 +0,0 @@
"""On-disk pet store — install / list / resolve pets.
Pets live under ``get_hermes_home()/pets/<slug>/`` so every profile gets its
own set (we deliberately do **not** reuse petdex's ``~/.codex/pets`` default —
that's owned by the petdex npm CLI and isn't profile-aware). Each installed
pet directory holds:
pets/<slug>/
pet.json # {id, displayName, description, spritesheetPath}
spritesheet.webp # (or .png)
The active pet is resolved from the caller-supplied ``display.pet.slug`` config
value (falling back to the first installed pet), so this module stays free of
the config loader.
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass
from pathlib import Path
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
_DOWNLOAD_TIMEOUT = 60.0
class PetStoreError(RuntimeError):
"""Raised on install/IO failures."""
@dataclass(frozen=True)
class InstalledPet:
"""A pet present on disk."""
slug: str
display_name: str
description: str
directory: Path
spritesheet: Path
created_by: str = "" # "generator" for pets hatched locally; "" for petdex installs
@property
def exists(self) -> bool:
return self.spritesheet.is_file()
@property
def generated(self) -> bool:
return self.created_by == "generator"
def pets_dir() -> Path:
"""Return the profile-scoped pets directory (created on demand)."""
path = get_hermes_home() / "pets"
path.mkdir(parents=True, exist_ok=True)
return path
def _read_pet_json(directory: Path) -> dict:
pet_json = directory / "pet.json"
if not pet_json.is_file():
return {}
try:
return json.loads(pet_json.read_text(encoding="utf-8"))
except (OSError, ValueError) as exc:
logger.debug("unreadable pet.json in %s: %s", directory, exc)
return {}
def _resolve_spritesheet(directory: Path, meta: dict) -> Path:
"""Find the spritesheet for a pet dir.
Honors ``spritesheetPath`` from pet.json, else probes the conventional
filenames (``spritesheet.{webp,png}`` and petdex R2's ``sprite.webp``).
"""
declared = str(meta.get("spritesheetPath", "") or "").strip()
if declared:
candidate = directory / declared
if candidate.is_file():
return candidate
for name in ("spritesheet.webp", "spritesheet.png", "sprite.webp", "sprite.png"):
candidate = directory / name
if candidate.is_file():
return candidate
# Default expectation even if missing, so callers get a stable path.
return directory / "spritesheet.webp"
def _safe_slug(slug: str) -> str:
"""Normalize a slug to a single bare path segment.
Pet slugs index into ``pets_dir()/<slug>/`` for load/remove, so a value
carrying path separators (``../``, absolute paths) could escape the pets
directory. Strip every separator and reject ``.``/``..`` so callers can
only ever name a direct child of the pets directory.
"""
segment = Path(str(slug).strip()).name
if segment in ("", ".", ".."):
return ""
return segment
def load_pet(slug: str) -> InstalledPet | None:
"""Return the :class:`InstalledPet` for *slug*, or ``None`` if absent."""
slug = _safe_slug(slug)
if not slug:
return None
directory = pets_dir() / slug
if not directory.is_dir():
return None
meta = _read_pet_json(directory)
return InstalledPet(
slug=slug,
display_name=str(meta.get("displayName", "") or slug),
description=str(meta.get("description", "") or ""),
directory=directory,
spritesheet=_resolve_spritesheet(directory, meta),
created_by=str(meta.get("createdBy", "") or ""),
)
def installed_pets() -> list[InstalledPet]:
"""Return every installed pet (dirs containing a usable spritesheet)."""
out: list[InstalledPet] = []
for child in sorted(pets_dir().iterdir()):
if not child.is_dir():
continue
pet = load_pet(child.name)
if pet and pet.exists:
out.append(pet)
return out
def resolve_active_pet(configured_slug: str | None = None) -> InstalledPet | None:
"""Resolve which pet to display.
Precedence: the configured slug (``display.pet.slug``) if it's installed,
otherwise the first installed pet alphabetically, otherwise ``None``.
"""
if configured_slug:
pet = load_pet(configured_slug.strip())
if pet and pet.exists:
return pet
pets = installed_pets()
return pets[0] if pets else None
def install_pet(slug: str, *, force: bool = False, timeout: float = _DOWNLOAD_TIMEOUT) -> InstalledPet:
"""Download *slug* from the manifest into the pets directory.
Idempotent: a fully-installed pet is returned as-is unless *force*. Raises
:class:`PetStoreError` / :class:`~agent.pet.manifest.ManifestError` on
failure.
"""
from agent.pet.manifest import find_entry
slug = _safe_slug(slug)
if not slug:
raise PetStoreError("invalid pet slug")
existing = load_pet(slug)
if existing and existing.exists and not force:
return existing
entry = find_entry(slug, timeout=timeout)
if entry is None:
raise PetStoreError(f"pet '{slug}' is not in the petdex manifest")
# Host-pin every asset URL to petdex. The manifest is trusted (HTTPS from
# petdex.dev), but pin the asset hosts too so a compromised/spoofed manifest
# can't redirect the download at an arbitrary host. Matches thumbnail_png.
if not _is_petdex_host(entry.spritesheet_url):
raise PetStoreError(f"refusing non-petdex spritesheet host for '{slug}'")
directory = pets_dir() / slug
directory.mkdir(parents=True, exist_ok=True)
sprite_ext = ".png" if entry.spritesheet_url.lower().split("?")[0].endswith(".png") else ".webp"
sprite_path = directory / f"spritesheet{sprite_ext}"
_download(entry.spritesheet_url, sprite_path, timeout=timeout)
# Fetch the upstream pet.json if present; otherwise synthesize a minimal
# one so the local layout is self-describing.
meta: dict = {}
if entry.pet_json_url and _is_petdex_host(entry.pet_json_url):
try:
meta = _download_json(entry.pet_json_url, timeout=timeout)
except Exception as exc: # noqa: BLE001 - non-fatal, fall back below
logger.debug("pet.json fetch failed for %s: %s", slug, exc)
if not isinstance(meta, dict) or not meta:
meta = {"id": slug, "displayName": entry.display_name, "description": ""}
meta["spritesheetPath"] = sprite_path.name
meta.setdefault("id", slug)
meta.setdefault("displayName", entry.display_name)
(directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
pet = load_pet(slug)
if pet is None or not pet.exists:
raise PetStoreError(f"install of '{slug}' did not produce a spritesheet")
return pet
def slugify(name: str) -> str:
"""Lowercase, hyphenate, and strip a display name into a filesystem slug."""
slug = re.sub(r"[^a-z0-9]+", "-", (name or "").strip().lower()).strip("-")
return slug or "pet"
def unique_slug(name: str) -> str:
"""A :func:`slugify` result that doesn't collide with an existing pet dir."""
base = slugify(name)
slug = base
counter = 2
while (pets_dir() / slug).exists():
slug = f"{base}-{counter}"
counter += 1
return slug
def _write_spritesheet(source, dest: Path) -> None:
"""Write *source* (PIL image, bytes, or path) as a lossless WebP at *dest*."""
if isinstance(source, (bytes, bytearray)):
dest.write_bytes(bytes(source))
return
from PIL import Image
if isinstance(source, (str, Path)):
with Image.open(source) as opened:
image = opened.convert("RGBA")
else:
image = source.convert("RGBA")
image.save(dest, format="WEBP", lossless=True, quality=100, method=6, exact=True)
def register_local_pet(
spritesheet,
*,
slug: str,
display_name: str = "",
description: str = "",
) -> InstalledPet:
"""Write a locally-generated pet into the store and return it.
*spritesheet* may be a PIL image, raw WebP/PNG bytes, or a path. The pet
appears in :func:`installed_pets` immediately, and because :func:`install_pet`
returns an already-on-disk pet before consulting the manifest, it can be
adopted (``pet.select`` / ``/pet <slug>``) without a manifest entry.
"""
slug = slugify(slug)
directory = pets_dir() / slug
directory.mkdir(parents=True, exist_ok=True)
sprite_path = directory / "spritesheet.webp"
try:
_write_spritesheet(spritesheet, sprite_path)
except Exception as exc: # noqa: BLE001 - normalize to one error type
raise PetStoreError(f"could not write spritesheet for '{slug}': {exc}") from exc
meta = {
"id": slug,
"displayName": display_name or slug,
"description": description or "",
"spritesheetPath": sprite_path.name,
"createdBy": "generator",
}
(directory / "pet.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
pet = load_pet(slug)
if pet is None or not pet.exists:
raise PetStoreError(f"register of generated pet '{slug}' did not produce a spritesheet")
return pet
def export_pet(slug: str) -> tuple[str, bytes]:
"""Zip an installed pet's folder (pet.json + spritesheet) → (filename, bytes).
Dotfiles (cached thumbs, backups) are skipped so the archive is a clean,
re-importable pet package. Raises :class:`PetStoreError` if not installed.
"""
import io
import zipfile
root = pets_dir()
directory = root / slug.strip()
# Guard against traversal: the target must be a direct child of pets_dir.
if directory.resolve().parent != root.resolve() or not directory.is_dir():
raise PetStoreError(f"pet '{slug}' is not installed")
name = directory.name
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as archive:
for path in sorted(directory.iterdir()):
if path.is_file() and not path.name.startswith("."):
archive.write(path, f"{name}/{path.name}")
return f"{name}.zip", buf.getvalue()
_THUMB_FRAME_W = 192
_THUMB_FRAME_H = 208
_THUMB_W = 96 # rendered ~40px; 2x+ keeps it crisp on HiDPI
def _thumbs_dir() -> Path:
path = pets_dir() / ".thumbs"
path.mkdir(parents=True, exist_ok=True)
return path
def _is_petdex_host(url: str) -> bool:
"""True only for petdex.dev hosts — bounds server-side fetch (anti-SSRF)."""
from urllib.parse import urlparse
try:
host = (urlparse(url).hostname or "").lower()
except ValueError:
return False
return host == "petdex.dev" or host.endswith(".petdex.dev")
def thumbnail_png(slug: str, *, source_url: str = "", timeout: float = 30.0) -> bytes | None:
"""Return a small idle-frame PNG for *slug*, cached on disk.
Crops the top-left (idle, frame 0) cell of the spritesheet and downsamples
it to a thumbnail. Source preference: an installed spritesheet on disk, else
*source_url* — but only when it points at petdex (so the gateway never
fetches an arbitrary client-supplied URL). Returns ``None`` when there's no
usable source or Pillow/network fails; callers render a placeholder.
Doing this server-side sidesteps the renderer's CSP / R2 hotlink limits that
break a direct ``<img src=cdn>`` and lets the result ride the authenticated
gateway as a same-origin data URL.
"""
slug = slug.strip()
if not slug:
return None
cache = _thumbs_dir() / f"{slug}.png"
if cache.is_file():
try:
return cache.read_bytes()
except OSError:
pass
sheet_bytes: bytes | None = None
pet = load_pet(slug)
if pet and pet.exists:
try:
sheet_bytes = pet.spritesheet.read_bytes()
except OSError:
sheet_bytes = None
if sheet_bytes is None and source_url and _is_petdex_host(source_url):
try:
import httpx
resp = httpx.get(
source_url,
timeout=timeout,
follow_redirects=True,
headers={"User-Agent": "hermes-agent-petdex"},
)
resp.raise_for_status()
sheet_bytes = resp.content
except Exception as exc: # noqa: BLE001 - cosmetic, degrade to placeholder
logger.debug("thumb fetch failed for %s: %s", slug, exc)
if not sheet_bytes:
return None
try:
import io
from PIL import Image
with Image.open(io.BytesIO(sheet_bytes)) as im:
frame = im.convert("RGBA").crop(
(0, 0, min(_THUMB_FRAME_W, im.width), min(_THUMB_FRAME_H, im.height))
)
height = round(_THUMB_W * _THUMB_FRAME_H / _THUMB_FRAME_W)
frame = frame.resize((_THUMB_W, height), Image.NEAREST)
buf = io.BytesIO()
frame.save(buf, format="PNG")
data = buf.getvalue()
except Exception as exc: # noqa: BLE001
logger.debug("thumb crop failed for %s: %s", slug, exc)
return None
try:
cache.write_bytes(data)
except OSError:
pass
return data
def remove_pet(slug: str) -> bool:
"""Delete an installed pet directory. Returns True if anything was removed."""
import shutil
slug = _safe_slug(slug)
if not slug:
return False
# The cached thumbnail lives in pets/.thumbs/<slug>.png — OUTSIDE the pet
# dir, so rmtree won't catch it. Drop it too, or a later pet that reuses this
# slug renders this one's stale thumbnail.
try:
(_thumbs_dir() / f"{slug}.png").unlink(missing_ok=True)
except OSError:
pass
directory = pets_dir() / slug
if not directory.is_dir():
return False
shutil.rmtree(directory, ignore_errors=True)
return not directory.exists()
def rename_pet(slug: str, display_name: str) -> str | None:
"""Rename a pet's ``displayName`` AND realign its slug/dir to match.
Generated pets are hatched under a provisional, prompt-derived slug; when
the user names the pet on the reveal screen we make that name the real
identity so lists/subtitles show what they typed, not the prompt. The dir is
renamed to ``slugify(name)`` (and the cached thumbnail moved alongside it)
whenever that yields a free, different slug — otherwise the slug is left as
is. Returns the resulting slug on success, or ``None`` on failure.
"""
slug = _safe_slug(slug)
display_name = (display_name or "").strip()
if not slug or not display_name:
return None
directory = pets_dir() / slug
pet_json = directory / "pet.json"
if not pet_json.is_file():
return None
try:
meta = json.loads(pet_json.read_text(encoding="utf-8"))
except (OSError, ValueError):
meta = {}
if not isinstance(meta, dict):
meta = {}
meta["displayName"] = display_name
new_slug = slug
desired = slugify(display_name)
if desired and desired != slug and not (pets_dir() / desired).exists():
try:
directory.rename(pets_dir() / desired)
try:
(_thumbs_dir() / f"{slug}.png").rename(_thumbs_dir() / f"{desired}.png")
except OSError:
pass
directory = pets_dir() / desired
pet_json = directory / "pet.json"
new_slug = desired
meta["id"] = new_slug
except OSError:
new_slug = slug # keep the provisional slug if the move fails
try:
pet_json.write_text(json.dumps(meta, indent=2), encoding="utf-8")
except OSError:
return None
return new_slug
def _download(url: str, dest: Path, *, timeout: float) -> None:
import httpx
try:
with httpx.stream(
"GET",
url,
timeout=timeout,
follow_redirects=True,
headers={"User-Agent": "hermes-agent-petdex"},
) as resp:
resp.raise_for_status()
tmp = dest.with_suffix(dest.suffix + ".part")
with tmp.open("wb") as fh:
for chunk in resp.iter_bytes():
fh.write(chunk)
tmp.replace(dest)
except Exception as exc: # noqa: BLE001
raise PetStoreError(f"download failed for {url}: {exc}") from exc
def _download_json(url: str, *, timeout: float) -> dict:
import httpx
resp = httpx.get(
url,
timeout=timeout,
follow_redirects=True,
headers={"User-Agent": "hermes-agent-petdex"},
)
resp.raise_for_status()
data = resp.json()
return data if isinstance(data, dict) else {}

View File

@@ -26,7 +26,7 @@ from __future__ import annotations
import os
import sys
import urllib.request
from typing import Any, Optional
from typing import Optional
from utils import base_url_hostname, normalize_proxy_url
@@ -142,46 +142,6 @@ def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
return proxy
def build_keepalive_http_client(
base_url: str = "",
*,
async_mode: bool = False,
) -> Optional[Any]:
"""Build an httpx client for OpenAI SDK calls with env-only proxy policy.
Uses explicit ``HTTPS_PROXY`` / ``NO_PROXY`` env vars via
``_get_proxy_for_base_url``. A custom transport disables httpx's default
``trust_env`` path, so macOS system proxy settings from
``urllib.request.getproxies()`` (which omit the ExceptionsList) are not
applied. Mirrors ``AIAgent._build_keepalive_http_client``.
"""
try:
import httpx
import socket
if "api.githubcopilot.com" in str(base_url or "").lower():
client_cls = httpx.AsyncClient if async_mode else httpx.Client
return client_cls()
sock_opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
if hasattr(socket, "TCP_KEEPIDLE"):
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30))
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10))
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3))
elif hasattr(socket, "TCP_KEEPALIVE"):
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 30))
proxy = _get_proxy_for_base_url(base_url)
transport_cls = httpx.AsyncHTTPTransport if async_mode else httpx.HTTPTransport
client_cls = httpx.AsyncClient if async_mode else httpx.Client
return client_cls(
transport=transport_cls(socket_options=sock_opts),
proxy=proxy,
)
except Exception:
return None
def _install_safe_stdio() -> None:
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
for stream_name in ("stdout", "stderr"):
@@ -204,5 +164,4 @@ __all__ = [
"_install_safe_stdio",
"_get_proxy_from_env",
"_get_proxy_for_base_url",
"build_keepalive_http_client",
]

View File

@@ -243,10 +243,7 @@ KANBAN_GUIDANCE = (
"- **Workspace.** `cd $HERMES_KANBAN_WORKSPACE` first. For a `worktree` kind "
"with no `.git`, `git worktree add <path> "
"${HERMES_KANBAN_BRANCH:-wt/$HERMES_KANBAN_TASK}` from the main repo, then "
"cd there. For a project-linked task the workspace is a fresh "
"`<repo>/.worktrees/<task-id>` and `$HERMES_KANBAN_BRANCH` a deterministic "
"`<project-slug>/<task-id>` — the main repo is two levels up, so run "
"`git worktree add` from there.\n"
"cd there.\n"
"- **Deliverables.** Files a human wants go in "
"`kanban_complete(artifacts=[<absolute paths>])` (top-level param; paths in "
"`metadata` are NOT uploaded). Files must exist at completion.\n"
@@ -617,12 +614,7 @@ DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
PLATFORM_HINTS = {
"whatsapp": (
"You are on a text messaging communication platform, WhatsApp. "
"Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
"`code`, ```code blocks```, [links](url)) is auto-converted to "
"WhatsApp's native syntax (*bold*, _italic_, ~strike~, monospace) — "
"feel free to write in markdown, and use bullet lists ('- item') "
"freely. Tables are NOT supported — prefer bullet lists or labeled "
"key:value pairs. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. The file "
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
@@ -687,11 +679,7 @@ PLATFORM_HINTS = {
),
"signal": (
"You are on a text messaging communication platform, Signal. "
"Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
"`code`, ```code blocks```) is auto-converted to Signal's native "
"rich formatting — feel free to write in markdown, and use bullet "
"lists ('- item') freely (they render as • bullets). Tables are NOT "
"supported — prefer bullet lists or labeled key:value pairs. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Images "
"(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
@@ -721,24 +709,7 @@ PLATFORM_HINTS = {
"(those are only intercepted on messaging platforms like Telegram, "
"Discord, Slack, etc.; on the CLI they render as literal text). "
"When referring to a file you created or changed, just state its "
"absolute path in plain text; the user can open it from there. "
"Cron jobs scheduled from this session are LOCAL-ONLY: their output is "
"saved (viewable via cronjob action='list') but is NOT delivered back "
"into this terminal — there is no live-delivery channel here. If the "
"user wants to be notified when a job runs, the job's `deliver` must "
"target a gateway-connected messaging platform (e.g. deliver='telegram' "
"or 'all'). Do not promise the user that a deliver='origin' or "
"default-deliver cron job will message them in this session."
),
"tui": (
"You are running in the Hermes terminal UI (TUI). "
"Cron jobs scheduled from this session are LOCAL-ONLY: their output is "
"saved (viewable via cronjob action='list') but is NOT delivered back "
"into this TUI session — there is no live-delivery channel here. If the "
"user wants to be notified when a job runs, the job's `deliver` must "
"target a gateway-connected messaging platform (e.g. deliver='telegram' "
"or 'all'). Do not promise the user that a deliver='origin' or "
"default-deliver cron job will message them in this session."
"absolute path in plain text; the user can open it from there."
),
"sms": (
"You are communicating via SMS. Keep responses concise and use plain text "
@@ -926,7 +897,8 @@ def _probe_remote_backend(env_type: str) -> str | None:
try:
# Import locally: tools/ imports are heavy and only relevant when a
# non-local backend is actually configured.
from tools.terminal_tool import _create_environment, _get_env_config # type: ignore
from tools.terminal_tool import _get_env_config # type: ignore
from tools.environments import get_environment # type: ignore
except Exception as e:
logger.debug("Backend probe unavailable (import failed): %s", e)
_BACKEND_PROBE_CACHE[cache_key] = ""
@@ -934,59 +906,7 @@ def _probe_remote_backend(env_type: str) -> str | None:
try:
config = _get_env_config()
# Build the environment the same way tools/terminal_tool.py does for a
# live command: select the backend image, then assemble ssh/container
# config from the env-derived dict. (There is no `get_environment`
# factory — the real entry point is `_create_environment`.)
if env_type == "docker":
image = config.get("docker_image", "")
elif env_type == "singularity":
image = config.get("singularity_image", "")
elif env_type == "modal":
image = config.get("modal_image", "")
elif env_type == "daytona":
image = config.get("daytona_image", "")
else:
image = ""
ssh_config = None
if env_type == "ssh":
ssh_config = {
"host": config.get("ssh_host", ""),
"user": config.get("ssh_user", ""),
"port": config.get("ssh_port", 22),
"key": config.get("ssh_key", ""),
"persistent": config.get("ssh_persistent", False),
}
container_config = None
if env_type in {"docker", "singularity", "modal", "daytona"}:
container_config = {
"container_cpu": config.get("container_cpu", 1),
"container_memory": config.get("container_memory", 5120),
"container_disk": config.get("container_disk", 51200),
"container_persistent": config.get("container_persistent", True),
"modal_mode": config.get("modal_mode", "auto"),
"docker_volumes": config.get("docker_volumes", []),
"docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
"docker_forward_env": config.get("docker_forward_env", []),
"docker_env": config.get("docker_env", {}),
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
"docker_extra_args": config.get("docker_extra_args", []),
"docker_persist_across_processes": config.get("docker_persist_across_processes", True),
"docker_orphan_reaper": config.get("docker_orphan_reaper", True),
}
env = _create_environment(
env_type=env_type,
image=image,
cwd=config.get("cwd", ""),
timeout=config.get("timeout", 180),
ssh_config=ssh_config,
container_config=container_config,
task_id="prompt-backend-probe",
host_cwd=config.get("host_cwd"),
)
env = get_environment(config)
# Single-line POSIX probe — works on any Unixy backend. Wrapped in
# `2>/dev/null` so a missing binary doesn't pollute the output.
probe_cmd = (

View File

@@ -1,216 +0,0 @@
"""Per-reasoning-model stale-timeout floor for known reasoning models.
Reasoning models (those that emit extended thinking blocks before their
first content token) routinely exceed Hermes's default chat-model
stale detectors:
* Stream stale detector: ``HERMES_STREAM_STALE_TIMEOUT`` default 180s
``agent/chat_completion_helpers.py:2544``
* Non-stream stale detector: ``HERMES_API_CALL_STALE_TIMEOUT`` default 90s
``run_agent.py:1140``
For NVIDIA Nemotron 3 Ultra on the hosted NIM gateway the empirical
upstream idle kill is ~120s (first-party reproduction at
NVIDIA/NemoClaw#4846 — TTFB ~31s, stream dies at 120s). The same
failure mode exists on OpenAI o1/o3, Anthropic Opus 4.x thinking,
DeepSeek R1, Qwen QwQ, xAI Grok reasoning — every cloud reasoning
model hits upstream-proxies / load-balancers with idle timeouts
shorter than the model's thinking phase. Result: the stale detector
kills the connection mid-think, surfacing as
``BrokenPipeError``/``RemoteProtocolError`` on the next read.
This module provides a floor that the existing stale-detector scaling
blocks consult via :func:`get_reasoning_stale_timeout_floor` and
apply as ``max(default, floor)``. It is a FLOOR:
* Never overrides explicit user config (``providers.<id>.models.<model>.stale_timeout_seconds``
or ``request_timeout_seconds`` already wins — this code never runs
in that branch).
* Never lowers an existing threshold.
* Has zero effect on non-reasoning models — they are not in the
allowlist and the resolver returns ``None``.
Matching uses start-anchored regex on the slug-only component of
the model name (after stripping any aggregator prefix like
``openai/``, ``x-ai/``, ``anthropic/``). The right-anchor matches
end-of-string or a ``-``/``.``/``_`` slug separator, so ``qwen3-235b``
matches the ``qwen3`` family entry (a future model slug would be
``qwen3-235b-instruct`` and would also match) but ``some-other-qwen3``
does NOT match ``qwen3`` (the ``-qwen3`` is not at start of slug).
The ``o1`` case is the most delicate: a model named
``llama-4-70b-o1-preview`` is a hypothetical community derivative that
should NOT trigger the reasoning-model floor for the user (the user
chose a non-OpenAI model, not a reasoning model). The start-of-slug
anchor naturally excludes this — the matched ``o1-preview`` is at
position 11 of the slug, not at position 0. The previous substring-
with-trailing-hyphen design would have over-matched here, which is
why start-of-slug anchoring is the right shape.
Fixes #52217.
"""
from __future__ import annotations
import re
from typing import Optional
# (slug, floor_seconds). Each slug is matched as a discrete
# word-boundary component via the wrapper regex in ``_match_any``
# below. Order is irrelevant — the first regex match wins.
_REASONING_STALE_TIMEOUT_FLOORS: tuple[tuple[str, int], ...] = (
# NVIDIA Nemotron — reasoning models behind hosted NIM with
# documented 60-180s upstream idle kill (NVIDIA/NemoClaw#4846:
# 120s measured).
("nemotron-3-ultra", 600),
("nemotron-3-super", 600),
("nemotron-3-nano", 300),
# DeepSeek — R1 reasoning model on hosted NIM / DeepSeek direct.
("deepseek-r1", 600),
("deepseek-reasoner", 600),
# Qwen — QwQ reasoning + Qwen3 thinking variants. QwQ-32B
# preview is the stable slug; ``qwen3`` covers the family of
# thinking-mode Qwen3 models (qwen3-235b-a22b, qwen3-32b, etc.)
# without over-matching every Qwen3 instruct variant — the
# right-anchor requires the slug to be at the start of the
# remaining model name, so ``qwen3-235b-instruct`` (instruct is
# NOT a thinking variant) would still match. Acceptable
# trade-off: instruct variants of qwen3 get the 180s floor
# even though they don't reason. The cost is a slightly longer
# wait on a hung provider; the alternative (matching only
# ``qwen3-.*-thinking``) breaks the moment NVIDIA or Alibaba
# ships a slightly different naming shape.
("qwq-32b", 300),
("qwen3", 180),
# OpenAI o-series — known multi-minute TTFB. Each variant
# enumerated explicitly so bare ``o1`` doesn't over-match
# ``olmo-1`` or hypothetical future community derivatives.
("o1", 600),
("o1-mini", 600),
("o1-pro", 600),
("o1-preview", 600),
("o3", 600),
("o3-pro", 600),
("o3-mini", 300),
("o4-mini", 300),
# Anthropic Claude 4.x thinking variants. Anchored at
# ``claude-opus-4`` so non-thinking Claude 3.x or future
# non-reasoning Claude variants don't match.
("claude-opus-4", 240),
("claude-sonnet-4.5", 180),
("claude-sonnet-4.6", 180),
# xAI Grok reasoning variants. Explicit reasoning-only keys
# plus one for the ``non-reasoning`` variant so users picking
# the fast variant don't get the 300s floor. Bare ``grok-3``,
# ``grok-4`` etc. don't match — only the explicit reasoning /
# non-reasoning pairs.
("grok-4-fast-reasoning", 300),
("grok-4.20-reasoning", 300),
("grok-4-fast-non-reasoning", 180),
)
# Pre-compile each pattern. Wrapper = start-of-slug + slug + end-or-
# separator, where ``start-of-slug`` means start-of-string OR
# immediately after the last ``/`` (aggregator separator) and
# ``end-or-separator`` means end-of-string OR a ``-``/``.``/``_``.
#
# Why start-of-slug and not start-of-string: aggregator prefixes
# like ``openai/`` should not affect matching — the slug identity is
# the part after the last ``/``. Stripping the aggregator prefix in
# :func:`get_reasoning_stale_timeout_floor` before regex matching
# gives the wrapper a clean start-of-string anchor.
#
# Why end-or-separator on the right: ``openai/o3-mini`` must match
# the ``o3-mini`` slug (the right anchor is end-of-string). And
# ``openai/o3-mini-2025-01-31`` must also match ``o3-mini`` (the right
# anchor is the ``-`` separator). But ``openai/o3-mini-fork`` should
# NOT match ``o3-mini`` if we wanted to exclude forks — though the
# pattern ``o3-mini-fork`` would be matched as a derivative anyway,
# so we accept that community forks inheriting the same prefix are
# treated as reasoning models (a reasonable default — the upstream
# gateway timing is the same).
_PATTERN_CACHE: dict[str, re.Pattern[str]] = {}
def _get_pattern(slug: str) -> re.Pattern[str]:
compiled = _PATTERN_CACHE.get(slug)
if compiled is None:
compiled = re.compile(
r"^"
+ re.escape(slug)
+ r"(?:$|[\-._])"
)
_PATTERN_CACHE[slug] = compiled
return compiled
def _match_any(model_lower: str) -> Optional[float]:
"""Return the floor for the first matching slug, else None.
Each table entry is matched as a start-of-slug prefix with the
slug-separator-or-end-of-string right-anchor. Table iteration
order is irrelevant: longest slug wins (so ``o3-mini`` beats
``o3`` on a model like ``openai/o3-mini``).
"""
# Sort by slug length descending so longer / more-specific slugs
# win on shared prefixes (o3-mini beats o3).
sorted_floors = sorted(
_REASONING_STALE_TIMEOUT_FLOORS, key=lambda kv: -len(kv[0])
)
for slug, floor in sorted_floors:
if _get_pattern(slug).search(model_lower):
return float(floor)
return None
def get_reasoning_stale_timeout_floor(model: object) -> Optional[float]:
"""Return the stale-timeout floor (seconds) for a known reasoning model.
Returns ``None`` when the model is not in the allowlist or the
argument is empty / not a string. Matching uses
word-boundary-anchored regex on the lowercased model name, so
``openai/o3-mini`` matches the ``o3-mini`` slug but
``olmo-1`` does NOT match ``o1`` (the ``o1`` substring is not
at a word boundary inside ``olmo-1``).
Aggregator prefixes (``openai/``, ``x-ai/``, ``anthropic/`` etc.)
are preserved through matching — the ``/`` is itself a word
boundary, so ``openai/o3-mini`` matches ``o3-mini`` because the
``/`` before ``o3-mini`` satisfies the left-anchor alternation.
This is a FLOOR — callers must apply it as ``max(default, floor)``
and only when no explicit user-configured per-model
``stale_timeout_seconds`` exists.
>>> get_reasoning_stale_timeout_floor("nvidia/nemotron-3-ultra-550b-a55b")
600.0
>>> get_reasoning_stale_timeout_floor("openai/o3-mini")
300.0
>>> get_reasoning_stale_timeout_floor("deepseek/deepseek-r1")
600.0
>>> get_reasoning_stale_timeout_floor("qwen/qwen3-235b-a22b-thinking")
180.0
>>> get_reasoning_stale_timeout_floor("x-ai/grok-4-fast-reasoning")
300.0
>>> get_reasoning_stale_timeout_floor("anthropic/claude-opus-4-6")
240.0
>>> get_reasoning_stale_timeout_floor("gpt-4o") is None
True
>>> get_reasoning_stale_timeout_floor("olmo-1") is None
True
>>> get_reasoning_stale_timeout_floor(None) is None
True
"""
if not model or not isinstance(model, str):
return None
name = model.strip().lower()
if not name:
return None
# Strip aggregator prefix (everything before and including the
# last ``/``). The wrapper regex anchors at start-of-string, so
# the slug identity is the bare model name.
if "/" in name:
name = name.rsplit("/", 1)[1]
return _match_any(name)

View File

@@ -10,7 +10,6 @@ the first 6 and last 4 characters for debuggability.
import logging
import os
import re
import shlex
logger = logging.getLogger(__name__)
@@ -108,60 +107,12 @@ _PREFIX_PATTERNS = [
r"ntn_[A-Za-z0-9]{10,}", # Notion internal integration token
]
# ENV assignment patterns: KEY=value where KEY contains a secret-like name.
# Uppercase keys tolerate spaces around "=" (e.g. ``FOO_SECRET = bar``) because
# an all-caps key is almost never prose/code.
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
_ENV_ASSIGN_RE = re.compile(
rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
)
# Lowercase / dotted / hyphenated config keys from config files
# (application.properties, .env, YAML-ish dumps): ``spring.datasource.password=secret``,
# ``app.api.key=xyz``, ``password=secret``. The uppercase _ENV_ASSIGN_RE above
# never matched these, so config-file passwords leaked verbatim (issue #16413).
#
# These run only in a config-file context, NOT in prose, code, or URLs — three
# carve-outs preserved from the original design (#4367 + the documented
# web-URL passthrough below):
# 1. The value is bounded by ``[^\s&]`` (stops at whitespace AND ``&``) so
# form-urlencoded bodies are handled pair-by-pair (by _redact_form_body),
# not greedily swallowed.
# 2. _CFG_DOTTED_RE only matches when the key is NAMESPACED (contains a dot),
# which is unambiguously a config key — never a prose word.
# 3. _CFG_ANCHORED_RE matches a bare secret-word key only at line start
# (optionally after ``export``), so conversational ``I have password=foo``
# mid-sentence is left alone.
# The colon-form URL guard (skip when ``://`` present) lives at the call site.
_SECRET_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential|auth)"
_CFG_VALUE = r"(['\"]?)([^\s&]+?)\2(?=[\s&]|$)"
# Namespaced (dotted) key: the secret word may sit anywhere in a dotted path.
_CFG_DOTTED_RE = re.compile(
rf"((?:[A-Za-z0-9_\-]+\.)+[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*"
rf"|[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*\.[A-Za-z0-9_.\-]+)"
rf"={_CFG_VALUE}",
re.IGNORECASE,
)
# Line-anchored bare key: ``password=…`` / ``export api_key=…`` at start of line.
_CFG_ANCHORED_RE = re.compile(
rf"(^[ \t]*(?:export[ \t]+)?[A-Za-z0-9_\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_\-]*)={_CFG_VALUE}",
re.IGNORECASE | re.MULTILINE,
)
# Unquoted YAML / colon config (e.g. ``password: secret``,
# ``spring.datasource.password: hunter2``). The secret keyword must be part of
# the KEY (anchored to the start of the line/indent), and the value is a single
# whitespace-free token — so prose like ``note: secret meeting`` (keyword in the
# value) and ``error: token expired`` are left alone. Bare ``auth`` is excluded
# from the key set so ``Authorization:`` / ``author:`` don't match (the former
# is masked by _AUTH_HEADER_RE); ``auth_token``/``auth-token`` still match via
# the ``token`` keyword. Quoted values defer to _JSON_FIELD_RE via the lookahead.
_YAML_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential)"
_YAML_ASSIGN_RE = re.compile(
rf"(^[ \t]*[A-Za-z0-9_.\-]*{_YAML_CFG_NAMES}[A-Za-z0-9_.\-]*)(:[ \t]*)(?!['\"])([^\s&]+)",
re.IGNORECASE | re.MULTILINE,
)
# JSON field patterns: "apiKey": "value", "token": "value", etc.
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)"
_JSON_FIELD_RE = re.compile(
@@ -174,15 +125,8 @@ _JSON_FIELD_RE = re.compile(
# while the header name and scheme word are preserved for debuggability. The
# previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
# ``token <pat>`` leaked verbatim into logs/transcripts.
#
# The credential class excludes quote characters (``"`` / ``'``): a token sitting
# flush against a closing quote (``"Authorization: Bearer sk-..."``) must not pull
# that quote into the match, or masking turns value corruption into *syntax*
# corruption — the closing quote vanishes and the command/string no longer parses
# (unterminated quote → shell EOF / Python SyntaxError). Real credentials never
# contain ``"`` or ``'``, so excluding them is safe. See #43083.
_AUTH_HEADER_RE = re.compile(
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?([^\s\"']+)",
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
re.IGNORECASE,
)
@@ -210,15 +154,9 @@ _PRIVATE_KEY_RE = re.compile(
)
# Database connection strings: protocol://user:PASSWORD@host
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password.
# The userinfo and password groups forbid whitespace ([^:\s]+ / [^@\s]+) so the
# match can never span a line break. A real DSN password never contains
# whitespace; without this bound the greedy [^@]+ would scan past the end of a
# code line to the next stray "@" (e.g. a Python decorator), swallowing
# intervening lines and corrupting tool OUTPUT for any source containing a
# postgresql:// f-string template. See issue #33801.
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
_DB_CONNSTR_RE = re.compile(
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:\s]+:)([^@\s]+)(@)",
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
re.IGNORECASE,
)
@@ -402,40 +340,7 @@ def _redact_form_body(text: str) -> str:
return _redact_query_string(text.strip())
def _mask_token_nonreusable(token: str) -> str:
"""Redact a prefix-matched credential to a NON-REUSABLE sentinel.
Unlike :func:`_mask_token` (which keeps head/tail chars — fine for logs
that are never fed back into a config), this emits a marker that:
* cannot be mistaken for a usable-but-truncated key, so an agent that
reads it from a config file and writes it back does NOT corrupt the
stored credential into a dead 13-char string (issue #35519); and
* still does not leak the secret material (no head/tail chars).
The vendor prefix label is preserved for debuggability so the agent can
still tell *which* credential is present (e.g. a GitHub PAT vs an OpenAI
key) without seeing any of its bytes.
"""
if not token:
return "«redacted-secret»"
# Preserve only the recognizable vendor prefix label (e.g. "ghp_", "sk-"),
# never any of the random secret body.
label = ""
for sub in _PREFIX_SUBSTRINGS:
if token.startswith(sub):
label = sub
break
return f"«redacted:{label}…»" if label else "«redacted-secret»"
def redact_sensitive_text(
text: str,
*,
force: bool = False,
code_file: bool = False,
file_read: bool = False,
) -> str:
def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
"""Apply all redaction patterns to a block of text.
Safe to call on any string -- non-matching text passes through unchanged.
@@ -448,17 +353,6 @@ def redact_sensitive_text(
constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
private keys, DB connstrings, JWTs, and URL secrets are still redacted.
Set file_read=True for file *content* returned to the agent (read_file /
search_files / cat). Secrets are STILL redacted — they are never exposed —
but prefix-matched credentials are replaced with a non-reusable sentinel
(``«redacted:ghp_…»``) instead of a head/tail-preserving mask
(``ghp_S1...Pn2T``). The old mask looked like a real-but-truncated key, so
an agent reading it from config.yaml and writing it back silently corrupted
the stored credential into a dead 13-char value → 401 (issue #35519). The
sentinel is syntactically invalid as a token, so it can't be mistaken for a
usable key or written back as one. Implies code_file=True (config/data
files shouldn't trigger the source-code ENV/JSON false-positive paths).
Performance: each regex pattern is gated behind a cheap substring
pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
@@ -477,15 +371,9 @@ def redact_sensitive_text(
if not (force or _REDACT_ENABLED):
return text
# file_read content shouldn't hit the source-code ENV/JSON false-positive
# paths either (it's config/data, not log lines).
if file_read:
code_file = True
# Known prefixes (sk-, ghp_, etc.) — gate on substring presence
if _has_known_prefix_substring(text):
_prefix_sub = _mask_token_nonreusable if file_read else _mask_token
text = _PREFIX_RE.sub(lambda m: _prefix_sub(m.group(1)), text)
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
# ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives)
if not code_file:
@@ -494,13 +382,6 @@ def redact_sensitive_text(
name, quote, value = m.group(1), m.group(2), m.group(3)
return f"{name}={quote}{_mask_token(value)}{quote}"
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
# Lowercase/dotted config keys (issue #16413). Skip URLs entirely —
# web-URL query params are intentionally passed through (see note
# near the bottom of this function); _DB_CONNSTR_RE still guards
# connection-string passwords.
if "://" not in text:
text = _CFG_DOTTED_RE.sub(_redact_env, text)
text = _CFG_ANCHORED_RE.sub(_redact_env, text)
# JSON fields: "apiKey": "***" (skip for code files — false positives)
if ":" in text and '"' in text:
@@ -509,15 +390,6 @@ def redact_sensitive_text(
return f'{key}: "{_mask_token(value)}"'
text = _JSON_FIELD_RE.sub(_redact_json, text)
# Unquoted YAML / colon config: password: *** (after JSON so quoted
# values are handled there; the lookahead in _YAML_ASSIGN_RE skips
# quotes). Skip URLs — web-URL query params pass through by design.
if ":" in text and "://" not in text:
def _redact_yaml(m):
key, sep, value = m.group(1), m.group(2), m.group(3)
return f"{key}{sep}{_mask_token(value)}"
text = _YAML_ASSIGN_RE.sub(_redact_yaml, text)
# Authorization headers — _AUTH_HEADER_RE matches any scheme after
# "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
# cheapest substring gate that covers every casing without a casefold().
@@ -547,22 +419,9 @@ def redact_sensitive_text(
if "BEGIN" in text and "-----" in text:
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
# Database connection string passwords. With code_file=True, a password
# group that is a pure ``{...}`` brace expression is an f-string template
# reference (e.g. f"postgresql://{user}:{pass}@{host}"), not a literal
# credential — preserve it. Literal passwords are still redacted. The regex
# forbids whitespace in the password group, so a single-line template's
# group(2) is exactly the brace expression. See issue #33801.
# Database connection string passwords
if "://" in text:
if code_file:
def _redact_db(m):
pw = m.group(2)
if pw.startswith("{") and pw.endswith("}"):
return m.group(0)
return f"{m.group(1)}***{m.group(3)}"
text = _DB_CONNSTR_RE.sub(_redact_db, text)
else:
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# JWT tokens (eyJ... — base64-encoded JSON headers)
if "eyJ" in text:
@@ -593,66 +452,6 @@ def redact_sensitive_text(
return text
# Commands whose stdout is an environment-variable dump (KEY=value lines),
# NOT source code. For these, terminal-output redaction must run the
# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
# masked. For all other commands, code_file=True is used to avoid mangling
# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
def is_env_dump_command(command: str | None) -> bool:
"""Return True if ``command`` dumps environment variables to stdout.
Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
``||`` / ``|``). Conservative: a parse failure or anything unrecognized
returns False (callers then fall back to the safer code_file=True path,
which still masks prefix-shaped keys).
"""
if not command or not isinstance(command, str):
return False
# Split on shell separators, then inspect the first token of each segment.
segments = re.split(r"[|;&]+", command)
for seg in segments:
seg = seg.strip()
if not seg:
continue
try:
tokens = shlex.split(seg)
except ValueError:
tokens = seg.split()
if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
return True
return False
def redact_terminal_output(
output: str, command: str | None = None, *, force: bool = False
) -> str:
"""Redact secrets from terminal/process stdout.
Single redaction policy for ALL terminal-output surfaces — foreground
``terminal`` results AND background ``process(action=poll/log/wait)``
output — so they can't diverge. Picks ``code_file`` based on whether
``command`` is an environment dump:
- env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
→ ``code_file=False`` so the ENV-assignment pass masks opaque tokens.
- anything else (or unknown command) → ``code_file=True`` to avoid
false positives on source/config dumps.
``force=True`` bypasses the global ``security.redact_secrets`` preference
for safety boundaries that must never emit raw credentials.
"""
if not output:
return output
code_file = not is_env_dump_command(command or "")
return redact_sensitive_text(output, force=force, code_file=code_file)
# Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
# the input string, the prefix regex cannot match anything, so we skip it.
# False positives are fine (they just run the regex, which then matches

View File

@@ -1,140 +0,0 @@
"""Replay-history sanitization shared across resume code paths.
When a session's last turn dies mid-tool-loop — the process is killed by a
restart/shutdown command, a stale-timeout fires, or an interrupt lands before
the tool result is written — the persisted transcript can end with a dangling
``assistant(tool_calls)`` (no matching ``tool`` answer) or an interrupted
``assistant→tool`` block. On resume the model sees that broken tail and
re-issues the unanswered call, producing an endless "thinking"/reboot loop
(#49201, #29086).
These pure helpers strip those tails before the history is replayed to the
model. They were originally local to ``gateway/run.py`` (which fixed the
messaging-gateway path) and are extracted here so every resume surface — the
messaging gateway AND the TUI/WebUI gateway — shares the same cleanup instead
of the WebUI path silently skipping it.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
def is_interrupted_tool_result(content: Any) -> bool:
"""Return True if a tool result indicates the tool was interrupted."""
if not isinstance(content, str):
return False
lowered = content.lower()
if "[command interrupted]" in lowered:
return True
if "exit_code" in lowered and ("130" in lowered or "-1" in lowered):
return "interrupt" in lowered
return False
def strip_interrupted_tool_tails(
agent_history: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Strip interrupted assistant→tool sequences from replay history.
Older interrupted gateway turns can be followed by a queued real user
message, so the interrupted assistant/tool block is not necessarily the
final tail by the time we rebuild replay history. Remove any contiguous
assistant(tool_calls) + tool-result block that contains an interrupted tool
result, while preserving successful tool-call sequences intact.
"""
if not agent_history:
return agent_history
cleaned: List[Dict[str, Any]] = []
i = 0
n = len(agent_history)
while i < n:
msg = agent_history[i]
if msg.get("role") == "assistant" and "tool_calls" in msg:
j = i + 1
tool_results: List[Dict[str, Any]] = []
while j < n and agent_history[j].get("role") == "tool":
tool_results.append(agent_history[j])
j += 1
if tool_results and any(
is_interrupted_tool_result(m.get("content", ""))
for m in tool_results
):
logger.debug(
"Stripping interrupted assistant→tool replay block "
"(indices %d%d, tool_results=%d)",
i, j - 1, len(tool_results),
)
i = j
continue
if msg.get("role") == "tool" and is_interrupted_tool_result(msg.get("content", "")):
logger.debug("Stripping orphan interrupted tool result from replay history")
i += 1
continue
cleaned.append(msg)
i += 1
return cleaned
def strip_dangling_tool_call_tail(
agent_history: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Strip a trailing ``assistant(tool_calls)`` block left with NO answers.
When a tool call itself kills the gateway process (``docker restart``,
``systemctl restart``, ``kill``, ``hermes gateway restart``), the process
is terminated by SIGKILL *mid-call* — before the tool result is ever
written and before the orderly shutdown rewind
(``_drop_trailing_empty_response_scaffolding``) can run. The last thing
persisted is the ``assistant`` message that issued the ``tool_calls``,
with zero matching ``tool`` rows.
On resume the model sees an unanswered tool call at the tail and naturally
re-issues it — which restarts the gateway again, producing the infinite
reboot loop in #49201. ``strip_interrupted_tool_tails`` does not catch
this because there is no tool result to inspect for an interrupt marker.
This strips that dangling tail at the source so there is nothing for the
model to re-execute. It only acts when the tail is an
``assistant(tool_calls)`` whose calls have NO corresponding ``tool``
results — a completed assistant→tool pair (any tool answers present) is
left untouched so genuine mid-progress tool loops still resume.
"""
if not agent_history:
return agent_history
last = agent_history[-1]
if not (
isinstance(last, dict)
and last.get("role") == "assistant"
and last.get("tool_calls")
):
return agent_history
logger.debug(
"Stripping dangling unanswered assistant(tool_calls) tail "
"(%d call(s)) — process likely killed mid-tool-call by a "
"restart/shutdown command (#49201)",
len(last.get("tool_calls") or []),
)
return agent_history[:-1]
def sanitize_replay_history(
agent_history: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Apply both replay-tail strippers in the canonical order.
Convenience entry point for resume code paths: removes interrupted
assistant→tool blocks anywhere in the history, then removes a dangling
unanswered ``assistant(tool_calls)`` tail. Returns the same list object
when there is nothing to strip.
"""
if not agent_history:
return agent_history
return strip_dangling_tool_call_tail(strip_interrupted_tool_tails(agent_history))

View File

@@ -8,7 +8,6 @@ rate-limited provider concurrently.
import random
import threading
import time
from typing import Any
# Monotonic counter for jitter seed uniqueness within the same process.
# Protected by a lock to avoid race conditions in concurrent retry paths
@@ -16,14 +15,6 @@ from typing import Any
_jitter_counter = 0
_jitter_lock = threading.Lock()
# Z.AI Coding Plan's GLM-5.2 endpoint often returns HTTP 429 code 1305
# ("The service may be temporarily overloaded...") for otherwise valid
# Hermes requests. Short retries tend to hammer the same overloaded window;
# after a few normal retries, progressively widen the wait window. Keep the
# cap interactive-friendly: a simple TUI message should fail visibly in minutes,
# not sit silent for 20+ minutes.
_ZAI_CODING_OVERLOAD_LONG_BACKOFF = (30.0, 60.0, 90.0, 120.0)
def jittered_backoff(
attempt: int,
@@ -64,66 +55,3 @@ def jittered_backoff(
jitter = rng.uniform(0, jitter_ratio * delay)
return delay + jitter
def _error_text(error: Any) -> str:
"""Best-effort flattened provider error text for retry classification."""
parts = [
error,
getattr(error, "message", None),
getattr(error, "body", None),
getattr(error, "response", None),
]
return " ".join(str(part) for part in parts if part is not None).lower()
def is_zai_coding_overload_error(*, base_url: str | None, model: str | None, error: Any) -> bool:
"""Return True for Z.AI Coding Plan transient overload 429s.
The coding-plan endpoint reports overload as HTTP 429 with body code 1305
and message "The service may be temporarily overloaded...". Treat only
that narrow shape specially so ordinary quota/billing 429s still fail fast
through the existing classifier.
"""
base = (base_url or "").lower()
model_name = (model or "").lower()
status = getattr(error, "status_code", None)
text = _error_text(error)
return (
status == 429
and "api.z.ai/api/coding/paas/v4" in base
and "glm-5.2" in model_name
and ("1305" in text or "temporarily overloaded" in text)
)
def adaptive_rate_limit_backoff(
attempt: int,
*,
base_url: str | None,
model: str | None,
error: Any,
default_wait: float,
short_attempts: int = 3,
) -> tuple[float, str | None]:
"""Provider-aware rate-limit backoff.
For most providers this returns ``default_wait`` unchanged. For Z.AI
Coding Plan GLM-5.2 overloads, keep the first ``short_attempts`` retries on
the normal short exponential schedule, then switch to progressively longer
waits (30s → 60s → 90s → 120s, capped) plus light jitter.
``attempt`` is 1-based, matching the retry loop's logged attempt number.
Returns ``(wait_seconds, reason_label)`` where ``reason_label`` is suitable
for status/log decoration when a provider-specific policy fired.
"""
if not is_zai_coding_overload_error(base_url=base_url, model=model, error=error):
return default_wait, None
if attempt <= short_attempts:
return default_wait, "zai_coding_overload_short"
idx = min(attempt - short_attempts - 1, len(_ZAI_CODING_OVERLOAD_LONG_BACKOFF) - 1)
base_delay = _ZAI_CODING_OVERLOAD_LONG_BACKOFF[idx]
# A smaller jitter ratio keeps long waits readable while still avoiding
# synchronized retry storms across concurrent Hermes sessions.
return jittered_backoff(1, base_delay=base_delay, max_delay=base_delay, jitter_ratio=0.2), "zai_coding_overload_long"

View File

@@ -122,8 +122,6 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
try:
import fcntl # POSIX only; Windows falls back to best-effort without flock.
except ImportError: # pragma: no cover
@@ -443,7 +441,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
return result
t0 = time.monotonic()
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
proc = subprocess.run(
argv,
@@ -452,7 +449,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
timeout=spec.timeout,
text=True,
shell=False,
**_popen_kwargs,
)
except subprocess.TimeoutExpired:
result["timed_out"] = True

View File

@@ -5,8 +5,6 @@ import re
import subprocess
from pathlib import Path
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
logger = logging.getLogger(__name__)
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
@@ -68,7 +66,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
try:
completed = subprocess.run(
["bash", "-c", command],
@@ -78,7 +75,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
timeout=max(1, int(timeout)),
check=False,
stdin=subprocess.DEVNULL,
**_popen_kwargs,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"

View File

@@ -507,34 +507,6 @@ def get_all_skills_dirs() -> List[Path]:
return dirs
def _resolve_for_skill_ownership(path) -> Path:
path_obj = path if isinstance(path, Path) else Path(str(path))
try:
return path_obj.expanduser().resolve()
except (OSError, RuntimeError):
return path_obj.expanduser().absolute()
def is_external_skill_path(path) -> bool:
"""Return True when ``path`` lives under a configured external skills dir.
``skills.external_dirs`` are externally owned: Hermes can discover and view
their skills, and foreground user-directed tool calls may still edit them,
but autonomous lifecycle maintenance must treat them as read-only. This
helper centralizes the ownership boundary so curator/reporting/tool paths do
not each need to re-interpret the config.
"""
candidate = _resolve_for_skill_ownership(path)
for root in get_external_skills_dirs():
resolved_root = _resolve_for_skill_ownership(root)
try:
candidate.relative_to(resolved_root)
return True
except ValueError:
continue
return False
# ── Condition extraction ──────────────────────────────────────────────────

View File

@@ -1,30 +0,0 @@
"""Hermes telemetry & observability.
Local-first observability, on by default. The ``telemetry`` plugin registers Hermes
lifecycle hooks and hands typed events to the fire-and-forget ``emitter`` (queue ->
background writer -> JSONL + state.db ``tel_*`` index). The emitter never blocks or
raises into a model/tool call (the hot-path invariant).
Events record the observed model ids, provider names, and tool names. ``metrics``
derives rollups for /usage and /insights; ``rollup`` builds the per-run summaries shown
by ``hermes telemetry preview``. ``redaction`` + ``exporter_bulk`` + ``otlp_exporter``
handle export to an operator-chosen destination. ``policy`` holds the consent
constants and the aggregate upload gate (no uploader ships).
"""
from __future__ import annotations
from . import emitter, events, metrics, policy, spans
emit = emitter.emit
get_emitter = emitter.get_emitter
__all__ = [
"emitter",
"events",
"metrics",
"policy",
"spans",
"emit",
"get_emitter",
]

View File

@@ -1,318 +0,0 @@
"""Local telemetry emitter: fire-and-forget queue + background writer.
The emitter is the single seam between instrumentation (the telemetry plugin's hook
callbacks) and durable storage. Its contract is the hot-path invariant:
``emit()`` MUST return in O(microseconds), MUST NOT block on disk/network, and
MUST NEVER raise into the caller. A telemetry failure is logged locally and
dropped — it can never affect a model call, a tool call, or a session.
Mechanism:
* ``emit(event)`` does a non-blocking ``queue.put_nowait`` wrapped in a bare except.
On a full queue it drops the *oldest* event and counts the drop.
* A daemon thread drains the queue and writes each event to two places:
1. the append-only JSONL log (source of truth)
2. the ``tel_*`` SQLite tables in state.db (rebuildable index)
* The writer uses its own sqlite connection to state.db, separate from SessionDB,
so telemetry writes never contend with or corrupt session writes.
Local telemetry only. Nothing here uploads anywhere.
"""
from __future__ import annotations
import json
import logging
import queue
import sqlite3
import threading
import time
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
_MAX_QUEUE = 10_000 # ring-buffer depth; oldest dropped when full
_DRAIN_BATCH = 256
def _default_dir() -> Path:
"""Resolve the telemetry dir under the active HERMES_HOME (profile-safe)."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "telemetry"
def _default_db_path() -> Path:
"""Resolve state.db under the active HERMES_HOME (profile-safe)."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "state.db"
# Map a telemetry event dict (its "event" tag) to (table, column-ordered insert).
# Only the columns the indexer knows about are written; unknown keys are ignored,
# so an event carrying extra fields never breaks the insert.
_TABLE_COLUMNS: Dict[str, tuple] = {
"run": (
"tel_runs",
("run_id", "trace_id", "session_id", "entrypoint",
"platform", "start_ns", "end_ns", "end_reason",
"model_call_count", "tool_call_count", "error_count"),
),
"span": (
"tel_spans",
("span_id", "trace_id", "run_id", "parent_span_id", "name", "kind",
"start_ns", "end_ns", "status"),
),
"model_call": (
"tel_model_calls",
("span_id", "run_id", "provider", "model", "base_url",
"input_tokens", "output_tokens", "cache_read_tokens",
"cache_write_tokens", "reasoning_tokens", "latency_ms"),
),
"tool_call": (
"tel_tool_calls",
("span_id", "run_id", "tool_name", "duration_ms", "result_class"),
),
"error": (
"tel_error_events",
("run_id", "error_class", "subsystem", "recovery", "ts_ns"),
),
}
class TelemetryEmitter:
"""Owns the queue, the writer thread, and the telemetry sqlite connection."""
def __init__(
self,
*,
events_path: Optional[Path] = None,
db_path: Optional[Path] = None,
enabled: bool = True,
) -> None:
self._dir = (events_path.parent if events_path else _default_dir())
self._events_path = events_path or (self._dir / "events.jsonl")
self._db_path = db_path or _default_db_path()
self._enabled = enabled
self._q: "queue.Queue[Dict[str, Any]]" = queue.Queue(maxsize=_MAX_QUEUE)
self._dropped = 0
self._written = 0
self._stop = threading.Event()
self._started = False
self._lock = threading.Lock()
self._conn: Optional[sqlite3.Connection] = None
self._thread: Optional[threading.Thread] = None
# Optional live subscribers (e.g. OTLP exporter). Called from the writer
# thread AFTER durable writes, fully fail-isolated — a subscriber that
# raises or blocks can never affect the JSONL/SQLite source of truth or
# the hot path. Each subscriber is callable(batch: list[dict]).
self._subscribers: list = []
# ── public API (hot path) ───────────────────────────────────────────────
def emit(self, event: Any) -> None:
"""Enqueue an event. Never blocks, never raises.
``event`` may be a dataclass with ``to_dict()`` or a plain dict.
"""
if not self._enabled:
return
try:
payload = event.to_dict() if hasattr(event, "to_dict") else dict(event)
payload.setdefault("ts_ns", time.time_ns())
self._ensure_started()
try:
self._q.put_nowait(payload)
except queue.Full:
# Drop oldest to make room — bounded memory, newest-wins.
try:
self._q.get_nowait()
self._dropped += 1
self._q.put_nowait(payload)
except Exception:
self._dropped += 1
except Exception: # the hot-path invariant: never propagate
logger.debug("telemetry emit failed", exc_info=True)
# ── lifecycle ───────────────────────────────────────────────────────────
def _ensure_started(self) -> None:
if self._started:
return
with self._lock:
if self._started:
return
try:
self._dir.mkdir(parents=True, exist_ok=True)
except Exception:
logger.debug("telemetry dir create failed", exc_info=True)
self._thread = threading.Thread(
target=self._run, name="hermes-telemetry-writer", daemon=True
)
self._thread.start()
self._started = True
def _open_conn(self) -> Optional[sqlite3.Connection]:
if self._conn is not None:
return self._conn
try:
conn = sqlite3.connect(str(self._db_path), isolation_level=None, timeout=5.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA busy_timeout=5000")
self._conn = conn
except Exception:
logger.debug("telemetry db open failed", exc_info=True)
self._conn = None
return self._conn
def _run(self) -> None:
while not self._stop.is_set():
try:
first = self._q.get(timeout=0.5)
except queue.Empty:
continue
batch = [first]
while len(batch) < _DRAIN_BATCH:
try:
batch.append(self._q.get_nowait())
except queue.Empty:
break
self._write_batch(batch)
def _write_batch(self, batch) -> None:
# JSONL append (source of truth) — best effort.
try:
with open(self._events_path, "a", encoding="utf-8") as fh:
for ev in batch:
fh.write(json.dumps(ev, ensure_ascii=False) + "\n")
except Exception:
logger.debug("telemetry jsonl append failed", exc_info=True)
# SQLite index — best effort, per-event so one bad row can't lose the batch.
conn = self._open_conn()
if conn is None:
return
for ev in batch:
try:
self._index_one(conn, ev)
self._written += 1
except Exception:
logger.debug("telemetry index row failed", exc_info=True)
# Live fan-out (e.g. OTLP) — AFTER durable writes, fully fail-isolated.
# A slow/raising subscriber never affects JSONL/SQLite or the hot path.
for sub in self._subscribers:
try:
sub(batch)
except Exception:
logger.debug("telemetry subscriber failed", exc_info=True)
def subscribe(self, callback) -> None:
"""Register a live batch subscriber (callable(batch: list[dict])).
Called from the writer thread after durable writes. Used by the OTLP
exporter for continuous streaming. Fail-isolated; never on the hot path.
"""
if callback not in self._subscribers:
self._subscribers.append(callback)
def unsubscribe(self, callback) -> None:
try:
self._subscribers.remove(callback)
except ValueError:
pass
def _index_one(self, conn: sqlite3.Connection, ev: Dict[str, Any]) -> None:
kind = ev.get("event")
spec = _TABLE_COLUMNS.get(kind)
if spec is None:
return
table, cols = spec
values = [ev.get(c) for c in cols]
placeholders = ", ".join("?" for _ in cols)
collist = ", ".join(cols)
conn.execute(
f"INSERT OR REPLACE INTO {table} ({collist}) VALUES ({placeholders})",
values,
)
# ── introspection / shutdown (tests, CLI) ───────────────────────────────
def flush(self, timeout: float = 2.0) -> None:
"""Block until the queue drains (test/CLI helper, NOT the hot path)."""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if self._q.empty():
# give the writer a tick to finish the in-flight batch
time.sleep(0.05)
if self._q.empty():
return
time.sleep(0.02)
def stats(self) -> Dict[str, int]:
return {
"queued": self._q.qsize(),
"written": self._written,
"dropped": self._dropped,
}
def close(self) -> None:
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=2.0)
if self._conn is not None:
try:
self._conn.close()
except Exception:
pass
self._conn = None
self._started = False
# ── process-wide singleton ──────────────────────────────────────────────────
_EMITTER: Optional[TelemetryEmitter] = None
_EMITTER_LOCK = threading.Lock()
def get_emitter() -> TelemetryEmitter:
"""Return the process-wide emitter, honoring telemetry.local config."""
global _EMITTER
if _EMITTER is not None:
return _EMITTER
with _EMITTER_LOCK:
if _EMITTER is None:
enabled = _local_enabled()
_EMITTER = TelemetryEmitter(enabled=enabled)
return _EMITTER
def _local_enabled() -> bool:
try:
from hermes_cli.config import load_config
cfg = load_config()
tel = cfg.get("telemetry") if isinstance(cfg, dict) else {}
return bool((tel or {}).get("local", True))
except Exception:
return True
def emit(event: Any) -> None:
"""Module-level convenience: emit via the singleton."""
get_emitter().emit(event)
def reset_emitter_for_tests(emitter: Optional[TelemetryEmitter] = None) -> None:
"""Swap the singleton (tests only)."""
global _EMITTER
with _EMITTER_LOCK:
if _EMITTER is not None and emitter is not _EMITTER:
try:
_EMITTER.close()
except Exception:
pass
_EMITTER = emitter
__all__ = [
"TelemetryEmitter",
"get_emitter",
"emit",
"reset_emitter_for_tests",
]

View File

@@ -1,111 +0,0 @@
"""Typed local telemetry events.
These dataclasses are the rows written to the local JSONL log and the ``tel_*``
SQLite tables. They record the values observed for each run — model id, provider, tool
name, token counts, durations — and stay on the machine unless explicitly exported.
"""
from __future__ import annotations
import time
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, Optional
# ── local telemetry events (real values) ────────────────────────────────────
def _now_ns() -> int:
return time.time_ns()
@dataclass(slots=True)
class RunEvent:
"""One top-level workflow execution (a trace root). A run spans one session."""
run_id: str
trace_id: str
entrypoint: str
session_id: Optional[str] = None
platform: Optional[str] = None
start_ns: int = field(default_factory=_now_ns)
end_ns: Optional[int] = None
end_reason: Optional[str] = None
model_call_count: int = 0
tool_call_count: int = 0
error_count: int = 0
def to_dict(self) -> Dict[str, Any]:
return {"event": "run", **asdict(self)}
@dataclass(slots=True)
class ModelCallEvent:
span_id: str
run_id: str
provider: Optional[str] = None # raw provider, e.g. "anthropic"
model: Optional[str] = None # raw model id, e.g. "claude-opus-4"
base_url: Optional[str] = None
input_tokens: int = 0
output_tokens: int = 0
cache_read_tokens: int = 0
cache_write_tokens: int = 0
reasoning_tokens: int = 0
latency_ms: Optional[int] = None
def to_dict(self) -> Dict[str, Any]:
return {"event": "model_call", **asdict(self)}
@dataclass(slots=True)
class ToolCallEvent:
span_id: str
run_id: str
tool_name: Optional[str] = None # raw tool name, e.g. "web_search"
duration_ms: Optional[int] = None
result_class: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {"event": "tool_call", **asdict(self)}
@dataclass(slots=True)
class SpanEvent:
"""A timed span — the timing/lineage backbone of a trace.
One row per run (the root, ``parent_span_id=None``) and one per model/tool call
(``parent_span_id`` = the run's root span). Detail rows in ``tel_model_calls`` /
``tel_tool_calls`` share the ``span_id`` and are joined here for ordering and
placement on a timeline.
"""
span_id: str
trace_id: str
run_id: str
name: str
kind: str # "run" | "model" | "tool"
start_ns: int
end_ns: Optional[int] = None
parent_span_id: Optional[str] = None
status: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {"event": "span", **asdict(self)}
@dataclass(slots=True)
class ErrorEvent:
run_id: Optional[str]
error_class: str
subsystem: str
recovery: Optional[str] = None
ts_ns: int = field(default_factory=_now_ns)
def to_dict(self) -> Dict[str, Any]:
return {"event": "error", **asdict(self)}
__all__ = [
"RunEvent",
"ModelCallEvent",
"ToolCallEvent",
"SpanEvent",
"ErrorEvent",
]

View File

@@ -1,139 +0,0 @@
"""Export telemetry (and optionally session content) to a file or stream.
Two data domains, both written to an operator-chosen destination:
* Telemetry: the tel_* rows + events.jsonl (structural observability).
* Content (opt-in via telemetry.trajectories): sessions + messages, with every
content field (message body, reasoning, raw tool-call args) passed through the
redaction pipeline (secrets always stripped; PII per content_redaction).
Formats: ndjson (default) and json. OTLP streaming export lives in otlp_exporter.py.
Content export is gated by ``redaction.content_export_enabled``.
"""
from __future__ import annotations
import json
import sqlite3
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, TextIO
from . import redaction
_TEL_TABLES = (
"tel_runs", "tel_model_calls", "tel_tool_calls", "tel_error_events",
)
def _open(db_path: Optional[Path]) -> sqlite3.Connection:
if db_path is None:
from hermes_constants import get_hermes_home
db_path = get_hermes_home() / "state.db"
c = sqlite3.connect(str(db_path), timeout=5.0)
c.row_factory = sqlite3.Row
return c
def _iter_telemetry(conn: sqlite3.Connection, since_ns: Optional[int]) -> Iterator[Dict[str, Any]]:
for table in _TEL_TABLES:
# only tel_runs has start_ns; window the rest by run join when needed.
if table == "tel_runs" and since_ns:
rows = conn.execute(
f"SELECT * FROM {table} WHERE start_ns >= ?", (int(since_ns),)
).fetchall()
else:
rows = conn.execute(f"SELECT * FROM {table}").fetchall()
for r in rows:
d = dict(r)
d["_kind"] = table
yield d
def _iter_content(
db_path: Optional[Path],
*,
config: Optional[Dict[str, Any]],
include_content: bool,
) -> Iterator[Dict[str, Any]]:
"""Yield session records. Message bodies included only when trajectories on."""
from hermes_state import SessionDB
content_mode = redaction.content_mode_for(config)
db = SessionDB(db_path=db_path) if db_path else SessionDB()
try:
for session in db.export_all():
msgs = session.get("messages", []) or []
red_msgs = [
redaction.redact_message(
m, content_mode=content_mode, include_content=include_content
)
for m in msgs
]
# Session-level metadata is structural; keep ids/model/counts, drop
# any free-text title only when content is excluded.
out = {
"_kind": "session",
"id": session.get("id"),
"source": session.get("source"),
"model": session.get("model"),
"started_at": session.get("started_at"),
"ended_at": session.get("ended_at"),
"message_count": session.get("message_count"),
"tool_call_count": session.get("tool_call_count"),
"messages": red_msgs,
}
if include_content and session.get("title"):
out["title"] = redaction.redact_for_export(
session["title"], content_mode=content_mode
)
yield out
finally:
db.close()
def export(
out: TextIO,
*,
fmt: str = "ndjson",
since_ns: Optional[int] = None,
include_content: bool = False,
config: Optional[Dict[str, Any]] = None,
db_path: Optional[Path] = None,
) -> Dict[str, int]:
"""Write telemetry (+ optional content) to ``out``. Returns counts.
``include_content`` is honored only when telemetry.trajectories is enabled in
``config``; otherwise content is forced off and only structural data is written.
"""
# Trajectories gate: a flag cannot override the config setting.
content_allowed = include_content and redaction.content_export_enabled(config)
counts = {"telemetry": 0, "sessions": 0, "content_included": int(content_allowed)}
conn = _open(db_path)
records: List[Dict[str, Any]] = []
try:
for rec in _iter_telemetry(conn, since_ns):
counts["telemetry"] += 1
if fmt == "ndjson":
out.write(json.dumps(rec, ensure_ascii=False) + "\n")
else:
records.append(rec)
finally:
conn.close()
# Content/session domain (separate connection via SessionDB).
for rec in _iter_content(db_path, config=config, include_content=content_allowed):
counts["sessions"] += 1
if fmt == "ndjson":
out.write(json.dumps(rec, ensure_ascii=False) + "\n")
else:
records.append(rec)
if fmt != "ndjson":
json.dump({"records": records}, out, ensure_ascii=False, indent=2)
return counts
__all__ = ["export"]

View File

@@ -1,219 +0,0 @@
"""Derive metric rollups from the local telemetry tables.
Reads the ``tel_*`` tables in state.db and returns aggregates for /usage, /insights,
and local dashboards. Metrics are computed by querying the event log rather than being
emitted on the hot path.
Each function accepts either an open caller-owned ``conn`` (reused, not closed) or a
``db_path`` (opened and closed internally). InsightsEngine passes its existing
connection; a standalone dashboard passes a path.
"""
from __future__ import annotations
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional
@contextmanager
def _cursor(
conn: Optional[sqlite3.Connection], db_path: Optional[Path]
) -> Iterator[sqlite3.Connection]:
"""Yield a Row-factory connection. Closes it only if we opened it."""
if conn is not None:
prev_factory = conn.row_factory
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.row_factory = prev_factory
return
if db_path is None:
from hermes_constants import get_hermes_home
db_path = get_hermes_home() / "state.db"
c = sqlite3.connect(str(db_path), timeout=5.0)
c.row_factory = sqlite3.Row
try:
yield c
finally:
c.close()
def _since_clause(since_ns: Optional[int], col: str = "start_ns") -> str:
return f" WHERE {col} >= {int(since_ns)}" if since_ns else ""
def workflow_summary(
db_path: Optional[Path] = None,
since_ns: Optional[int] = None,
*,
conn: Optional[sqlite3.Connection] = None,
) -> Dict[str, Any]:
"""Run-level counters + duration percentiles (local telemetry, exact)."""
with _cursor(conn, db_path) as c:
where = _since_clause(since_ns)
total = c.execute(f"SELECT COUNT(*) n FROM tel_runs{where}").fetchone()["n"]
by_reason = {
r["end_reason"] or "unknown": r["n"]
for r in c.execute(
f"SELECT end_reason, COUNT(*) n FROM tel_runs{where} GROUP BY end_reason"
).fetchall()
}
by_entry = {
r["entrypoint"] or "unknown": r["n"]
for r in c.execute(
f"SELECT entrypoint, COUNT(*) n FROM tel_runs{where} GROUP BY entrypoint"
).fetchall()
}
dur_where = (where + " AND end_ns IS NOT NULL") if where else " WHERE end_ns IS NOT NULL"
durations = [
(r["end_ns"] - r["start_ns"]) / 1e6
for r in c.execute(
f"SELECT start_ns, end_ns FROM tel_runs{dur_where}"
).fetchall()
]
return {
"total_runs": total,
"by_end_reason": by_reason,
"by_entrypoint": by_entry,
"duration_ms_p50": _pct(durations, 50),
"duration_ms_p95": _pct(durations, 95),
"success_rate": round(by_reason.get("completed", 0) / total, 4) if total else 0.0,
}
def model_call_summary(
db_path: Optional[Path] = None,
since_ns: Optional[int] = None,
*,
conn: Optional[sqlite3.Connection] = None,
) -> Dict[str, Any]:
with _cursor(conn, db_path) as c:
rows = c.execute(
"SELECT provider, model, COUNT(*) n, "
"SUM(input_tokens) inp, SUM(output_tokens) outp, "
"SUM(cache_read_tokens) cache, AVG(latency_ms) avg_latency "
"FROM tel_model_calls GROUP BY provider, model"
).fetchall()
by_provider: Dict[str, int] = {}
by_model: Dict[str, int] = {}
tokens = {"input": 0, "output": 0, "cache_read": 0}
breakdown: List[Dict[str, Any]] = []
for r in rows:
prov = r["provider"] or "unknown"
mdl = r["model"] or "unknown"
by_provider[prov] = by_provider.get(prov, 0) + r["n"]
by_model[mdl] = by_model.get(mdl, 0) + r["n"]
tokens["input"] += r["inp"] or 0
tokens["output"] += r["outp"] or 0
tokens["cache_read"] += r["cache"] or 0
breakdown.append({
"provider": r["provider"],
"model": r["model"],
"calls": r["n"],
"avg_latency_ms": round(r["avg_latency"] or 0, 1),
})
cache_total = tokens["cache_read"] + tokens["input"]
return {
"by_provider": by_provider,
"by_model": by_model,
"tokens": tokens,
"cache_hit_rate": round(tokens["cache_read"] / cache_total, 4) if cache_total else 0.0,
"breakdown": breakdown,
}
def tool_call_summary(
db_path: Optional[Path] = None,
*,
conn: Optional[sqlite3.Connection] = None,
) -> Dict[str, Any]:
with _cursor(conn, db_path) as c:
by_tool = {
r["tool_name"] or "unknown": r["n"]
for r in c.execute(
"SELECT tool_name, COUNT(*) n FROM tel_tool_calls GROUP BY tool_name"
).fetchall()
}
fails = {
r["tool_name"] or "unknown": r["n"]
for r in c.execute(
"SELECT tool_name, COUNT(*) n FROM tel_tool_calls "
"WHERE result_class IN ('error','timeout','blocked') GROUP BY tool_name"
).fetchall()
}
total = sum(by_tool.values())
total_fail = sum(fails.values())
return {
"by_tool": by_tool,
"failures_by_tool": fails,
"total": total,
"failure_rate": round(total_fail / total, 4) if total else 0.0,
}
def error_summary(
db_path: Optional[Path] = None,
*,
conn: Optional[sqlite3.Connection] = None,
) -> Dict[str, Any]:
with _cursor(conn, db_path) as c:
return {
"by_class": {
r["error_class"] or "unknown": r["n"]
for r in c.execute(
"SELECT error_class, COUNT(*) n FROM tel_error_events GROUP BY error_class"
).fetchall()
},
}
def _pct(values: List[float], p: int) -> float:
if not values:
return 0.0
s = sorted(values)
k = (len(s) - 1) * (p / 100)
lo = int(k)
hi = min(lo + 1, len(s) - 1)
frac = k - lo
return round(s[lo] + (s[hi] - s[lo]) * frac, 2)
def overview(
db_path: Optional[Path] = None,
since_ns: Optional[int] = None,
*,
conn: Optional[sqlite3.Connection] = None,
) -> Dict[str, Any]:
"""One call for a dashboard: all the rollups."""
return {
"workflows": workflow_summary(db_path, since_ns, conn=conn),
"model_calls": model_call_summary(db_path, since_ns, conn=conn),
"tool_calls": tool_call_summary(db_path, conn=conn),
"errors": error_summary(db_path, conn=conn),
}
def has_data(
db_path: Optional[Path] = None,
*,
conn: Optional[sqlite3.Connection] = None,
) -> bool:
"""True when any telemetry runs exist (cheap guard for /insights rendering)."""
try:
with _cursor(conn, db_path) as c:
return c.execute("SELECT 1 FROM tel_runs LIMIT 1").fetchone() is not None
except Exception:
return False
__all__ = [
"workflow_summary",
"model_call_summary",
"tool_call_summary",
"error_summary",
"overview",
"has_data",
]

View File

@@ -1,289 +0,0 @@
"""Export telemetry to an OpenTelemetry Collector over OTLP/HTTP.
Maps the local tel_* events to OTel spans and sends them to the endpoint configured
under ``telemetry.export.otlp``. Lets an operator stream Hermes telemetry into their
own observability stack.
Notes:
* The destination is operator-configured; this module only sends to that endpoint.
It does not import or interact with any aggregate-metrics path.
* ``opentelemetry-sdk`` + ``opentelemetry-exporter-otlp-proto-http`` are an optional
extra (``pip install hermes-agent[otlp]``), imported lazily so the dependency is
only required when OTLP export is actually used.
* ``headers_env`` maps a header name to an environment variable name; values are read
from the environment at export time and never logged or stored.
* The continuous subscriber runs in the emitter's writer thread after durable writes
and is fail-isolated, so an export error cannot affect a run.
Each event is exported as a span carrying its recorded attributes (provider, model,
tokens, duration, etc.). The timing/parent linkage captured in tel_spans
(trace_id/span_id/parent_span_id/start_ns/end_ns) is not yet reconstructed into OTel
SpanContexts here, so spans currently arrive as independent records rather than a
connected trace tree; building the connected-trace projection is tracked separately.
Spans carry structural telemetry by default. Message content is included only when
trajectories is enabled, and always passes through the export redaction pipeline.
"""
from __future__ import annotations
import logging
import os
import sqlite3
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
class OTLPUnavailable(RuntimeError):
"""Raised when the optional OpenTelemetry SDK isn't installed."""
def _require_sdk(*, auto_install: bool = True, prompt: bool = True):
"""Import the OTel SDK, lazily installing it on first use if needed.
Routes through tools.lazy_deps (feature 'export.otlp') so a missing SDK
triggers the standard venv install flow — same as every other optional
backend — gated by security.allow_lazy_installs and TTY-prompted. Falls back
to OTLPUnavailable (with a manual install hint) when the SDK can't be made
importable (lazy installs disabled, install failed, or auto_install=False).
``auto_install``: attempt the lazy install when missing (default True).
``prompt``: ask before installing when interactive (default True); pass
False from non-interactive contexts like the continuous streamer.
"""
if auto_install:
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("export.otlp", prompt=prompt)
except ImportError:
pass # lazy_deps unavailable — fall through to the import attempt
except Exception:
# FeatureUnavailable (lazy installs disabled / declined / failed) —
# fall through; the import below raises OTLPUnavailable with the hint.
pass
try:
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.sdk.resources import Resource
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
OTLPSpanExporter,
)
from opentelemetry.trace import SpanKind
return {
"TracerProvider": TracerProvider,
"BatchSpanProcessor": BatchSpanProcessor,
"Resource": Resource,
"OTLPSpanExporter": OTLPSpanExporter,
"SpanKind": SpanKind,
}
except Exception as e: # ImportError or partial install
raise OTLPUnavailable(
"OTLP export requires the optional dependency. Install with:\n"
" pip install 'hermes-agent[otlp]'\n"
f"(import error: {e})"
)
def _resolve_headers(headers_env: Optional[Dict[str, str]]) -> Dict[str, str]:
"""Resolve {header_name: ENV_VAR_NAME} -> {header_name: value} from env.
The config stores environment variable names, not secret values; values are read
from the environment here. Missing variables are skipped (and noted at debug level
without the value).
"""
resolved: Dict[str, str] = {}
for header_name, env_name in (headers_env or {}).items():
val = os.environ.get(str(env_name))
if val:
resolved[str(header_name)] = val
else:
logger.debug("OTLP header %s: env var %s not set; skipping",
header_name, env_name)
return resolved
def _otlp_config(config: Dict[str, Any]) -> Dict[str, Any]:
tel = (config or {}).get("telemetry") or {}
export = tel.get("export") or {}
return export.get("otlp") or {}
def build_exporter(config: Dict[str, Any]):
"""Construct an OTLP span exporter from config. Raises OTLPUnavailable if no SDK."""
sdk = _require_sdk()
otlp = _otlp_config(config)
endpoint = otlp.get("endpoint")
if not endpoint:
raise ValueError("telemetry.export.otlp.endpoint is not set")
headers = _resolve_headers(otlp.get("headers_env"))
return sdk["OTLPSpanExporter"](endpoint=endpoint, headers=headers or None)
def _make_provider(config: Dict[str, Any]):
sdk = _require_sdk()
resource = sdk["Resource"].create({
"service.name": "hermes-agent",
"telemetry.scope": "local", # never aggregate metrics
})
provider = sdk["TracerProvider"](resource=resource)
processor = sdk["BatchSpanProcessor"](build_exporter(config))
provider.add_span_processor(processor)
return provider, processor
# ── event -> span attribute mapping (real values) ───────────────────────────
def _span_attrs(ev: Dict[str, Any]) -> Dict[str, Any]:
"""Span attributes for an event — the real recorded values (local telemetry)."""
kind = ev.get("event")
attrs: Dict[str, Any] = {"hermes.event": kind or "unknown"}
keep_by_kind = {
"run": ("entrypoint", "platform", "end_reason",
"model_call_count", "tool_call_count", "error_count"),
"span": ("trace_id", "run_id", "parent_span_id", "name", "kind",
"start_ns", "end_ns", "status"),
"model_call": ("provider", "model", "base_url",
"input_tokens", "output_tokens", "cache_read_tokens",
"cache_write_tokens", "reasoning_tokens", "latency_ms"),
"tool_call": ("tool_name", "duration_ms", "result_class"),
"error": ("error_class", "subsystem", "recovery"),
}
for col in keep_by_kind.get(kind, ()): # type: ignore[arg-type]
v = ev.get(col)
if v is not None:
attrs[f"hermes.{col}"] = v
return attrs
def export_batch(provider, batch: List[Dict[str, Any]]) -> int:
"""Map a batch of events to OTel spans. Returns spans created."""
tracer = provider.get_tracer("hermes.telemetry")
n = 0
for ev in batch:
try:
name = f"hermes.{ev.get('event', 'event')}"
span = tracer.start_span(name, attributes=_span_attrs(ev))
span.end()
n += 1
except Exception:
logger.debug("OTLP span map failed", exc_info=True)
return n
# ── one-shot drain (export current local rows) ──────────────────────────────
def export_once(
config: Dict[str, Any],
*,
db_path: Optional[Path] = None,
since_ns: Optional[int] = None,
) -> int:
"""Drain the local tel_* tables to the configured OTLP endpoint once."""
provider, processor = _make_provider(config)
try:
rows = _read_events(db_path, since_ns)
total = export_batch(provider, rows)
processor.force_flush()
return total
finally:
try:
provider.shutdown()
except Exception:
pass
def _read_events(db_path: Optional[Path], since_ns: Optional[int]) -> List[Dict[str, Any]]:
if db_path is None:
from hermes_constants import get_hermes_home
db_path = get_hermes_home() / "state.db"
c = sqlite3.connect(str(db_path), timeout=5.0)
c.row_factory = sqlite3.Row
out: List[Dict[str, Any]] = []
try:
table_event = {
"tel_runs": "run", "tel_spans": "span",
"tel_model_calls": "model_call",
"tel_tool_calls": "tool_call", "tel_error_events": "error",
}
for table, evkind in table_event.items():
where = ""
if table == "tel_runs" and since_ns:
where = f" WHERE start_ns >= {int(since_ns)}"
for r in c.execute(f"SELECT * FROM {table}{where}").fetchall():
d = dict(r)
d["event"] = evkind
out.append(d)
finally:
c.close()
return out
# ── continuous streaming subscriber ─────────────────────────────────────────
class OTLPStreamer:
"""A live subscriber that pushes each emitter batch to OTLP as it lands.
Register with ``emitter.subscribe(streamer)``. Fail-isolated by the emitter.
"""
def __init__(self, config: Dict[str, Any]):
self._provider, self._processor = _make_provider(config)
self.exported = 0
def __call__(self, batch: List[Dict[str, Any]]) -> None:
self.exported += export_batch(self._provider, batch)
def shutdown(self) -> None:
try:
self._processor.force_flush()
self._provider.shutdown()
except Exception:
pass
def is_available() -> bool:
"""True when the OTel SDK is already importable. Does NOT auto-install —
this is a pure check (e.g. for status display)."""
try:
_require_sdk(auto_install=False)
return True
except OTLPUnavailable:
return False
def is_enabled(config: Dict[str, Any]) -> bool:
otlp = _otlp_config(config)
return bool(otlp.get("enabled") and otlp.get("endpoint"))
def start_streaming(config: Dict[str, Any]) -> Optional[OTLPStreamer]:
"""If OTLP is enabled, attach a streamer to the singleton emitter.
Non-interactive context (startup): attempts a lazy install with prompt=False
so a configured-but-missing SDK is installed once (gated by
security.allow_lazy_installs), then streams. If it still can't load, logs and
no-ops — never blocks or raises into startup.
"""
if not is_enabled(config):
return None
try:
_require_sdk(prompt=False)
except OTLPUnavailable:
logger.warning("telemetry.export.otlp.enabled but the OTel SDK could not "
"be installed/imported; install 'hermes-agent[otlp]'")
return None
from agent.telemetry.emitter import get_emitter
streamer = OTLPStreamer(config)
get_emitter().subscribe(streamer)
return streamer
__all__ = [
"OTLPUnavailable",
"OTLPStreamer",
"build_exporter",
"export_once",
"export_batch",
"is_available",
"is_enabled",
"start_streaming",
]

View File

@@ -1,72 +0,0 @@
"""Telemetry consent posture and the aggregate-metrics gate.
Consent is a single config field, ``telemetry.consent_state``:
* "unknown" — no choice recorded; never uploads (the default).
* "local" — declined aggregate metrics; local telemetry only.
* "aggregate" — opted in to aggregate metrics.
The config file is the source of truth: set ``telemetry.consent_state`` with
``hermes config set`` (or a managed-scope pin). Callers that gate behavior read
``telemetry.*`` directly from config; this module only provides the consent
constants, the install-id helper, and the upload gate a future uploader must
consult.
``allow_aggregate`` is the hard gate. An administrator pins
``telemetry.allow_aggregate: false`` through the managed-scope layer
(``/etc/hermes/config.yaml``), which takes precedence over the user's config; when
it is false, aggregate metrics are off regardless of ``consent_state``.
"""
from __future__ import annotations
import uuid
from typing import Any, Dict
CONSENT_UNKNOWN = "unknown"
CONSENT_LOCAL = "local"
CONSENT_AGGREGATE = "aggregate"
VALID_CONSENT_STATES = {CONSENT_UNKNOWN, CONSENT_LOCAL, CONSENT_AGGREGATE}
def _telemetry_cfg(config: Dict[str, Any]) -> Dict[str, Any]:
cfg = config.get("telemetry") if isinstance(config, dict) else None
return cfg if isinstance(cfg, dict) else {}
def ensure_install_id(config: Dict[str, Any]) -> str:
"""Return a stable install id, minting one if the config slot is empty.
Does not persist — the caller writes the returned value back to config.yaml. A
fresh uuid4 is used; clearing ``telemetry.install_id`` (e.g. with
``hermes config set telemetry.install_id ""``) causes the next call to mint anew.
"""
tel = _telemetry_cfg(config)
existing = tel.get("install_id")
if isinstance(existing, str) and existing.strip():
return existing
return str(uuid.uuid4())
def may_upload_aggregate(config: Dict[str, Any]) -> bool:
"""Whether aggregate metrics may upload — the gate a future uploader consults.
Aggregate metrics are derived from the local telemetry tables, so they require
local telemetry to be on. True only when local telemetry is enabled, the admin
hard gate allows it, and the user has opted in via ``telemetry.consent_state``.
"""
tel = _telemetry_cfg(config)
local_enabled = bool(tel.get("local", True))
allow_aggregate = bool(tel.get("allow_aggregate", True))
state = tel.get("consent_state", CONSENT_UNKNOWN)
return local_enabled and allow_aggregate and state == CONSENT_AGGREGATE
__all__ = [
"CONSENT_UNKNOWN",
"CONSENT_LOCAL",
"CONSENT_AGGREGATE",
"VALID_CONSENT_STATES",
"may_upload_aggregate",
"ensure_install_id",
]

View File

@@ -1,187 +0,0 @@
"""Redaction applied to telemetry data on export.
Two independent controls:
* Secrets are always redacted, on every export and in every mode; no setting
disables this. Wraps ``agent/redact.py::redact_sensitive_text(force=True)``.
* Whether message bodies, reasoning, and raw tool arguments are exportable at all is
governed by the trajectories setting (``telemetry.trajectories.enabled``, default
off, admin-pinnable), not by a redaction mode. With trajectories off, content is
dropped. With it on, content is exportable and ``content_redaction`` (none|pii)
controls how much is scrubbed; secrets are still always stripped.
This applies to the local and trajectory export paths. It is unrelated to any
aggregate-metrics path.
"""
from __future__ import annotations
import re
from typing import Any, Dict, List, Optional
# Content-redaction strengths for any content that IS exported.
CONTENT_NONE = "none" # drop content entirely (structural telemetry only)
CONTENT_PII = "pii" # codec-aware PII redaction on exported content
CONTENT_MODES = {CONTENT_NONE, CONTENT_PII}
# ── PII patterns (applied only in CONTENT_PII mode, on content that is exported) ──
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
# E.164-ish and common separators; conservative to avoid nuking code/IDs.
_PHONE_RE = re.compile(
r"(?<!\w)(?:\+?\d{1,3}[\s.\-]?)?(?:\(\d{2,4}\)[\s.\-]?)?\d{3}[\s.\-]?\d{3,4}(?:[\s.\-]?\d{2,4})?(?!\w)"
)
# Long opaque hex/uuid-ish user identifiers.
_UUID_RE = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
def _secret_redact(text: Optional[str]) -> Optional[str]:
"""Always-on secret redaction. force=True so user config can't disable it."""
if text is None:
return None
try:
from agent.redact import redact_sensitive_text
return redact_sensitive_text(str(text), force=True)
except Exception:
# Fail CLOSED: if the redactor can't run, do not emit the raw string.
return "[redaction-unavailable]"
def _pii_redact(text: str) -> str:
text = _EMAIL_RE.sub("[email]", text)
text = _UUID_RE.sub("[id]", text)
text = _PHONE_RE.sub("[phone]", text)
return text
def redact_for_export(
text: Optional[str],
*,
content_mode: str = CONTENT_NONE,
) -> Optional[str]:
"""Redact a single content string for export.
Secrets are ALWAYS stripped. Then PII is stripped when content_mode is 'pii'.
Callers gate *whether content is exported at all* via telemetry.trajectories
(see ``content_export_enabled``); this function only scrubs content that the
caller has already decided to export.
"""
redacted = _secret_redact(text)
if redacted is None:
return None
if content_mode == CONTENT_PII:
redacted = _pii_redact(redacted)
return redacted
def content_export_enabled(config: Optional[Dict[str, Any]]) -> bool:
"""True only when telemetry.trajectories is explicitly enabled.
This is the consent gate for exporting message bodies / reasoning / raw tool
args. Default off. Admin-pinnable via managed scope (telemetry.trajectories.enabled).
"""
try:
tel = (config or {}).get("telemetry") or {}
traj = tel.get("trajectories") or {}
return bool(traj.get("enabled", False))
except Exception:
return False
def content_mode_for(config: Optional[Dict[str, Any]]) -> str:
try:
tel = (config or {}).get("telemetry") or {}
mode = tel.get("content_redaction", CONTENT_NONE)
return mode if mode in CONTENT_MODES else CONTENT_NONE
except Exception:
return CONTENT_NONE
# ── Codec-aware message redaction (NeMo pattern) ─────────────────────────────
# Redact the right fields of a provider message shape rather than regex-blasting
# the whole blob. Structure (roles, names, counts) is preserved; only the
# free-text content fields are scrubbed.
def redact_message(
msg: Dict[str, Any],
*,
content_mode: str = CONTENT_NONE,
include_content: bool = False,
) -> Dict[str, Any]:
"""Redact one chat message dict for export.
When include_content is False (trajectories off), content/reasoning/tool-arg
fields are dropped — only structural fields (role, tool name, counts) remain.
When True, those fields are kept but passed through redact_for_export.
"""
role = msg.get("role")
out: Dict[str, Any] = {"role": role}
# Always-structural fields.
if msg.get("tool_name") is not None:
out["tool_name"] = msg.get("tool_name")
if msg.get("name") is not None:
out["name"] = msg.get("name")
if not include_content:
# Structural only: record presence/size, not bytes.
c = msg.get("content")
if c is not None:
out["content_chars"] = len(str(c))
if msg.get("reasoning_content"):
out["reasoning_chars"] = len(str(msg["reasoning_content"]))
if msg.get("tool_calls"):
out["tool_call_count"] = _count_tool_calls(msg["tool_calls"])
return out
# Content included (trajectories enabled): scrub then keep.
if msg.get("content") is not None:
out["content"] = redact_for_export(msg["content"], content_mode=content_mode)
if msg.get("reasoning_content"):
out["reasoning_content"] = redact_for_export(
msg["reasoning_content"], content_mode=content_mode
)
if msg.get("tool_calls"):
out["tool_calls"] = _redact_tool_calls(msg["tool_calls"], content_mode=content_mode)
return out
def _count_tool_calls(tool_calls: Any) -> int:
try:
import json
tc = json.loads(tool_calls) if isinstance(tool_calls, str) else tool_calls
return len(tc) if isinstance(tc, list) else (1 if tc else 0)
except Exception:
return 0
def _redact_tool_calls(tool_calls: Any, *, content_mode: str) -> Any:
"""Redact raw tool-call arguments (free text) while keeping function names."""
import json
try:
tc = json.loads(tool_calls) if isinstance(tool_calls, str) else tool_calls
except Exception:
return "[unparseable-tool-calls]"
if not isinstance(tc, list):
return []
out: List[Dict[str, Any]] = []
for call in tc:
if not isinstance(call, dict):
continue
fn = (call.get("function") or {}) if isinstance(call.get("function"), dict) else {}
name = fn.get("name") or call.get("name")
args = fn.get("arguments")
red_args = redact_for_export(args, content_mode=content_mode) if args is not None else None
out.append({"name": name, "arguments": red_args})
return out
__all__ = [
"CONTENT_NONE",
"CONTENT_PII",
"CONTENT_MODES",
"redact_for_export",
"content_export_enabled",
"content_mode_for",
"redact_message",
]

View File

@@ -1,144 +0,0 @@
"""Build per-run summary events from the local telemetry tables.
Reads the ``tel_*`` tables and projects each completed run into a summary dict holding
the recorded values: provider, models used, tool names, token totals, duration, and
cost. Powers ``hermes telemetry preview``. No aggregation or bucketing is applied here.
"""
from __future__ import annotations
import platform
import sqlite3
from pathlib import Path
from typing import Any, Dict, List, Optional
def _os_family() -> str:
s = platform.system().lower()
if s.startswith("lin"):
return "linux"
if s == "darwin":
return "macos"
if s.startswith("win"):
return "windows"
return "other"
def _hermes_version() -> str:
try:
from hermes_cli import __version__
return str(__version__)
except Exception:
return "0.0.0"
def _open(db_path: Optional[Path], conn: Optional[sqlite3.Connection]):
if conn is not None:
prev = conn.row_factory
conn.row_factory = sqlite3.Row
return conn, prev, False
if db_path is None:
from hermes_constants import get_hermes_home
db_path = get_hermes_home() / "state.db"
c = sqlite3.connect(str(db_path), timeout=5.0)
c.row_factory = sqlite3.Row
return c, None, True
def _run_events(c: sqlite3.Connection, since_ns: Optional[int]) -> List[Dict[str, Any]]:
"""Project completed runs into per-run summary dicts."""
where = " WHERE end_ns IS NOT NULL"
if since_ns:
where += f" AND start_ns >= {int(since_ns)}"
rows = c.execute(
"SELECT run_id, entrypoint, platform, end_reason, start_ns, end_ns, "
"model_call_count, tool_call_count, error_count "
"FROM tel_runs" + where
).fetchall()
events: List[Dict[str, Any]] = []
for r in rows:
# Models actually used in this run (real ids), with token totals.
models = [
{"provider": m["provider"], "model": m["model"],
"calls": m["n"], "input_tokens": int(m["inp"] or 0),
"output_tokens": int(m["outp"] or 0)}
for m in c.execute(
"SELECT provider, model, COUNT(*) n, SUM(input_tokens) inp, "
"SUM(output_tokens) outp FROM tel_model_calls WHERE run_id = ? "
"GROUP BY provider, model ORDER BY n DESC",
(r["run_id"],),
).fetchall()
]
tools = [
row["tool_name"]
for row in c.execute(
"SELECT DISTINCT tool_name FROM tel_tool_calls WHERE run_id = ?",
(r["run_id"],),
).fetchall()
if row["tool_name"]
]
trow = c.execute(
"SELECT SUM(input_tokens) inp, SUM(output_tokens) outp "
"FROM tel_model_calls WHERE run_id = ?",
(r["run_id"],),
).fetchone()
duration_ms = (r["end_ns"] - r["start_ns"]) / 1e6 if r["end_ns"] else None
events.append({
"event_name": "workflow_completed",
"run_id": r["run_id"],
"entrypoint": r["entrypoint"] or "cli",
"platform": r["platform"],
"end_reason": r["end_reason"] or "completed",
"models_used": models,
"tools_used": tools,
"model_call_count": r["model_call_count"] or 0,
"tool_call_count": r["tool_call_count"] or 0,
"error_count": r["error_count"] or 0,
"duration_ms": round(duration_ms, 1) if duration_ms is not None else None,
"input_tokens": int((trow["inp"] if trow else 0) or 0),
"output_tokens": int((trow["outp"] if trow else 0) or 0),
})
return events
def build_aggregate_events(
*,
install_id: str,
db_path: Optional[Path] = None,
since_ns: Optional[int] = None,
conn: Optional[sqlite3.Connection] = None,
include_heartbeat: bool = True,
) -> List[Dict[str, Any]]:
"""Return per-run summary events plus an optional heartbeat."""
c, prev_factory, owned = _open(db_path, conn)
try:
events = _run_events(c, since_ns)
if include_heartbeat:
events.append({
"event_name": "heartbeat",
"install_id": install_id,
"hermes_version": _hermes_version(),
"os_family": _os_family(),
"entrypoint": "cli",
})
return events
finally:
if owned:
c.close()
elif prev_factory is not None:
c.row_factory = prev_factory
def summarize(events: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Counts by event_name + field coverage, for status/preview output."""
by_name: Dict[str, int] = {}
fields = set()
for e in events:
name = e.get("event_name", "?")
by_name[name] = by_name.get(name, 0) + 1
fields.update(e.keys())
return {"total": len(events), "by_event_name": by_name, "fields_present": sorted(fields)}
__all__ = ["build_aggregate_events", "summarize"]

View File

@@ -1,83 +0,0 @@
"""Trace / run / span id propagation via contextvars.
Telemetry events share IDs so a workflow can be reconstructed: one ``trace_id`` per
workflow, one ``run_id`` per top-level execution, ``span_id`` per timed operation, and
``parent_span_id`` for nesting. These live in contextvars so async tool calls and
spawned subagents inherit the lineage automatically.
Provides helpers to start/clear a run context and mint child span ids. The telemetry
plugin sets the run context on session start and reads it in each hook callback.
Nothing here writes to storage — it only carries ids.
"""
from __future__ import annotations
import contextvars
import uuid
from dataclasses import dataclass
from typing import Optional
_trace_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"hermes_tel_trace_id", default=None
)
_run_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"hermes_tel_run_id", default=None
)
_parent_span_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"hermes_tel_parent_span_id", default=None
)
def new_id() -> str:
return uuid.uuid4().hex
@dataclass(slots=True)
class RunContext:
trace_id: str
run_id: str
def start_run(trace_id: Optional[str] = None, run_id: Optional[str] = None) -> RunContext:
"""Begin a run context, minting ids when not supplied. Sets contextvars."""
tid = trace_id or new_id()
rid = run_id or new_id()
_trace_id.set(tid)
_run_id.set(rid)
_parent_span_id.set(None)
return RunContext(trace_id=tid, run_id=rid)
def current_trace_id() -> Optional[str]:
return _trace_id.get()
def current_run_id() -> Optional[str]:
return _run_id.get()
def current_parent_span_id() -> Optional[str]:
return _parent_span_id.get()
def new_span_id() -> str:
"""Mint a span id (does not alter the parent pointer)."""
return new_id()
def clear_run() -> None:
_trace_id.set(None)
_run_id.set(None)
_parent_span_id.set(None)
__all__ = [
"RunContext",
"new_id",
"start_run",
"current_trace_id",
"current_run_id",
"current_parent_span_id",
"new_span_id",
"clear_run",
]

View File

@@ -1,136 +0,0 @@
"""Thinking-timeout detection and user-facing guidance for reasoning models.
When a known reasoning model (NVIDIA Nemotron 3 Ultra, OpenAI o1/o3,
Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ, xAI Grok reasoning)
hits a transport-layer error before the first content token arrives, the
upstream proxy has almost certainly idle-killed a long thinking stream —
not a true context overflow or a configuration error. The user needs
distinct guidance for this case:
"The model's thinking phase exceeded the upstream proxy's idle
timeout before the first content token arrived. This is a known
issue with reasoning models behind cloud gateways (NVIDIA NIM,
OpenAI, Anthropic, DeepSeek). Workarounds in priority order:
1. Set `providers.<provider>.models.<model>.stale_timeout_seconds: 900`
in `~/.hermes/config.yaml` to extend the per-call timeout...
2. Lower `reasoning_budget` or set `reasoning_effort: medium`...
3. Use a smaller / faster reasoning model..."
The existing `_is_stream_drop` guidance at
``agent/conversation_loop.py:3464-3486`` fires for large-file-write
stream drops ("try execute_code with Python's open() for large files")
which is the WRONG advice for the thinking-timeout case. This module
provides the detection and the message as standalone helpers so the
detection logic is unit-testable without driving the full retry loop,
and the message text can be regression-tested for spelling and accuracy.
Part 2 of Fixes #52310.
"""
from __future__ import annotations
from typing import Optional
# Substring set that identifies a transport-layer failure on the
# response stream. Same shape as the existing
# ``_SERVER_DISCONNECT_PATTERNS`` in ``agent/error_classifier.py:394``
# but extended to also catch the OSS-level error signature
# (``broken pipe`` / ``errno 32``) that the upstream kill surfaces
# to the OpenAI SDK wrapper.
_THINKING_TIMEOUT_SUBSTRINGS: tuple[str, ...] = (
"broken pipe",
"errno 32",
"remote protocol",
"connection reset",
"connection lost",
"peer closed",
"server disconnected",
)
def is_thinking_timeout(classified: object, model: str, error_msg: str) -> bool:
"""Return True when a reasoning model's thinking phase hit a transport kill.
Args:
classified: a :class:`agent.error_classifier.ClassifiedError` instance
(duck-typed here to avoid an import cycle in unit tests).
model: the model slug at failure time (e.g.
``"nvidia/nemotron-3-ultra-550b-a55b"``).
error_msg: lowercased string representation of the underlying
exception (typically ``str(api_error).lower()``).
Returns True when ALL conditions hold:
1. ``classified.reason == FailoverReason.timeout`` (the classifier
override at ``agent/error_classifier.py:720-738`` ensures this
is the case for reasoning models even on large sessions).
2. ``api_error`` has no ``.status_code`` attribute set (transport
disconnect, not an HTTP error).
3. ``model`` is in the reasoning-model allowlist (reuses
``agent.reasoning_timeouts.get_reasoning_stale_timeout_floor``).
4. ``error_msg`` contains one of the transport-kill substrings.
Non-reasoning models always return False. Non-transport errors
(billing / rate_limit / auth / context_overflow / format_error)
always return False. HTTP-status errors always return False.
"""
# Import here (not at module top) to keep this helper cheap to
# import even from callers that don't need it. ``agent.reasoning_timeouts``
# is small and dependency-free.
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
# Condition 1: classifier says timeout. Use a string/value check
# rather than importing FailoverReason so this module has zero
# import cycles from the error_classifier package.
reason = getattr(classified, "reason", None)
reason_value = getattr(reason, "value", None)
if reason_value != "timeout":
return False
# Condition 2: no HTTP status code (transport, not API error).
# Caller is expected to gate on ``getattr(api_error, "status_code", None) is None``
# before calling this helper; the surface here is just the post-gate
# boolean so the caller can pass an already-prepped error_msg.
# Condition 3: reasoning model allowlist.
if get_reasoning_stale_timeout_floor(model) is None:
return False
# Condition 4: transport-kill substring in the error message.
error_msg_lower = (error_msg or "").lower()
return any(p in error_msg_lower for p in _THINKING_TIMEOUT_SUBSTRINGS)
def build_thinking_timeout_guidance(
provider: str, model: str, model_label: Optional[str] = None,
) -> str:
"""Return the user-facing guidance string appended to ``_final_response``.
Args:
provider: provider slug (e.g. ``"nvidia"``, ``"openai"``).
model: bare model slug the user would put in their config
(e.g. ``"nemotron-3-ultra-550b-a55b"`` if the user uses
NVIDIA direct, or the full ``"nvidia/nemotron-3-ultra-550b-a55b"``
if they go through an aggregator). Used verbatim in the
config snippet so the user can copy-paste.
model_label: optional short label for the model name in the
prose (e.g. ``"Nemotron 3 Ultra"``). Falls back to the
slug if not provided.
"""
label = model_label or model
return (
"\n\nThe model's thinking phase exceeded the upstream proxy's "
"idle timeout before the first content token arrived. This is a "
f"known issue with reasoning models (like {label}) behind cloud "
"gateways (NVIDIA NIM, OpenAI, Anthropic, DeepSeek). Workarounds "
"in priority order:\n"
f"1. Set `providers.{provider}.models.{model}.stale_timeout_seconds: 900` "
"in `~/.hermes/config.yaml` to extend the per-call timeout. "
"(Hermes's built-in floor is 600s for known reasoning models — "
"if you still see this after raising, the upstream cap is even "
"shorter.)\n"
"2. Lower `reasoning_budget` or set `reasoning_effort: medium` on this "
"model if the provider supports it.\n"
"3. Use a smaller / faster reasoning model if the task doesn't "
"require deep thinking."
)

View File

@@ -11,8 +11,7 @@ Pure module-level utilities extracted from ``run_agent.py``:
``_append_subdir_hint_to_multimodal`` — envelope helpers for the
``{"_multimodal": True, "content": [...], "text_summary": ...}`` dict
shape returned by tools like ``computer_use``.
* ``_extract_file_mutation_targets`` / ``_extract_landed_file_mutation_paths`` /
``_extract_error_preview`` —
* ``_extract_file_mutation_targets`` / ``_extract_error_preview``
per-turn file-mutation verifier inputs.
* ``_trajectory_normalize_msg`` — strip image blobs from a message for
trajectory saving.
@@ -270,35 +269,6 @@ def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List
return []
def _extract_landed_file_mutation_paths(
tool_name: str,
args: Dict[str, Any],
result: Any,
) -> List[str]:
"""Return the concrete file paths a successful mutation reports."""
targets = _extract_file_mutation_targets(tool_name, args)
if tool_name not in _FILE_MUTATING_TOOLS or not isinstance(result, str):
return targets
try:
data = json.loads(result.strip())
except Exception:
return targets
if not isinstance(data, dict):
return targets
files = data.get("files_modified")
if isinstance(files, list):
landed = [str(p) for p in files if p]
if landed:
return landed
resolved = data.get("resolved_path")
if resolved:
return [str(resolved)]
return targets
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
"""Pull a one-line error summary out of a tool result for footer display."""
text = _multimodal_text_summary(result) if result is not None else ""
@@ -441,7 +411,6 @@ __all__ = [
"_multimodal_text_summary",
"_append_subdir_hint_to_multimodal",
"_extract_file_mutation_targets",
"_extract_landed_file_mutation_paths",
"_extract_error_preview",
"_trajectory_normalize_msg",
"make_tool_result_message",

View File

@@ -26,7 +26,6 @@ from agent.display import (
build_tool_preview as _build_tool_preview,
get_cute_tool_message as _get_cute_tool_message_impl,
get_tool_emoji as _get_tool_emoji,
redact_tool_args_for_display as _redact_tool_args_for_display,
_detect_tool_failure,
)
from agent.tool_guardrails import ToolGuardrailDecision
@@ -70,35 +69,12 @@ def _budget_for_agent(agent) -> BudgetConfig:
_MAX_TOOL_WORKERS = 8
def _flush_session_db_after_tool_progress(
agent,
messages: list,
*,
stage: str,
) -> None:
"""Best-effort incremental SessionDB flush for tool-call progress.
Tool execution can perform side effects that terminate or restart the
current Hermes process before the normal turn-end persistence path runs.
Flush the already-appended assistant/tool messages immediately so the
transcript survives destructive-but-valid tool calls.
"""
try:
agent._flush_messages_to_session_db(messages)
except Exception as exc:
logger.warning("Incremental tool-call persistence failed after %s: %s", stage, exc)
def _ra():
"""Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
import run_agent
return run_agent
def _is_interpreter_shutdown_submit_error(exc: RuntimeError) -> bool:
return "cannot schedule new futures after interpreter shutdown" in str(exc)
def _emit_terminal_post_tool_call(
agent,
*,
@@ -303,11 +279,6 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
tc.id,
))
_flush_session_db_after_tool_progress(
agent,
messages,
stage=f"cancelled tool result {tc.function.name}",
)
return
# ── Parse args + pre-execution bookkeeping ───────────────────────
@@ -470,11 +441,10 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
display_args = _redact_tool_args_for_display(name, args) or args
args_str = json.dumps(display_args, ensure_ascii=False)
args_str = json.dumps(args, ensure_ascii=False)
if agent.verbose_logging:
print(f" 📞 Tool {i}: {name}({list(display_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
else:
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
@@ -484,9 +454,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
continue
if agent.tool_progress_callback:
try:
display_args = _redact_tool_args_for_display(name, args) or args
preview = _build_tool_preview(name, display_args)
agent.tool_progress_callback("tool.started", name, preview, display_args)
preview = _build_tool_preview(name, args)
agent.tool_progress_callback("tool.started", name, preview, args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
@@ -495,8 +464,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
continue
if agent.tool_start_callback:
try:
display_args = _redact_tool_args_for_display(name, args) or args
agent.tool_start_callback(tc.id, name, display_args)
agent.tool_start_callback(tc.id, name, args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
@@ -613,40 +581,13 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
if runnable_calls:
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
for submit_index, (i, tc, name, args) in enumerate(runnable_calls):
for i, tc, name, args in runnable_calls:
# Propagate the agent turn's ContextVars (e.g.
# _approval_session_key) AND thread-local approval/sudo
# callbacks into the worker thread; clears callbacks on exit.
try:
f = executor.submit(
propagate_context_to_thread(_run_tool), i, tc, name, args, parsed_calls[i][3]
)
except RuntimeError as submit_error:
if not _is_interpreter_shutdown_submit_error(submit_error):
raise
skipped_calls = runnable_calls[submit_index:]
logger.warning(
"interpreter shutdown while scheduling concurrent tools; "
"skipping %d unsubmitted tool(s)",
len(skipped_calls),
)
for skipped_i, _tc, skipped_name, skipped_args in skipped_calls:
if results[skipped_i] is None:
middleware_trace = parsed_calls[skipped_i][3]
result = (
f"Error executing tool '{skipped_name}': "
"Python interpreter is shutting down; tool was not started"
)
results[skipped_i] = (
skipped_name,
skipped_args,
result,
0.0,
True,
False,
middleware_trace,
)
break
f = executor.submit(
propagate_context_to_thread(_run_tool), i, tc, name, args, parsed_calls[i][3]
)
futures.append(f)
# Wait for all to complete with periodic heartbeats so the
@@ -796,8 +737,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
if not blocked and agent.tool_complete_callback:
try:
display_args = _redact_tool_args_for_display(name, args) or args
agent.tool_complete_callback(tc.id, name, display_args, function_result)
agent.tool_complete_callback(tc.id, name, args, function_result)
except Exception as cb_err:
logging.debug(f"Tool complete callback error: {cb_err}")
@@ -828,11 +768,6 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
# String results pass through unchanged.
_tool_content = agent._tool_result_content_for_active_model(name, function_result)
messages.append(make_tool_result_message(name, _tool_content, tc.id))
_flush_session_db_after_tool_progress(
agent,
messages,
stage=f"tool result {name}",
)
# ── Per-tool /steer drain ───────────────────────────────────
# Same as the sequential path: drain between each collected
@@ -868,16 +803,13 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
for skipped_tc in remaining_calls:
skipped_name = skipped_tc.function.name
messages.append(make_tool_result_message(
skipped_name,
f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
skipped_tc.id,
))
_flush_session_db_after_tool_progress(
agent,
messages,
stage=f"cancelled tool result {skipped_name}",
)
skip_msg = {
"role": "tool",
"name": skipped_name,
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
"tool_call_id": skipped_tc.id,
}
messages.append(skip_msg)
break
function_name = tool_call.function.name
@@ -959,11 +891,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
agent._iters_since_skill = 0
if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
args_str = json.dumps(display_args, ensure_ascii=False)
args_str = json.dumps(function_args, ensure_ascii=False)
if agent.verbose_logging:
print(f" 📞 Tool {i}: {function_name}({list(display_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
else:
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
@@ -984,16 +915,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if not _execution_blocked and agent.tool_progress_callback:
try:
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args)
agent.tool_progress_callback("tool.started", function_name, preview, display_args)
preview = _build_tool_preview(function_name, function_args)
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
if not _execution_blocked and agent.tool_start_callback:
try:
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
agent.tool_start_callback(tool_call.id, function_name, display_args)
agent.tool_start_callback(tool_call.id, function_name, function_args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
@@ -1223,8 +1152,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if agent._should_emit_quiet_tool_messages():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_ce_result = None
@@ -1257,8 +1185,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_mem_result = None
@@ -1289,8 +1216,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_spinner_result = None
@@ -1452,8 +1378,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
if not _execution_blocked and agent.tool_complete_callback:
try:
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
agent.tool_complete_callback(tool_call.id, function_name, display_args, function_result)
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
except Exception as cb_err:
logging.debug(f"Tool complete callback error: {cb_err}")
@@ -1477,11 +1402,6 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
# (see parallel path for rationale). String results pass through.
_tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
messages.append(make_tool_result_message(function_name, _tool_content, tool_call.id))
_flush_session_db_after_tool_progress(
agent,
messages,
stage=f"tool result {function_name}",
)
# ── Per-tool /steer drain ───────────────────────────────────
# Drain pending steer BETWEEN individual tool calls so the
@@ -1508,11 +1428,6 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
skipped_tc.id,
))
_flush_session_db_after_tool_progress(
agent,
messages,
stage=f"skipped tool result {skipped_name}",
)
break
if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):

View File

@@ -5,47 +5,12 @@ This transport owns format conversion and normalization — NOT client lifecycle
streaming, or the _run_codex_stream() call path.
"""
import hashlib
import json
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall
def _content_cache_key(instructions: str, tools: Optional[List[Dict[str, Any]]]) -> Optional[str]:
"""Content-address the prompt cache key from the static request prefix.
Returns ``pck_<sha256[:24]>`` of (instructions + sorted tool schemas), or
None when there is nothing static to key on. The cache key is a routing
hint only — never a correctness boundary — so two requests sharing a system
prompt and tool set intentionally resolve to the same warm prefix bucket.
The fix this exists for: recurring cron jobs build session_id as
``cron_<id>_<timestamp>``, so using session_id as the cache key made every
fire cache-cold. The static prefix (identity + tools) is identical across
fires, so hashing it gives a stable key that stays warm within the
provider's cache TTL. Sorting tools by name keeps the hash insertion-order
independent.
"""
if not instructions and not tools:
return None
tools_part = ""
if tools:
sorted_tools = sorted(
(t for t in tools if isinstance(t, dict)),
key=lambda t: str(t.get("name") or t.get("type") or ""),
)
tools_part = json.dumps(
sorted_tools, sort_keys=True, ensure_ascii=False, separators=(",", ":")
)
# \x00 separator so instructions ending in the tool JSON can't collide with
# a request whose instructions contain that JSON and whose tools are empty.
content = f"{instructions or ''}\x00{tools_part}"
digest = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()[:24]
return f"pck_{digest}"
class ResponsesApiTransport(ProviderTransport):
"""Transport for api_mode='codex_responses'.
@@ -106,10 +71,7 @@ class ResponsesApiTransport(ProviderTransport):
params:
instructions: str — system prompt (extracted from messages[0] if not given)
reasoning_config: dict | None — {effort, enabled}
session_id: str | None — transcript/session id; drives the xAI
x-grok-conv-id header and the Codex cache-scope headers, and is
the fallback prompt_cache_key when there is no static prefix to
content-address
session_id: str | None — used for prompt_cache_key + xAI conv header
max_tokens: int | None — max_output_tokens
timeout: float | None — per-request timeout forwarded to the SDK
request_overrides: dict | None — extra kwargs merged in
@@ -250,17 +212,10 @@ class ResponsesApiTransport(ProviderTransport):
kwargs["parallel_tool_calls"] = True
session_id = params.get("session_id")
# prompt_cache_key is content-addressed from the static prefix
# (instructions + tools), NOT session_id — recurring cron jobs carry a
# per-fire timestamp in session_id (cron_<id>_<ts>) that made every run
# cache-cold. session_id is left untouched for transcript isolation and
# the cache-scope routing headers below. Falls back to session_id when
# there is no static content to hash.
cache_key = _content_cache_key(instructions, response_tools) or session_id
# xAI Responses takes prompt_cache_key in extra_body (set further
# down); GitHub Models opts out of cache-key routing entirely.
if not is_github_responses and not is_xai_responses and cache_key:
kwargs["prompt_cache_key"] = cache_key
if not is_github_responses and not is_xai_responses and session_id:
kwargs["prompt_cache_key"] = session_id
if reasoning_enabled and is_xai_responses:
from agent.model_metadata import grok_supports_reasoning_effort
@@ -371,7 +326,7 @@ class ResponsesApiTransport(ProviderTransport):
merged_extra_body: Dict[str, Any] = {}
if isinstance(existing_extra_body, dict):
merged_extra_body.update(existing_extra_body)
merged_extra_body.setdefault("prompt_cache_key", cache_key)
merged_extra_body.setdefault("prompt_cache_key", session_id)
kwargs["extra_body"] = merged_extra_body
return kwargs

View File

@@ -28,12 +28,8 @@ import uuid
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from agent.conversation_compression import conversation_history_after_compression
from agent.iteration_budget import IterationBudget
from agent.model_metadata import (
estimate_messages_tokens_rough,
estimate_request_tokens_rough,
)
from agent.model_metadata import estimate_request_tokens_rough
logger = logging.getLogger(__name__)
@@ -61,34 +57,6 @@ def _compression_made_progress(
return orig_tokens > 0 and new_tokens < orig_tokens * 0.95
def _should_run_preflight_estimate(
messages: List[Dict[str, Any]],
protect_first_n: int,
protect_last_n: int,
threshold_tokens: int,
) -> bool:
"""Cheap gate for the (expensive) full preflight token estimate.
Returns ``True`` when either:
(a) message count exceeds the protected ranges (the historical gate), or
(b) a cheap char-based estimate already crosses the configured threshold
— the few-but-huge case from issue #27405 that the count-only gate
would silently skip (a handful of very large messages never trips
the count condition, so compression was never attempted and the
turn hit a hard context-overflow error).
Branch (b) uses ``estimate_messages_tokens_rough`` (the shared char-based
estimator) so a single large base64 image isn't mistaken for ~250K tokens.
It intentionally undercounts vs. the full request estimate — it omits the
system prompt and tool schemas — because it is only a *hint* deciding
whether to pay for the authoritative ``estimate_request_tokens_rough``,
which (together with ``should_compress``) makes the real decision.
"""
if len(messages) > protect_first_n + protect_last_n + 1:
return True
return estimate_messages_tokens_rough(messages) >= threshold_tokens
@dataclass
class TurnContext:
"""Values produced by the turn prologue and consumed by the turn loop."""
@@ -143,13 +111,7 @@ def build_turn_context(
# Guard stdio against OSError from broken pipes (systemd/headless/daemon).
install_safe_stdio()
# NOTE: the DB session row is created later, AFTER the system prompt is
# restored/built (see _ensure_db_session() below the system-prompt block).
# Creating it here — before _cached_system_prompt is populated — inserts a
# row with system_prompt=NULL on a fresh API/gateway agent that carries
# client-managed history, which then trips the "stored system prompt is
# null; rebuilding from scratch" warning and a needless first-turn prefix
# cache miss. (Issue #45499.)
agent._ensure_db_session()
# Tell auxiliary_client what the live main provider/model are for this turn.
try:
@@ -316,11 +278,6 @@ def build_turn_context(
active_system_prompt = agent._cached_system_prompt
# Create the DB session row now that _cached_system_prompt is populated, so
# the persisted snapshot is written non-NULL on the first turn (Issue
# #45499). Idempotent: _ensure_db_session() no-ops once the row exists.
agent._ensure_db_session()
# Crash-resilience: persist the inbound user turn as soon as the session row exists.
try:
agent._persist_session(messages, conversation_history)
@@ -332,14 +289,10 @@ def build_turn_context(
)
# ── Preflight context compression ──
# Gate the (expensive) full token estimate behind a cheap pre-check.
# See ``_should_run_preflight_estimate`` for the OR semantics that fix
# issue #27405 (a few very large messages slipping past the count gate).
if agent.compression_enabled and _should_run_preflight_estimate(
messages,
agent.context_compressor.protect_first_n,
agent.context_compressor.protect_last_n,
agent.context_compressor.threshold_tokens,
if (
agent.compression_enabled
and len(messages) > agent.context_compressor.protect_first_n
+ agent.context_compressor.protect_last_n + 1
):
_preflight_tokens = estimate_request_tokens_rough(
messages,
@@ -401,9 +354,7 @@ def build_turn_context(
_orig_len, len(messages), _orig_tokens, _preflight_tokens
):
break # Cannot compress further: neither rows nor tokens moved
conversation_history = conversation_history_after_compression(
agent, messages
)
conversation_history = None
agent._empty_content_retries = 0
agent._thinking_prefill_retries = 0
agent._last_content_with_tools = None
@@ -441,8 +392,6 @@ def build_turn_context(
# Per-turn file-mutation verifier state.
agent._turn_failed_file_mutations = {}
agent._turn_file_mutation_paths = set()
agent._verification_stop_nudges = 0
# Record the execution thread so interrupt()/clear_interrupt() can scope
# the tool-level interrupt signal to THIS agent's thread only.

View File

@@ -166,25 +166,6 @@ def finalize_turn(
# same empty-response loop again.
try:
agent._drop_trailing_empty_response_scaffolding(messages)
# When the turn was interrupted and the last message is a tool
# result, append a synthetic assistant message to close the
# tool-call sequence. Without this, the session persists a
# ``tool → user`` alternation that strict providers (Gemini,
# Claude) reject, causing them to hallucinate a continuation of
# the user's message on the next turn (#48879).
#
# ``_drop_trailing_empty_response_scaffolding`` only rewinds the
# tool tail when an empty-response scaffolding flag is present; a
# clean ``/stop`` interrupt after a successful tool sets no such
# flag, so the tool result survives as the tail and we close it
# here instead. On an interrupt ``final_response`` is typically
# empty, so fall back to an explicit placeholder rather than
# persisting an empty-content assistant turn.
if interrupted:
from agent.message_sanitization import close_interrupted_tool_sequence
close_interrupted_tool_sequence(messages, final_response)
agent._persist_session(messages, conversation_history)
except Exception as _persist_err:
_cleanup_errors.append(f"persist_session: {_persist_err}")
@@ -289,14 +270,7 @@ def finalize_turn(
and len(_stripped) <= 24
and _stripped[-1:] not in {".", "!", "?", "", "", "", "`", ")"}
)
_is_partial_stream_recovery = (
str(_turn_exit_reason) == "partial_stream_recovery"
)
if (
_is_empty_terminal
or _is_partial_fragment
or _is_partial_stream_recovery
):
if _is_empty_terminal or _is_partial_fragment:
_explanation = agent._format_turn_completion_explanation(
_turn_exit_reason
)

View File

@@ -67,11 +67,6 @@ class TurnRetryState:
# ── Restart signals (read by the outer loop after the attempt) ───────
restart_with_compressed_messages: bool = False
restart_with_length_continuation: bool = False
# Set when a content-filter stream stall (e.g. MiniMax "new_sensitive")
# has been escalated to the fallback chain: the partial-stream content
# was rolled back off ``messages`` and the loop should re-issue the API
# call against the newly-activated provider (#32421).
restart_with_rebuilt_messages: bool = False
def __iter__(self):
# Convenience for debugging / tests: iterate (name, value) pairs.

View File

@@ -1,618 +0,0 @@
"""Coding verification evidence ledger.
This module records what the agent actually proved while working in a code
workspace. It is deliberately passive: it never decides to run a suite, never
blocks completion, and never upgrades targeted checks into "repo green".
"""
from __future__ import annotations
import json
import re
import shlex
import sqlite3
import tempfile
import threading
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Optional
from hermes_constants import get_hermes_home
_DB_LOCK = threading.Lock()
_MAX_OUTPUT_SUMMARY_CHARS = 2000
_MAX_EVIDENCE_AGE_DAYS = 30
_MAX_EVENTS_PER_SESSION_ROOT = 100
_MAX_TOTAL_UNREFERENCED_EVENTS = 10_000
_AD_HOC_SCRIPT_NAME_PREFIXES = ("hermes-verify-", "hermes-ad-hoc-")
_VERIFY_SCHEMA_VERSION = 1
_SHELL_SPLIT_RE = re.compile(r"\s*(?:&&|\|\||;)\s*")
@dataclass(frozen=True)
class VerificationEvidence:
"""A classified command result worth recording."""
command: str
canonical_command: str
kind: str
scope: str
status: str
exit_code: int
cwd: str
root: str
session_id: str
output_summary: str = ""
def _utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
def _retention_cutoff() -> str:
return (datetime.now(timezone.utc) - timedelta(days=_MAX_EVIDENCE_AGE_DAYS)).isoformat()
def _db_path() -> Path:
return get_hermes_home() / "verification_evidence.db"
def _connect() -> sqlite3.Connection:
path = _db_path()
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA busy_timeout=5000")
conn.row_factory = sqlite3.Row
_ensure_schema(conn)
return conn
def _ensure_schema(conn: sqlite3.Connection) -> None:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS verification_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
created_at TEXT NOT NULL,
session_id TEXT NOT NULL,
cwd TEXT NOT NULL,
root TEXT NOT NULL,
command TEXT NOT NULL,
canonical_command TEXT NOT NULL,
kind TEXT NOT NULL,
scope TEXT NOT NULL,
status TEXT NOT NULL,
exit_code INTEGER NOT NULL,
output_summary TEXT NOT NULL
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS verification_state (
session_id TEXT NOT NULL,
root TEXT NOT NULL,
last_event_id INTEGER,
last_edit_at TEXT,
changed_paths_json TEXT NOT NULL DEFAULT '[]',
PRIMARY KEY (session_id, root)
)
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_verification_events_session_root
ON verification_events(session_id, root, id DESC)
"""
)
conn.execute(
"INSERT OR REPLACE INTO meta(key, value) VALUES ('schema_version', ?)",
(str(_VERIFY_SCHEMA_VERSION),),
)
conn.commit()
def _split_segment_tokens(command: str) -> list[list[str]]:
segments: list[list[str]] = []
for segment in _SHELL_SPLIT_RE.split(command.strip()):
if not segment:
continue
try:
tokens = shlex.split(segment)
except ValueError:
continue
if tokens:
segments.append(tokens)
return segments
def _clean_token(token: str) -> str:
token = token.strip()
while token.startswith("./"):
token = token[2:]
return token
def _canonical_tokens(canonical: str) -> list[str]:
try:
return [_clean_token(t) for t in shlex.split(canonical) if t]
except ValueError:
return []
def _find_subsequence(tokens: list[str], needle: list[str]) -> Optional[int]:
if not tokens or not needle or len(needle) > len(tokens):
return None
cleaned = [_clean_token(t) for t in tokens]
for idx in range(0, len(cleaned) - len(needle) + 1):
if cleaned[idx:idx + len(needle)] == needle:
return idx
return None
def _strip_command_prefix(tokens: list[str]) -> list[str]:
"""Remove harmless command prefixes before matching canonical commands."""
remaining = list(tokens)
if remaining and remaining[0] == "env":
remaining = remaining[1:]
while remaining and "=" in remaining[0] and not remaining[0].startswith("-"):
remaining = remaining[1:]
while remaining and remaining[0] in {"command", "time", "noglob"}:
remaining = remaining[1:]
return remaining
def _equivalent_needles(needle: list[str]) -> list[list[str]]:
"""Return command spellings equivalent to the detected canonical command."""
candidates = [needle]
if len(needle) >= 3 and needle[1] == "run":
package_manager = needle[0]
script_name = needle[2]
if package_manager in {"npm", "pnpm", "yarn", "bun"}:
candidates.append([package_manager, script_name])
if len(needle) == 1 and "/" in needle[0]:
candidates.extend([["bash", needle[0]], ["sh", needle[0]]])
if needle == ["pytest"]:
candidates.extend(
[
["python", "-m", "pytest"],
["python3", "-m", "pytest"],
["uv", "run", "pytest"],
["poetry", "run", "pytest"],
["pipenv", "run", "pytest"],
]
)
return candidates
def _find_canonical_match(command: str, canonical_commands: list[str]) -> Optional[tuple[str, list[str]]]:
"""Return ``(canonical, trailing_args)`` for the first detected command."""
segments = _split_segment_tokens(command)
for canonical in canonical_commands:
needle = _canonical_tokens(canonical)
if not needle:
continue
for tokens in segments:
candidate_tokens = _strip_command_prefix(tokens)
for candidate in _equivalent_needles(needle):
if candidate_tokens[:len(candidate)] == candidate:
return canonical, candidate_tokens[len(candidate):]
return None
def _kind_for_command(canonical: str) -> str:
lowered = canonical.lower()
if any(word in lowered for word in ("lint", "eslint", "ruff")):
return "lint"
if any(word in lowered for word in ("typecheck", "tsc", "mypy", "pyright", "ty")):
return "typecheck"
if "build" in lowered:
return "build"
if "fmt" in lowered or "format" in lowered:
return "format"
if "check" in lowered and "test" not in lowered:
return "check"
return "test"
def _looks_like_target(arg: str) -> bool:
if not arg or arg.startswith("-") or "=" in arg:
return False
return (
"/" in arg
or "\\" in arg
or "::" in arg
or arg.endswith((".py", ".js", ".jsx", ".ts", ".tsx", ".rs", ".go", ".java"))
or arg.startswith(("test_", "tests", "spec", "__tests__"))
)
def _scope_for_args(args: list[str]) -> str:
return "targeted" if any(_looks_like_target(arg) for arg in args) else "full"
def _is_under_temp_dir(token: str) -> bool:
if not token or token.startswith("-"):
return False
try:
path = Path(token).expanduser()
if not path.is_absolute():
return False
resolved = path.resolve()
temp_root = Path(tempfile.gettempdir()).resolve()
return resolved == temp_root or temp_root in resolved.parents
except Exception:
return False
def _is_under_root(token: str, root: str | Path | None) -> bool:
if not root:
return False
try:
path = Path(token).expanduser().resolve()
root_path = Path(root).expanduser().resolve()
return path == root_path or root_path in path.parents
except Exception:
return False
def _is_temp_script_path(token: str, root: str | Path | None) -> bool:
try:
name = Path(token).expanduser().name
except Exception:
return False
return (
name.startswith(_AD_HOC_SCRIPT_NAME_PREFIXES)
and _is_under_temp_dir(token)
and not _is_under_root(token, root)
)
def _ad_hoc_script_args(tokens: list[str], root: str | Path | None) -> Optional[list[str]]:
candidate_tokens = _strip_command_prefix(tokens)
if not candidate_tokens:
return None
command = candidate_tokens[0]
if _is_temp_script_path(command, root):
return candidate_tokens[1:]
if command in {"python", "python3", "node", "bash", "sh", "ruby", "perl"}:
for idx, token in enumerate(candidate_tokens[1:], start=1):
if token == "--":
continue
if _is_temp_script_path(token, root):
return candidate_tokens[idx + 1:]
if not token.startswith("-"):
return None
return None
def _find_ad_hoc_match(command: str, root: str | Path | None) -> Optional[list[str]]:
for tokens in _split_segment_tokens(command):
trailing_args = _ad_hoc_script_args(tokens, root)
if trailing_args is not None:
return trailing_args
return None
def _summarize_output(output: str) -> str:
text = (output or "").strip()
if len(text) <= _MAX_OUTPUT_SUMMARY_CHARS:
return text
head = _MAX_OUTPUT_SUMMARY_CHARS // 3
tail = _MAX_OUTPUT_SUMMARY_CHARS - head
return (
text[:head]
+ f"\n... [{len(text) - _MAX_OUTPUT_SUMMARY_CHARS} chars omitted] ...\n"
+ text[-tail:]
)
def _prune_old_events(conn: sqlite3.Connection, *, session_id: str, root: str) -> None:
"""Bound ledger growth without deleting the current state pointer."""
cutoff = _retention_cutoff()
conn.execute(
"""
DELETE FROM verification_events
WHERE session_id = ?
AND root = ?
AND id NOT IN (
SELECT id FROM verification_events
WHERE session_id = ? AND root = ?
ORDER BY id DESC
LIMIT ?
)
""",
(session_id, root, session_id, root, _MAX_EVENTS_PER_SESSION_ROOT),
)
conn.execute(
"""
DELETE FROM verification_state
WHERE (
last_edit_at IS NOT NULL
AND last_edit_at < ?
)
OR (
last_edit_at IS NULL
AND last_event_id IN (
SELECT id FROM verification_events
WHERE created_at < ?
)
)
""",
(cutoff, cutoff),
)
conn.execute(
"""
DELETE FROM verification_events
WHERE created_at < ?
AND id NOT IN (
SELECT last_event_id FROM verification_state
WHERE last_event_id IS NOT NULL
)
""",
(cutoff,),
)
conn.execute(
"""
DELETE FROM verification_events
WHERE id NOT IN (
SELECT id FROM verification_events
ORDER BY id DESC
LIMIT ?
)
AND id NOT IN (
SELECT last_event_id FROM verification_state
WHERE last_event_id IS NOT NULL
)
""",
(_MAX_TOTAL_UNREFERENCED_EVENTS,),
)
def classify_verification_command(
command: str,
*,
cwd: str | Path | None = None,
session_id: str | None = None,
exit_code: int = 0,
output: str = "",
) -> Optional[VerificationEvidence]:
"""Classify a terminal command as verification evidence, if applicable."""
if not command or not isinstance(command, str):
return None
try:
from agent.coding_context import project_facts_for
facts = project_facts_for(cwd)
except Exception:
facts = None
if not facts:
return None
verify_commands = list(facts.get("verifyCommands") or [])
match = _find_canonical_match(command, verify_commands)
is_ad_hoc = False
if match is None and not verify_commands:
ad_hoc_args = _find_ad_hoc_match(command, facts.get("root"))
if ad_hoc_args is not None:
match = ("ad-hoc verification script", ad_hoc_args)
is_ad_hoc = True
if match is None:
return None
canonical, trailing_args = match
return VerificationEvidence(
command=command,
canonical_command=canonical,
kind="ad_hoc" if is_ad_hoc else _kind_for_command(canonical),
scope="targeted" if is_ad_hoc else _scope_for_args(trailing_args),
status="passed" if int(exit_code) == 0 else "failed",
exit_code=int(exit_code),
cwd=str(Path(cwd or ".").resolve()),
root=str(facts.get("root") or Path(cwd or ".").resolve()),
session_id=str(session_id or "default"),
output_summary=_summarize_output(output),
)
def record_terminal_result(
*,
command: str,
cwd: str | Path | None,
session_id: str | None,
exit_code: int,
output: str = "",
) -> Optional[dict[str, Any]]:
"""Record a foreground terminal result when it is verification evidence."""
evidence = classify_verification_command(
command,
cwd=cwd,
session_id=session_id,
exit_code=exit_code,
output=output,
)
if evidence is None:
return None
created_at = _utc_now()
with _DB_LOCK:
with _connect() as conn:
cur = conn.execute(
"""
INSERT INTO verification_events(
created_at, session_id, cwd, root, command, canonical_command,
kind, scope, status, exit_code, output_summary
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
created_at,
evidence.session_id,
evidence.cwd,
evidence.root,
evidence.command,
evidence.canonical_command,
evidence.kind,
evidence.scope,
evidence.status,
evidence.exit_code,
evidence.output_summary,
),
)
if cur.lastrowid is None:
raise RuntimeError("verification event insert did not return an id")
event_id = int(cur.lastrowid)
conn.execute(
"""
INSERT INTO verification_state(
session_id, root, last_event_id, last_edit_at, changed_paths_json
) VALUES (?, ?, ?, NULL, '[]')
ON CONFLICT(session_id, root) DO UPDATE SET
last_event_id = excluded.last_event_id,
last_edit_at = NULL,
changed_paths_json = '[]'
""",
(evidence.session_id, evidence.root, event_id),
)
_prune_old_events(conn, session_id=evidence.session_id, root=evidence.root)
conn.commit()
return {"id": event_id, **evidence.__dict__, "created_at": created_at}
def mark_workspace_edited(
*,
session_id: str | None,
cwd: str | Path | None,
paths: list[str] | tuple[str, ...] | None = None,
) -> Optional[dict[str, Any]]:
"""Mark verification evidence stale after a successful file edit."""
try:
from agent.coding_context import project_facts_for
facts = project_facts_for(cwd)
except Exception:
facts = None
if not facts:
return None
sid = str(session_id or "default")
root = str(facts.get("root") or Path(cwd or ".").resolve())
changed_paths = sorted({str(p) for p in (paths or []) if p})
edited_at = _utc_now()
with _DB_LOCK:
with _connect() as conn:
row = conn.execute(
"""
SELECT changed_paths_json FROM verification_state
WHERE session_id = ? AND root = ?
""",
(sid, root),
).fetchone()
existing: set[str] = set()
if row is not None:
try:
existing = set(json.loads(row["changed_paths_json"] or "[]"))
except (TypeError, ValueError):
existing = set()
merged = sorted((existing | set(changed_paths)))[-200:]
conn.execute(
"""
INSERT INTO verification_state(
session_id, root, last_event_id, last_edit_at, changed_paths_json
) VALUES (?, ?, NULL, ?, ?)
ON CONFLICT(session_id, root) DO UPDATE SET
last_edit_at = excluded.last_edit_at,
changed_paths_json = excluded.changed_paths_json
""",
(sid, root, edited_at, json.dumps(merged)),
)
conn.commit()
return {"session_id": sid, "root": root, "last_edit_at": edited_at, "changed_paths": changed_paths}
def verification_status(
*,
session_id: str | None,
cwd: str | Path | None,
) -> dict[str, Any]:
"""Return the best known verification state for a session/workspace."""
try:
from agent.coding_context import project_facts_for
facts = project_facts_for(cwd)
except Exception:
facts = None
if not facts:
return {"status": "not_applicable", "evidence": None}
sid = str(session_id or "default")
root = str(facts.get("root") or Path(cwd or ".").resolve())
with _DB_LOCK:
with _connect() as conn:
state = conn.execute(
"""
SELECT last_event_id, last_edit_at, changed_paths_json
FROM verification_state
WHERE session_id = ? AND root = ?
""",
(sid, root),
).fetchone()
if state is None:
return {
"status": "unverified",
"evidence": None,
"root": root,
"session_id": sid,
"changed_paths": [],
}
event = None
if state["last_event_id"] is not None:
event = conn.execute(
"SELECT * FROM verification_events WHERE id = ?",
(state["last_event_id"],),
).fetchone()
changed_paths: list[str] = []
try:
changed_paths = json.loads(state["changed_paths_json"] or "[]")
except (TypeError, ValueError):
changed_paths = []
if event is None:
return {
"status": "unverified",
"evidence": None,
"root": root,
"session_id": sid,
"changed_paths": changed_paths,
}
evidence = dict(event)
if state["last_edit_at"] and state["last_edit_at"] > evidence["created_at"]:
status = "stale"
else:
status = evidence["status"]
return {
"status": status,
"evidence": evidence,
"root": root,
"session_id": sid,
"changed_paths": changed_paths,
}

View File

@@ -1,304 +0,0 @@
"""Turn-end verification guard for coding edits.
This module is intentionally policy-only. It never runs checks itself; it turns
the passive verification ledger into a bounded follow-up when the model tries to
finish immediately after editing code without fresh evidence.
"""
from __future__ import annotations
import os
import tempfile
from pathlib import Path
from typing import Any, Iterable
_MAX_CHANGED_PATHS_IN_NUDGE = 8
# Non-code file extensions whose edits carry no verifiable runtime behavior:
# documentation, prose, and data/markup that no test/build exercises. When a
# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is
# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a
# SKILL.md or README edit must never demand a /tmp verification script). A turn
# that edits any non-listed path (a real source/code/config file) still nudges.
_NON_CODE_VERIFY_EXTENSIONS = frozenset(
{
".md",
".markdown",
".mdx",
".rst",
".txt",
".text",
".adoc",
".asciidoc",
".org",
".log",
".csv",
".tsv",
}
)
# Filenames (case-insensitive, extension-less or otherwise) that are pure prose
# even without a recognized doc extension.
_NON_CODE_VERIFY_FILENAMES = frozenset(
{
"license",
"licence",
"notice",
"authors",
"contributors",
"changelog",
"codeowners",
}
)
def _is_non_code_path(raw: str) -> bool:
"""Return True when a changed path is documentation/prose with nothing to verify."""
try:
p = Path(str(raw))
except Exception:
return False
suffix = p.suffix.lower()
if suffix in _NON_CODE_VERIFY_EXTENSIONS:
return True
if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES:
return True
return False
def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]:
"""Drop documentation/prose paths; keep paths that could have verifiable behavior."""
return [p for p in paths if p and not _is_non_code_path(p)]
# Session identities (platform or source) that are NOT human conversational
# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
# local, gateway) and programmatic callers (API server, webhooks, tools).
# Verify-on-stop stays ON by default for these. Any other resolved gateway
# platform is a conversational messaging surface (Telegram, Discord, WhatsApp,
# Signal, Slack, etc.) where the verification narrative would reach a human as
# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in
# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a
# local or programmatic surface. Default-deny by design: an unrecognized
# identity is treated as messaging (OFF) so a new chat platform never leaks the
# verification receipt before this set is updated.
_NON_MESSAGING_SESSION_SURFACES = frozenset(
{
"",
"cli",
"codex",
"desktop",
"gateway",
"local",
"tui",
"tool",
"api_server",
"webhook",
"msgraph_webhook",
}
)
def _session_is_messaging_surface() -> bool:
"""Return whether this turn is delivered over a human messaging channel.
The gateway binds the platform value (e.g. ``telegram``) to
``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE``
(e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context
helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM``
override, matching the sibling platform resolution in
``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a
messaging surface when a resolved identity is present and is not a known
non-messaging surface.
"""
try:
from gateway.session_context import get_session_env
platform = (
os.getenv("HERMES_PLATFORM")
or get_session_env("HERMES_SESSION_PLATFORM", "")
)
source = get_session_env("HERMES_SESSION_SOURCE", "")
except Exception:
platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get(
"HERMES_SESSION_PLATFORM", ""
)
source = os.environ.get("HERMES_SESSION_SOURCE", "")
for identity in (platform, source):
identity = str(identity or "").strip().lower()
if identity and identity not in _NON_MESSAGING_SESSION_SURFACES:
return True
return False
def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
"""Return whether edit -> verify-before-finish behavior is enabled.
Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
explicit ``agent.verify_on_stop`` config value. The config default is
``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
sets it explicitly: it resolves to ON for interactive coding surfaces
(CLI, TUI, desktop) and programmatic callers, and OFF for conversational
messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
falls back to OFF.
"""
env = os.environ.get("HERMES_VERIFY_ON_STOP")
if env is not None:
return env.strip().lower() not in {"0", "false", "no", "off"}
if config is None:
try:
from hermes_cli.config import load_config
config = load_config()
except Exception:
config = {}
agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
token = cfg_val.strip().lower()
if token in {"1", "true", "yes", "on"}:
return True
if token in {"0", "false", "no", "off"}:
return False
if token == "auto":
# Explicit opt-in to the legacy surface-aware behavior.
return not _session_is_messaging_surface()
# Missing or unknown value -> OFF (the new default).
return False
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
candidates: list[Path] = []
seen: set[str] = set()
for raw in paths:
if not raw:
continue
try:
path = Path(raw).expanduser()
candidate = path if path.is_dir() else path.parent
resolved = str(candidate.resolve())
except Exception:
continue
if resolved not in seen:
seen.add(resolved)
candidates.append(Path(resolved))
return candidates
def _verification_snapshot(
*,
session_id: str | None,
changed_paths: list[str],
) -> tuple[dict[str, Any], dict[str, Any]] | None:
"""Return ``(status, facts)`` for the first edited workspace needing proof."""
try:
from agent.coding_context import project_facts_for
from agent.verification_evidence import verification_status
except Exception:
return None
first_snapshot: tuple[dict[str, Any], dict[str, Any]] | None = None
for cwd in _candidate_cwds(changed_paths):
facts = project_facts_for(cwd)
if not facts:
continue
status = verification_status(session_id=session_id, cwd=cwd)
snapshot = (status, facts)
if first_snapshot is None:
first_snapshot = snapshot
if str(status.get("status") or "unverified") != "passed":
return snapshot
return first_snapshot
def _format_changed_paths(paths: list[str]) -> str:
shown = paths[:_MAX_CHANGED_PATHS_IN_NUDGE]
lines = [f"- `{path}`" for path in shown]
remaining = len(paths) - len(shown)
if remaining > 0:
lines.append(f"- ... and {remaining} more")
return "\n".join(lines)
def _status_detail(status: dict[str, Any]) -> str:
state = str(status.get("status") or "unverified")
evidence = status.get("evidence") if isinstance(status.get("evidence"), dict) else None
if not evidence:
return state
command = evidence.get("canonical_command") or evidence.get("command")
summary = str(evidence.get("output_summary") or "").strip()
parts = [state]
if command:
parts.append(f"last command `{command}`")
if summary:
max_summary = 1200
if len(summary) > max_summary:
summary = summary[:max_summary].rstrip() + "\n... [truncated]"
parts.append(f"last output:\n{summary}")
return "\n".join(parts)
def build_verify_on_stop_nudge(
*,
session_id: str | None,
changed_paths: Iterable[str],
attempts: int = 0,
max_attempts: int = 2,
) -> str | None:
"""Return a synthetic follow-up when edited code lacks fresh verification."""
# Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) —
# they carry no verifiable behavior, so a turn that touched only those has
# nothing to verify and must not nudge.
paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)})
if not paths or attempts >= max_attempts:
return None
snapshot = _verification_snapshot(session_id=session_id, changed_paths=paths)
if snapshot is None:
return None
status, facts = snapshot
verify_commands = [
str(cmd).strip()
for cmd in (facts.get("verifyCommands") or [])
if str(cmd).strip()
]
state = str(status.get("status") or "unverified")
if state == "passed":
return None
if verify_commands:
command_instruction = (
"Run the relevant verification command now ("
+ ", ".join(f"`{cmd}`" for cmd in verify_commands[:3])
+ (", ..." if len(verify_commands) > 3 else "")
+ "), read any failure, repair the code, and summarize what passed."
)
else:
temp_dir = tempfile.gettempdir()
command_instruction = (
"No canonical test/lint/build command was detected. Create a focused "
f"temporary verification script under `{temp_dir}` using an OS-safe "
"`tempfile` path with a `hermes-verify-` filename prefix, run it "
"against the changed behavior, clean it up when possible, and "
"summarize it explicitly as ad-hoc verification rather than suite "
"green."
)
return (
"[System: You edited code in this turn, but the workspace does not have "
"fresh passing verification evidence yet.\n\n"
f"Verification status: {_status_detail(status)}\n\n"
f"Changed paths:\n{_format_changed_paths(paths)}\n\n"
f"{command_instruction} If verification is not possible, explain the "
"concrete blocker instead of claiming the work is fully verified.]"
)
__all__ = ["build_verify_on_stop_nudge", "verify_on_stop_enabled"]

View File

@@ -17,5 +17,5 @@
"lib": "@/lib",
"hooks": "@/hooks"
},
"iconLibrary": "tabler"
"iconLibrary": "lucide"
}

View File

@@ -61,7 +61,10 @@ function buildDesktopBackendPath({
const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
const saneEntries = platform === 'win32' ? [] : POSIX_SANE_PATH_ENTRIES
return appendUniquePathEntries([hermesNodeBin, venvBin, currentPath, saneEntries], { delimiter })
return appendUniquePathEntries(
[hermesNodeBin, venvBin, currentPath, saneEntries],
{ delimiter }
)
}
function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {

View File

@@ -76,7 +76,10 @@ test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root'
normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
'C:\\Users\\test\\AppData\\Local\\hermes'
)
assert.equal(normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }), '/Users/test/.hermes')
assert.equal(
normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
'/Users/test/.hermes'
)
})
test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
@@ -101,5 +104,8 @@ test('Windows PATH casing and delimiter are preserved without POSIX sane entries
})
test('appendUniquePathEntries drops empty entries and keeps first occurrence', () => {
assert.equal(appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }), '/a:/b:/c')
assert.equal(
appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }),
'/a:/b:/c'
)
})

View File

@@ -37,18 +37,7 @@ const { execFileSync } = require('node:child_process')
const PROBE_TIMEOUT_MS = 5000
/**
* Return the Python snippet used to verify Hermes can import far enough to
* launch the CLI. Kept exported for tests so dependency regressions are
* caught without needing a real broken venv fixture.
*
* @returns {string}
*/
function hermesRuntimeImportProbe() {
return 'import yaml; import hermes_cli.config'
}
/**
* Return true iff the Hermes runtime import probe exits 0.
* Return true iff `python -c "import hermes_cli"` exits 0.
*
* Used to gate the "fallback to system Python with hermes_cli installed"
* rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
@@ -57,20 +46,13 @@ function hermesRuntimeImportProbe() {
* site-packages -- and the resolver returns a backend that immediately
* dies on spawn.
*
* The probe intentionally imports hermes_cli.config, not just the top-level
* package: a broken/empty Windows launcher venv can still see the source tree
* through PYTHONPATH but lack PyYAML, then die on the first real CLI import.
*
* @param {string} pythonPath - Absolute path to a python.exe / python.
* @param {object} [opts]
* @param {object} [opts.env] - Additional environment for the probe.
* @returns {boolean}
*/
function canImportHermesCli(pythonPath, opts = {}) {
function canImportHermesCli(pythonPath) {
if (!pythonPath) return false
try {
execFileSync(pythonPath, ['-c', hermesRuntimeImportProbe()], {
env: { ...process.env, ...(opts.env || {}) },
execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
stdio: 'ignore',
timeout: PROBE_TIMEOUT_MS,
windowsHide: true
@@ -119,7 +101,6 @@ function verifyHermesCli(hermesCommand, opts = {}) {
module.exports = {
canImportHermesCli,
hermesRuntimeImportProbe,
verifyHermesCli,
PROBE_TIMEOUT_MS
}

View File

@@ -11,7 +11,7 @@ const fs = require('node:fs')
const os = require('node:os')
const path = require('node:path')
const { canImportHermesCli, hermesRuntimeImportProbe, verifyHermesCli } = require('./backend-probes.cjs')
const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
// Resolve the host's own Node binary -- guaranteed to be on disk and
// runnable. We use it as both a stand-in for "a python that doesn't
@@ -40,12 +40,6 @@ test('canImportHermesCli returns false when binary does not exist', () => {
assert.equal(canImportHermesCli(ghost), false)
})
test('hermes runtime import probe checks config dependencies', () => {
const probe = hermesRuntimeImportProbe()
assert.match(probe, /\bimport yaml\b/)
assert.match(probe, /\bimport hermes_cli\.config\b/)
})
test('verifyHermesCli returns false when command is falsy', () => {
assert.equal(verifyHermesCli(''), false)
assert.equal(verifyHermesCli(null), false)

View File

@@ -1,5 +1,3 @@
const fs = require('node:fs')
const _READY_RE = /^HERMES_DASHBOARD_READY port=(\d+)/m
// The announcement clock starts the instant the backend process is spawned —
@@ -96,76 +94,9 @@ function waitForDashboardPort(child, timeoutMs = resolvePortAnnounceTimeoutMs())
})
}
function readDashboardReadyFile(readyFile) {
if (!readyFile) return null
try {
const parsed = JSON.parse(fs.readFileSync(readyFile, 'utf8'))
const port = Number(parsed?.port)
return Number.isInteger(port) && port > 0 ? port : null
} catch {
return null
}
}
function waitForDashboardReadyFile(readyFile, child, timeoutMs = resolvePortAnnounceTimeoutMs()) {
return new Promise((resolve, reject) => {
let done = false
let interval = null
function cleanup() {
if (done) return
done = true
clearTimeout(timer)
if (interval) clearInterval(interval)
child.off('exit', onExit)
child.off('error', onError)
}
function check() {
const port = readDashboardReadyFile(readyFile)
if (port) {
cleanup()
resolve(port)
}
}
function onExit(code, signal) {
cleanup()
reject(new Error(`Hermes backend: exited before port announcement (${signal || code})`))
}
function onError(err) {
cleanup()
reject(err)
}
const timer = setTimeout(() => {
cleanup()
reject(new Error(`Timed out waiting for Hermes backend port announcement (${timeoutMs}ms)`))
}, timeoutMs)
child.on('exit', onExit)
child.on('error', onError)
interval = setInterval(check, 50)
if (typeof interval.unref === 'function') interval.unref()
check()
})
}
function waitForDashboardPortAnnouncement(child, options = {}) {
const timeoutMs = options.timeoutMs ?? resolvePortAnnounceTimeoutMs()
if (options.readyFile) {
return waitForDashboardReadyFile(options.readyFile, child, timeoutMs)
}
return waitForDashboardPort(child, timeoutMs)
}
module.exports = {
waitForDashboardPort,
waitForDashboardPortAnnouncement,
waitForDashboardReadyFile,
readDashboardReadyFile,
resolvePortAnnounceTimeoutMs,
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
MIN_PORT_ANNOUNCE_TIMEOUT_MS
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
}

View File

@@ -14,18 +14,12 @@
const test = require('node:test')
const assert = require('node:assert/strict')
const { EventEmitter } = require('node:events')
const fs = require('node:fs')
const os = require('node:os')
const path = require('node:path')
const {
readDashboardReadyFile,
waitForDashboardPort,
waitForDashboardPortAnnouncement,
waitForDashboardReadyFile,
resolvePortAnnounceTimeoutMs,
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
MIN_PORT_ANNOUNCE_TIMEOUT_MS
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
} = require('./backend-ready.cjs')
// A minimal stand-in for a spawned child process: an EventEmitter with a
@@ -125,75 +119,3 @@ test('a late announcement after timeout does not throw (listeners torn down)', a
child.stdout.emit('data', 'HERMES_DASHBOARD_READY port=9999\n')
})
})
// ---------------------------------------------------------------------------
// ready-file port announcement
// ---------------------------------------------------------------------------
function mkTmpReadyFile() {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-ready-test-'))
return {
dir,
file: path.join(dir, 'ready.json'),
cleanup: () => fs.rmSync(dir, { recursive: true, force: true })
}
}
test('readDashboardReadyFile returns a valid port from JSON', () => {
const tmp = mkTmpReadyFile()
try {
fs.writeFileSync(tmp.file, JSON.stringify({ port: 4567 }))
assert.equal(readDashboardReadyFile(tmp.file), 4567)
} finally {
tmp.cleanup()
}
})
test('readDashboardReadyFile ignores missing, malformed, or invalid files', () => {
const tmp = mkTmpReadyFile()
try {
assert.equal(readDashboardReadyFile(tmp.file), null)
fs.writeFileSync(tmp.file, '{')
assert.equal(readDashboardReadyFile(tmp.file), null)
fs.writeFileSync(tmp.file, JSON.stringify({ port: 0 }))
assert.equal(readDashboardReadyFile(tmp.file), null)
} finally {
tmp.cleanup()
}
})
test('waitForDashboardReadyFile resolves when the ready file appears', async () => {
const tmp = mkTmpReadyFile()
const child = makeFakeChild()
try {
const p = waitForDashboardReadyFile(tmp.file, child, 1000)
setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 8765 })), 20)
assert.equal(await p, 8765)
} finally {
tmp.cleanup()
}
})
test('waitForDashboardPortAnnouncement uses ready file when provided', async () => {
const tmp = mkTmpReadyFile()
const child = makeFakeChild()
try {
const p = waitForDashboardPortAnnouncement(child, { readyFile: tmp.file, timeoutMs: 1000 })
setTimeout(() => fs.writeFileSync(tmp.file, JSON.stringify({ port: 9876 })), 20)
assert.equal(await p, 9876)
} finally {
tmp.cleanup()
}
})
test('waitForDashboardReadyFile rejects when the child exits before file readiness', async () => {
const tmp = mkTmpReadyFile()
const child = makeFakeChild()
try {
const p = waitForDashboardReadyFile(tmp.file, child, 1000)
child.emit('exit', 1, null)
await assert.rejects(p, /exited before port announcement/)
} finally {
tmp.cleanup()
}
})

View File

@@ -179,13 +179,7 @@ function downloadInstallScript(commit, destPath) {
})
}
async function resolveInstallScript({
installStamp,
sourceRepoRoot,
hermesHome,
emit,
_download = downloadInstallScript
}) {
async function resolveInstallScript({ installStamp, sourceRepoRoot, hermesHome, emit, _download = downloadInstallScript }) {
// 1. Dev shortcut: prefer a local checkout's installer so we can iterate
// without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
// of APP_ROOT/../..).
@@ -299,19 +293,15 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]
const child = spawn(
ps,
fullArgs,
hiddenWindowsChildOptions({
stdio: ['ignore', 'pipe', 'pipe'],
env: {
...process.env,
// Pass HERMES_HOME through so install.ps1 respects the caller's
// choice rather than re-computing the default.
HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
}
})
)
const child = spawn(ps, fullArgs, hiddenWindowsChildOptions({
stdio: ['ignore', 'pipe', 'pipe'],
env: {
...process.env,
// Pass HERMES_HOME through so install.ps1 respects the caller's
// choice rather than re-computing the default.
HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
}
}))
let stdout = ''
let stderr = ''

View File

@@ -261,7 +261,12 @@ function cookiesHaveSession(cookies) {
*/
function cookiesHaveLiveSession(cookies) {
if (!Array.isArray(cookies)) return false
return cookies.some(c => c && c.value && (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)))
return cookies.some(
c =>
c &&
c.value &&
(AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name))
)
}
module.exports = {

View File

@@ -138,7 +138,10 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
if (pythonPath) {
lines.push(`export PYTHONPATH=${q(pythonPath)}\${PYTHONPATH:+:$PYTHONPATH}`)
}
lines.push(`cd ${q(agentRoot)} 2>/dev/null || true`, `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`)
lines.push(
`cd ${q(agentRoot)} 2>/dev/null || true`,
`${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`
)
if (appPath) {
lines.push(`rm -rf ${q(appPath)} || true`)
}
@@ -166,15 +169,7 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
* Removal: even after the desktop PID is gone, Windows releases directory
* handles lazily, so a single `rmdir /s /q` can half-fail — retry up to 10x.
*/
function buildWindowsCleanupScript({
desktopPid,
pythonExe,
pythonPath,
agentRoot,
uninstallArgs,
appPath,
hermesHome
}) {
function buildWindowsCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot, uninstallArgs, appPath, hermesHome }) {
const pid = Number(desktopPid) || 0
// cmd.exe has no string escaping inside quotes; strip embedded quotes (paths
// under %LOCALAPPDATA% never contain them). `&`/`^` in a path would still be

View File

@@ -101,7 +101,10 @@ test('resolveRemovableAppPath uses APPIMAGE on Linux when set', () => {
})
test('resolveRemovableAppPath finds the unpacked dir on Linux', () => {
assert.equal(resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}), '/opt/hermes/linux-unpacked')
assert.equal(
resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}),
'/opt/hermes/linux-unpacked'
)
// A system-package install (/usr/bin) → null, left to apt/dnf.
assert.equal(resolveRemovableAppPath('/usr/bin/hermes', 'linux', {}), null)
})

View File

@@ -1,48 +0,0 @@
'use strict'
const { session } = require('electron')
const EMBED_SESSION_PARTITION = 'persist:hermes-embed'
const EMBED_REFERER = 'https://www.youtube.com/'
const YOUTUBE_REFERER_HOST_RE =
/(^|\.)(youtube\.com|youtube-nocookie\.com|googlevideo\.com|ytimg\.com|youtubei\.googleapis\.com)$/i
function installEmbedRefererForSession(embedSession) {
if (!embedSession) {
return
}
embedSession.webRequest.onBeforeSendHeaders((details, callback) => {
let host = ''
try {
host = new URL(details.url).hostname
} catch {
host = ''
}
if (!YOUTUBE_REFERER_HOST_RE.test(host)) {
callback({ requestHeaders: details.requestHeaders })
return
}
const headers = { ...details.requestHeaders }
if (!headers.Referer && !headers.referer) {
headers.Referer = EMBED_REFERER
}
callback({ requestHeaders: headers })
})
}
/** Stamp Referer on YouTube requests in the embed webview partition only. */
function installEmbedReferer() {
try {
installEmbedRefererForSession(session.fromPartition(EMBED_SESSION_PARTITION))
} catch {
// Non-fatal: embeds still render; YouTube may show referer errors.
}
}
module.exports = { installEmbedReferer }

Some files were not shown because too many files have changed in this diff Show More