mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-04 09:07:20 +08:00
Compare commits
1 Commits
feat/deskt
...
opencode-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fffbef0ec4 |
@@ -66,12 +66,8 @@ runtime/
|
||||
|
||||
# ---------- Not needed inside the Docker image ----------
|
||||
|
||||
# Desktop app source (Tauri/Electron); never installed in the container.
|
||||
# apps/shared is the dashboard↔desktop websocket helper and is linked from
|
||||
# web/package.json as a file: workspace dep — keep it in the build context.
|
||||
# Desktop app source (Tauri/Electron); never installed in the container
|
||||
apps/
|
||||
!apps/shared/
|
||||
!apps/shared/**
|
||||
|
||||
# Test suite — not shipped in production images
|
||||
tests/
|
||||
|
||||
2
.envrc
2
.envrc
@@ -1,5 +1,5 @@
|
||||
watch_file pyproject.toml uv.lock
|
||||
watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
|
||||
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix nix/hermes-agent.nix nix/desktop.nix
|
||||
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
|
||||
|
||||
use flake
|
||||
|
||||
50
.github/actions/hermes-smoke-test/action.yml
vendored
Normal file
50
.github/actions/hermes-smoke-test/action.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: Hermes smoke test
|
||||
description: >
|
||||
Run the image's built-in entrypoint against `--help` and `dashboard --help`
|
||||
to catch basic runtime regressions before publishing. Requires the image
|
||||
to already be loaded into the local Docker daemon under `image`.
|
||||
|
||||
Works identically on amd64 and arm64 runners.
|
||||
|
||||
inputs:
|
||||
image:
|
||||
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Ensure /tmp/hermes-test is hermes-writable
|
||||
shell: bash
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
|
||||
- name: hermes --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
|
||||
# this exercises the actual production startup path. PR #30136
|
||||
# review caught that an --entrypoint override here had been
|
||||
# silently neutered by the s6-overlay migration — stage2-hook
|
||||
# ignores its CMD args, so the smoke test was a no-op.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
"${{ inputs.image }}" --help
|
||||
|
||||
- name: hermes dashboard --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Regression guard for #9153: dashboard was present in source but
|
||||
# missing from the published image. If this fails, something in
|
||||
# the Dockerfile is excluding the dashboard subcommand from the
|
||||
# installed package.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
"${{ inputs.image }}" dashboard --help
|
||||
24
.github/workflows/ci.yml
vendored
24
.github/workflows/ci.yml
vendored
@@ -20,7 +20,6 @@ permissions:
|
||||
pull-requests: write # needed by lint (PR comment) + supply-chain (PR comment)
|
||||
actions: read # needed by osv-scanner (SARIF upload)
|
||||
security-events: write # needed by osv-scanner (SARIF upload)
|
||||
packages: write # needed by docker build
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
@@ -33,7 +32,6 @@ jobs:
|
||||
# (all lanes true) so post-merge validation is never weakened.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
detect:
|
||||
name: Detect affected areas
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
python: ${{ steps.classify.outputs.python }}
|
||||
@@ -55,15 +53,11 @@ jobs:
|
||||
# Skipped workflows (if condition is false) don't spin up runners.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
tests:
|
||||
name: Python tests
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/tests.yml
|
||||
with:
|
||||
slice_count: 8
|
||||
|
||||
lint:
|
||||
name: Python lints
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/lint.yml
|
||||
@@ -71,49 +65,35 @@ jobs:
|
||||
event_name: ${{ needs.detect.outputs.event_name }}
|
||||
|
||||
typecheck:
|
||||
name: TypeScript
|
||||
needs: detect
|
||||
if: needs.detect.outputs.frontend == 'true'
|
||||
uses: ./.github/workflows/typecheck.yml
|
||||
|
||||
docs-site:
|
||||
name: Docs Site
|
||||
needs: detect
|
||||
if: needs.detect.outputs.site == 'true'
|
||||
uses: ./.github/workflows/docs-site-checks.yml
|
||||
|
||||
history-check:
|
||||
name: Deny unrelated histories
|
||||
needs: detect
|
||||
if: needs.detect.outputs.event_name == 'pull_request'
|
||||
uses: ./.github/workflows/history-check.yml
|
||||
|
||||
contributor-check:
|
||||
name: Check contributors
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true'
|
||||
uses: ./.github/workflows/contributor-check.yml
|
||||
|
||||
uv-lockfile:
|
||||
name: Check uv.lock
|
||||
needs: detect
|
||||
uses: ./.github/workflows/uv-lockfile-check.yml
|
||||
|
||||
docker-lint:
|
||||
name: Lint Docker scripts
|
||||
needs: detect
|
||||
if: needs.detect.outputs.docker_meta == 'true'
|
||||
uses: ./.github/workflows/docker-lint.yml
|
||||
|
||||
docker:
|
||||
name: Build&Test Docker image
|
||||
needs: detect
|
||||
if: needs.detect.outputs.python == 'true' || needs.detect.outputs.frontend == 'true' || needs.detect.outputs.docker_meta == 'true'
|
||||
uses: ./.github/workflows/docker.yml
|
||||
secrets: inherit
|
||||
|
||||
supply-chain:
|
||||
name: Supply-chain scan
|
||||
needs: detect
|
||||
if: needs.detect.outputs.event_name == 'pull_request' && (needs.detect.outputs.scan == 'true' || needs.detect.outputs.deps == 'true' || needs.detect.outputs.mcp_catalog == 'true')
|
||||
uses: ./.github/workflows/supply-chain-audit.yml
|
||||
@@ -124,7 +104,7 @@ jobs:
|
||||
mcp_catalog: ${{ needs.detect.outputs.mcp_catalog == 'true' }}
|
||||
|
||||
osv-scanner:
|
||||
name: OSV scan
|
||||
needs: detect
|
||||
uses: ./.github/workflows/osv-scanner.yml
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
@@ -147,8 +127,6 @@ jobs:
|
||||
- docker-lint
|
||||
- supply-chain
|
||||
- osv-scanner
|
||||
# we don't require docker to pass rn because it's so slow lol
|
||||
# - docker
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
2
.github/workflows/docker-lint.yml
vendored
2
.github/workflows/docker-lint.yml
vendored
@@ -2,7 +2,7 @@ name: Docker / shell lint
|
||||
|
||||
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
|
||||
# scripts under docker/ (via shellcheck). These catch the class of regression
|
||||
# the behavioral docker smoke test can't — unquoted variable
|
||||
# the behavioral docker-publish smoke test can't — unquoted variable
|
||||
# expansions, silently-failing RUN commands, etc.
|
||||
#
|
||||
# Rules and ignores are documented in .hadolint.yaml at the repo root.
|
||||
|
||||
@@ -1,9 +1,24 @@
|
||||
name: Docker Build, Test, and Publish
|
||||
name: Docker Build and Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
pull_request:
|
||||
|
||||
release:
|
||||
types: [published]
|
||||
workflow_call:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -24,7 +39,11 @@ env:
|
||||
IMAGE_NAME: nousresearch/hermes-agent
|
||||
|
||||
jobs:
|
||||
# Build, test, and optionally push the amd64 image.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build amd64 natively. This job also runs the smoke tests (basic --help
|
||||
# and the dashboard subcommand regression guard from #9153), because amd64
|
||||
# is the only arch we can `load` into the local daemon on an amd64 runner.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-amd64:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
@@ -34,19 +53,24 @@ jobs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# The image build + integration tests run on every event
|
||||
# (PRs, push-to-main, release). Publish steps below are gated to
|
||||
# push-to-main / release only.
|
||||
# The image build + smoke test + integration tests run ONLY on
|
||||
# push-to-main and release — never on PRs. They are the heaviest jobs
|
||||
# in CI (~15-45 min) and a broken build surfaces on the main push (and
|
||||
# is gated pre-merge by docker-lint + uv-lockfile-check). Every step
|
||||
# below is skipped on PRs, so the job still reports green and the
|
||||
# required check never hangs.
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for testing. Cached
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (amd64)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
- name: Build image (amd64, smoke test)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -58,12 +82,25 @@ jobs:
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
- name: Smoke test image
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Run the docker-integration test suite against the freshly-built
|
||||
# image already loaded into the local daemon (`:test`).
|
||||
# image already loaded into the local daemon (`:test`). These tests
|
||||
# are excluded from the sharded `tests.yml :: test` matrix on purpose
|
||||
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
|
||||
# shard would otherwise reach the session-scoped ``built_image``
|
||||
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
|
||||
# ``docker build`` — guaranteed to
|
||||
# die in fixture setup.
|
||||
#
|
||||
# Piggybacking here avoids a second image build: the build step
|
||||
# already loaded the image into the daemon under
|
||||
# `${IMAGE_NAME}:test`, so we just point ``HERMES_TEST_IMAGE`` at
|
||||
# Piggybacking here avoids a second image build: the smoke test
|
||||
# already proved the image loads + runs, so the daemon has it under
|
||||
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
|
||||
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
|
||||
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
|
||||
#
|
||||
@@ -73,20 +110,26 @@ jobs:
|
||||
# cheapest path to coverage on every PR that touches docker code.
|
||||
# ---------------------------------------------------------------------
|
||||
- name: Install uv (for docker tests)
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11 (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
# ``dev`` extra pulls in pytest, pytest-asyncio —
|
||||
# everything tests/docker/ needs. We deliberately avoid ``all``
|
||||
# here because the docker tests only drive the container via
|
||||
# subprocess and don't import hermes_agent's optional deps.
|
||||
uv sync --locked --python 3.11 --extra dev
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
- name: Run docker integration tests
|
||||
if: github.event_name != 'pull_request'
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
@@ -96,11 +139,12 @@ jobs:
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
scripts/run_tests.sh tests/docker/ --file-timeout 600
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/docker/ -v --tb=short
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
@@ -111,7 +155,7 @@ jobs:
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -135,7 +179,7 @@ jobs:
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-amd64
|
||||
path: /tmp/digests/*
|
||||
@@ -143,7 +187,10 @@ jobs:
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build, test, and optionally push the arm64 image.
|
||||
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
|
||||
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
|
||||
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
|
||||
# smoke test, then on push/release push by digest.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-arm64:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
@@ -153,26 +200,29 @@ jobs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# arm64 build runs only on push-to-main and release (see build-amd64).
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Log in to ghcr.io so the registry-backed build cache below can be
|
||||
# read (cache-from) on every event and written (cache-to) on
|
||||
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
|
||||
# the whole job — unlike the gha cache backend's short-lived Azure SAS
|
||||
# token, which expired mid-build on slow cold-cache arm64 runs and
|
||||
# crashed the build before the tests ran (the reason the gha cache
|
||||
# crashed the build before the smoke test (the reason the gha cache
|
||||
# was removed from arm64 PRs in the first place).
|
||||
- name: Log in to ghcr.io (build cache)
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Build once, load into the local daemon for testing, then push
|
||||
# Build once, load into the local daemon for smoke testing, then push
|
||||
# by digest below. Reads AND writes the registry-backed cache so the
|
||||
# push reuses layers from this build and the next build starts warm.
|
||||
#
|
||||
@@ -180,8 +230,9 @@ jobs:
|
||||
# cache that previously broke here: its credential is the job-lifetime
|
||||
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
|
||||
# token failure mode cannot recur.
|
||||
- name: Build image (arm64, cached publish)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -193,29 +244,15 @@ jobs:
|
||||
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
|
||||
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
|
||||
|
||||
- name: Install uv for docker tests
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
|
||||
- name: Set up Python 3.11 for docker tests
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies for docker tests
|
||||
run: |
|
||||
uv sync --locked --python 3.11 --extra dev
|
||||
|
||||
- name: Run docker tests
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
scripts/run_tests.sh tests/docker/ --file-timeout 600
|
||||
- name: Smoke test image
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
@@ -223,7 +260,7 @@ jobs:
|
||||
- name: Push arm64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
@@ -245,7 +282,7 @@ jobs:
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-arm64
|
||||
path: /tmp/digests/*
|
||||
@@ -267,17 +304,17 @@ jobs:
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digest-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
uses: ./.github/actions/retry
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff
|
||||
uses: ./.github/actions/retry
|
||||
|
||||
18
.github/workflows/skills-index.yml
vendored
18
.github/workflows/skills-index.yml
vendored
@@ -3,17 +3,17 @@ name: Build Skills Index
|
||||
on:
|
||||
schedule:
|
||||
# Run twice daily: 6 AM and 6 PM UTC
|
||||
- cron: "0 6,18 * * *"
|
||||
workflow_dispatch: # Manual trigger
|
||||
- cron: '0 6,18 * * *'
|
||||
workflow_dispatch: # Manual trigger
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "scripts/build_skills_index.py"
|
||||
- ".github/workflows/skills-index.yml"
|
||||
- 'scripts/build_skills_index.py'
|
||||
- '.github/workflows/skills-index.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write # to trigger deploy-site.yml on schedule
|
||||
actions: write # to trigger deploy-site.yml on schedule
|
||||
|
||||
jobs:
|
||||
build-index:
|
||||
@@ -21,11 +21,11 @@ jobs:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install httpx==0.28.1 pyyaml==6.0.2
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
run: python scripts/build_skills_index.py
|
||||
|
||||
- name: Upload index artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: skills-index
|
||||
path: website/static/api/skills-index.json
|
||||
|
||||
74
.github/workflows/tests.yml
vendored
74
.github/workflows/tests.yml
vendored
@@ -2,11 +2,6 @@ name: Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
slice_count:
|
||||
description: Number of parallel test slices
|
||||
type: number
|
||||
default: 8
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -17,11 +12,13 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate:
|
||||
name: "Generate slices"
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.matrix.outputs.matrix }}
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
slice: [1, 2, 3, 4, 5, 6]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -30,26 +27,13 @@ jobs:
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
# main always writes a new suffix, but jobs pick the latest one with the same prefix
|
||||
# quote from https://docs.github.com/en/actions/reference/workflows-and-actions/dependency-caching#cache-hits-and-misses
|
||||
# If you provide restore-keys, the cache action sequentially searches for any caches that match the list of restore-keys.
|
||||
# If there are no exact matches, the action searches for partial matches of the restore keys.
|
||||
# When the action finds a partial match, the most recent cache is restored to the path directory.
|
||||
key: test-durations
|
||||
|
||||
- name: Generate test slices
|
||||
id: matrix
|
||||
run: |
|
||||
MATRIX=$(python3 scripts/run_tests_parallel.py --generate-slices ${{ inputs.slice_count }})
|
||||
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
|
||||
|
||||
test:
|
||||
name: Run tests slice ${{ matrix.slice.index }}/${{ inputs.slice_count }}
|
||||
needs: generate
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(needs.generate.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
@@ -65,7 +49,7 @@ jobs:
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
with:
|
||||
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
|
||||
# Keyed on the dependency manifests, so the cache is reused until
|
||||
@@ -94,19 +78,33 @@ jobs:
|
||||
# re-download, keeping the persisted cache small and fast to restore.
|
||||
run: uv cache prune --ci
|
||||
|
||||
- name: Run tests (slice ${{ matrix.slice.index }}/${{ inputs.slice_count }})
|
||||
# Per-file isolation via scripts/run_tests.sh: each test file runs
|
||||
# in its own freshly-spawned `python -m pytest <file>` subprocess
|
||||
- name: Run tests (slice ${{ matrix.slice }}/6)
|
||||
# Per-file isolation via scripts/run_tests_parallel.py: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# File list is pre-computed by the generate job (--generate-slices)
|
||||
# which runs LPT distribution once and passes the file list to each
|
||||
# matrix job via --files. Previously each job re-discovered files and
|
||||
# re-ran LPT independently — redundant N times.
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
#
|
||||
# Matrix slicing (--slice I/N): files are distributed across 6
|
||||
# jobs by cached duration (LPT algorithm) so each job gets
|
||||
# roughly equal wall time. Without a cache, files default to 2s
|
||||
# estimate and get split roughly evenly by count — still correct,
|
||||
# just not perfectly balanced.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
scripts/run_tests.sh --files '${{ matrix.slice.files }}'
|
||||
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
@@ -116,7 +114,7 @@ jobs:
|
||||
- name: Upload per-slice durations
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: test-durations-slice-${{ matrix.slice.index }}
|
||||
name: test-durations-slice-${{ matrix.slice }}
|
||||
path: test_durations.json
|
||||
retention-days: 1
|
||||
|
||||
@@ -175,7 +173,7 @@ jobs:
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
with:
|
||||
# Persist uv's download/wheel cache (~/.cache/uv) across runs.
|
||||
# Keyed on the dependency manifests, so the cache is reused until
|
||||
|
||||
8
.github/workflows/typecheck.yml
vendored
8
.github/workflows/typecheck.yml
vendored
@@ -6,7 +6,6 @@ on:
|
||||
|
||||
jobs:
|
||||
typecheck:
|
||||
name: Check TypeScript
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -23,7 +22,8 @@ jobs:
|
||||
# native builds. Skipping install scripts drops node-pty's node-gyp
|
||||
# header fetch — the transient flake that killed this job pre-`tsc` — and
|
||||
# is faster. retry covers the remaining registry blips.
|
||||
- uses: ./.github/actions/retry
|
||||
-
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci --ignore-scripts
|
||||
- run: npm run --prefix ${{ matrix.package }} typecheck
|
||||
@@ -35,7 +35,6 @@ jobs:
|
||||
# users build apps/desktop from source on install/update. Run the real
|
||||
# `vite build` here so that class of break fails in CI instead.
|
||||
desktop-build:
|
||||
name: Build desktop app
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -45,7 +44,8 @@ jobs:
|
||||
cache: npm
|
||||
# Keep install scripts here: the production build may need node-pty's
|
||||
# native binary. retry handles the transient install-time fetch flakes.
|
||||
- uses: ./.github/actions/retry
|
||||
-
|
||||
uses: ./.github/actions/retry
|
||||
with:
|
||||
command: npm ci
|
||||
- run: npm run --prefix apps/desktop build
|
||||
|
||||
32
.github/workflows/upload_to_pypi.yml
vendored
32
.github/workflows/upload_to_pypi.yml
vendored
@@ -5,11 +5,11 @@ name: Publish to PyPI
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v20*" # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
|
||||
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm_tag:
|
||||
description: "Tag to publish (e.g. v2026.5.15). Must already exist."
|
||||
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
name: Build distribution 📦
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
# On workflow_dispatch, check out the confirmed tag.
|
||||
@@ -43,17 +43,17 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: "22"
|
||||
node-version: '22'
|
||||
|
||||
- name: Build web dashboard
|
||||
run: cd web && npm ci && npm run build
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
run: uv build --sdist --wheel
|
||||
|
||||
- name: Upload distribution artifacts
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -94,17 +94,17 @@ jobs:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/hermes-agent
|
||||
permissions:
|
||||
id-token: write # OIDC trusted publishing
|
||||
id-token: write # OIDC trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
||||
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
||||
with:
|
||||
skip-existing: true
|
||||
|
||||
@@ -116,12 +116,12 @@ jobs:
|
||||
needs: publish
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # attach assets to the existing release
|
||||
id-token: write # sigstore signing
|
||||
contents: write # attach assets to the existing release
|
||||
id-token: write # sigstore signing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -145,7 +145,7 @@ jobs:
|
||||
|
||||
- name: Sign with Sigstore
|
||||
if: env.skip_sign != 'true'
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
with:
|
||||
inputs: >-
|
||||
./dist/*.tar.gz
|
||||
|
||||
6
.github/workflows/uv-lockfile-check.yml
vendored
6
.github/workflows/uv-lockfile-check.yml
vendored
@@ -4,7 +4,7 @@ name: uv.lock check
|
||||
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
|
||||
# must not merge, because the Docker build's `uv sync --frozen` step will
|
||||
# fail on a stale lockfile and we'd rather catch it here than in the
|
||||
# docker workflow on main.
|
||||
# docker-publish workflow on main.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# IMPORTANT: this check runs against the MERGED state, not just your branch
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # 8.2.0
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
# `uv lock --check` re-resolves the project from pyproject.toml and
|
||||
# compares the result to uv.lock, exiting non-zero if they disagree.
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
|
||||
This check is blocking because the Docker image build uses
|
||||
`uv sync --frozen --extra all`, which rejects stale lockfiles
|
||||
— catching it here avoids a ~15 min failed docker run
|
||||
— catching it here avoids a ~15 min failed docker-publish run
|
||||
on `main` post-merge.
|
||||
EOF
|
||||
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -137,9 +137,3 @@ RELEASE_v*.md
|
||||
# Desktop demo-run scratch output (hermes writes demo/*.txt during recorded
|
||||
# walkthroughs). Throwaway artifacts, never part of the app.
|
||||
apps/desktop/demo/
|
||||
|
||||
# PR infographics are rendered locally and embedded in PR descriptions via the
|
||||
# image-provider (fal.media) URL — they are NEVER committed to the repo. The
|
||||
# PR body is the archive. See the hermes-agent-dev skill's
|
||||
# pr-infographic-workflow reference (storage rule + lapse #8 / #COMMIT-1).
|
||||
infographic/
|
||||
|
||||
31
AGENTS.md
31
AGENTS.md
@@ -123,17 +123,6 @@ conservative at the waist.
|
||||
without E2E proof, and plugins that touch core files.** Plugins live in their
|
||||
own directory and work within the ABCs/hooks we provide; if a plugin needs
|
||||
more, widen the generic plugin surface, don't special-case it in core.
|
||||
- **Third-party products / other people's projects integrated into the core
|
||||
tree.** Observability backends, vendor SaaS integrations, analytics dashboards,
|
||||
and similar "someone else's product" plugins do NOT land under `plugins/` in
|
||||
this repo. They place an ongoing maintenance burden on us to keep them working
|
||||
against a fast-moving core, for a backend we don't own. Ship them as a
|
||||
**standalone plugin repo** users install into `~/.hermes/plugins/` (or via a
|
||||
pip entry point), and promote them in the Nous Research Discord
|
||||
(`#plugins-skills-and-skins`). This is a coupling-and-maintenance decision, not
|
||||
a quality bar — the plugin can be excellent and still be a close. PRs that add
|
||||
such a directory to the tree are closed with a pointer to publish it as its own
|
||||
repo.
|
||||
|
||||
### Before you call it a bug — verify the premise (and when NOT to close)
|
||||
|
||||
@@ -491,7 +480,7 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes
|
||||
|
||||
### Electron Desktop Chat App (`apps/desktop/`)
|
||||
|
||||
A **separate** chat surface from both the classic CLI and the dashboard's embedded TUI. It is an Electron + React + nanostore renderer (`@assistant-ui/react`) that talks to a `tui_gateway` backend over JSON-RPC (`requestGateway(method, params)`). The WebSocket/JSON-RPC transport lives in the framework-agnostic `apps/shared` package (`@hermes/shared` — `JsonRpcGatewayClient` + WS URL helpers), which the web dashboard (`web/`) also consumes; **desktop has no build/runtime dependency on the dashboard frontend** — it spawns a headless `hermes serve` backend server (the same gateway `dashboard` serves, minus the browser UI). `dashboard` and `serve` share `cmd_dashboard`/`start_server` but are independent surfaces — neither launches the other. The one exception is a backward-compat *fallback*: `serve` is newer, so the desktop spawn (`electron/backend-command.cjs` + `backendSupportsServe()` in `main.cjs`) detects whether the resolved runtime registers `serve` and, only when it does not (an older managed install / PATH `hermes` the app hasn't updated yet), rewrites the argv to the legacy `dashboard --no-open`. Without that, a new app against an un-upgraded runtime would crash on an unknown subcommand and brick every mid-upgrade user. It does NOT embed `hermes --tui` — it has its own composer, transcript, and slash-command pipeline. Route desktop bugs to the `hermes-desktop-app-work` skill, not `hermes-dashboard-work`.
|
||||
A **separate** chat surface from both the classic CLI and the dashboard's embedded TUI. It is an Electron + React + nanostore renderer (`@assistant-ui/react`) that talks to a `tui_gateway` backend over JSON-RPC (`requestGateway(method, params)`). It does NOT embed `hermes --tui` — it has its own composer, transcript, and slash-command pipeline. Route desktop bugs to the `hermes-desktop-app-work` skill, not `hermes-dashboard-work`.
|
||||
|
||||
**Slash commands in the desktop app are curated client-side, then dispatched to the backend.** The pipeline:
|
||||
|
||||
@@ -794,24 +783,6 @@ landing in this tree. PRs that add a new directory under
|
||||
provider as its own repo. Existing in-tree providers stay; bug fixes
|
||||
to them are welcome.
|
||||
|
||||
**No new third-party-product plugins in-tree (policy, June 2026):** the
|
||||
same rule applies beyond memory providers. Plugins that integrate
|
||||
someone else's product or project — observability/metrics backends,
|
||||
vendor SaaS connectors, analytics dashboards, paid-service tie-ins —
|
||||
must ship as **standalone plugin repos** that users install into
|
||||
`~/.hermes/plugins/` (or via pip entry points). They register through
|
||||
the existing plugin discovery path and use the ABCs/hooks/ctx surface
|
||||
we expose; nothing special is needed in core. The reason is
|
||||
maintenance load: every product we absorb into the tree becomes our
|
||||
burden to keep working against a fast-moving core, for a backend we
|
||||
don't own. Promote standalone plugins in the Nous Research Discord
|
||||
(`#plugins-skills-and-skins`). PRs that add such a directory under
|
||||
`plugins/` are closed with a pointer to publish it as its own repo —
|
||||
this is a coupling decision, not a quality judgment. (The
|
||||
`observability/`, `kanban/`, `disk-cleanup/`, etc. directories already
|
||||
in the tree are existing precedent, not an invitation to add more
|
||||
third-party-product plugins alongside them.)
|
||||
|
||||
### Model-provider plugins (`plugins/model-providers/<name>/`)
|
||||
|
||||
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
|
||||
|
||||
@@ -85,23 +85,6 @@ This isn't a quality bar — it's a coupling-and-maintenance decision. Memory pr
|
||||
|
||||
---
|
||||
|
||||
## Third-Party Product Integrations: Ship as a Standalone Plugin
|
||||
|
||||
The same rule extends to **any plugin that integrates someone else's product or project** — observability/metrics backends, vendor SaaS connectors, analytics dashboards, paid-service tie-ins, and similar third-party integrations. **These do not land in this repo.**
|
||||
|
||||
The reason is maintenance load, not quality. Every external product absorbed into the core tree becomes ours to keep working against a fast-moving codebase, for a backend we don't own and can't control. Hermes ships a lot and the core moves quickly; coupling third-party products into it creates an open-ended burden on the maintainers.
|
||||
|
||||
Publish these as a **standalone plugin repo** instead:
|
||||
|
||||
- Implement the relevant ABC and use the existing plugin discovery path (`~/.hermes/plugins/`, project `.hermes/plugins/`, or a pip entry point) — see [Build a Hermes Plugin](https://hermes-agent.nousresearch.com/docs/guides/build-a-hermes-plugin)
|
||||
- Register lifecycle hooks (`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end`), tools (`ctx.register_tool`), and CLI subcommands (`ctx.register_cli_command`) through the surface we already expose — no core changes needed
|
||||
- If your plugin needs a capability the framework doesn't expose, that's a feature request to **widen the generic plugin surface** (a new hook or `ctx` method) — never special-case your plugin in core
|
||||
- Promote it in the [Nous Research Discord](https://discord.gg/NousResearch) `#plugins-skills-and-skins` channel so users can find and install it
|
||||
|
||||
A well-built third-party-product plugin can clear automated review and still be closed for this reason — it's a placement decision, not a verdict on the code. PRs that add such a directory under `plugins/` will be closed with a pointer to publish it as its own repo.
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
@@ -149,20 +132,13 @@ this way, make sure you run the `hermes` entrypoint from this venv; running the
|
||||
system `python3 -m hermes_cli.main` can pick up unrelated system Python
|
||||
packages.
|
||||
|
||||
Create the venv **outside** the cloned source tree. A venv that lives inside
|
||||
the directory the agent operates from can be wiped by a relative-path command
|
||||
the agent runs against its own checkout (`rm -rf venv`, `uv venv venv`, etc.),
|
||||
which silently destroys the running runtime mid-session. Keeping it outside the
|
||||
tree means no relative path from the workspace resolves to it.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
|
||||
# Create venv with Python 3.11, OUTSIDE the source tree
|
||||
uv venv ~/.hermes/venvs/hermes-dev --python 3.11
|
||||
export VIRTUAL_ENV="$HOME/.hermes/venvs/hermes-dev"
|
||||
export PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
# Create venv with Python 3.11
|
||||
uv venv venv --python 3.11
|
||||
export VIRTUAL_ENV="$(pwd)/venv"
|
||||
|
||||
# Install with all extras (messaging, cron, CLI menus, dev tools)
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
32
Dockerfile
32
Dockerfile
@@ -119,9 +119,6 @@ COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/
|
||||
COPY ui-tui/package.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
|
||||
# apps/shared/ is copied IN FULL because web/package.json references it as a
|
||||
# `file:` workspace dependency (same pattern as hermes-ink above).
|
||||
COPY apps/shared/ apps/shared/
|
||||
|
||||
# `npm_config_install_links=false` forces npm to install `file:` deps as
|
||||
# symlinks instead of copies. This is the default since npm 10+, which is
|
||||
@@ -187,19 +184,12 @@ RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra
|
||||
# invalidate the (relatively slow) web + ui-tui build layer.
|
||||
COPY web/ web/
|
||||
COPY ui-tui/ ui-tui/
|
||||
COPY apps/shared/ apps/shared/
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
# --link decouples this layer from parents for cache purposes; --chmod bakes
|
||||
# the final read-only permissions at copy time so we skip the separate
|
||||
# `chmod -R` pass that previously walked ~30k files across the venv +
|
||||
# node_modules + source (21s amd64 / 222s arm64 — #49113). `a+rX,go-w`
|
||||
# gives the non-root hermes user read + traverse but no write; root retains
|
||||
# write so the build steps below don't need chmod u+w dances.
|
||||
COPY --link --chmod=a+rX,go-w . .
|
||||
COPY . .
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Link hermes-agent itself (editable). Deps are already installed in the
|
||||
@@ -207,15 +197,19 @@ COPY --link --chmod=a+rX,go-w . .
|
||||
# resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# Wire the exec shim and install-method stamp. Files under /opt/hermes are
|
||||
# already root-owned (COPY, uv sync, npm install all run as root) and
|
||||
# read-only for the hermes user (go-w from the --chmod above).
|
||||
|
||||
# Keep /opt/hermes immutable for the runtime hermes user. Hosted/container
|
||||
# instances must not be able to self-edit the installed source or venv; user
|
||||
# data, skills, plugins, config, logs, and dashboard uploads live under
|
||||
# /opt/data instead. Root can still repair the image during build/boot, but
|
||||
# supervised Hermes processes drop to the non-root hermes user.
|
||||
USER root
|
||||
RUN mkdir -p /opt/hermes/bin && \
|
||||
cp /opt/hermes/docker/hermes-exec-shim.sh /opt/hermes/bin/hermes && \
|
||||
chmod 0755 /opt/hermes/bin/hermes && \
|
||||
printf 'docker\n' > /opt/hermes/.install_method
|
||||
printf 'docker\n' > /opt/hermes/.install_method && \
|
||||
chown -R root:root /opt/hermes && \
|
||||
chmod -R a+rX /opt/hermes && \
|
||||
chmod -R a-w /opt/hermes
|
||||
# The ``.install_method`` stamp is baked next to the running code (the install
|
||||
# tree), NOT into $HERMES_HOME. $HERMES_HOME (/opt/data) is a shared data
|
||||
# volume that is commonly bind-mounted from the host and even shared with a
|
||||
@@ -242,11 +236,13 @@ RUN mkdir -p /opt/hermes/bin && \
|
||||
#
|
||||
# The arg is optional — local `docker build` without --build-arg simply
|
||||
# omits the file, and the runtime falls back to live-git lookup. CI
|
||||
# (.github/workflows/docker.yml) passes ${{ github.sha }} so
|
||||
# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
|
||||
# every published image has it.
|
||||
ARG HERMES_GIT_SHA=
|
||||
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha; \
|
||||
chmod u+w /opt/hermes && \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
|
||||
chmod a-w /opt/hermes /opt/hermes/.hermes_build_sha; \
|
||||
fi
|
||||
|
||||
# ---------- s6-overlay service wiring ----------
|
||||
|
||||
10
README.md
10
README.md
@@ -18,7 +18,7 @@
|
||||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), OpenRouter, OpenAI, your own endpoint, and [many others](https://hermes-agent.nousresearch.com/docs/integrations/providers). Switch with `hermes model` — no code changes, no lock-in.
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
|
||||
<table>
|
||||
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
|
||||
@@ -232,14 +232,10 @@ scripts/run_tests.sh
|
||||
Manual clone fallback (for throwaway clones/CI where you intentionally do not
|
||||
want the managed install layout):
|
||||
|
||||
Create the venv outside the cloned source tree — a venv inside the directory
|
||||
the agent operates from can be wiped by a relative-path command the agent runs
|
||||
against its own checkout, destroying the running runtime mid-session.
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv ~/.hermes/venvs/hermes-dev --python 3.11
|
||||
source ~/.hermes/venvs/hermes-dev/bin/activate
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
scripts/run_tests.sh
|
||||
```
|
||||
|
||||
@@ -722,50 +722,10 @@ def init_agent(
|
||||
elif agent.provider == "moa":
|
||||
from agent.moa_loop import MoAClient
|
||||
agent.api_mode = "chat_completions"
|
||||
|
||||
# Route reference-model outputs to the agent's tool_progress_callback so
|
||||
# every surface that already consumes it (CLI spinner/scrollback, TUI,
|
||||
# desktop, gateway) can show each reference's answer as a labelled block
|
||||
# before the aggregator acts. The facade emits "moa.reference" and
|
||||
# "moa.aggregating" events; we forward them through the same callback
|
||||
# the tool lifecycle uses. Best-effort and cache-safe — these are
|
||||
# display-only events, they never touch the message history.
|
||||
def _moa_reference_relay(event: str, **kwargs: Any) -> None:
|
||||
cb = getattr(agent, "tool_progress_callback", None)
|
||||
if cb is None:
|
||||
return
|
||||
try:
|
||||
if event == "moa.reference":
|
||||
label = str(kwargs.get("label") or "")
|
||||
text = str(kwargs.get("text") or "")
|
||||
idx = kwargs.get("index")
|
||||
count = kwargs.get("count")
|
||||
cb(
|
||||
"moa.reference",
|
||||
label,
|
||||
text,
|
||||
None,
|
||||
moa_index=idx,
|
||||
moa_count=count,
|
||||
)
|
||||
elif event == "moa.aggregating":
|
||||
cb(
|
||||
"moa.aggregating",
|
||||
str(kwargs.get("aggregator") or ""),
|
||||
None,
|
||||
None,
|
||||
moa_ref_count=kwargs.get("ref_count"),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
agent.client = MoAClient(
|
||||
agent.model or "default",
|
||||
reference_callback=_moa_reference_relay,
|
||||
)
|
||||
agent.client = MoAClient(agent.model or "default")
|
||||
agent._client_kwargs = {}
|
||||
agent.api_key = api_key or "moa-virtual-provider"
|
||||
agent.base_url = "moa://local"
|
||||
agent.base_url = base_url or "moa://local"
|
||||
if not agent.quiet_mode:
|
||||
print(f"🤖 AI Agent initialized with MoA preset: {agent.model}")
|
||||
elif agent.api_mode == "bedrock_converse":
|
||||
@@ -1307,12 +1267,6 @@ def init_agent(
|
||||
_agent_section = {}
|
||||
agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# Intent-ack continuation config: "auto" (default — codex_responses only,
|
||||
# the historical gate), true (all api_modes), false (never), or a list of
|
||||
# model-name substrings. Resolved against the active api_mode/model in the
|
||||
# conversation loop's intent-ack block.
|
||||
agent._intent_ack_continuation = _agent_section.get("intent_ack_continuation", "auto")
|
||||
|
||||
# Universal task-completion guidance toggle. Default True. Surfaced
|
||||
# as a separate flag from tool_use_enforcement because the guidance
|
||||
# applies to ALL models, not just the model families enforcement
|
||||
@@ -1676,10 +1630,8 @@ def init_agent(
|
||||
f"Model {agent.model} has a context window of {_ctx:,} tokens, "
|
||||
f"which is below the minimum {MINIMUM_CONTEXT_LENGTH:,} required "
|
||||
f"by Hermes Agent. Choose a model with at least "
|
||||
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context. If your server "
|
||||
f"reports a window smaller than the model's true window, set "
|
||||
f"model.context_length in config.yaml to the real value "
|
||||
f"(this must be at least {MINIMUM_CONTEXT_LENGTH // 1000}K)."
|
||||
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context, or set "
|
||||
f"model.context_length in config.yaml to override."
|
||||
)
|
||||
|
||||
# Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand).
|
||||
|
||||
@@ -42,14 +42,6 @@ from utils import base_url_host_matches, base_url_hostname, env_var_enabled, ato
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Max consecutive successful credential-pool token refreshes of the SAME entry
|
||||
# on a persistent auth failure before we give up and let the fallback chain
|
||||
# activate. A single-entry OAuth pool can re-mint a fresh token indefinitely
|
||||
# even when the upstream keeps rejecting it, so without this cap the retry loop
|
||||
# spins forever and never reaches ``_try_activate_fallback``. See #26080.
|
||||
_MAX_AUTH_REFRESH_ATTEMPTS = 2
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy ``run_agent`` reference for test-patch routing."""
|
||||
import run_agent
|
||||
@@ -783,30 +775,6 @@ def recover_with_credential_pool(
|
||||
return False, has_retried_429
|
||||
refreshed = pool.try_refresh_current()
|
||||
if refreshed is not None:
|
||||
# ``try_refresh_current()`` re-mints a fresh OAuth token and reports
|
||||
# success even when the upstream keeps rejecting it — a single-entry
|
||||
# pool (common for OAuth/Max subscribers) has nothing to rotate to,
|
||||
# so a bare "refreshed → retry" loop spins forever on the same dead
|
||||
# token and the configured fallback never activates. Cap consecutive
|
||||
# same-entry refreshes and fall through to fallback once exceeded.
|
||||
# See #26080.
|
||||
refreshed_id = getattr(refreshed, "id", None)
|
||||
if refreshed_id is not None:
|
||||
refresh_counts = getattr(agent, "_auth_pool_refresh_counts", None)
|
||||
if refresh_counts is None:
|
||||
refresh_counts = {}
|
||||
agent._auth_pool_refresh_counts = refresh_counts
|
||||
refresh_key = (agent.provider, refreshed_id)
|
||||
refresh_counts[refresh_key] = refresh_counts.get(refresh_key, 0) + 1
|
||||
if refresh_counts[refresh_key] > _MAX_AUTH_REFRESH_ATTEMPTS:
|
||||
_ra().logger.warning(
|
||||
"Credential auth failure persists after %s refreshes for "
|
||||
"pool entry %s — treating as unrecoverable and allowing "
|
||||
"fallback to activate.",
|
||||
refresh_counts[refresh_key] - 1,
|
||||
refreshed_id,
|
||||
)
|
||||
return False, has_retried_429
|
||||
_ra().logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
|
||||
agent._swap_credential(refreshed)
|
||||
return True, has_retried_429
|
||||
@@ -1078,34 +1046,6 @@ def restore_primary_runtime(agent) -> bool:
|
||||
api_mode=rt.get("compressor_api_mode", ""),
|
||||
)
|
||||
|
||||
# ── Re-select from the credential pool if one is available ──
|
||||
# The snapshot's api_key was captured at construction time. Across
|
||||
# turns the pool may have rotated (token revocation, billing/rate-limit
|
||||
# exhaustion, cooldown), leaving the snapshot key stale. Restoring it
|
||||
# blindly re-fails on the first request and burns through the remaining
|
||||
# pool entries before cross-provider fallback even gets a chance. Ask
|
||||
# the pool for its current best entry and swap the live credential in.
|
||||
# When the pool is absent, empty, or the entry has no usable key, we
|
||||
# keep the snapshot key (the existing behavior). Fixes #25205.
|
||||
pool = getattr(agent, "_credential_pool", None)
|
||||
if pool is not None and pool.has_available():
|
||||
entry = pool.select()
|
||||
if entry is not None:
|
||||
entry_key = (
|
||||
getattr(entry, "runtime_api_key", None)
|
||||
or getattr(entry, "access_token", "")
|
||||
)
|
||||
if entry_key:
|
||||
# ``_swap_credential`` rebuilds the OpenAI/Anthropic client,
|
||||
# reapplies base-url-scoped headers, and carries the
|
||||
# accumulated base_url / OAuth-detection fixes (#33163).
|
||||
agent._swap_credential(entry)
|
||||
logger.info(
|
||||
"Restore re-selected pool entry %s (%s)",
|
||||
getattr(entry, "id", "?"),
|
||||
getattr(entry, "label", "?"),
|
||||
)
|
||||
|
||||
# ── Reset fallback chain for the new turn ──
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_index = 0
|
||||
@@ -1281,11 +1221,7 @@ def dump_api_request_debug(
|
||||
dump_payload["error"] = error_info
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
# Sanitize the session ID into a traversal-free path segment — it can
|
||||
# originate from untrusted input (X-Hermes-Session-Id header), and an
|
||||
# unsanitized "../"-shaped ID would write the dump outside logs_dir.
|
||||
safe_sid = _ra()._safe_session_filename_component(agent.session_id)
|
||||
dump_file = agent.logs_dir / f"request_dump_{safe_sid}_{timestamp}.json"
|
||||
dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json"
|
||||
|
||||
# Redact secrets before persisting/printing. This dump captures the
|
||||
# full request body (system prompt, tool defs, context-embedded
|
||||
@@ -1484,15 +1420,6 @@ def create_openai_client(agent, client_kwargs: dict, *, reason: str, shared: boo
|
||||
keepalive_http = agent._build_keepalive_http_client(client_kwargs.get("base_url", ""))
|
||||
if keepalive_http is not None:
|
||||
client_kwargs["http_client"] = keepalive_http
|
||||
# Delegate all rate-limit / 5xx retry to hermes's outer conversation loop,
|
||||
# which honors Retry-After and applies adaptive/jittered backoff. The OpenAI
|
||||
# SDK default (max_retries=2) uses its own 1-2s backoff that ignores
|
||||
# Retry-After and double-retries inside our loop — the same deadlock the
|
||||
# Anthropic clients hit (#26293). This is the single chokepoint every primary
|
||||
# OpenAI/aggregator client passes through (init, switch_model, recovery,
|
||||
# restore, request-scoped); auxiliary_client builds its own clients and keeps
|
||||
# SDK retries because it is NOT wrapped by the conversation loop.
|
||||
client_kwargs.setdefault("max_retries", 0)
|
||||
# Uses the module-level `OpenAI` name, resolved lazily on first
|
||||
# access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
|
||||
client = _ra().OpenAI(**client_kwargs)
|
||||
@@ -1572,10 +1499,6 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
# _client_kwargs is a dict — snapshot a shallow copy so mutating the
|
||||
# live dict doesn't poison the rollback target.
|
||||
_snapshot["_client_kwargs"] = dict(getattr(agent, "_client_kwargs", {}) or {})
|
||||
# Snapshot the credential pool reference so a failed client rebuild can
|
||||
# restore the original pool (issue #52727: pool reload is part of this
|
||||
# switch and must be reversible on rollback).
|
||||
_snapshot["_credential_pool"] = getattr(agent, "_credential_pool", _MISSING)
|
||||
|
||||
try:
|
||||
# Clear the per-config context_length override so the new model's
|
||||
@@ -1600,36 +1523,8 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
||||
if api_key:
|
||||
agent.api_key = api_key
|
||||
|
||||
# ── Reload credential pool for the new provider (issue #52727) ──
|
||||
# Without this, ``recover_with_credential_pool`` sees a
|
||||
# ``pool.provider != agent.provider`` mismatch and short-circuits,
|
||||
# leaving the new provider with no rotation/recovery on 401/429 and
|
||||
# burning the original pool's entries. Only reload when the provider
|
||||
# actually changed (or the pool was missing) — re-selecting the same
|
||||
# provider must not churn the pool reference. A reload failure is
|
||||
# logged + swallowed: the switch itself must still complete.
|
||||
old_norm = (old_provider or "").strip().lower()
|
||||
new_norm = (new_provider or "").strip().lower()
|
||||
if old_norm != new_norm or getattr(agent, "_credential_pool", None) is None:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
agent._credential_pool = load_pool(new_provider)
|
||||
except Exception as _pool_exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"switch_model: credential pool reload failed for %s (%s); "
|
||||
"continuing without pool rotation this turn",
|
||||
new_provider, _pool_exc,
|
||||
)
|
||||
|
||||
# ── Build new client ──
|
||||
if (new_provider or "").strip().lower() == "moa":
|
||||
from agent.moa_loop import MoAClient
|
||||
|
||||
agent.api_key = api_key or "moa-virtual-provider"
|
||||
agent.base_url = "moa://local"
|
||||
agent._client_kwargs = {}
|
||||
agent.client = MoAClient(agent.model or "default")
|
||||
elif api_mode == "anthropic_messages":
|
||||
if api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import (
|
||||
build_anthropic_client,
|
||||
resolve_anthropic_token,
|
||||
@@ -2209,21 +2104,8 @@ def looks_like_codex_intermediate_ack(
|
||||
user_message: str,
|
||||
assistant_content: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
require_workspace: bool = True,
|
||||
) -> bool:
|
||||
"""Detect a planning/ack message that should continue instead of ending the turn.
|
||||
|
||||
``require_workspace`` (default True) keeps the original codex-coding scope:
|
||||
the ack must reference a filesystem/repo workspace. The conversation loop
|
||||
passes ``require_workspace=False`` when the user has explicitly opted into
|
||||
intent-ack continuation for all api_modes (``agent.intent_ack_continuation``
|
||||
is ``true`` or a model-list), so general autonomous workflows ("I'll run a
|
||||
health check on the server", "I'll start the deployment") — which carry a
|
||||
future-ack and an action verb but no filesystem reference — are caught too.
|
||||
The future-ack + short-content + no-prior-tools + action-verb requirements
|
||||
always apply, which is what keeps conversational "I'll help you brainstorm"
|
||||
replies from tripping it.
|
||||
"""
|
||||
"""Detect a planning/ack message that should continue instead of ending the turn."""
|
||||
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
|
||||
return False
|
||||
|
||||
@@ -2276,67 +2158,17 @@ def looks_like_codex_intermediate_ack(
|
||||
"path",
|
||||
)
|
||||
|
||||
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
|
||||
if not assistant_mentions_action:
|
||||
return False
|
||||
|
||||
# Opted-in (all-api_mode) path: a future-ack + action verb + no prior tool
|
||||
# call is enough — the user asked us to keep going when the model only
|
||||
# announces intent, regardless of whether a filesystem is involved.
|
||||
if not require_workspace:
|
||||
return True
|
||||
|
||||
user_text = (user_message or "").strip().lower()
|
||||
user_targets_workspace = (
|
||||
any(marker in user_text for marker in workspace_markers)
|
||||
or "~/" in user_text
|
||||
or "/" in user_text
|
||||
)
|
||||
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
|
||||
assistant_targets_workspace = any(
|
||||
marker in assistant_text for marker in workspace_markers
|
||||
)
|
||||
return user_targets_workspace or assistant_targets_workspace
|
||||
|
||||
|
||||
def intent_ack_continuation_mode(agent) -> str:
|
||||
"""Classify the resolved intent-ack continuation mode for this turn.
|
||||
|
||||
Returns one of:
|
||||
* ``"off"`` — never continue.
|
||||
* ``"codex_only"`` — historical scope: continue only on the
|
||||
``codex_responses`` api_mode, and only for codebase/workspace acks
|
||||
(``require_workspace=True``).
|
||||
* ``"all"`` — user opted in for every api_mode; continue on any
|
||||
future-ack + action verb (``require_workspace=False``).
|
||||
|
||||
Mirrors the four-mode shape of ``agent.tool_use_enforcement``: ``"auto"``
|
||||
(default) → codex_only; ``True``/"true"/"always"/"yes"/"on" → all;
|
||||
``False``/"false"/"never"/"no"/"off" → off; ``list`` → all when a substring
|
||||
matches the active model name, else off.
|
||||
"""
|
||||
mode = getattr(agent, "_intent_ack_continuation", "auto")
|
||||
|
||||
if mode is True or (isinstance(mode, str) and mode.lower() in {"true", "always", "yes", "on"}):
|
||||
return "all"
|
||||
if mode is False or (isinstance(mode, str) and mode.lower() in {"false", "never", "no", "off"}):
|
||||
return "off"
|
||||
if isinstance(mode, list):
|
||||
model_lower = (agent.model or "").lower()
|
||||
return "all" if any(p.lower() in model_lower for p in mode if isinstance(p, str)) else "off"
|
||||
# "auto" or any unrecognised value — historical codex-only behavior.
|
||||
return "codex_only" if agent.api_mode == "codex_responses" else "off"
|
||||
|
||||
|
||||
def intent_ack_continuation_enabled(agent) -> bool:
|
||||
"""Whether intent-ack continuation should fire at all for this turn.
|
||||
|
||||
The ``codex_ack_continuations < 2`` per-turn cap and the
|
||||
``looks_like_codex_intermediate_ack`` detector are applied by the caller;
|
||||
this only decides the on/off gate. Callers that also need to know whether
|
||||
the workspace requirement applies should use ``intent_ack_continuation_mode``
|
||||
directly (``"codex_only"`` ⇒ require_workspace=True, ``"all"`` ⇒ False).
|
||||
"""
|
||||
return intent_ack_continuation_mode(agent) != "off"
|
||||
return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -673,9 +673,6 @@ def _build_anthropic_client_with_bearer_hook(
|
||||
kwargs = {
|
||||
"timeout": timeout_obj,
|
||||
"http_client": http_client,
|
||||
# Delegate retry to hermes's outer loop (honors Retry-After); the SDK
|
||||
# default max_retries=2 ignores it and double-retries. (#26293)
|
||||
"max_retries": 0,
|
||||
# The SDK requires *something* for api_key/auth_token. Our
|
||||
# event hook overrides Authorization per request so this value
|
||||
# is never sent. The sentinel string makes accidental leaks
|
||||
@@ -760,12 +757,6 @@ def build_anthropic_client(
|
||||
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
|
||||
kwargs = {
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
# Delegate all rate-limit / 5xx retry to hermes's outer conversation
|
||||
# loop, which honors Retry-After. The SDK default (max_retries=2) uses
|
||||
# its own 1-2s backoff that ignores Retry-After and double-retries
|
||||
# inside our loop — burning request slots against a bucket that won't
|
||||
# refill for minutes. (#26293)
|
||||
"max_retries": 0,
|
||||
}
|
||||
if normalized_base_url:
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
@@ -861,9 +852,6 @@ def build_anthropic_bedrock_client(region: str):
|
||||
return _anthropic_sdk.AnthropicBedrock(
|
||||
aws_region=region,
|
||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||
# Delegate retry to hermes's outer loop (honors Retry-After); the SDK
|
||||
# default max_retries=2 ignores it and double-retries. (#26293)
|
||||
max_retries=0,
|
||||
default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])},
|
||||
)
|
||||
|
||||
@@ -926,72 +914,44 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
return None
|
||||
|
||||
|
||||
def _read_claude_code_credentials_from_file() -> Optional[Dict[str, Any]]:
|
||||
"""Read Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
|
||||
"""
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if not cred_path.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(cred_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||||
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
|
||||
return None
|
||||
|
||||
oauth_data = data.get("claudeAiOauth")
|
||||
if not (oauth_data and isinstance(oauth_data, dict)):
|
||||
return None
|
||||
access_token = oauth_data.get("accessToken", "")
|
||||
if not access_token:
|
||||
return None
|
||||
return {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "claude_code_credentials_file",
|
||||
}
|
||||
|
||||
|
||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read refreshable Claude Code OAuth credentials.
|
||||
|
||||
Reads from two possible sources and reconciles them:
|
||||
Checks two sources in order:
|
||||
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
|
||||
2. ~/.claude/.credentials.json file
|
||||
|
||||
Selection rules when both are present:
|
||||
- If exactly one is non-expired, prefer that one. (Handles the case
|
||||
where Claude Code refreshes one source but not the other — observed
|
||||
in the wild on Claude Code 2.1.x.)
|
||||
- Otherwise, prefer the source with the later ``expiresAt`` so that
|
||||
any subsequent refresh uses the most recent ``refreshToken``.
|
||||
|
||||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||||
and native direct Anthropic provider usage should follow that path rather
|
||||
than auto-detecting Claude's first-party managed key.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?, source} or None.
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
# Try macOS Keychain first (covers Claude Code >=2.1.114)
|
||||
kc_creds = _read_claude_code_credentials_from_keychain()
|
||||
file_creds = _read_claude_code_credentials_from_file()
|
||||
if kc_creds:
|
||||
return kc_creds
|
||||
|
||||
if kc_creds and file_creds:
|
||||
kc_valid = is_claude_code_token_valid(kc_creds)
|
||||
file_valid = is_claude_code_token_valid(file_creds)
|
||||
if kc_valid and not file_valid:
|
||||
return kc_creds
|
||||
if file_valid and not kc_valid:
|
||||
return file_creds
|
||||
# Both valid or both expired: prefer the later expiresAt so the
|
||||
# downstream refresh path uses the freshest refresh_token.
|
||||
kc_exp = kc_creds.get("expiresAt", 0) or 0
|
||||
file_exp = file_creds.get("expiresAt", 0) or 0
|
||||
return kc_creds if kc_exp >= file_exp else file_creds
|
||||
# Fall back to JSON file
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if cred_path.exists():
|
||||
try:
|
||||
data = json.loads(cred_path.read_text(encoding="utf-8"))
|
||||
oauth_data = data.get("claudeAiOauth")
|
||||
if oauth_data and isinstance(oauth_data, dict):
|
||||
access_token = oauth_data.get("accessToken", "")
|
||||
if access_token:
|
||||
return {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "claude_code_credentials_file",
|
||||
}
|
||||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||||
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
|
||||
|
||||
return kc_creds or file_creds
|
||||
return None
|
||||
|
||||
|
||||
def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
|
||||
@@ -1074,40 +1034,8 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
|
||||
|
||||
|
||||
def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
|
||||
"""Attempt to refresh an expired Claude Code OAuth token.
|
||||
|
||||
Claude Code's OAuth refresh tokens are single-use: a successful refresh
|
||||
rotates the pair and invalidates the old refresh token. Claude Code itself
|
||||
also refreshes on its own schedule (IDE/CLI activity), so by the time
|
||||
Hermes notices an expired token, Claude Code may have already rotated it.
|
||||
POSTing our now-stale refresh token in that window races Claude Code and
|
||||
fails with ``invalid_grant``.
|
||||
|
||||
So before refreshing, re-read the live credential sources. If Claude Code
|
||||
has already produced a valid token, adopt it and skip the POST entirely.
|
||||
Only fall back to refreshing ourselves when no fresh credential is found.
|
||||
"""
|
||||
# Claude Code may have already refreshed — adopt its token rather than
|
||||
# racing it with our (possibly already-rotated) refresh token. Only adopt
|
||||
# when the live re-read produced a DIFFERENT token with a real future
|
||||
# expiry: re-adopting the same credential we were just handed would be a
|
||||
# no-op, and a 0/absent ``expiresAt`` means "managed key / unknown expiry"
|
||||
# (see is_claude_code_token_valid) which must NOT be treated as a fresh
|
||||
# refresh here.
|
||||
current = read_claude_code_credentials()
|
||||
if current:
|
||||
current_token = current.get("accessToken", "")
|
||||
current_exp = current.get("expiresAt", 0) or 0
|
||||
if (
|
||||
current_token
|
||||
and current_token != creds.get("accessToken", "")
|
||||
and current_exp > 0
|
||||
and is_claude_code_token_valid(current)
|
||||
):
|
||||
logger.debug("Adopted Claude Code's already-refreshed OAuth token")
|
||||
return current_token
|
||||
|
||||
refresh_token = (current or {}).get("refreshToken", "") or creds.get("refreshToken", "")
|
||||
"""Attempt to refresh an expired Claude Code OAuth token."""
|
||||
refresh_token = creds.get("refreshToken", "")
|
||||
if not refresh_token:
|
||||
logger.debug("No refresh token available — cannot refresh")
|
||||
return None
|
||||
|
||||
@@ -102,7 +102,6 @@ OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length
|
||||
from agent.process_bootstrap import build_keepalive_http_client
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars
|
||||
@@ -110,23 +109,6 @@ from utils import base_url_host_matches, base_url_hostname, env_float, model_for
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _openai_http_client_kwargs(
|
||||
base_url: Optional[str],
|
||||
*,
|
||||
async_mode: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Inject keepalive httpx client with env-only proxy (not macOS system proxy)."""
|
||||
client = build_keepalive_http_client(str(base_url or ""), async_mode=async_mode)
|
||||
if client is None:
|
||||
return {}
|
||||
return {"http_client": client}
|
||||
|
||||
|
||||
def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any:
|
||||
kwargs = {**_openai_http_client_kwargs(base_url), **kwargs}
|
||||
return OpenAI(api_key=api_key, base_url=base_url, **kwargs)
|
||||
|
||||
|
||||
# ── Interrupt protection for atomic auxiliary tasks ──────────────────────
|
||||
# Some auxiliary tasks must NOT be aborted mid-flight by a gateway interrupt
|
||||
# (e.g. an incoming user message while the agent is busy). Context
|
||||
@@ -684,28 +666,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
|
||||
return str(url or "").strip().rstrip("/")
|
||||
|
||||
|
||||
# Hostnames (lowercase, exact) that the auxiliary Anthropic path is allowed to
|
||||
# be pointed at via config.yaml model.base_url. Anything else falls back to the
|
||||
# Anthropic default — operators routing main-session traffic through a
|
||||
# non-Anthropic host (e.g. OpenRouter, OpenAI) with provider=anthropic in config
|
||||
# must NOT have that foreign host leak into the auxiliary client. See #52608.
|
||||
_ANTHROPIC_COMPATIBLE_HOSTS = frozenset({
|
||||
"api.anthropic.com",
|
||||
})
|
||||
|
||||
|
||||
def _is_anthropic_compatible_host(url: str) -> bool:
|
||||
"""Return True if ``url``'s hostname is an Anthropic endpoint we trust for aux calls."""
|
||||
if not url:
|
||||
return False
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
host = (urlparse(url).hostname or "").strip().lower().rstrip(".")
|
||||
return host in _ANTHROPIC_COMPATIBLE_HOSTS
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _nous_min_key_ttl_seconds() -> int:
|
||||
try:
|
||||
return max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
|
||||
@@ -1632,7 +1592,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
_merged_aux = _apply_user_default_headers(extra.get("default_headers"))
|
||||
if _merged_aux:
|
||||
extra["default_headers"] = _merged_aux
|
||||
_client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||
return _client, model
|
||||
|
||||
@@ -1672,7 +1632,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
_merged_aux2 = _apply_user_default_headers(extra.get("default_headers"))
|
||||
if _merged_aux2:
|
||||
extra["default_headers"] = _merged_aux2
|
||||
_client = _create_openai_client(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||
return _client, model
|
||||
|
||||
@@ -1687,21 +1647,20 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
or_key = explicit_api_key or _pool_runtime_api_key(entry)
|
||||
if or_key:
|
||||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return _create_openai_client(api_key=or_key, base_url=base_url,
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
# Pool exists but is exhausted (no usable runtime key) — fall through to
|
||||
# the OPENROUTER_API_KEY env-var path rather than failing outright.
|
||||
logger.debug("Auxiliary client: OpenRouter pool exhausted, trying OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
_mark_provider_unhealthy("openrouter", ttl=60)
|
||||
return None, None
|
||||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return OpenAI(api_key=or_key, base_url=base_url,
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
|
||||
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
_mark_provider_unhealthy("openrouter", ttl=60)
|
||||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return _create_openai_client(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
|
||||
|
||||
@@ -1794,7 +1753,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
return None, None
|
||||
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
|
||||
return (
|
||||
_create_openai_client(
|
||||
OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
),
|
||||
@@ -2071,7 +2030,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
if _custom_headers:
|
||||
_extra["default_headers"] = _custom_headers
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
@@ -2085,14 +2044,14 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
# URL-based anthropic detection for custom endpoints that didn't set
|
||||
# api_mode explicitly (e.g. kimi.com/coding reached via custom config).
|
||||
_fallback_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
_fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
_fallback_client = _maybe_wrap_anthropic(
|
||||
_fallback_client, model, custom_key, custom_base, custom_mode,
|
||||
)
|
||||
@@ -2121,7 +2080,7 @@ def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str
|
||||
return None, None
|
||||
api_key, base_url = resolved
|
||||
logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
|
||||
real_client = _create_openai_client(api_key=api_key, base_url=base_url)
|
||||
real_client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
|
||||
|
||||
@@ -2158,7 +2117,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
base_url = _CODEX_AUX_BASE_URL
|
||||
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
|
||||
real_client = _create_openai_client(
|
||||
real_client = OpenAI(
|
||||
api_key=codex_token,
|
||||
base_url=base_url,
|
||||
default_headers=_codex_cloudflare_headers(codex_token),
|
||||
@@ -2258,7 +2217,7 @@ def _try_azure_foundry(
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
|
||||
client = _create_openai_client(api_key=api_key, base_url=_clean_base, **extra)
|
||||
client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
|
||||
|
||||
if runtime_api_mode == "codex_responses":
|
||||
# GPT-5.x / o-series / codex models on Azure Foundry are
|
||||
@@ -2297,16 +2256,9 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
|
||||
if not token:
|
||||
return None, None
|
||||
|
||||
# Allow base URL override from config.yaml model.base_url, but only when:
|
||||
# 1. the configured provider is anthropic (otherwise a non-Anthropic
|
||||
# base_url, e.g. Codex endpoint, would leak into Anthropic requests), AND
|
||||
# 2. the override URL actually points at an Anthropic-compatible endpoint.
|
||||
# Without gate (2), operators who route main-session traffic through a
|
||||
# non-Anthropic provider that accepts Anthropic-format requests (e.g.
|
||||
# OpenRouter at openrouter.ai/api/v1, with provider=anthropic in config.yaml)
|
||||
# would have every auxiliary side-channel call (memory extractors,
|
||||
# reflection, vision, title generation) 401 from the foreign host —
|
||||
# see issue #52608.
|
||||
# Allow base URL override from config.yaml model.base_url, but only
|
||||
# when the configured provider is anthropic — otherwise a non-Anthropic
|
||||
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
|
||||
base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
@@ -2316,7 +2268,7 @@ def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optiona
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == "anthropic":
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url and _is_anthropic_compatible_host(cfg_base_url):
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
except Exception:
|
||||
pass
|
||||
@@ -2802,25 +2754,6 @@ def _is_model_incompatible_error(exc: Exception) -> bool:
|
||||
))
|
||||
|
||||
|
||||
def _is_invalid_aux_response_error(exc: Exception) -> bool:
|
||||
"""Detect provider responses that authenticated but cannot serve aux shape.
|
||||
|
||||
Some OpenAI-compatible routes return HTTP 200 with an empty/malformed
|
||||
ChatCompletion instead of a normal provider error. That is still a
|
||||
provider/model capability failure for auxiliary tasks: downstream callers
|
||||
need ``choices[0].message`` and should be able to continue through the
|
||||
same fallback path as explicit model-incompatibility errors.
|
||||
"""
|
||||
if not isinstance(exc, RuntimeError):
|
||||
return False
|
||||
msg = str(exc).lower()
|
||||
return (
|
||||
"auxiliary " in msg
|
||||
and "llm returned invalid response" in msg
|
||||
and "choices[0].message" in msg
|
||||
)
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
@@ -3643,37 +3576,6 @@ def _resolve_auto(
|
||||
# config.yaml (auxiliary.<task>.provider) still win over this.
|
||||
main_provider = str(runtime_provider or _read_main_provider() or "")
|
||||
main_model = str(runtime_model or _read_main_model() or "")
|
||||
|
||||
# MoA virtual provider: the "model" is a preset name (e.g. "opus-gpt") and
|
||||
# there is no real "moa" HTTP endpoint, so resolving an aux client against
|
||||
# provider="moa"/model=<preset> sends the preset name as the model id and
|
||||
# the provider 400s ("opus-gpt is not a valid model ID"). Auxiliary tasks
|
||||
# (title generation, compression, vision, …) don't need the reference
|
||||
# fan-out — they should run on the aggregator, which is the preset's acting
|
||||
# model. Resolve the MoA preset to its aggregator slot and continue Step 1
|
||||
# with that real provider+model. Mirrors the MoA context-length resolution.
|
||||
if main_provider == "moa":
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.moa_config import resolve_moa_preset
|
||||
|
||||
_preset = resolve_moa_preset(load_config().get("moa") or {}, main_model)
|
||||
_agg = _preset.get("aggregator") or {}
|
||||
_agg_provider = str(_agg.get("provider") or "").strip()
|
||||
_agg_model = str(_agg.get("model") or "").strip()
|
||||
if _agg_provider and _agg_model and _agg_provider.lower() != "moa":
|
||||
main_provider = _agg_provider
|
||||
main_model = _agg_model
|
||||
# The MoA virtual runtime carries a non-HTTP base_url
|
||||
# ("moa://local") and a placeholder api_key; they belong to the
|
||||
# facade, not the aggregator's real provider. Drop them so the
|
||||
# aggregator resolves through its own provider credentials.
|
||||
runtime_base_url = ""
|
||||
runtime_api_key = ""
|
||||
runtime_api_mode = ""
|
||||
except Exception:
|
||||
logger.debug("MoA aux resolution to aggregator failed", exc_info=True)
|
||||
|
||||
if (main_provider and main_model
|
||||
and main_provider not in {"auto", ""}):
|
||||
resolved_provider = main_provider
|
||||
@@ -3820,10 +3722,6 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
|
||||
_merged_async = _apply_user_default_headers(async_kwargs.get("default_headers"))
|
||||
if _merged_async:
|
||||
async_kwargs["default_headers"] = _merged_async
|
||||
async_kwargs = {
|
||||
**_openai_http_client_kwargs(sync_base_url, async_mode=True),
|
||||
**async_kwargs,
|
||||
}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
@@ -4034,7 +3932,7 @@ def resolve_provider_client(
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model, provider)
|
||||
raw_client = _create_openai_client(
|
||||
raw_client = OpenAI(
|
||||
api_key=codex_token,
|
||||
base_url=_CODEX_AUX_BASE_URL,
|
||||
default_headers=_codex_cloudflare_headers(codex_token),
|
||||
@@ -4115,7 +4013,7 @@ def resolve_provider_client(
|
||||
_merged_custom = _apply_user_default_headers(extra.get("default_headers"))
|
||||
if _merged_custom:
|
||||
extra["default_headers"] = _merged_custom
|
||||
client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base, custom_key)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
@@ -4219,7 +4117,7 @@ def resolve_provider_client(
|
||||
_fb_headers = _apply_user_default_headers(_fb_extra.get("default_headers"))
|
||||
if _fb_headers:
|
||||
_fb_extra["default_headers"] = _fb_headers
|
||||
client = _create_openai_client(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
@@ -4228,7 +4126,7 @@ def resolve_provider_client(
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = _create_openai_client(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
@@ -4370,7 +4268,7 @@ def resolve_provider_client(
|
||||
_merged_main = _apply_user_default_headers(headers)
|
||||
if _merged_main:
|
||||
headers = _merged_main
|
||||
client = _create_openai_client(api_key=api_key, base_url=base_url,
|
||||
client = OpenAI(api_key=api_key, base_url=base_url,
|
||||
**({"default_headers": headers} if headers else {}))
|
||||
|
||||
# Copilot GPT-5+ models (except gpt-5-mini) require the Responses
|
||||
@@ -4906,7 +4804,7 @@ def _refresh_nous_auxiliary_client(
|
||||
return None, model
|
||||
|
||||
fresh_key, fresh_base_url = runtime
|
||||
sync_client = _create_openai_client(api_key=fresh_key, base_url=fresh_base_url)
|
||||
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
|
||||
final_model = model
|
||||
|
||||
current_loop = None
|
||||
@@ -5489,24 +5387,10 @@ def _build_call_kwargs(
|
||||
# ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
|
||||
# max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
|
||||
# there.
|
||||
#
|
||||
# NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a
|
||||
# second exception: some models—notably minimaxai/minimax-m3—return HTTP
|
||||
# 200 with an empty choices[] payload when max_tokens is omitted. The main
|
||||
# NVIDIA chat path already sends an output cap via the provider profile;
|
||||
# preserve it on the auxiliary path too.
|
||||
_effective_base = base_url or (
|
||||
_current_custom_base_url() if provider == "custom" else ""
|
||||
)
|
||||
_provider_norm = str(provider or "").strip().lower()
|
||||
_is_nvidia_nim = (
|
||||
_provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"}
|
||||
or base_url_host_matches(_effective_base, "integrate.api.nvidia.com")
|
||||
)
|
||||
if (
|
||||
_is_anthropic_compat_endpoint(provider, _effective_base)
|
||||
or _is_nvidia_nim
|
||||
):
|
||||
if _is_anthropic_compat_endpoint(provider, _effective_base):
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
if tools:
|
||||
@@ -5561,9 +5445,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
|
||||
if not choices or not hasattr(choices[0], "message"):
|
||||
raise AttributeError("missing choices[0].message")
|
||||
except (AttributeError, TypeError, IndexError) as exc:
|
||||
recovered = _recover_aux_response_message(response)
|
||||
if recovered is not None:
|
||||
return recovered
|
||||
response_type = type(response).__name__
|
||||
response_preview = str(response)[:120]
|
||||
raise RuntimeError(
|
||||
@@ -5575,64 +5456,6 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
|
||||
return response
|
||||
|
||||
|
||||
def _recover_aux_response_message(response: Any) -> Optional[Any]:
|
||||
"""Synthesize chat-completions shape from Responses-style text fields.
|
||||
|
||||
Auxiliary callers consume ``choices[0].message``. Some compatible
|
||||
endpoints return text outside ``choices`` (for example ``output_text`` or
|
||||
``output`` items). Preserve that response before declaring it malformed.
|
||||
"""
|
||||
text = _extract_aux_response_text(response)
|
||||
if not text:
|
||||
return None
|
||||
|
||||
choice = SimpleNamespace(
|
||||
message=SimpleNamespace(content=text),
|
||||
finish_reason=getattr(response, "finish_reason", None) or "stop",
|
||||
)
|
||||
try:
|
||||
response.choices = [choice]
|
||||
return response
|
||||
except Exception:
|
||||
return SimpleNamespace(
|
||||
id=getattr(response, "id", ""),
|
||||
model=getattr(response, "model", ""),
|
||||
object=getattr(response, "object", "chat.completion"),
|
||||
choices=[choice],
|
||||
usage=getattr(response, "usage", None),
|
||||
)
|
||||
|
||||
|
||||
def _extract_aux_response_text(response: Any) -> str:
|
||||
output_text = _obj_get(response, "output_text")
|
||||
if isinstance(output_text, str) and output_text.strip():
|
||||
return output_text.strip()
|
||||
|
||||
output = _obj_get(response, "output")
|
||||
if not isinstance(output, list):
|
||||
return ""
|
||||
|
||||
parts: List[str] = []
|
||||
for item in output:
|
||||
item_type = _obj_get(item, "type")
|
||||
if item_type and item_type != "message":
|
||||
continue
|
||||
for part in (_obj_get(item, "content") or []):
|
||||
part_type = _obj_get(part, "type")
|
||||
if part_type in {"output_text", "text", None}:
|
||||
text = _obj_get(part, "text")
|
||||
if isinstance(text, str) and text.strip():
|
||||
parts.append(text.strip())
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
def _obj_get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
value = getattr(obj, key, default)
|
||||
if value is default and isinstance(obj, dict):
|
||||
value = obj.get(key, default)
|
||||
return value
|
||||
|
||||
|
||||
def call_llm(
|
||||
task: str = None,
|
||||
*,
|
||||
@@ -6030,21 +5853,11 @@ def call_llm(
|
||||
# When the provider returns a 429 rate-limit (not billing), fall
|
||||
# back to an alternative provider instead of exhausting retries
|
||||
# against the same rate-limited endpoint.
|
||||
#
|
||||
# ── Auth error fallback (#21165) ─────────────────────────────
|
||||
# When the resolved provider returns 401 and neither the Nous
|
||||
# refresh path nor explicit provider credential refresh applies,
|
||||
# fall back to an alternative provider instead of dropping the
|
||||
# auxiliary task on the floor (silent compression failure /
|
||||
# message loss). Auth is NOT a capacity error: it only bypasses
|
||||
# the explicit-provider gate when the user is in auto mode.
|
||||
should_fallback = (
|
||||
_is_auth_error(first_err)
|
||||
or _is_payment_error(first_err)
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
# Respect explicit provider choice for transient errors (auth, request
|
||||
# validation, etc.) but allow fallback when the provider clearly cannot
|
||||
@@ -6067,12 +5880,9 @@ def call_llm(
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_auth_error(first_err):
|
||||
reason = "auth error"
|
||||
elif _is_payment_error(first_err):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
# Resolve the actual provider label (resolved_provider may be
|
||||
# "auto"; the client's base_url tells us which backend got the
|
||||
@@ -6085,8 +5895,6 @@ def call_llm(
|
||||
reason = "rate limit"
|
||||
elif _is_model_incompatible_error(first_err):
|
||||
reason = "model incompatible with route"
|
||||
elif _is_invalid_aux_response_error(first_err):
|
||||
reason = "invalid provider response"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
@@ -6521,17 +6329,11 @@ async def async_call_llm(
|
||||
raise
|
||||
|
||||
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
|
||||
# Auth error fallback (#21165): a 401 that survived the refresh path
|
||||
# falls back in auto mode just like the sync call_llm() path. Auth is
|
||||
# NOT a capacity error, so on an explicit provider it still respects
|
||||
# the user's choice (handled by the is_auto/is_capacity_error gate).
|
||||
should_fallback = (
|
||||
_is_auth_error(first_err)
|
||||
or _is_payment_error(first_err)
|
||||
_is_payment_error(first_err)
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
# Capacity errors (payment/quota/connection/rate-limit) bypass the
|
||||
# explicit-provider gate — the provider cannot serve the request
|
||||
@@ -6546,12 +6348,9 @@ async def async_call_llm(
|
||||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_auth_error(first_err):
|
||||
reason = "auth error"
|
||||
elif _is_payment_error(first_err):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
_mark_provider_unhealthy(
|
||||
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
|
||||
@@ -6560,8 +6359,6 @@ async def async_call_llm(
|
||||
reason = "rate limit"
|
||||
elif _is_model_incompatible_error(first_err):
|
||||
reason = "model incompatible with route"
|
||||
elif _is_invalid_aux_response_error(first_err):
|
||||
reason = "invalid provider response"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
|
||||
@@ -28,7 +28,6 @@ from typing import Any, Dict, Optional
|
||||
from hermes_cli.timeouts import get_provider_request_timeout, get_provider_stale_timeout
|
||||
from hermes_constants import PARTIAL_STREAM_STUB_ID, FINISH_REASON_LENGTH
|
||||
from agent.error_classifier import FailoverReason
|
||||
from agent.gemini_native_adapter import is_native_gemini_base_url
|
||||
from agent.model_metadata import is_local_endpoint
|
||||
from agent.message_sanitization import (
|
||||
_sanitize_surrogates,
|
||||
@@ -38,18 +37,6 @@ from tools.terminal_tool import is_persistent_env
|
||||
from utils import base_url_host_matches, base_url_hostname, env_float, env_int
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_OPENROUTER_PROVIDER_SORT_VALUES = {"throughput", "latency", "price"}
|
||||
|
||||
# When the fallback chain is fully exhausted on a non-rate-limit failure
|
||||
# (e.g. every provider returns a non-retryable client error like HTTP 400),
|
||||
# arm a short cooldown so the NEXT turn's restore_primary_runtime stays gated
|
||||
# and does not reset _fallback_index=0 to replay the entire chain again.
|
||||
# Without this, a client/gateway that re-submits immediately would re-marshal
|
||||
# the full (potentially 80k-token) context once per provider every turn and
|
||||
# can drive a constrained host into memory/swap exhaustion. Rate-limit /
|
||||
# billing reasons keep their own 60s cooldown (set above); this is the
|
||||
# narrower non-rate-limit case. See issue #24996.
|
||||
_FALLBACK_EXHAUSTED_COOLDOWN_S = 5.0
|
||||
|
||||
|
||||
def _ra():
|
||||
@@ -128,23 +115,6 @@ def _is_openai_codex_backend(agent) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _validated_openrouter_provider_sort(raw_sort: Any) -> Optional[str]:
|
||||
"""Return a normalized OpenRouter provider.sort value or None."""
|
||||
if not isinstance(raw_sort, str):
|
||||
return None
|
||||
sort_value = raw_sort.strip().lower()
|
||||
if not sort_value:
|
||||
return None
|
||||
if sort_value in _OPENROUTER_PROVIDER_SORT_VALUES:
|
||||
return sort_value
|
||||
logger.warning(
|
||||
"Ignoring invalid OpenRouter provider.sort value %r (allowed: %s)",
|
||||
raw_sort,
|
||||
", ".join(sorted(_OPENROUTER_PROVIDER_SORT_VALUES)),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
try:
|
||||
return float(os.getenv(name, str(default)))
|
||||
@@ -259,11 +229,6 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
result["response"] = normalize_converse_response(raw_response)
|
||||
elif agent.provider == "moa":
|
||||
# MoA is a virtual chat-completions provider backed by the
|
||||
# in-process MoAClient facade. Do not rebuild a request-local
|
||||
# OpenAI client from the virtual runtime metadata.
|
||||
result["response"] = agent.client.chat.completions.create(**api_kwargs)
|
||||
else:
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
@@ -733,9 +698,8 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
|
||||
_prefs["ignore"] = agent.providers_ignored
|
||||
if agent.providers_order:
|
||||
_prefs["order"] = agent.providers_order
|
||||
_provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
|
||||
if _provider_sort:
|
||||
_prefs["sort"] = _provider_sort
|
||||
if agent.provider_sort:
|
||||
_prefs["sort"] = agent.provider_sort
|
||||
if agent.provider_require_parameters:
|
||||
_prefs["require_parameters"] = True
|
||||
if agent.provider_data_collection:
|
||||
@@ -1051,23 +1015,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
|
||||
"arguments": tool_call.function.arguments
|
||||
},
|
||||
}
|
||||
# Tool-call arguments are intentionally NOT redacted here. This
|
||||
# dict enters the in-memory conversation history that is replayed
|
||||
# to the model on every subsequent turn AND persisted to state.db,
|
||||
# which is itself replayed verbatim on session resume
|
||||
# (get_messages_as_conversation). Masking a credential to `***`
|
||||
# here poisons that replay: the model reads back its own
|
||||
# `PGPASSWORD='***' psql ...` call and copies the placeholder into
|
||||
# the next tool call, breaking every credential-dependent command
|
||||
# on the second turn (#43083). The masking also provided no real
|
||||
# protection — the same secret still leaks verbatim through tool
|
||||
# OUTPUT (file contents, command output, diffs, the compaction
|
||||
# block), none of which this pass ever touched. Keeping secrets
|
||||
# out of the replayable store is a separate tokenization/vault
|
||||
# concern, not something arg-redaction can deliver without
|
||||
# breaking replay. Storage-time redaction remains governed by the
|
||||
# `security.redact_secrets` toggle. (#19798 introduced this;
|
||||
# #43083 removed it.)
|
||||
# Defence-in-depth: redact credentials from tool call arguments
|
||||
# before they enter conversation history. Tool execution uses the
|
||||
# raw API response object, not this dict, so redacting the
|
||||
# persisted shape is safe and only affects storage. Catches the
|
||||
# case where a model accidentally inlines a secret into a tool
|
||||
# call (e.g. `terminal(command="curl -H 'Authorization: Bearer
|
||||
# sk-...'")`). (#19798)
|
||||
if isinstance(tc_dict["function"]["arguments"], str):
|
||||
from agent.redact import redact_sensitive_text
|
||||
tc_dict["function"]["arguments"] = redact_sensitive_text(
|
||||
tc_dict["function"]["arguments"]
|
||||
)
|
||||
# Preserve extra_content (e.g. Gemini thought_signature) so it
|
||||
# is sent back on subsequent API calls. Without this, Gemini 3
|
||||
# thinking models reject the request with a 400 error.
|
||||
@@ -1134,22 +1093,8 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
|
||||
agent._rate_limited_until = time.monotonic() + 60
|
||||
if agent._fallback_index >= len(agent._fallback_chain):
|
||||
# Chain exhausted. If we actually walked a non-empty chain and the
|
||||
# failure was NOT a rate-limit/billing event (those already armed
|
||||
# their own 60s cooldown above), arm a short cooldown so the next
|
||||
# turn's restore_primary_runtime stays gated instead of resetting
|
||||
# _fallback_index=0 and re-marshaling the whole context across every
|
||||
# provider again. Guards the cross-turn replay storm in #24996.
|
||||
if (
|
||||
len(agent._fallback_chain) > 0
|
||||
and reason not in {FailoverReason.rate_limit, FailoverReason.billing}
|
||||
):
|
||||
_existing_cooldown = getattr(agent, "_rate_limited_until", 0) or 0
|
||||
agent._rate_limited_until = max(
|
||||
_existing_cooldown,
|
||||
time.monotonic() + _FALLBACK_EXHAUSTED_COOLDOWN_S,
|
||||
)
|
||||
return False
|
||||
|
||||
fb = agent._fallback_chain[agent._fallback_index]
|
||||
agent._fallback_index += 1
|
||||
fb_provider = (fb.get("provider") or "").strip().lower()
|
||||
@@ -1265,16 +1210,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
agent._transport_cache.clear()
|
||||
agent._fallback_activated = True
|
||||
|
||||
# Rebind the credential pool to the fallback provider when the provider
|
||||
# changes. Keeping the primary pool attached would make downstream
|
||||
# recovery (rate_limit / billing / auth) mutate the wrong credential
|
||||
# set and can overwrite the fallback's base_url back to the primary
|
||||
# endpoint. See #33163.
|
||||
#
|
||||
# Clear the credential pool when the fallback provider doesn't match
|
||||
# the pool's provider. The pool was seeded for the primary provider;
|
||||
# leaving it attached means downstream recovery (rate_limit / billing /
|
||||
# auth) calls ``_swap_credential`` with a primary entry which overwrites
|
||||
# the agent's ``base_url`` back to the primary's endpoint — every
|
||||
# fallback request then 404s against the wrong host. See #33163.
|
||||
# When the fallback shares the pool's provider (e.g. both openrouter
|
||||
# entries with different routing) the pool is preserved. When the
|
||||
# providers differ, load the fallback provider's own pool if one exists
|
||||
# so provider-specific rotation continues to work after the switch.
|
||||
# entries with different routing) the pool is preserved.
|
||||
_existing_pool = getattr(agent, "_credential_pool", None)
|
||||
if _existing_pool is not None:
|
||||
_pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
|
||||
@@ -1285,22 +1228,6 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
||||
fb_provider, fb_model, _pool_provider,
|
||||
)
|
||||
agent._credential_pool = None
|
||||
if getattr(agent, "_credential_pool", None) is None:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
|
||||
fallback_pool = load_pool(fb_provider)
|
||||
if fallback_pool and fallback_pool.has_credentials():
|
||||
agent._credential_pool = fallback_pool
|
||||
logger.info(
|
||||
"Fallback to %s/%s: attached fallback credential pool",
|
||||
fb_provider, fb_model,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Fallback to %s/%s: could not attach credential pool: %s",
|
||||
fb_provider, fb_model, exc,
|
||||
)
|
||||
|
||||
# Honor per-provider / per-model request_timeout_seconds for the
|
||||
# fallback target (same knob the primary client uses). None = use
|
||||
@@ -1531,9 +1458,8 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
|
||||
provider_preferences["ignore"] = agent.providers_ignored
|
||||
if agent.providers_order:
|
||||
provider_preferences["order"] = agent.providers_order
|
||||
_provider_sort = _validated_openrouter_provider_sort(agent.provider_sort)
|
||||
if _provider_sort:
|
||||
provider_preferences["sort"] = _provider_sort
|
||||
if agent.provider_sort:
|
||||
provider_preferences["sort"] = agent.provider_sort
|
||||
if provider_preferences and (
|
||||
(agent.provider or "").strip().lower() == "openrouter"
|
||||
or agent._is_openrouter_url()
|
||||
@@ -1912,6 +1838,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
stream_kwargs = {
|
||||
**api_kwargs,
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True},
|
||||
"timeout": _httpx.Timeout(
|
||||
connect=_conn_cap,
|
||||
read=_stream_read_timeout,
|
||||
@@ -1919,14 +1846,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
pool=_conn_cap,
|
||||
),
|
||||
}
|
||||
# OpenAI's `stream_options={"include_usage": True}` drives usage
|
||||
# accounting on OpenAI-compatible endpoints (incl. the Gemini OpenAI
|
||||
# compat shim and aggregators like OpenRouter). Google's *native*
|
||||
# Gemini REST endpoint rejects the keyword outright
|
||||
# (`Completions.create() got an unexpected keyword argument
|
||||
# 'stream_options'`), so omit it only for that endpoint.
|
||||
if not is_native_gemini_base_url(agent.base_url):
|
||||
stream_kwargs["stream_options"] = {"include_usage": True}
|
||||
request_client = _set_request_client(
|
||||
agent._create_request_openai_client(
|
||||
reason="chat_completion_stream_request",
|
||||
@@ -2327,15 +2246,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
_fire_first_delta()
|
||||
agent._fire_reasoning_delta(thinking_text)
|
||||
|
||||
# Return the native Anthropic Message for downstream processing.
|
||||
# If the stream was interrupted (the event loop broke out above on
|
||||
# agent._interrupt_requested), do NOT call get_final_message() — on
|
||||
# a partially-consumed stream the SDK may hang draining remaining
|
||||
# events or return a Message with incomplete tool_use blocks (partial
|
||||
# JSON in `input`). The outer poll loop raises InterruptedError, so
|
||||
# this return value is discarded anyway.
|
||||
if agent._interrupt_requested:
|
||||
return None
|
||||
# Return the native Anthropic Message for downstream processing
|
||||
return stream.get_final_message()
|
||||
|
||||
def _call():
|
||||
@@ -2480,19 +2391,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
diag=request_client_holder.get("diag"),
|
||||
)
|
||||
_close_request_client_once("stream_mid_tool_retry_cleanup")
|
||||
if agent.api_mode == "anthropic_messages":
|
||||
try:
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
agent._replace_primary_openai_client(
|
||||
reason="stream_mid_tool_retry_pool_cleanup"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
agent._replace_primary_openai_client(
|
||||
reason="stream_mid_tool_retry_pool_cleanup"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
|
||||
# SSE error events from proxies (e.g. OpenRouter sends
|
||||
@@ -2540,19 +2444,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
_close_request_client_once("stream_retry_cleanup")
|
||||
# Also rebuild the primary client to purge
|
||||
# any dead connections from the pool.
|
||||
if agent.api_mode == "anthropic_messages":
|
||||
try:
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
agent._replace_primary_openai_client(
|
||||
reason="stream_retry_pool_cleanup"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
agent._replace_primary_openai_client(
|
||||
reason="stream_retry_pool_cleanup"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
# Retries exhausted. Log the final failure with
|
||||
# full diagnostic detail (chain, headers,
|
||||
@@ -2664,17 +2561,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
|
||||
else:
|
||||
_stream_stale_timeout = _stream_stale_timeout_base
|
||||
# Reasoning-model floor: known reasoning models (Nemotron 3 Ultra,
|
||||
# OpenAI o1/o3, Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ,
|
||||
# xAI Grok reasoning, etc.) routinely exceed the default 180s chat-
|
||||
# model threshold during their thinking phase. The cloud gateway
|
||||
# upstream kills the socket first, surfacing as BrokenPipeError.
|
||||
# Raises the floor only — never overrides explicit user config
|
||||
# (handled by get_provider_stale_timeout above).
|
||||
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
|
||||
_reasoning_floor = get_reasoning_stale_timeout_floor(api_kwargs.get("model"))
|
||||
if _reasoning_floor is not None:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout, _reasoning_floor)
|
||||
|
||||
t = threading.Thread(target=_call, daemon=True)
|
||||
t.start()
|
||||
@@ -2723,17 +2609,10 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
pass
|
||||
# Rebuild the primary client too — its connection pool
|
||||
# may hold dead sockets from the same provider outage.
|
||||
if agent.api_mode == "anthropic_messages":
|
||||
try:
|
||||
agent._anthropic_client.close()
|
||||
agent._rebuild_anthropic_client()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
agent._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
|
||||
except Exception:
|
||||
pass
|
||||
# Reset the timer so we don't kill repeatedly while
|
||||
# the inner thread processes the closure.
|
||||
last_chunk_time["t"] = time.time()
|
||||
@@ -2809,30 +2688,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
role="assistant", content=_partial_text, tool_calls=None,
|
||||
reasoning_content=None,
|
||||
)
|
||||
# Detect provider output-layer content filtering (e.g. MiniMax
|
||||
# "output new_sensitive (1027)", Azure/OpenAI content_filter,
|
||||
# Anthropic safety refusal). The raw error is about to be
|
||||
# swallowed into a finish_reason=length stub, so classify it HERE
|
||||
# while we still have it and stamp the stub. Retrying such a
|
||||
# content-deterministic filter on the same primary just re-hits
|
||||
# the filter — the conversation loop reads this tag and activates
|
||||
# the fallback chain instead of burning continuation retries.
|
||||
# error_classifier is the single source of truth for "what counts
|
||||
# as a content filter" (#32421).
|
||||
_content_filter_terminated = False
|
||||
try:
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
_cls = classify_api_error(
|
||||
result["error"],
|
||||
provider=str(getattr(agent, "provider", "") or ""),
|
||||
model=str(getattr(agent, "model", "") or ""),
|
||||
)
|
||||
_content_filter_terminated = (
|
||||
_cls.reason == FailoverReason.content_policy_blocked
|
||||
)
|
||||
except Exception:
|
||||
_content_filter_terminated = False
|
||||
_stub = SimpleNamespace(
|
||||
return SimpleNamespace(
|
||||
id=PARTIAL_STREAM_STUB_ID,
|
||||
model=getattr(agent, "model", "unknown"),
|
||||
choices=[SimpleNamespace(
|
||||
@@ -2841,9 +2697,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
||||
usage=None,
|
||||
_dropped_tool_names=_partial_names or None,
|
||||
)
|
||||
if _content_filter_terminated:
|
||||
_stub._content_filter_terminated = True
|
||||
return _stub
|
||||
raise result["error"]
|
||||
return result["response"]
|
||||
|
||||
|
||||
@@ -60,8 +60,6 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
|
||||
|
||||
logger = logging.getLogger("hermes.coding_context")
|
||||
|
||||
CODING_TOOLSET = "coding"
|
||||
@@ -649,14 +647,12 @@ def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:
|
||||
|
||||
|
||||
def _git(cwd: Path, *args: str) -> str:
|
||||
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["git", "-C", str(cwd), *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_GIT_TIMEOUT,
|
||||
**_popen_kwargs,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
return ""
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
"""Live session context-window breakdown for UI surfaces.
|
||||
|
||||
Estimates how the next provider request is composed: system prompt tiers,
|
||||
tool schemas, and conversation history. Uses the same rough char/4 heuristic
|
||||
as ``agent.model_metadata.estimate_request_tokens_rough`` so numbers align
|
||||
with compression thresholds — not exact tokenizer counts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
|
||||
|
||||
_SUBAGENT_TOOL_NAMES = frozenset({"delegate_task"})
|
||||
|
||||
_CATEGORY_COLORS = {
|
||||
"system_prompt": "var(--context-usage-system)",
|
||||
"tool_definitions": "var(--context-usage-tools)",
|
||||
"rules": "var(--context-usage-rules)",
|
||||
"skills": "var(--context-usage-skills)",
|
||||
"mcp": "var(--context-usage-mcp)",
|
||||
"subagent_definitions": "var(--context-usage-subagents)",
|
||||
"memory": "var(--context-usage-memory)",
|
||||
"conversation": "var(--context-usage-conversation)",
|
||||
}
|
||||
|
||||
|
||||
def _chars_to_tokens(text: str) -> int:
|
||||
if not text:
|
||||
return 0
|
||||
return (len(text) + 3) // 4
|
||||
|
||||
|
||||
def _json_tokens(value: Any) -> int:
|
||||
if not value:
|
||||
return 0
|
||||
return _chars_to_tokens(json.dumps(value, ensure_ascii=False))
|
||||
|
||||
|
||||
def _tool_name(tool: dict) -> str:
|
||||
fn = tool.get("function") if isinstance(tool, dict) else None
|
||||
if isinstance(fn, dict):
|
||||
return str(fn.get("name") or "")
|
||||
return str(tool.get("name") or "")
|
||||
|
||||
|
||||
def _split_tools(tools: Sequence[dict]) -> Tuple[List[dict], List[dict], List[dict]]:
|
||||
builtin: List[dict] = []
|
||||
mcp: List[dict] = []
|
||||
subagent: List[dict] = []
|
||||
for tool in tools:
|
||||
name = _tool_name(tool)
|
||||
if name.startswith("mcp_"):
|
||||
mcp.append(tool)
|
||||
elif name in _SUBAGENT_TOOL_NAMES:
|
||||
subagent.append(tool)
|
||||
else:
|
||||
builtin.append(tool)
|
||||
return builtin, mcp, subagent
|
||||
|
||||
|
||||
def _memory_blocks(agent: Any) -> Tuple[str, str]:
|
||||
memory_block = ""
|
||||
user_block = ""
|
||||
store = getattr(agent, "_memory_store", None)
|
||||
if store is None:
|
||||
return memory_block, user_block
|
||||
try:
|
||||
if getattr(agent, "_memory_enabled", True):
|
||||
memory_block = store.format_for_system_prompt("memory") or ""
|
||||
if getattr(agent, "_user_profile_enabled", True):
|
||||
user_block = store.format_for_system_prompt("user") or ""
|
||||
except Exception:
|
||||
pass
|
||||
return memory_block, user_block
|
||||
|
||||
|
||||
def _strip_blocks(text: str, *blocks: str) -> str:
|
||||
out = text
|
||||
for block in blocks:
|
||||
if block:
|
||||
out = out.replace(block, "")
|
||||
return out.strip()
|
||||
|
||||
|
||||
def compute_session_context_breakdown(
|
||||
agent: Any,
|
||||
messages: Optional[List[dict]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Return a Cursor-style context usage breakdown for one live agent."""
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.system_prompt import build_system_prompt_parts
|
||||
|
||||
parts = build_system_prompt_parts(agent)
|
||||
stable = parts.get("stable", "") or ""
|
||||
context = parts.get("context", "") or ""
|
||||
volatile = parts.get("volatile", "") or ""
|
||||
|
||||
skills_match = _SKILLS_BLOCK_RE.search(stable)
|
||||
skills_index = skills_match.group(0) if skills_match else ""
|
||||
|
||||
memory_block, user_block = _memory_blocks(agent)
|
||||
memory_text = "\n\n".join(part for part in (memory_block, user_block) if part).strip()
|
||||
|
||||
system_core = _strip_blocks(stable, skills_index)
|
||||
system_tail = _strip_blocks(volatile, memory_block, user_block)
|
||||
system_prompt_text = "\n\n".join(part for part in (system_core, system_tail) if part).strip()
|
||||
|
||||
tools = list(getattr(agent, "tools", None) or [])
|
||||
builtin_tools, mcp_tools, subagent_tools = _split_tools(tools)
|
||||
|
||||
conversation_tokens = estimate_messages_tokens_rough(messages or [])
|
||||
|
||||
categories = [
|
||||
("system_prompt", "System prompt", _chars_to_tokens(system_prompt_text)),
|
||||
("tool_definitions", "Tool definitions", _json_tokens(builtin_tools)),
|
||||
("rules", "Rules", _chars_to_tokens(context)),
|
||||
("skills", "Skills", _chars_to_tokens(skills_index)),
|
||||
("mcp", "MCP", _json_tokens(mcp_tools)),
|
||||
("subagent_definitions", "Subagent definitions", _json_tokens(subagent_tools)),
|
||||
("memory", "Memory", _chars_to_tokens(memory_text)),
|
||||
("conversation", "Conversation", conversation_tokens),
|
||||
]
|
||||
|
||||
estimated_total = sum(tokens for _, _, tokens in categories)
|
||||
|
||||
comp = getattr(agent, "context_compressor", None)
|
||||
context_max = int(getattr(comp, "context_length", 0) or 0) if comp else 0
|
||||
measured_used = int(getattr(comp, "last_prompt_tokens", 0) or 0) if comp else 0
|
||||
context_used = measured_used if measured_used > 0 else estimated_total
|
||||
context_percent = (
|
||||
max(0, min(100, round(context_used / context_max * 100)))
|
||||
if context_max
|
||||
else 0
|
||||
)
|
||||
|
||||
return {
|
||||
"categories": [
|
||||
{
|
||||
"color": _CATEGORY_COLORS.get(category_id, "var(--ui-text-tertiary)"),
|
||||
"id": category_id,
|
||||
"label": label,
|
||||
"tokens": tokens,
|
||||
}
|
||||
for category_id, label, tokens in categories
|
||||
if tokens > 0
|
||||
],
|
||||
"context_max": context_max,
|
||||
"context_percent": context_percent,
|
||||
"context_used": context_used,
|
||||
"estimated_total": estimated_total,
|
||||
"model": getattr(agent, "model", "") or "",
|
||||
}
|
||||
@@ -12,7 +12,6 @@ from pathlib import Path
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from agent.model_metadata import estimate_tokens_rough
|
||||
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
|
||||
|
||||
_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
|
||||
REFERENCE_PATTERN = re.compile(
|
||||
@@ -291,7 +290,6 @@ def _expand_git_reference(
|
||||
args: list[str],
|
||||
label: str,
|
||||
) -> tuple[str | None, str | None]:
|
||||
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", *args],
|
||||
@@ -300,7 +298,6 @@ def _expand_git_reference(
|
||||
text=True,
|
||||
timeout=30,
|
||||
stdin=subprocess.DEVNULL,
|
||||
**_popen_kwargs,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"{ref.raw}: git command timed out (30s)", None
|
||||
@@ -328,9 +325,9 @@ async def _fetch_url_content(
|
||||
async def _default_url_fetcher(url: str) -> str:
|
||||
from tools.web_tools import web_extract_tool
|
||||
|
||||
raw = await web_extract_tool([url], format="markdown")
|
||||
raw = await web_extract_tool([url], format="markdown", use_llm_processing=True)
|
||||
payload = json.loads(raw)
|
||||
docs = payload.get("results", [])
|
||||
docs = payload.get("data", {}).get("documents", [])
|
||||
if not docs:
|
||||
return ""
|
||||
doc = docs[0]
|
||||
@@ -486,7 +483,6 @@ def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:
|
||||
|
||||
|
||||
def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["rg", "--files", str(path.relative_to(cwd))],
|
||||
@@ -495,7 +491,6 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||
text=True,
|
||||
timeout=10,
|
||||
stdin=subprocess.DEVNULL,
|
||||
**_popen_kwargs,
|
||||
)
|
||||
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
|
||||
return None
|
||||
|
||||
@@ -288,29 +288,6 @@ def replay_compression_warning(agent: Any) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def conversation_history_after_compression(agent: Any, messages: list) -> Optional[list]:
|
||||
"""Return the correct flush baseline after a compression boundary.
|
||||
|
||||
Legacy compression rotates to a fresh child session. That child has not
|
||||
seen the compacted transcript through the normal same-turn flush path yet,
|
||||
so callers must clear ``conversation_history`` to ``None`` and let the next
|
||||
persistence call write the whole compacted list.
|
||||
|
||||
In-place compaction is different: ``archive_and_compact()`` has already
|
||||
soft-archived the previous active rows and inserted ``messages`` as the new
|
||||
active live transcript under the same session id. If the same agent turn
|
||||
continues with ``conversation_history=None``, the identity-based flush path
|
||||
treats those already-persisted compacted dicts as new and appends them a
|
||||
second time, doubling the active context and retriggering compression.
|
||||
|
||||
A shallow copy is intentional: it captures the current compacted dict
|
||||
identities as history while allowing later same-turn appends to remain new.
|
||||
"""
|
||||
if bool(getattr(agent, "_last_compaction_in_place", False)):
|
||||
return list(messages)
|
||||
return None
|
||||
|
||||
|
||||
def compress_context(
|
||||
agent: Any,
|
||||
messages: list,
|
||||
|
||||
@@ -28,7 +28,6 @@ import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.codex_responses_adapter import _summarize_user_message_for_log
|
||||
from agent.conversation_compression import conversation_history_after_compression
|
||||
from agent.display import KawaiiSpinner
|
||||
from agent.error_classifier import FailoverReason, classify_api_error
|
||||
from agent.iteration_budget import IterationBudget
|
||||
@@ -588,13 +587,6 @@ def run_conversation(
|
||||
compression_attempts = 0
|
||||
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
|
||||
|
||||
# Per-turn tally of consecutive successful credential-pool token refreshes,
|
||||
# keyed by (provider, pool-entry-id). A persistent upstream 401 lets
|
||||
# ``try_refresh_current()`` "succeed" forever on a single-entry OAuth pool,
|
||||
# so this tally caps same-entry refreshes and lets the fallback chain take
|
||||
# over instead of spinning. Reset here so each turn starts fresh. See #26080.
|
||||
agent._auth_pool_refresh_counts = {}
|
||||
|
||||
# Optional opt-in runtime: if api_mode == codex_app_server, hand the
|
||||
# turn to the codex app-server subprocess (terminal/file ops/patching
|
||||
# all run inside Codex). Default Hermes path is bypassed entirely.
|
||||
@@ -835,6 +827,7 @@ def run_conversation(
|
||||
aggregator=moa_config.get("aggregator") or {},
|
||||
temperature=float(moa_config.get("reference_temperature", 0.6) or 0.6),
|
||||
aggregator_temperature=float(moa_config.get("aggregator_temperature", 0.4) or 0.4),
|
||||
max_tokens=int(moa_config.get("max_tokens", 4096) or 4096),
|
||||
)
|
||||
if _moa_context:
|
||||
for _msg in reversed(api_messages):
|
||||
@@ -1699,56 +1692,6 @@ def run_conversation(
|
||||
|
||||
if agent.api_mode in {"chat_completions", "bedrock_converse", "anthropic_messages"}:
|
||||
assistant_message = _trunc_msg
|
||||
# ── Content-filter stream stall → fallback (#32421) ──
|
||||
# When the provider's output-layer safety filter (e.g.
|
||||
# MiniMax "output new_sensitive (1027)", Azure
|
||||
# content_filter) kills the stream mid-delivery, the
|
||||
# raw error was classified at the swallow point and the
|
||||
# stub tagged ``_content_filter_terminated``. This
|
||||
# filter is content-deterministic — continuation
|
||||
# retries against the SAME primary just re-hit it and
|
||||
# burn paid attempts (the loop used to give up with
|
||||
# "Response remained truncated after 3 continuation
|
||||
# attempts" and never consult the fallback chain).
|
||||
# Escalate to the configured fallback BEFORE retrying.
|
||||
_cf_terminated = getattr(
|
||||
response, "_content_filter_terminated", False
|
||||
)
|
||||
if (
|
||||
_cf_terminated
|
||||
and agent._fallback_index < len(agent._fallback_chain)
|
||||
):
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}🛡️ Content filter terminated "
|
||||
f"stream — activating fallback provider...",
|
||||
force=True,
|
||||
)
|
||||
agent._emit_status(
|
||||
"Content filter terminated stream; switching to fallback..."
|
||||
)
|
||||
if agent._try_activate_fallback():
|
||||
# Roll the partial content (if any was already
|
||||
# appended in a prior continuation pass) back to
|
||||
# the last clean turn so the fallback provider
|
||||
# gets a coherent continuation point.
|
||||
if truncated_response_parts:
|
||||
messages = agent._get_messages_up_to_last_assistant(messages)
|
||||
agent._session_messages = messages
|
||||
length_continue_retries = 0
|
||||
truncated_response_parts = []
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
_retry.primary_recovery_attempted = False
|
||||
_retry.restart_with_rebuilt_messages = True
|
||||
break
|
||||
# No fallback available — fall through to normal
|
||||
# continuation (best-effort, may loop).
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ No fallback provider "
|
||||
f"configured — retrying with same provider "
|
||||
f"(may re-hit filter)...",
|
||||
force=True,
|
||||
)
|
||||
if assistant_message is not None and not _trunc_has_tool_calls:
|
||||
length_continue_retries += 1
|
||||
interim_msg = agent._build_assistant_message(assistant_message, finish_reason)
|
||||
@@ -2068,21 +2011,9 @@ def run_conversation(
|
||||
agent.thinking_callback("")
|
||||
api_elapsed = time.time() - api_start_time
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True)
|
||||
interrupted = True
|
||||
# Preserve any assistant text already streamed to the user
|
||||
# before the stop landed. Dropping it leaves history with no
|
||||
# record of the half-finished reply on screen, so the next turn
|
||||
# the model "forgets" what it just said — exactly what users hit
|
||||
# when they stop to redirect mid-response.
|
||||
_partial = agent._strip_think_blocks(
|
||||
getattr(agent, "_current_streamed_assistant_text", "") or ""
|
||||
).strip()
|
||||
if _partial:
|
||||
messages.append({"role": "assistant", "content": _partial})
|
||||
final_response = _partial
|
||||
else:
|
||||
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
|
||||
agent._persist_session(messages, conversation_history)
|
||||
interrupted = True
|
||||
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
|
||||
break
|
||||
|
||||
except Exception as api_error:
|
||||
@@ -2316,15 +2247,6 @@ def run_conversation(
|
||||
# "unknown variant `image_url`, expected `text`".
|
||||
"unknown variant `image_url`, expected `text`",
|
||||
"unknown variant image_url, expected text",
|
||||
# OpenRouter routes a request to upstream endpoints and,
|
||||
# when none of the candidate endpoints for the model accept
|
||||
# image input, returns HTTP 404 "No endpoints found that
|
||||
# support image input". Without this phrase the agent never
|
||||
# strips the images, the retry loop re-sends the same
|
||||
# rejected request until exhaustion, and the gateway leaves
|
||||
# every subsequent message queued behind the stuck turn —
|
||||
# the P1 in issue #21160. The 404 passes the 4xx gate below.
|
||||
"no endpoints found that support image input",
|
||||
)
|
||||
_err_lower = _err_body.lower()
|
||||
_looks_like_image_rejection = any(
|
||||
@@ -2896,9 +2818,10 @@ def run_conversation(
|
||||
approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
conversation_history = conversation_history_after_compression(
|
||||
agent, messages
|
||||
)
|
||||
# Compression created a new session — clear history
|
||||
# so _flush_messages_to_session_db writes compressed
|
||||
# messages to the new session, not skipping them.
|
||||
conversation_history = None
|
||||
if len(messages) < original_len or old_ctx > _reduced_ctx:
|
||||
agent._buffer_status(
|
||||
f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
|
||||
@@ -2910,25 +2833,15 @@ def run_conversation(
|
||||
# Fall through to normal error handling if compression
|
||||
# is exhausted or didn't help.
|
||||
|
||||
# Eager fallback for rate-limit errors (429 or quota exhaustion)
|
||||
# and transport errors (connection failure / timeout / provider
|
||||
# overloaded). Rate limits and billing: switch immediately —
|
||||
# the primary provider won't recover within the retry window.
|
||||
# Transport errors: allow 1 retry first (transient hiccups
|
||||
# recover), then fall back if the provider is truly unreachable.
|
||||
# Eager fallback for rate-limit errors (429 or quota exhaustion).
|
||||
# When a fallback model is configured, switch immediately instead
|
||||
# of burning through retries with exponential backoff -- the
|
||||
# primary provider won't recover within the retry window.
|
||||
is_rate_limited = classified.reason in {
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.billing,
|
||||
}
|
||||
_is_transport_failure = classified.reason in {
|
||||
FailoverReason.timeout,
|
||||
FailoverReason.overloaded,
|
||||
}
|
||||
_should_fallback = (
|
||||
is_rate_limited
|
||||
or (_is_transport_failure and retry_count >= 2)
|
||||
)
|
||||
if _should_fallback and agent._fallback_index < len(agent._fallback_chain):
|
||||
if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
|
||||
# Don't eagerly fallback if credential pool rotation may
|
||||
# still recover. See _pool_may_recover_from_rate_limit
|
||||
# for the single-credential-pool and CloudCode-quota
|
||||
@@ -2943,10 +2856,6 @@ def run_conversation(
|
||||
agent._buffer_status(
|
||||
"⚠️ Billing or credits exhausted — switching to fallback provider..."
|
||||
)
|
||||
elif _is_transport_failure:
|
||||
agent._buffer_status(
|
||||
"⚠️ Provider unreachable — switching to fallback provider..."
|
||||
)
|
||||
else:
|
||||
agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if agent._try_activate_fallback(reason=classified.reason):
|
||||
@@ -3121,9 +3030,10 @@ def run_conversation(
|
||||
messages, system_message, approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
conversation_history = conversation_history_after_compression(
|
||||
agent, messages
|
||||
)
|
||||
# Compression created a new session — clear history
|
||||
# so _flush_messages_to_session_db writes compressed
|
||||
# messages to the new session, not skipping them.
|
||||
conversation_history = None
|
||||
|
||||
# Re-estimate tokens after compression. Same-message-count
|
||||
# compression (tool-result pruning, in-place summarization)
|
||||
@@ -3287,9 +3197,10 @@ def run_conversation(
|
||||
messages, system_message, approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
conversation_history = conversation_history_after_compression(
|
||||
agent, messages
|
||||
)
|
||||
# Compression created a new session — clear history
|
||||
# so _flush_messages_to_session_db writes compressed
|
||||
# messages to the new session, not skipping them.
|
||||
conversation_history = None
|
||||
|
||||
# Re-estimate tokens after compression. Same-message-count
|
||||
# compression (tool-result pruning, in-place summarization)
|
||||
@@ -3551,13 +3462,6 @@ def run_conversation(
|
||||
):
|
||||
_retry.primary_recovery_attempted = True
|
||||
retry_count = 0
|
||||
# Primary transport recovery starts a fresh attempt
|
||||
# cycle. Re-open fallback state so a follow-on 429 can
|
||||
# still activate fallback_providers after stale
|
||||
# pre-recovery fallback/credential-pool bookkeeping.
|
||||
_retry.has_retried_429 = False
|
||||
agent._fallback_index = 0
|
||||
agent._fallback_activated = False
|
||||
continue
|
||||
# Try fallback before giving up entirely
|
||||
if agent._has_pending_fallback():
|
||||
@@ -3623,65 +3527,6 @@ def run_conversation(
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Detect thinking-timeout pattern: a known reasoning model
|
||||
# hit a transport-layer error before the first content
|
||||
# token arrived. Distinct from _is_stream_drop above
|
||||
# (which fires for large file-write stream drops) and
|
||||
# from any classifier reason that's not a transport
|
||||
# timeout. Reuses the reasoning-model allowlist from
|
||||
# agent/reasoning_timeouts.py (Fixes #52217) so the
|
||||
# trigger is consistent with what the per-model
|
||||
# stale-timeout floor covers. After the classifier
|
||||
# override at agent/error_classifier.py:720-738 (this
|
||||
# PR), transport disconnects on reasoning models route
|
||||
# to FailoverReason.timeout rather than
|
||||
# context_overflow, so this branch actually fires.
|
||||
# Detection and message text live in
|
||||
# agent.thinking_timeout_guidance so they're
|
||||
# unit-testable without driving the full retry loop.
|
||||
# (Part 2 of Fixes #52310.)
|
||||
from agent.thinking_timeout_guidance import (
|
||||
is_thinking_timeout,
|
||||
)
|
||||
_is_thinking_timeout = is_thinking_timeout(
|
||||
classified,
|
||||
_model,
|
||||
error_msg,
|
||||
)
|
||||
if _is_thinking_timeout:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix} 💡 The model's thinking "
|
||||
f"phase exceeded the upstream proxy's idle "
|
||||
f"timeout before the first content token "
|
||||
f"arrived. This is a known issue with "
|
||||
f"reasoning models behind cloud gateways "
|
||||
f"(NVIDIA NIM, OpenAI, Anthropic, DeepSeek).",
|
||||
force=True,
|
||||
)
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix} Workarounds in priority order:",
|
||||
force=True,
|
||||
)
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix} 1. Set "
|
||||
f"`providers.{_provider}.models.{_model}.stale_timeout_seconds: 900` "
|
||||
f"in `~/.hermes/config.yaml` to extend the per-call "
|
||||
f"timeout. (Hermes's built-in floor is 600s for "
|
||||
f"known reasoning models — if you still see this "
|
||||
f"after raising, the upstream cap is even shorter.)",
|
||||
force=True,
|
||||
)
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix} 2. Lower `reasoning_budget` or set "
|
||||
f"`reasoning_effort: medium` on this model if the provider supports it.",
|
||||
force=True,
|
||||
)
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix} 3. Use a smaller / faster reasoning "
|
||||
f"model if the task doesn't require deep thinking.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
logger.error(
|
||||
"%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
|
||||
agent.log_prefix, max_retries, _final_summary,
|
||||
@@ -3698,22 +3543,7 @@ def run_conversation(
|
||||
_final_response += f"\n\n{_billing_guidance}"
|
||||
else:
|
||||
_final_response = f"API call failed after {max_retries} retries: {_final_summary}"
|
||||
if _is_thinking_timeout:
|
||||
# Thinking-timeout guidance overrides the generic
|
||||
# stream-drop guidance — the latter is wrong for
|
||||
# this case (it suggests splitting large file
|
||||
# writes, which isn't what happened). See the
|
||||
# reasoning-model override at
|
||||
# agent/error_classifier.py:720-738 and the
|
||||
# detection block above for context.
|
||||
from agent.thinking_timeout_guidance import (
|
||||
build_thinking_timeout_guidance,
|
||||
)
|
||||
_final_response += build_thinking_timeout_guidance(
|
||||
provider=_provider,
|
||||
model=_model,
|
||||
)
|
||||
elif _is_stream_drop:
|
||||
if _is_stream_drop:
|
||||
_final_response += (
|
||||
"\n\nThe provider's stream connection keeps "
|
||||
"dropping — this often happens when generating "
|
||||
@@ -3745,12 +3575,7 @@ def run_conversation(
|
||||
_ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
|
||||
if _ra_raw:
|
||||
try:
|
||||
# Cap at 10 minutes. Anthropic Tier 1 input-token
|
||||
# buckets reset in ~171s, so a 120s cap caused us to
|
||||
# retry before the actual reset window and re-trip the
|
||||
# limit. 600s covers all realistic provider reset
|
||||
# windows while still rejecting pathological values. (#26293)
|
||||
_retry_after = min(float(_ra_raw), 600)
|
||||
_retry_after = min(float(_ra_raw), 120) # Cap at 2 minutes
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
|
||||
@@ -3831,17 +3656,6 @@ def run_conversation(
|
||||
_retry.restart_with_compressed_messages = False
|
||||
continue
|
||||
|
||||
if _retry.restart_with_rebuilt_messages:
|
||||
# A content-filter stream stall (#32421) was escalated to the
|
||||
# fallback chain and the partial content rolled back. Re-issue
|
||||
# the API call against the now-active fallback provider. Refund
|
||||
# the budget/count for the stalled attempt so the fallback gets a
|
||||
# fair turn.
|
||||
api_call_count -= 1
|
||||
agent.iteration_budget.refund()
|
||||
_retry.restart_with_rebuilt_messages = False
|
||||
continue
|
||||
|
||||
if _retry.restart_with_length_continuation:
|
||||
# Progressively boost the output token budget on each retry.
|
||||
# Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
|
||||
@@ -4416,9 +4230,10 @@ def run_conversation(
|
||||
approx_tokens=agent.context_compressor.last_prompt_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
conversation_history = conversation_history_after_compression(
|
||||
agent, messages
|
||||
)
|
||||
# Compression created a new session — clear history so
|
||||
# _flush_messages_to_session_db writes compressed messages
|
||||
# to the new session (see preflight compression comment).
|
||||
conversation_history = None
|
||||
|
||||
# Save session log incrementally (so progress is visible even if interrupted)
|
||||
agent._session_messages = messages
|
||||
@@ -4460,11 +4275,7 @@ def run_conversation(
|
||||
"as final response"
|
||||
)
|
||||
final_response = _recovered
|
||||
# Streaming delivered a fragment, not a confirmed
|
||||
# final preview. Leave response_previewed false so
|
||||
# gateway fallback delivery can send the recovered
|
||||
# text plus the abnormal-turn explanation.
|
||||
agent._response_was_previewed = False
|
||||
agent._response_was_previewed = True
|
||||
break
|
||||
|
||||
# If the previous turn already delivered real content alongside
|
||||
@@ -4709,20 +4520,14 @@ def run_conversation(
|
||||
# status from earlier failed attempts in this turn.
|
||||
agent._clear_status_buffer()
|
||||
|
||||
from agent.agent_runtime_helpers import (
|
||||
intent_ack_continuation_mode,
|
||||
)
|
||||
|
||||
_ack_mode = intent_ack_continuation_mode(agent)
|
||||
if (
|
||||
_ack_mode != "off"
|
||||
agent.api_mode == "codex_responses"
|
||||
and agent.valid_tool_names
|
||||
and codex_ack_continuations < 2
|
||||
and agent._looks_like_codex_intermediate_ack(
|
||||
user_message=user_message,
|
||||
assistant_content=final_response,
|
||||
messages=messages,
|
||||
require_workspace=(_ack_mode == "codex_only"),
|
||||
)
|
||||
):
|
||||
codex_ack_continuations += 1
|
||||
@@ -4803,11 +4608,7 @@ def run_conversation(
|
||||
"_verification_stop_synthetic": True,
|
||||
})
|
||||
agent._session_messages = messages
|
||||
# Run the verification-stop loop silently — the nudge is an
|
||||
# internal turn that should not add noise to the user's
|
||||
# terminal. Keep a debug breadcrumb in agent.log for tracing.
|
||||
logger.debug("verification stop-loop nudge issued (attempt %d)",
|
||||
agent._verification_stop_nudges)
|
||||
agent._emit_status("↻ Verification required before finishing")
|
||||
continue
|
||||
|
||||
messages.append(final_msg)
|
||||
|
||||
@@ -21,14 +21,8 @@ from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from openai.types.chat.chat_completion_message_tool_call import (
|
||||
ChatCompletionMessageToolCall,
|
||||
Function,
|
||||
)
|
||||
|
||||
from agent.file_safety import get_read_block_error, is_write_denied
|
||||
from agent.redact import redact_sensitive_text
|
||||
from tools.environments.local import hermes_subprocess_env
|
||||
|
||||
ACP_MARKER_BASE_URL = "acp://copilot"
|
||||
_DEFAULT_TIMEOUT_SECONDS = 900.0
|
||||
@@ -100,10 +94,7 @@ def _resolve_home_dir() -> str:
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
# Copilot ACP is a model-driving CLI executor: it legitimately needs LLM
|
||||
# provider credentials. Route through the central helper so Tier-1 secrets
|
||||
# (gateway bot tokens, GitHub auth, infra) are still stripped (#29157).
|
||||
env = hermes_subprocess_env(inherit_credentials=True)
|
||||
env = os.environ.copy()
|
||||
home = _resolve_home_dir()
|
||||
env["HOME"] = home
|
||||
from hermes_constants import apply_subprocess_home_env
|
||||
@@ -233,73 +224,11 @@ def _render_message_content(content: Any) -> str:
|
||||
return str(content).strip()
|
||||
|
||||
|
||||
def _build_openai_tool_call(
|
||||
*,
|
||||
call_id: str,
|
||||
name: str,
|
||||
arguments: str,
|
||||
) -> ChatCompletionMessageToolCall:
|
||||
"""Build an OpenAI-compatible tool-call object for downstream handling."""
|
||||
return ChatCompletionMessageToolCall(
|
||||
id=call_id,
|
||||
call_id=call_id,
|
||||
response_item_id=None,
|
||||
type="function",
|
||||
function=Function(name=name, arguments=arguments),
|
||||
)
|
||||
|
||||
|
||||
def _completion_to_stream_chunks(completion: SimpleNamespace) -> list[SimpleNamespace]:
|
||||
"""Convert a one-shot ACP response into OpenAI-style stream chunks."""
|
||||
choice = completion.choices[0]
|
||||
message = choice.message
|
||||
tool_call_deltas = None
|
||||
if message.tool_calls:
|
||||
tool_call_deltas = []
|
||||
for index, tool_call in enumerate(message.tool_calls):
|
||||
tool_call_deltas.append(
|
||||
SimpleNamespace(
|
||||
index=index,
|
||||
id=getattr(tool_call, "id", None),
|
||||
type=getattr(tool_call, "type", "function"),
|
||||
function=SimpleNamespace(
|
||||
name=getattr(tool_call.function, "name", None),
|
||||
arguments=getattr(tool_call.function, "arguments", None),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
delta = SimpleNamespace(
|
||||
role="assistant",
|
||||
content=message.content or None,
|
||||
tool_calls=tool_call_deltas,
|
||||
reasoning_content=message.reasoning_content,
|
||||
reasoning=message.reasoning,
|
||||
)
|
||||
data_chunk = SimpleNamespace(
|
||||
choices=[
|
||||
SimpleNamespace(
|
||||
index=0,
|
||||
delta=delta,
|
||||
finish_reason=choice.finish_reason,
|
||||
)
|
||||
],
|
||||
model=completion.model,
|
||||
usage=None,
|
||||
)
|
||||
usage_chunk = SimpleNamespace(
|
||||
choices=[],
|
||||
model=completion.model,
|
||||
usage=completion.usage,
|
||||
)
|
||||
return [data_chunk, usage_chunk]
|
||||
|
||||
|
||||
def _extract_tool_calls_from_text(text: str) -> tuple[list[ChatCompletionMessageToolCall], str]:
|
||||
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
|
||||
if not isinstance(text, str) or not text.strip():
|
||||
return [], ""
|
||||
|
||||
extracted: list[ChatCompletionMessageToolCall] = []
|
||||
extracted: list[SimpleNamespace] = []
|
||||
consumed_spans: list[tuple[int, int]] = []
|
||||
|
||||
def _try_add_tool_call(raw_json: str) -> None:
|
||||
@@ -323,10 +252,12 @@ def _extract_tool_calls_from_text(text: str) -> tuple[list[ChatCompletionMessage
|
||||
call_id = f"acp_call_{len(extracted)+1}"
|
||||
|
||||
extracted.append(
|
||||
_build_openai_tool_call(
|
||||
SimpleNamespace(
|
||||
id=call_id,
|
||||
call_id=call_id,
|
||||
name=fn_name.strip(),
|
||||
arguments=fn_args,
|
||||
response_item_id=None,
|
||||
type="function",
|
||||
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -445,7 +376,6 @@ class CopilotACPClient:
|
||||
timeout: float | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
tool_choice: Any = None,
|
||||
stream: bool = False,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
prompt_text = _format_messages_as_prompt(
|
||||
@@ -492,14 +422,11 @@ class CopilotACPClient:
|
||||
)
|
||||
finish_reason = "tool_calls" if tool_calls else "stop"
|
||||
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
|
||||
completion = SimpleNamespace(
|
||||
return SimpleNamespace(
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
model=model or "copilot-acp",
|
||||
)
|
||||
if stream:
|
||||
return _completion_to_stream_chunks(completion)
|
||||
return completion
|
||||
|
||||
def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
|
||||
try:
|
||||
|
||||
@@ -11,7 +11,6 @@ import uuid
|
||||
import re
|
||||
from dataclasses import dataclass, fields, replace
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
@@ -448,63 +447,6 @@ def get_pool_strategy(provider: str) -> str:
|
||||
DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
|
||||
|
||||
|
||||
def _write_through_provider_state_to_global_root(
|
||||
provider_id: str, state: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Persist a rotated OAuth ``state`` into the global-root auth.json.
|
||||
|
||||
Best-effort write-through for the multi-profile rotation hazard
|
||||
(#48415 / #43589): nous, openai-codex, and xai-oauth rotate the
|
||||
refresh_token on refresh, so when a profile pool refresh rotates a grant
|
||||
it resolved from the root fallback, the rotated chain must land back in
|
||||
root. Otherwise root keeps a now-revoked refresh token and every other
|
||||
profile reading the stale root grant dies with ``refresh_token_reused`` /
|
||||
``invalid_grant`` once its access token expires.
|
||||
|
||||
Only updates ``providers.<provider_id>`` in the root store; never touches
|
||||
the profile store (the caller already saved that). Swallows all errors — a
|
||||
failed write-through degrades to the pre-existing behavior (root stale), it
|
||||
must never break the profile's own successful save. Mirrors
|
||||
``hermes_cli.auth._write_through_xai_oauth_to_global_root`` (which covers
|
||||
the non-pool xAI refresh path) for the credential-pool refresh path.
|
||||
"""
|
||||
try:
|
||||
global_path = auth_mod._global_auth_file_path()
|
||||
except Exception:
|
||||
return
|
||||
if global_path is None:
|
||||
# Classic mode (profile == root); the profile save already hit root.
|
||||
return
|
||||
# Seat belt: under pytest, refuse to write the real user's
|
||||
# ~/.hermes/auth.json even when HERMES_HOME points at a profile path
|
||||
# (mirrors the read-side guard in _load_global_auth_store). Uses the
|
||||
# unmodified HOME env, not Path.home() which fixtures may monkeypatch.
|
||||
if os.environ.get("PYTEST_CURRENT_TEST"):
|
||||
real_home_env = os.environ.get("HOME", "")
|
||||
if real_home_env:
|
||||
real_root = Path(real_home_env) / ".hermes" / "auth.json"
|
||||
try:
|
||||
if global_path.resolve(strict=False) == real_root.resolve(strict=False):
|
||||
return
|
||||
except Exception:
|
||||
return
|
||||
try:
|
||||
if global_path.exists():
|
||||
global_store = _load_auth_store(global_path)
|
||||
else:
|
||||
global_store = {}
|
||||
if not isinstance(global_store, dict):
|
||||
return
|
||||
_store_provider_state(global_store, provider_id, dict(state), set_active=False)
|
||||
auth_mod._save_auth_store(global_store, global_path)
|
||||
except Exception as exc: # pragma: no cover - best effort
|
||||
logger.debug(
|
||||
"%s pool refresh: write-through to global root failed: %s",
|
||||
provider_id,
|
||||
exc,
|
||||
)
|
||||
|
||||
|
||||
class CredentialPool:
|
||||
def __init__(self, provider: str, entries: List[PooledCredential]):
|
||||
self.provider = provider
|
||||
@@ -537,11 +479,10 @@ class CredentialPool:
|
||||
self._entries[idx] = new
|
||||
return
|
||||
|
||||
def _persist(self, *, removed_ids: Optional[List[str]] = None) -> None:
|
||||
def _persist(self) -> None:
|
||||
write_credential_pool(
|
||||
self.provider,
|
||||
[entry.to_dict() for entry in self._entries],
|
||||
removed_ids=removed_ids,
|
||||
)
|
||||
|
||||
def _is_terminal_auth_failure(
|
||||
@@ -859,28 +800,6 @@ class CredentialPool:
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
# Decide BEFORE writing whether this profile is reading the
|
||||
# grant from the global root (no own providers.<id> block) vs.
|
||||
# genuinely shadowing it. A pool refresh rotates single-use
|
||||
# OAuth refresh tokens, so a profile that resolved the grant
|
||||
# from root MUST write the rotated chain back to root too —
|
||||
# otherwise root keeps a revoked refresh token and every other
|
||||
# profile reading the stale root grant dies with
|
||||
# refresh_token_reused / invalid_grant once its access token
|
||||
# expires. This mirrors the xAI write-through in
|
||||
# hermes_cli.auth._save_xai_oauth_tokens (#43589); the pool
|
||||
# refresh path is the Codex/xAI analog reported in #48415.
|
||||
_wt_provider_id = {
|
||||
"nous": "nous",
|
||||
"openai-codex": "openai-codex",
|
||||
"xai-oauth": "xai-oauth",
|
||||
}.get(self.provider)
|
||||
write_through_to_root = bool(_wt_provider_id) and not (
|
||||
isinstance(auth_store.get("providers"), dict)
|
||||
and isinstance(
|
||||
auth_store["providers"].get(_wt_provider_id), dict
|
||||
)
|
||||
)
|
||||
if self.provider == "nous":
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if state is None:
|
||||
@@ -936,10 +855,6 @@ class CredentialPool:
|
||||
return
|
||||
|
||||
_save_auth_store(auth_store)
|
||||
if write_through_to_root and _wt_provider_id:
|
||||
_write_through_provider_state_to_global_root(
|
||||
_wt_provider_id, state
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)
|
||||
|
||||
@@ -1125,17 +1040,13 @@ class CredentialPool:
|
||||
logger.debug(
|
||||
"Failed to clear terminal xAI OAuth state: %s", clear_exc
|
||||
)
|
||||
removed_ids = [
|
||||
item.id for item in self._entries
|
||||
if item.source == "loopback_pkce"
|
||||
]
|
||||
self._entries = [
|
||||
item for item in self._entries
|
||||
if item.source != "loopback_pkce"
|
||||
]
|
||||
if self._current_id == entry.id:
|
||||
self._current_id = None
|
||||
self._persist(removed_ids=removed_ids)
|
||||
self._persist()
|
||||
return None
|
||||
# For openai-codex: same race as xAI/nous — another Hermes process
|
||||
# may have consumed the refresh token between our proactive sync
|
||||
@@ -1195,17 +1106,13 @@ class CredentialPool:
|
||||
logger.debug(
|
||||
"Failed to clear terminal Codex OAuth state: %s", clear_exc
|
||||
)
|
||||
removed_ids = [
|
||||
item.id for item in self._entries
|
||||
if item.source == "device_code"
|
||||
]
|
||||
self._entries = [
|
||||
item for item in self._entries
|
||||
if item.source != "device_code"
|
||||
]
|
||||
if self._current_id == entry.id:
|
||||
self._current_id = None
|
||||
self._persist(removed_ids=removed_ids)
|
||||
self._persist()
|
||||
return None
|
||||
# For nous: another process may have consumed the refresh token
|
||||
# between our proactive sync and the HTTP call. Re-sync from
|
||||
@@ -1262,17 +1169,13 @@ class CredentialPool:
|
||||
auth_mod.NOUS_DEVICE_CODE_SOURCE,
|
||||
f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
|
||||
}
|
||||
removed_ids = [
|
||||
item.id for item in self._entries
|
||||
if item.source in singleton_sources
|
||||
]
|
||||
self._entries = [
|
||||
item for item in self._entries
|
||||
if item.source not in singleton_sources
|
||||
]
|
||||
if self._current_id == entry.id:
|
||||
self._current_id = None
|
||||
self._persist(removed_ids=removed_ids)
|
||||
self._persist()
|
||||
return None
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
@@ -1434,7 +1337,7 @@ class CredentialPool:
|
||||
pruned_ids = set(entries_to_prune)
|
||||
self._entries = [e for e in self._entries if e.id not in pruned_ids]
|
||||
if cleared_any:
|
||||
self._persist(removed_ids=entries_to_prune)
|
||||
self._persist()
|
||||
return available
|
||||
|
||||
def _select_unlocked(self) -> Optional[PooledCredential]:
|
||||
@@ -1608,11 +1511,7 @@ class CredentialPool:
|
||||
replace(entry, priority=new_priority)
|
||||
for new_priority, entry in enumerate(self._entries)
|
||||
]
|
||||
write_credential_pool(
|
||||
self.provider,
|
||||
[entry.to_dict() for entry in self._entries],
|
||||
removed_ids=[removed.id],
|
||||
)
|
||||
self._persist()
|
||||
if self._current_id == removed.id:
|
||||
self._current_id = None
|
||||
return removed
|
||||
@@ -2274,11 +2173,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
||||
def load_pool(provider: str) -> CredentialPool:
|
||||
provider = (provider or "").strip().lower()
|
||||
raw_entries = read_credential_pool(provider)
|
||||
disk_ids = {
|
||||
entry.get("id")
|
||||
for entry in raw_entries
|
||||
if isinstance(entry, dict) and entry.get("id")
|
||||
}
|
||||
raw_needs_sanitization = any(
|
||||
isinstance(payload, dict)
|
||||
and sanitize_borrowed_credential_payload(payload, provider) != payload
|
||||
@@ -2307,10 +2201,8 @@ def load_pool(provider: str) -> CredentialPool:
|
||||
changed |= _normalize_pool_priorities(provider, entries)
|
||||
|
||||
if changed:
|
||||
new_ids = {entry.id for entry in entries}
|
||||
write_credential_pool(
|
||||
provider,
|
||||
[entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
|
||||
removed_ids=disk_ids - new_ids,
|
||||
)
|
||||
return CredentialPool(provider, entries)
|
||||
|
||||
@@ -273,21 +273,6 @@ def should_run_now(now: Optional[datetime] = None) -> bool:
|
||||
# Automatic state transitions (pure function, no LLM)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cron_referenced_skills() -> Set[str]:
|
||||
"""Skill names referenced by any cron job (incl. paused/disabled).
|
||||
|
||||
Best-effort: a cron-module import error or corrupt jobs store must never
|
||||
break the curator, so any failure yields an empty set (no protection,
|
||||
but no crash).
|
||||
"""
|
||||
try:
|
||||
from cron.jobs import referenced_skill_names as _refs
|
||||
return _refs()
|
||||
except Exception as e:
|
||||
logger.debug("Curator could not read cron skill references: %s", e, exc_info=True)
|
||||
return set()
|
||||
|
||||
|
||||
def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
|
||||
"""Walk every curator-managed skill and move active/stale/archived based on
|
||||
the latest real activity timestamp. Pinned skills are never touched.
|
||||
@@ -307,8 +292,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
|
||||
stale_cutoff = now - timedelta(days=get_stale_after_days())
|
||||
archive_cutoff = now - timedelta(days=get_archive_after_days())
|
||||
|
||||
cron_referenced = _cron_referenced_skills()
|
||||
|
||||
counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0, "seeded": 0}
|
||||
|
||||
for row in _u.agent_created_report():
|
||||
@@ -317,15 +300,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
|
||||
if row.get("pinned"):
|
||||
continue
|
||||
|
||||
# A skill referenced by any cron job (incl. paused/disabled) is in
|
||||
# use by definition — resuming or the next fire must find it. The
|
||||
# scheduler only bumps usage when a job actually fires, so jobs that
|
||||
# fire less often than archive_after_days, paused jobs, and far-future
|
||||
# one-shots would otherwise have their skills aged out from under
|
||||
# them. Treat referenced skills like pinned: never auto-transition.
|
||||
if name in cron_referenced:
|
||||
continue
|
||||
|
||||
# First sight of a curation-eligible skill with no persisted record
|
||||
# (e.g. a newly-eligible built-in): anchor its clock to now and defer.
|
||||
if not row.get("_persisted", True):
|
||||
@@ -342,18 +316,6 @@ def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int
|
||||
|
||||
current = row.get("state", _u.STATE_ACTIVE)
|
||||
|
||||
# Never-used skills (use_count == 0) get a grace floor: don't archive
|
||||
# one until it is at least stale_after_days old. A use=0 skill is
|
||||
# absence of evidence, not evidence of staleness — a skill created
|
||||
# recently may simply not have had its trigger come up yet.
|
||||
never_used = int(row.get("use_count", 0) or 0) == 0
|
||||
if never_used and anchor > stale_cutoff:
|
||||
# Younger than the stale window — leave it alone entirely.
|
||||
if current == _u.STATE_STALE:
|
||||
_u.set_state(name, _u.STATE_ACTIVE)
|
||||
counts["reactivated"] += 1
|
||||
continue
|
||||
|
||||
if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
|
||||
ok, _msg = _u.archive_skill(name)
|
||||
if ok:
|
||||
@@ -415,10 +377,8 @@ CURATOR_REVIEW_PROMPT = (
|
||||
"bodies + `references/`, `templates/`, and `scripts/` subfiles for "
|
||||
"session-specific detail — not one-session-one-skill micro-entries.\n\n"
|
||||
"Hard rules — do not violate:\n"
|
||||
"1. DO NOT touch bundled, hub-installed, or external-dir skills "
|
||||
"(`skills.external_dirs`). The candidate list below is already filtered "
|
||||
"to local curator-managed skills only; external skills are externally "
|
||||
"owned and read-only to this background curator.\n"
|
||||
"1. DO NOT touch bundled or hub-installed skills. The candidate list "
|
||||
"below is already filtered to agent-created skills only.\n"
|
||||
"2. DO NOT delete any skill. Archiving (moving the skill's directory "
|
||||
"into ~/.hermes/skills/.archive/) is the maximum destructive action. "
|
||||
"Archives are recoverable; deletion is not.\n"
|
||||
@@ -428,19 +388,10 @@ CURATOR_REVIEW_PROMPT = (
|
||||
"back load-bearing UX (slash-command entry points referenced in docs and "
|
||||
"tips) and are filtered out of the candidate list below — never resurrect "
|
||||
"one as an archive or absorb target.\n"
|
||||
"3c. DO NOT archive or prune any skill marked `cron=yes` in the candidate "
|
||||
"list. A cron job depends on it and will fail to load it on its next "
|
||||
"run. You MAY still consolidate it into an umbrella — but only because "
|
||||
"the curator rewrites cron job skill references to follow consolidations; "
|
||||
"never simply prune it.\n"
|
||||
"4. DO NOT use usage counters as a reason to skip consolidation. The "
|
||||
"counters are new and often mostly zero. Judge overlap on CONTENT, "
|
||||
"not on use_count. 'use=0' is not evidence a skill is valuable; it's "
|
||||
"absence of evidence either way. Corollary: 'use=0' is ALSO not a "
|
||||
"reason to PRUNE a skill. Never archive a never-used skill (use=0) "
|
||||
"unless it is at least 30 days old (check last_activity / created date) "
|
||||
"AND its content is genuinely obsolete or fully absorbed elsewhere — a "
|
||||
"recently-created skill simply may not have had its trigger come up yet.\n"
|
||||
"absence of evidence either way.\n"
|
||||
"5. DO NOT reject consolidation on the grounds that 'each skill has "
|
||||
"a distinct trigger'. Pairwise distinctness is the wrong bar. The "
|
||||
"right bar is: 'would a human maintainer write this as N separate "
|
||||
@@ -518,9 +469,8 @@ CURATOR_REVIEW_PROMPT = (
|
||||
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
|
||||
"forwarding target. This drives cron-job skill-reference migration — "
|
||||
"guessing from your YAML summary after the fact is fragile.\n"
|
||||
" - terminal — move LOCAL candidate content into "
|
||||
"a support subfile when package integrity requires it; never mv, cp, rm, "
|
||||
"patch, or rewrite bundled, hub-installed, or external-dir skills\n\n"
|
||||
" - terminal — mv a sibling into the archive "
|
||||
"OR move its content into a support subfile\n\n"
|
||||
"'keep' is a legitimate decision ONLY when the skill is already a "
|
||||
"class-level umbrella and none of the proposed merges would improve "
|
||||
"discoverability. 'This is narrow but distinct from its siblings' "
|
||||
@@ -1460,14 +1410,12 @@ def _render_candidate_list() -> str:
|
||||
rows = skill_usage.agent_created_report()
|
||||
if not rows:
|
||||
return "No agent-created skills to review."
|
||||
cron_referenced = _cron_referenced_skills()
|
||||
lines = [f"Agent-created skills ({len(rows)}):\n"]
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"- {r['name']} "
|
||||
f"state={r['state']} "
|
||||
f"pinned={'yes' if r.get('pinned') else 'no'} "
|
||||
f"cron={'yes' if r['name'] in cron_referenced else 'no'} "
|
||||
f"activity={r.get('activity_count', 0)} "
|
||||
f"use={r.get('use_count', 0)} "
|
||||
f"view={r.get('view_count', 0)} "
|
||||
@@ -1895,14 +1843,6 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]:
|
||||
# Disable recursive nudges — the curator must never spawn its own review.
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
# Tag this fork as autonomous background curation so skill_manage's
|
||||
# background-review write guard fires. Without this the fork inherits
|
||||
# the default "assistant_tool" origin, is_background_review() is False,
|
||||
# and the external/bundled/hub-installed skill_manage guards never
|
||||
# trigger during the curation pass they exist to protect against.
|
||||
# turn_context.py binds this onto the write-origin ContextVar at turn
|
||||
# start (see agent/turn_context.py).
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
|
||||
# Redirect the forked agent's stdout/stderr to /dev/null while it
|
||||
# runs so its tool-call chatter doesn't pollute the foreground
|
||||
|
||||
@@ -16,7 +16,6 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from utils import safe_json_loads
|
||||
from agent.redact import redact_sensitive_text
|
||||
from agent.tool_result_classification import file_mutation_result_landed
|
||||
|
||||
# ANSI escape codes for coloring tool failure indicators
|
||||
@@ -340,62 +339,6 @@ def _read_file_line_label(args: dict) -> str:
|
||||
return f"L{offset}-{offset + limit - 1}"
|
||||
|
||||
|
||||
def redact_browser_typed_text_for_display(value: Any, typed_text: Any) -> Any:
|
||||
"""Apply secret redaction to browser_type text in display-facing payloads.
|
||||
|
||||
Backends sometimes echo the attempted input in error strings or fallback
|
||||
metadata. When the raw typed value contains a recognizable secret (API
|
||||
key, token, JWT, etc.) the redacted form differs from the raw value, so we
|
||||
replace every occurrence of the raw value with its redacted form before a
|
||||
browser_type result reaches logs, callbacks, the model, or chat history.
|
||||
|
||||
Normal typed text (search queries, addresses, form fields) matches no
|
||||
secret pattern, so it passes through unchanged and stays readable.
|
||||
|
||||
Redaction is forced here regardless of the global ``security.redact_secrets``
|
||||
preference: a typed credential leaking into chat history is a security
|
||||
boundary, not mere log hygiene.
|
||||
"""
|
||||
if typed_text is None:
|
||||
return value
|
||||
needle = str(typed_text)
|
||||
if needle == "":
|
||||
return value
|
||||
redacted = redact_sensitive_text(needle, force=True)
|
||||
if redacted == needle:
|
||||
# Nothing secret-looking in the typed text; leave payload untouched.
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
return value.replace(needle, redacted)
|
||||
if isinstance(value, dict):
|
||||
return {
|
||||
key: redact_browser_typed_text_for_display(item, typed_text)
|
||||
for key, item in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [redact_browser_typed_text_for_display(item, typed_text) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return tuple(redact_browser_typed_text_for_display(item, typed_text) for item in value)
|
||||
return value
|
||||
|
||||
|
||||
def redact_tool_args_for_display(tool_name: str, args: dict | None) -> dict | None:
|
||||
"""Return a copy of tool args safe for logs/progress UI.
|
||||
|
||||
For ``browser_type`` the ``text`` argument is run through the same
|
||||
secret-pattern redactor used for logs. Recognizable credentials (API
|
||||
keys, tokens) are masked before the value reaches tool progress
|
||||
notifications; normal typed text is left intact for debuggability.
|
||||
"""
|
||||
if not isinstance(args, dict):
|
||||
return args
|
||||
if tool_name == "browser_type" and isinstance(args.get("text"), str):
|
||||
safe_args = dict(args)
|
||||
safe_args["text"] = redact_sensitive_text(args["text"], force=True)
|
||||
return safe_args
|
||||
return args
|
||||
|
||||
|
||||
def _delegate_task_goal_parts(tasks: Any, *, per_goal_len: int) -> tuple[int, list[str]]:
|
||||
if not isinstance(tasks, list):
|
||||
return 0, []
|
||||
@@ -419,7 +362,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
|
||||
max_len = _tool_preview_max_len
|
||||
if not args:
|
||||
return None
|
||||
args = redact_tool_args_for_display(tool_name, args) or args
|
||||
primary_args = {
|
||||
"terminal": "command", "web_search": "query", "web_extract": "urls",
|
||||
"read_file": "path", "write_file": "path", "patch": "path",
|
||||
@@ -1143,7 +1085,6 @@ def get_cute_tool_message(
|
||||
When *result* is provided the line is checked for failure indicators.
|
||||
Failed tool calls get a red prefix and an informational suffix.
|
||||
"""
|
||||
args = redact_tool_args_for_display(tool_name, args) or args
|
||||
dur = f"{duration:.1f}s"
|
||||
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
|
||||
skin_prefix = get_skin_tool_prefix()
|
||||
|
||||
@@ -133,31 +133,6 @@ _RATE_LIMIT_PATTERNS = [
|
||||
"servicequotaexceededexception",
|
||||
]
|
||||
|
||||
# Patterns that indicate provider-side overload, NOT a per-credential rate
|
||||
# limit or billing problem. The credential is valid — the server is just
|
||||
# busy — so the correct recovery is "back off and retry the same key", never
|
||||
# "rotate the credential" (rotating exhausts the pool while the endpoint is
|
||||
# still busy; a single-key user has nothing to rotate to). Some providers
|
||||
# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
|
||||
# status path matches the body against this list before falling through to
|
||||
# the rate_limit default. Phrases are kept narrow and overload-flavoured so a
|
||||
# normal rate-limit message ("you have been rate-limited") doesn't hit this
|
||||
# bucket. (#14038, #15297)
|
||||
_OVERLOADED_PATTERNS = [
|
||||
"overloaded",
|
||||
"temporarily overloaded",
|
||||
"service is temporarily overloaded",
|
||||
"service may be temporarily overloaded",
|
||||
"server is overloaded",
|
||||
"server overloaded",
|
||||
"service overloaded",
|
||||
"service is overloaded",
|
||||
"upstream overloaded",
|
||||
"currently overloaded",
|
||||
"at capacity",
|
||||
"over capacity",
|
||||
]
|
||||
|
||||
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
||||
_USAGE_LIMIT_PATTERNS = [
|
||||
"usage limit",
|
||||
@@ -355,14 +330,6 @@ _CONTENT_POLICY_BLOCKED_PATTERNS = [
|
||||
# echo back; the underscore form is provider-specific enough.
|
||||
"content_filter",
|
||||
"responsibleaipolicyviolation",
|
||||
# MiniMax output-layer safety filter. The error string is surfaced
|
||||
# verbatim by MiniMax SDK / OpenAI-compatible endpoints, usually in the
|
||||
# form "output new_sensitive (1027)" when the model's *output* (often a
|
||||
# large tool-call argument block) trips the upstream safety filter and
|
||||
# the SSE stream is truncated mid-flight. ``new_sensitive`` is the
|
||||
# filter name and is narrow enough that billing / format / auth error
|
||||
# strings will not collide. See #32421.
|
||||
"new_sensitive",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
@@ -750,26 +717,6 @@ def classify_api_error(
|
||||
|
||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||
if is_disconnect and not status_code:
|
||||
# Reasoning-model override: a transport disconnect on a reasoning
|
||||
# model is much more likely the upstream proxy idle-killing a
|
||||
# long thinking stream than a true context overflow — even on
|
||||
# large sessions. The default disconnect+large-session routing
|
||||
# below would otherwise send the user into the compression
|
||||
# branch (should_compress=True) and silently delete
|
||||
# conversation history on a phantom context-length error.
|
||||
# Reasoning models have multi-minute thinking phases that
|
||||
# routinely exceed the cloud gateway's idle window (NVIDIA
|
||||
# NIM ~120s — first-party repro at NVIDIA/NemoClaw#4846;
|
||||
# OpenAI worker / Anthropic stream-idle similar). The
|
||||
# per-reasoning-model stale-timeout floor in
|
||||
# agent/reasoning_timeouts.py raises the stale-detector
|
||||
# threshold to tolerate long thinking, so a true
|
||||
# transport-layer failure here is recoverable via the retry
|
||||
# path — not via context compression. Reclassify as timeout.
|
||||
# (Part 1 of Fixes #52310.)
|
||||
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
|
||||
if get_reasoning_stale_timeout_floor(model) is not None:
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have hundreds of
|
||||
# messages while still being far below their actual token budget.
|
||||
@@ -896,19 +843,7 @@ def _classify_by_status(
|
||||
)
|
||||
|
||||
if status_code == 429:
|
||||
# Already checked long_context_tier above. Some providers (notably
|
||||
# Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
|
||||
# code as a true per-credential rate limit, but the credential is
|
||||
# valid and the correct recovery is "back off and retry the same key",
|
||||
# NOT "rotate the credential" (which exhausts the pool while the
|
||||
# endpoint is still busy, and does nothing for a single-key user).
|
||||
# Disambiguate on the error body so an overload 429 takes the
|
||||
# transient-overload path instead of burning the pool. (#14038)
|
||||
if any(p in error_msg for p in _OVERLOADED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.overloaded,
|
||||
retryable=True,
|
||||
)
|
||||
# Already checked long_context_tier above; this is a normal rate limit
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
@@ -1259,17 +1194,6 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Overloaded / server-busy patterns — must come BEFORE the rate_limit and
|
||||
# billing checks so that a message-only "overloaded" (no 503/529 status,
|
||||
# e.g. some Anthropic-compatible proxies) classifies as a transient
|
||||
# overload (backoff + retry) instead of falling through to `unknown` or
|
||||
# incorrectly triggering credential rotation.
|
||||
if any(p in error_msg for p in _OVERLOADED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.overloaded,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Billing patterns
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
@@ -1359,25 +1283,19 @@ def _extract_status_code(error: Exception) -> Optional[int]:
|
||||
|
||||
|
||||
def _extract_error_body(error: Exception) -> dict:
|
||||
"""Extract the structured error body from an SDK exception or its cause chain."""
|
||||
current = error
|
||||
for _ in range(5): # Match _extract_status_code() traversal depth.
|
||||
body = getattr(current, "body", None)
|
||||
if isinstance(body, dict):
|
||||
return body
|
||||
# Some errors have .response.json()
|
||||
response = getattr(current, "response", None)
|
||||
if response is not None:
|
||||
try:
|
||||
json_body = response.json()
|
||||
if isinstance(json_body, dict):
|
||||
return json_body
|
||||
except Exception:
|
||||
pass
|
||||
cause = getattr(current, "__cause__", None) or getattr(current, "__context__", None)
|
||||
if cause is None or cause is current:
|
||||
break
|
||||
current = cause
|
||||
"""Extract the structured error body from an SDK exception."""
|
||||
body = getattr(error, "body", None)
|
||||
if isinstance(body, dict):
|
||||
return body
|
||||
# Some errors have .response.json()
|
||||
response = getattr(error, "response", None)
|
||||
if response is not None:
|
||||
try:
|
||||
json_body = response.json()
|
||||
if isinstance(json_body, dict):
|
||||
return json_body
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
|
||||
@@ -77,22 +77,15 @@ def build_write_denied_prefixes(home: str) -> list[str]:
|
||||
]
|
||||
|
||||
|
||||
def get_safe_write_roots() -> set[str]:
|
||||
"""Return resolved HERMES_WRITE_SAFE_ROOT paths. Supports multiple directories
|
||||
separated by ``os.pathsep`` (``:`` on Unix, ``;`` on Windows).
|
||||
E.g., ``/opt/data:/var/www/html`` on Unix, ``C:\\data;D:\\www`` on Windows."""
|
||||
env = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not env:
|
||||
return set()
|
||||
roots: set[str] = set()
|
||||
for path in env.split(os.pathsep):
|
||||
if path:
|
||||
try:
|
||||
resolved = os.path.realpath(os.path.expanduser(path))
|
||||
roots.add(resolved)
|
||||
except (OSError, ValueError):
|
||||
continue
|
||||
return roots
|
||||
def get_safe_write_root() -> Optional[str]:
|
||||
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
|
||||
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not root:
|
||||
return None
|
||||
try:
|
||||
return os.path.realpath(os.path.expanduser(root))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def is_write_denied(path: str) -> bool:
|
||||
@@ -131,15 +124,9 @@ def is_write_denied(path: str) -> bool:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
safe_roots = get_safe_write_roots()
|
||||
if safe_roots:
|
||||
allowed = False
|
||||
for safe_root in safe_roots:
|
||||
if resolved == safe_root or resolved.startswith(safe_root + os.sep):
|
||||
allowed = True
|
||||
break
|
||||
if not allowed:
|
||||
return True
|
||||
safe_root = get_safe_write_root()
|
||||
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
@@ -251,78 +251,6 @@ def _supports_vision_override(
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_inference_base_url(
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
provider: str,
|
||||
) -> str:
|
||||
"""Best-effort base URL for the active inference provider."""
|
||||
try:
|
||||
from agent.auxiliary_client import _RUNTIME_MAIN_BASE_URL
|
||||
|
||||
runtime = str(_RUNTIME_MAIN_BASE_URL or "").strip()
|
||||
if runtime:
|
||||
return runtime
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not isinstance(cfg, dict):
|
||||
return ""
|
||||
|
||||
model_cfg_raw = cfg.get("model")
|
||||
model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
|
||||
base_url = str(model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
return base_url
|
||||
|
||||
config_provider = str(model_cfg.get("provider") or "").strip()
|
||||
candidate_names: set[str] = set()
|
||||
for p in filter(None, (provider, config_provider)):
|
||||
candidate_names.add(p)
|
||||
if p.lower().startswith("custom:"):
|
||||
candidate_names.add(p.split(":", 1)[1])
|
||||
else:
|
||||
candidate_names.add(f"custom:{p}")
|
||||
|
||||
providers_cfg = cfg.get("providers")
|
||||
if isinstance(providers_cfg, dict):
|
||||
for name in candidate_names:
|
||||
entry = providers_cfg.get(name)
|
||||
if isinstance(entry, dict):
|
||||
bu = str(entry.get("base_url") or "").strip()
|
||||
if bu:
|
||||
return bu
|
||||
|
||||
custom_providers = cfg.get("custom_providers")
|
||||
if isinstance(custom_providers, list):
|
||||
lowered = {n.lower() for n in candidate_names}
|
||||
for entry_raw in custom_providers:
|
||||
if not isinstance(entry_raw, dict):
|
||||
continue
|
||||
entry_name = str(entry_raw.get("name") or "").strip()
|
||||
if entry_name not in candidate_names and entry_name.lower() not in lowered:
|
||||
continue
|
||||
bu = str(entry_raw.get("base_url") or "").strip()
|
||||
if bu:
|
||||
return bu
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _should_probe_ollama_vision(provider: str, base_url: str) -> bool:
|
||||
"""True when the active provider likely fronts a local Ollama server."""
|
||||
p = (provider or "").strip().lower()
|
||||
if p == "ollama":
|
||||
return True
|
||||
if not base_url:
|
||||
return False
|
||||
try:
|
||||
from agent.model_metadata import detect_local_server_type
|
||||
|
||||
return detect_local_server_type(base_url) == "ollama"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
@@ -374,33 +302,15 @@ def _lookup_supports_vision(
|
||||
return override
|
||||
if not provider or not model:
|
||||
return None
|
||||
caps = None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
if caps is not None:
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
base_url = _resolve_inference_base_url(cfg, provider)
|
||||
if not base_url and (provider or "").strip().lower() == "ollama":
|
||||
base_url = "http://localhost:11434/v1"
|
||||
if _should_probe_ollama_vision(provider, base_url):
|
||||
try:
|
||||
from agent.model_metadata import query_ollama_supports_vision
|
||||
|
||||
ollama_vision = query_ollama_supports_vision(model, base_url)
|
||||
if ollama_vision is not None:
|
||||
return ollama_vision
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug(
|
||||
"image_routing: ollama vision probe failed for %s:%s — %s",
|
||||
provider,
|
||||
model,
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
@@ -478,98 +388,14 @@ def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
|
||||
# BMP: "BM"
|
||||
if raw.startswith(b"BM"):
|
||||
return "image/bmp"
|
||||
# ISO-BMFF family (HEIC/HEIF/AVIF): bytes 4..8 == 'ftyp', major brand at 8..12
|
||||
if len(raw) >= 12 and raw[4:8] == b"ftyp":
|
||||
brand = raw[8:12]
|
||||
if brand in {b"avif", b"avis"}:
|
||||
return "image/avif"
|
||||
if brand in {
|
||||
b"heic", b"heix", b"hevc", b"hevx",
|
||||
b"mif1", b"msf1", b"heim", b"heis",
|
||||
}:
|
||||
return "image/heic"
|
||||
# TIFF: II*\0 (little-endian) or MM\0* (big-endian)
|
||||
if raw[:4] in {b"II*\x00", b"MM\x00*"}:
|
||||
return "image/tiff"
|
||||
# ICO: 00 00 01 00 (reserved=0, type=1=icon)
|
||||
if raw[:4] == b"\x00\x00\x01\x00":
|
||||
return "image/x-icon"
|
||||
# SVG: text-based, look for an <svg tag near the start (skip BOM/whitespace)
|
||||
head = raw[:512].lstrip().lower()
|
||||
if head.startswith(b"<?xml") or head.startswith(b"<svg"):
|
||||
if b"<svg" in head:
|
||||
return "image/svg+xml"
|
||||
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
|
||||
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
|
||||
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
|
||||
}:
|
||||
return "image/heic"
|
||||
return None
|
||||
|
||||
|
||||
# Formats every major vision provider (Anthropic, OpenAI, Gemini, Bedrock)
|
||||
# accepts natively. Anything outside this set has to be transcoded to PNG
|
||||
# before we declare media_type, otherwise the provider returns HTTP 400
|
||||
# ("Could not process image" / "Unsupported image media type") and the
|
||||
# whole turn fails with no salvage path.
|
||||
#
|
||||
# Discord (and a few other chat platforms) freely accept attachments in
|
||||
# formats outside this set -- AVIF screenshots from Chromium, HEIC from
|
||||
# iPhones, TIFF from scanners, BMP from old Windows tools, ICO -- so users
|
||||
# do hit this in practice. SVG is vector and Pillow cannot rasterize it;
|
||||
# it is skipped (logged) rather than transcoded.
|
||||
_UNIVERSALLY_SUPPORTED_MIMES = frozenset({
|
||||
"image/png", "image/jpeg", "image/gif", "image/webp",
|
||||
})
|
||||
|
||||
|
||||
def _transcode_to_png(raw: bytes) -> Optional[bytes]:
|
||||
"""Decode arbitrary image bytes with Pillow and re-encode as PNG.
|
||||
|
||||
Returns None if Pillow isn't installed or can't decode the input
|
||||
(rare formats, corrupted bytes, missing optional decoder plugin for
|
||||
HEIC/AVIF, or vector formats like SVG). Caller falls back to skipping
|
||||
the image so the rest of the turn still works.
|
||||
|
||||
HEIC/HEIF and AVIF need optional Pillow plugins; we try to register
|
||||
them on demand and swallow ImportError so a missing plugin just
|
||||
looks like 'Pillow can't decode this' rather than crashing.
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
logger.info(
|
||||
"image_routing: Pillow not installed; cannot transcode "
|
||||
"non-standard image format to PNG. Install with `pip install Pillow` "
|
||||
"(and `pillow-heif` / `pillow-avif-plugin` for those formats)."
|
||||
)
|
||||
return None
|
||||
# Optional plugin registration. Silent on failure: an unsupported
|
||||
# format will just fall through to Image.open raising below.
|
||||
try:
|
||||
import pillow_heif # type: ignore
|
||||
|
||||
pillow_heif.register_heif_opener()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import pillow_avif # type: ignore # noqa: F401 -- registers AVIF on import
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from io import BytesIO
|
||||
|
||||
with Image.open(BytesIO(raw)) as im:
|
||||
# Pick an output mode PNG can serialise. Anything other than
|
||||
# the standard set gets normalised to RGBA so transparency is
|
||||
# preserved where the source had it.
|
||||
if im.mode not in {"RGB", "RGBA", "L", "LA", "P"}:
|
||||
im = im.convert("RGBA")
|
||||
buf = BytesIO()
|
||||
im.save(buf, format="PNG", optimize=False)
|
||||
return buf.getvalue()
|
||||
except Exception as exc:
|
||||
logger.info(
|
||||
"image_routing: Pillow could not transcode image to PNG -- %s", exc
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
|
||||
"""Return image MIME type for *path*.
|
||||
|
||||
@@ -605,18 +431,8 @@ def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Format compatibility IS handled here: if the sniffed MIME isn't one
|
||||
of ``_UNIVERSALLY_SUPPORTED_MIMES`` (i.e. it's something like AVIF,
|
||||
HEIC, BMP, TIFF, or ICO that some providers reject outright), we
|
||||
transcode to PNG with Pillow before declaring media_type. This fixes
|
||||
the user-visible "Could not process image" HTTP 400 from Anthropic on
|
||||
Discord-attached AVIF/HEIC/BMP files.
|
||||
|
||||
Returns None if the file can't be read OR if the format isn't
|
||||
universally supported AND Pillow can't transcode it (Pillow missing,
|
||||
HEIC/AVIF plugin missing, vector format like SVG, corrupt bytes). The
|
||||
caller reports those paths in ``skipped`` and the rest of the turn
|
||||
proceeds.
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
@@ -624,22 +440,6 @@ def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path, raw=raw)
|
||||
if mime not in _UNIVERSALLY_SUPPORTED_MIMES:
|
||||
transcoded = _transcode_to_png(raw)
|
||||
if transcoded is None:
|
||||
logger.warning(
|
||||
"image_routing: %s is %s which is not accepted by all major "
|
||||
"vision providers and could not be transcoded to PNG; "
|
||||
"skipping this attachment.",
|
||||
path, mime,
|
||||
)
|
||||
return None
|
||||
logger.info(
|
||||
"image_routing: transcoded %s (%s) -> image/png for provider compatibility",
|
||||
path.name, mime,
|
||||
)
|
||||
raw = transcoded
|
||||
mime = "image/png"
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ iteration.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
@@ -26,112 +25,20 @@ logger = logging.getLogger(__name__)
|
||||
# opening dozens of sockets at once.
|
||||
_MAX_REFERENCE_WORKERS = 8
|
||||
|
||||
# Per-tool-result character budget for the advisory reference view. Tool
|
||||
# results can be huge (a full diff, a 5000-line file dump); replaying them
|
||||
# verbatim per reference per tool-loop step would blow the reference model's
|
||||
# context window and cost. We keep the agent's *actions* (tool calls) in full —
|
||||
# they are cheap, high-signal, and tell the reference what the agent did — but
|
||||
# preview each tool *result* head+tail so the reference still sees what came
|
||||
# back without replaying megabytes. The acting aggregator always gets the full,
|
||||
# untrimmed transcript; this budget only shapes the advisory copy.
|
||||
_REFERENCE_TOOL_RESULT_BUDGET = 4000
|
||||
|
||||
# System prompt prepended to every reference-model call. References are
|
||||
# advisory — they do NOT act, call tools, or own the task. Without this
|
||||
# framing a reference receives the bare trimmed conversation and assumes it is
|
||||
# the acting agent: it then refuses ("I can't access repositories / URLs from
|
||||
# here") or tries to call tools it doesn't have. The prompt reframes the model
|
||||
# as an analyst whose job is to reason about the presented state and hand its
|
||||
# best thinking to the aggregator/orchestrator that will actually act.
|
||||
_REFERENCE_SYSTEM_PROMPT = (
|
||||
"You are a reference advisor in a Mixture of Agents (MoA) process. You are "
|
||||
"NOT the acting agent and you do NOT execute anything: you cannot call "
|
||||
"tools, run commands, browse, or access files, repositories, or URLs, and "
|
||||
"you should not try to or apologize for being unable to. A separate "
|
||||
"aggregator/orchestrator model holds those capabilities and will take the "
|
||||
"actual actions.\n\n"
|
||||
"The conversation below is the current state of a task handled by that "
|
||||
"acting agent. Your job is to give your most intelligent analysis of that "
|
||||
"state: understand the goal, reason about the problem, and advise on what "
|
||||
"to do next. Surface the best approach, concrete next steps and tool-use "
|
||||
"strategy, likely pitfalls and risks, and anything the acting agent may "
|
||||
"have missed or gotten wrong. Assume any referenced files, URLs, or "
|
||||
"systems exist and reason about them from the context given rather than "
|
||||
"asking for access.\n\n"
|
||||
"Respond with your advice directly — no preamble, no disclaimers about "
|
||||
"tools or access. Your response is private guidance handed to the "
|
||||
"aggregator, not an answer shown to the user."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def _slot_label(slot: dict[str, str]) -> str:
|
||||
return f"{slot.get('provider', '').strip()}:{slot.get('model', '').strip()}"
|
||||
|
||||
|
||||
def _slot_runtime(slot: dict[str, str]) -> dict[str, Any]:
|
||||
"""Resolve a reference/aggregator slot to real runtime call kwargs.
|
||||
|
||||
A MoA slot is just a model selection — it must be called the same way any
|
||||
model is called elsewhere, not through a bare ``call_llm(provider=...,
|
||||
model=...)`` that leaves base_url/api_key/api_mode unresolved and lets the
|
||||
auxiliary auto-detector guess. We route the slot's provider through
|
||||
``resolve_runtime_provider`` (the canonical provider→api_mode/base_url/
|
||||
api_key resolver the CLI, gateway, and delegate_task all use), so the slot
|
||||
gets its provider's real API surface — e.g. MiniMax → anthropic_messages,
|
||||
GPT-5/o-series → max_completion_tokens, custom endpoints → their base_url.
|
||||
|
||||
Returns the kwargs to pass through to ``call_llm`` (provider/model plus the
|
||||
resolved base_url/api_key when available). Falls back to the bare
|
||||
provider/model on any resolution error so a misconfigured slot still
|
||||
attempts the call rather than aborting the whole MoA turn.
|
||||
"""
|
||||
provider = str(slot.get("provider") or "").strip()
|
||||
model = str(slot.get("model") or "").strip()
|
||||
out: dict[str, Any] = {"provider": provider, "model": model}
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
rt = resolve_runtime_provider(requested=provider, target_model=model)
|
||||
resolved_provider = str(rt.get("provider") or provider).strip().lower()
|
||||
# call_llm treats an explicit base_url as a custom endpoint. That is
|
||||
# correct for ordinary OpenAI-compatible targets, but wrong for OAuth /
|
||||
# provider-backed targets whose provider branch adds auth refresh,
|
||||
# request metadata, or request-shape adapters. Keep those providers
|
||||
# identified by name.
|
||||
if resolved_provider in {"nous", "openai-codex", "xai-oauth"}:
|
||||
return out
|
||||
# Pass the resolved endpoint through so call_llm builds the request for
|
||||
# the provider's actual API surface instead of auto-detecting. base_url
|
||||
# routes call_llm to the right adapter (incl. anthropic_messages mode);
|
||||
# api_key is the resolved credential for that provider.
|
||||
if rt.get("base_url"):
|
||||
out["base_url"] = rt["base_url"]
|
||||
if rt.get("api_key"):
|
||||
out["api_key"] = rt["api_key"]
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("MoA slot runtime resolution failed for %s: %s", _slot_label(slot), exc)
|
||||
return out
|
||||
|
||||
|
||||
def _run_reference(
|
||||
slot: dict[str, str],
|
||||
ref_messages: list[dict[str, Any]],
|
||||
*,
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
temperature: float,
|
||||
max_tokens: int,
|
||||
) -> tuple[str, str]:
|
||||
"""Call one reference model and return ``(label, text)``.
|
||||
|
||||
The slot is resolved to its provider's real runtime (via ``_slot_runtime``)
|
||||
and called through the same ``call_llm`` request-building path any model
|
||||
uses, so per-model wire-format handling (anthropic_messages,
|
||||
max_completion_tokens, fixed/forbidden temperature) applies identically to
|
||||
a reference as it would if that model were the acting model. MoA imposes no
|
||||
cap of its own (``max_tokens`` defaults to ``None`` → omitted → the model's
|
||||
real maximum); ``temperature`` is only the user's configured preset value,
|
||||
which call_llm may still override per model.
|
||||
|
||||
Never raises: a failed reference becomes a labelled note so the aggregator
|
||||
can still act with partial context. Designed to run inside a thread pool —
|
||||
``call_llm`` is synchronous/blocking, so threads (not asyncio) are the right
|
||||
@@ -139,17 +46,13 @@ def _run_reference(
|
||||
"""
|
||||
label = _slot_label(slot)
|
||||
try:
|
||||
# Prepend the advisory-role system prompt so the reference understands
|
||||
# it is analyzing state for an aggregator, not acting on the task. The
|
||||
# trimmed view (_reference_messages) already strips the agent's own
|
||||
# system prompt, so this is the only system message the reference sees.
|
||||
messages = [{"role": "system", "content": _REFERENCE_SYSTEM_PROMPT}, *ref_messages]
|
||||
response = call_llm(
|
||||
task="moa_reference",
|
||||
messages=messages,
|
||||
provider=slot["provider"],
|
||||
model=slot["model"],
|
||||
messages=ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
**_slot_runtime(slot),
|
||||
)
|
||||
return label, _extract_text(response) or "(empty response)"
|
||||
except Exception as exc:
|
||||
@@ -161,8 +64,8 @@ def _run_references_parallel(
|
||||
reference_models: list[dict[str, str]],
|
||||
ref_messages: list[dict[str, Any]],
|
||||
*,
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
temperature: float,
|
||||
max_tokens: int,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Fan out all reference models in parallel, returning outputs in order.
|
||||
|
||||
@@ -203,140 +106,40 @@ def _run_references_parallel(
|
||||
return [r for r in results if r is not None]
|
||||
|
||||
|
||||
def _truncate_tool_result(text: str, budget: int = _REFERENCE_TOOL_RESULT_BUDGET) -> str:
|
||||
"""Head+tail preview of a tool result for the advisory view.
|
||||
|
||||
Keeps the first and last halves of the budget with a ``[... N chars
|
||||
omitted ...]`` marker between them, so a reference sees both how the result
|
||||
started and how it ended without replaying the whole payload.
|
||||
"""
|
||||
if not text or len(text) <= budget:
|
||||
return text
|
||||
half = budget // 2
|
||||
omitted = len(text) - 2 * half
|
||||
return f"{text[:half]}\n[... {omitted} chars omitted ...]\n{text[-half:]}"
|
||||
|
||||
|
||||
def _render_tool_calls(tool_calls: Any) -> str:
|
||||
"""Render an assistant turn's tool_calls as readable text lines.
|
||||
|
||||
The advisory view cannot carry real ``tool_calls`` payloads (strict
|
||||
providers reject tool_calls the reference never produced), so the agent's
|
||||
actions are flattened to text the reference can read and reason about.
|
||||
"""
|
||||
lines: list[str] = []
|
||||
for tc in tool_calls or []:
|
||||
fn = (tc.get("function") or {}) if isinstance(tc, dict) else {}
|
||||
name = fn.get("name") or (tc.get("name") if isinstance(tc, dict) else "") or "tool"
|
||||
args = fn.get("arguments")
|
||||
if isinstance(args, str):
|
||||
args_text = args
|
||||
elif args is not None:
|
||||
try:
|
||||
import json
|
||||
|
||||
args_text = json.dumps(args, ensure_ascii=False)
|
||||
except Exception:
|
||||
args_text = str(args)
|
||||
else:
|
||||
args_text = ""
|
||||
lines.append(f"[called tool: {name}({args_text})]" if args_text else f"[called tool: {name}]")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _reference_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Build an advisory view of the conversation for reference models.
|
||||
"""Build an advisory-safe view of the conversation for reference models.
|
||||
|
||||
A reference gives an INFORMED judgement on the current state, so it must
|
||||
see what the agent actually did — its tool calls AND the tool results that
|
||||
came back — not just the agent's narration. We therefore preserve the whole
|
||||
conversation flow, but flatten it into clean user/assistant *text* turns:
|
||||
|
||||
- system prompt: dropped (8K of Hermes boilerplate, not advisory signal).
|
||||
- assistant turns: kept; any ``tool_calls`` are rendered inline as
|
||||
``[called tool: name(args)]`` text lines appended to the turn's text.
|
||||
- ``tool``-role results: NOT dropped. Each is folded (head+tail preview,
|
||||
see ``_truncate_tool_result``) into the *preceding* assistant turn as a
|
||||
``[tool result: ...]`` block, so the reference sees what came back.
|
||||
|
||||
This emits ZERO ``tool``-role messages and ZERO ``tool_calls`` arrays — only
|
||||
plain user/assistant text — so strict providers (Mistral, Fireworks) that
|
||||
reject orphan tool messages / unproduced tool_calls don't 400, while the
|
||||
reference still has the full picture.
|
||||
|
||||
The view MUST end with a ``user`` turn. Anthropic (and OpenRouter→Anthropic)
|
||||
interpret a trailing assistant turn as an assistant *prefill* to continue,
|
||||
and no-prefill models (e.g. Claude Opus 4.8) reject it with
|
||||
``400 ... must end with a user message``. Rather than DELETE the agent's
|
||||
latest context to satisfy that (which would blind the reference to the
|
||||
current state), we APPEND a synthetic user turn asking the reference to
|
||||
judge the state above. End-on-user is satisfied and no context is lost.
|
||||
|
||||
The acting aggregator always receives the full, untrimmed transcript; this
|
||||
function only shapes the disposable advisory copy.
|
||||
Reference calls are advisory: they never call tools and never emit the
|
||||
``tool_calls`` the main model did. Replaying the full transcript verbatim
|
||||
(a) re-bills the ~8K-token Hermes system prompt per reference per
|
||||
iteration and (b) risks 400s from strict providers (Mistral, Fireworks)
|
||||
that reject orphan ``tool`` messages or ``tool_calls`` the reference never
|
||||
produced. We keep only the user/assistant *text* turns, dropping the
|
||||
system prompt, any ``tool``-role messages, and any ``tool_calls`` payloads.
|
||||
"""
|
||||
advisory_instruction = (
|
||||
"[The conversation above is the current state of the task. Give your "
|
||||
"most intelligent judgement: what is going on, what should happen next, "
|
||||
"what risks or mistakes you see, and how the acting agent should "
|
||||
"proceed.]"
|
||||
)
|
||||
|
||||
rendered: list[dict[str, Any]] = []
|
||||
last_user_content: str | None = None
|
||||
trimmed: list[dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
role = msg.get("role")
|
||||
content = msg.get("content")
|
||||
text = content if isinstance(content, str) else ""
|
||||
|
||||
if role == "system":
|
||||
if role not in ("user", "assistant"):
|
||||
# Drop system prompt and tool-result messages.
|
||||
continue
|
||||
if role == "user":
|
||||
if text.strip():
|
||||
last_user_content = text
|
||||
rendered.append({"role": "user", "content": text})
|
||||
elif role == "assistant":
|
||||
parts: list[str] = []
|
||||
if text.strip():
|
||||
parts.append(text.strip())
|
||||
calls_text = _render_tool_calls(msg.get("tool_calls"))
|
||||
if calls_text:
|
||||
parts.append(calls_text)
|
||||
# Empty assistant turns (no text, no calls) carry nothing advisory.
|
||||
if parts:
|
||||
rendered.append({"role": "assistant", "content": "\n".join(parts)})
|
||||
elif role == "tool":
|
||||
# Fold the tool result into the preceding assistant turn as text so
|
||||
# the reference sees what came back, without emitting a tool-role
|
||||
# message a reference never produced.
|
||||
result_text = _truncate_tool_result(text)
|
||||
block = f"[tool result: {result_text}]"
|
||||
if rendered and rendered[-1].get("role") == "assistant":
|
||||
rendered[-1]["content"] = rendered[-1]["content"] + "\n" + block
|
||||
else:
|
||||
# No assistant turn to attach to (e.g. a leading tool result);
|
||||
# keep it as advisory context on its own assistant-role line.
|
||||
rendered.append({"role": "assistant", "content": block})
|
||||
# Any other role is ignored.
|
||||
|
||||
# End on a user turn: append a synthetic advisory request rather than
|
||||
# deleting the agent's latest assistant context. This satisfies Anthropic's
|
||||
# no-trailing-assistant-prefill rule while preserving full state.
|
||||
if rendered and rendered[-1].get("role") == "assistant":
|
||||
rendered.append({"role": "user", "content": advisory_instruction})
|
||||
elif rendered and rendered[-1].get("role") == "user":
|
||||
# Already ends on a user turn (fresh user prompt, no agent action yet).
|
||||
# Leave it — the reference answers that prompt directly.
|
||||
pass
|
||||
|
||||
if not rendered:
|
||||
# Degenerate case: nothing rendered. Fall back to the latest user turn.
|
||||
if last_user_content is not None:
|
||||
return [{"role": "user", "content": last_user_content}]
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, str):
|
||||
# Skip non-text (multimodal/tool-call-only) assistant turns.
|
||||
if not content:
|
||||
continue
|
||||
text = content if isinstance(content, str) else ""
|
||||
if role == "assistant" and not text.strip():
|
||||
# Assistant turn that was purely tool calls — nothing advisory.
|
||||
continue
|
||||
trimmed.append({"role": role, "content": text})
|
||||
if not trimmed:
|
||||
# Degenerate case (e.g. first turn was stripped): fall back to a
|
||||
# minimal user turn so the reference still has something to answer.
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "user" and isinstance(msg.get("content"), str):
|
||||
return [{"role": "user", "content": msg["content"]}]
|
||||
return rendered
|
||||
return trimmed
|
||||
|
||||
|
||||
|
||||
@@ -366,18 +169,12 @@ def aggregate_moa_context(
|
||||
aggregator: dict[str, str],
|
||||
temperature: float = 0.6,
|
||||
aggregator_temperature: float = 0.4,
|
||||
max_tokens: int | None = None,
|
||||
max_tokens: int = 4096,
|
||||
) -> str:
|
||||
"""Run configured reference models and synthesize their advice.
|
||||
|
||||
Failures are returned as model-specific notes instead of aborting the normal
|
||||
agent loop; the main model can still act with partial context.
|
||||
|
||||
``max_tokens`` is ``None`` by default: MoA does not cap reference or
|
||||
aggregator output, so each model uses its own maximum. ``call_llm`` omits
|
||||
the parameter entirely when it is ``None`` (see its docstring), which also
|
||||
sidesteps providers that reject ``max_tokens`` outright. A hardcoded cap
|
||||
here previously truncated long aggregator syntheses.
|
||||
"""
|
||||
reference_outputs: list[tuple[str, str]] = []
|
||||
ref_messages = _reference_messages(api_messages)
|
||||
@@ -406,10 +203,11 @@ def aggregate_moa_context(
|
||||
try:
|
||||
response = call_llm(
|
||||
task="moa_aggregator",
|
||||
provider=aggregator["provider"],
|
||||
model=aggregator["model"],
|
||||
messages=[{"role": "user", "content": synth_prompt}],
|
||||
temperature=aggregator_temperature,
|
||||
max_tokens=max_tokens,
|
||||
**_slot_runtime(aggregator),
|
||||
)
|
||||
synthesis = _extract_text(response)
|
||||
except Exception as exc:
|
||||
@@ -432,38 +230,8 @@ def aggregate_moa_context(
|
||||
class MoAChatCompletions:
|
||||
"""OpenAI-chat-compatible facade where the aggregator is the acting model."""
|
||||
|
||||
def __init__(self, preset_name: str, reference_callback: Any = None):
|
||||
def __init__(self, preset_name: str):
|
||||
self.preset_name = preset_name or "default"
|
||||
# Optional display hook. Called as reference outputs become available so
|
||||
# frontends can show each reference model's answer as a labelled block
|
||||
# before the aggregator acts. Signature:
|
||||
# reference_callback(event, **kwargs)
|
||||
# where event is one of:
|
||||
# "moa.reference" kwargs: index, count, label, text
|
||||
# "moa.aggregating" kwargs: aggregator (label), ref_count
|
||||
# Never raises into the model call — display is best-effort.
|
||||
self.reference_callback = reference_callback
|
||||
# State-scoped reference cache. The agent loop calls create() once per
|
||||
# tool-loop iteration; references should re-run whenever the task STATE
|
||||
# advances — i.e. on every new user message AND every new tool result —
|
||||
# so each reference judges the latest state. The advisory view
|
||||
# (_reference_messages) now renders tool calls + results as text, so its
|
||||
# signature changes on every new tool response; the cache key is that
|
||||
# signature, so a new tool result is a cache MISS (references re-run)
|
||||
# while a redundant create() call with identical state is a HIT (no
|
||||
# re-run, no re-emit). This gives "fire on every user/tool response"
|
||||
# for free, without re-firing on a pure no-op re-call.
|
||||
self._ref_cache_key: tuple | None = None
|
||||
self._ref_cache_outputs: list[tuple[str, str]] = []
|
||||
|
||||
def _emit(self, event: str, **kwargs: Any) -> None:
|
||||
cb = self.reference_callback
|
||||
if cb is None:
|
||||
return
|
||||
try:
|
||||
cb(event, **kwargs)
|
||||
except Exception as exc: # pragma: no cover - display must never break the turn
|
||||
logger.debug("MoA reference_callback failed for %s: %s", event, exc)
|
||||
|
||||
def create(self, **api_kwargs: Any) -> Any:
|
||||
from hermes_cli.config import load_config
|
||||
@@ -473,10 +241,7 @@ class MoAChatCompletions:
|
||||
messages = list(api_kwargs.get("messages") or [])
|
||||
reference_models = preset.get("reference_models") or []
|
||||
aggregator = preset.get("aggregator") or {}
|
||||
# MoA does not cap reference or aggregator output: each model uses its
|
||||
# own maximum. Passing max_tokens=None makes call_llm omit the parameter
|
||||
# (it never caps by default), so a long aggregator synthesis is never
|
||||
# truncated and providers that reject max_tokens don't 400.
|
||||
max_tokens = int(preset.get("max_tokens", api_kwargs.get("max_tokens") or 4096) or 4096)
|
||||
temperature = float(preset.get("reference_temperature", 0.6) or 0.6)
|
||||
aggregator_temperature = float(preset.get("aggregator_temperature", api_kwargs.get("temperature") or 0.4) or 0.4)
|
||||
|
||||
@@ -488,52 +253,12 @@ class MoAChatCompletions:
|
||||
|
||||
reference_outputs: list[tuple[str, str]] = []
|
||||
ref_messages = _reference_messages(messages)
|
||||
|
||||
# Turn-scoped cache: only run + display references when the advisory
|
||||
# view changed (i.e. a new user turn). Within one turn the agent loop
|
||||
# calls create() once per tool iteration with the same advisory view;
|
||||
# reuse the cached outputs and skip both the re-run and the re-emit.
|
||||
_sig = hashlib.sha256(
|
||||
"\u0000".join(
|
||||
f"{m.get('role')}:{m.get('content')}" for m in ref_messages
|
||||
).encode("utf-8", "replace")
|
||||
).hexdigest()
|
||||
_cache_key = (self.preset_name, _sig, tuple(_slot_label(s) for s in reference_models))
|
||||
_refs_from_cache = _cache_key == self._ref_cache_key and bool(self._ref_cache_outputs)
|
||||
|
||||
if _refs_from_cache:
|
||||
reference_outputs = list(self._ref_cache_outputs)
|
||||
else:
|
||||
reference_outputs = _run_references_parallel(
|
||||
reference_models,
|
||||
ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=None,
|
||||
)
|
||||
self._ref_cache_key = _cache_key
|
||||
self._ref_cache_outputs = list(reference_outputs)
|
||||
|
||||
# Surface each reference model's answer to the display BEFORE the
|
||||
# aggregator acts — once per turn (only on the iteration that
|
||||
# actually ran them). The user sees one labelled block per
|
||||
# reference (rendered like a thinking block) so the MoA process is
|
||||
# visible rather than a silent pause. Best-effort: never blocks the
|
||||
# turn.
|
||||
_ref_count = len(reference_outputs)
|
||||
for _idx, (_label, _text) in enumerate(reference_outputs, start=1):
|
||||
self._emit(
|
||||
"moa.reference",
|
||||
index=_idx,
|
||||
count=_ref_count,
|
||||
label=_label,
|
||||
text=_text,
|
||||
)
|
||||
if _ref_count:
|
||||
self._emit(
|
||||
"moa.aggregating",
|
||||
aggregator=_slot_label(aggregator),
|
||||
ref_count=_ref_count,
|
||||
)
|
||||
reference_outputs = _run_references_parallel(
|
||||
reference_models,
|
||||
ref_messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
agg_messages = [dict(m) for m in messages]
|
||||
if reference_outputs:
|
||||
@@ -561,26 +286,21 @@ class MoAChatCompletions:
|
||||
raise RuntimeError("MoA aggregator cannot be another MoA preset")
|
||||
agg_kwargs = dict(api_kwargs)
|
||||
agg_kwargs["messages"] = agg_messages
|
||||
# The aggregator is the acting model. Resolve its slot to the provider's
|
||||
# real runtime (base_url/api_key/api_mode) and call it through the same
|
||||
# request-building path any model uses — so per-model wire-format
|
||||
# handling (anthropic_messages, max_completion_tokens, fixed/forbidden
|
||||
# temperature) applies identically to it. MoA imposes no output cap:
|
||||
# max_tokens is passed through from the caller (normally None → omitted
|
||||
# → the model's real maximum). The preset's old hardcoded 4096 default
|
||||
# is gone — it truncated long syntheses.
|
||||
agg_kwargs["model"] = aggregator.get("model")
|
||||
agg_kwargs["temperature"] = aggregator_temperature
|
||||
return call_llm(
|
||||
task="moa_aggregator",
|
||||
provider=aggregator.get("provider"),
|
||||
model=aggregator.get("model"),
|
||||
messages=agg_messages,
|
||||
temperature=aggregator_temperature,
|
||||
max_tokens=agg_kwargs.get("max_tokens"),
|
||||
tools=agg_kwargs.get("tools"),
|
||||
extra_body=agg_kwargs.get("extra_body"),
|
||||
**_slot_runtime(aggregator),
|
||||
)
|
||||
|
||||
|
||||
class MoAClient:
|
||||
def __init__(self, preset_name: str, reference_callback: Any = None):
|
||||
def __init__(self, preset_name: str):
|
||||
self.chat = type("_MoAChat", (), {})()
|
||||
self.chat.completions = MoAChatCompletions(preset_name, reference_callback=reference_callback)
|
||||
self.chat.completions = MoAChatCompletions(preset_name)
|
||||
|
||||
@@ -478,16 +478,6 @@ def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _lmstudio_server_root(base_url: str) -> str:
|
||||
"""Return the LM Studio server root for native ``/api/v1`` endpoints."""
|
||||
root = _normalize_base_url(base_url).rstrip("/")
|
||||
for suffix in ("/api/v1", "/api", "/v1"):
|
||||
if root.endswith(suffix):
|
||||
root = root[: -len(suffix)].rstrip("/")
|
||||
break
|
||||
return root
|
||||
|
||||
|
||||
def _is_known_provider_base_url(base_url: str) -> bool:
|
||||
return _infer_provider_from_url(base_url) is not None
|
||||
|
||||
@@ -559,7 +549,6 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
|
||||
server_url = normalized
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
lmstudio_url = _lmstudio_server_root(base_url)
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
@@ -567,7 +556,7 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
|
||||
with httpx.Client(timeout=2.0, headers=headers) as client:
|
||||
# LM Studio exposes /api/v1/models — check first (most specific)
|
||||
try:
|
||||
r = client.get(f"{lmstudio_url}/api/v1/models")
|
||||
r = client.get(f"{server_url}/api/v1/models")
|
||||
if r.status_code == 200:
|
||||
return "lm-studio"
|
||||
except Exception:
|
||||
@@ -785,7 +774,7 @@ def fetch_endpoint_model_metadata(
|
||||
if is_local_endpoint(normalized):
|
||||
try:
|
||||
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
|
||||
server_url = _lmstudio_server_root(normalized)
|
||||
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
|
||||
response = requests.get(
|
||||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
@@ -1199,56 +1188,6 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
|
||||
return None
|
||||
|
||||
|
||||
def query_ollama_supports_vision(model: str, base_url: str, api_key: str = "") -> Optional[bool]:
|
||||
"""Return True/False when Ollama ``/api/show`` reports vision support.
|
||||
|
||||
Uses the ``capabilities`` field on Ollama 0.6.0+ and falls back to
|
||||
``model_info.*.vision.block_count`` on older servers. Returns None when
|
||||
the server is unreachable, not Ollama, or the model is unknown.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
bare_model = _strip_provider_prefix(model)
|
||||
if not bare_model or not base_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
if detect_local_server_type(base_url, api_key=api_key) != "ollama":
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
server_url = base_url.rstrip("/")
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=3.0, headers=headers) as client:
|
||||
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
caps = data.get("capabilities")
|
||||
if isinstance(caps, list):
|
||||
if any(str(cap).lower() == "vision" for cap in caps):
|
||||
return True
|
||||
if caps:
|
||||
return False
|
||||
|
||||
model_info = data.get("model_info")
|
||||
if isinstance(model_info, dict):
|
||||
for key in model_info:
|
||||
if "vision.block_count" in str(key).lower():
|
||||
return True
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _query_ollama_api_show(model: str, base_url: str, api_key: str = "") -> Optional[int]:
|
||||
"""Query an Ollama server's native ``/api/show`` for context length.
|
||||
|
||||
@@ -1358,7 +1297,6 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
server_url = base_url.rstrip("/")
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
lmstudio_url = _lmstudio_server_root(base_url)
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
@@ -1402,7 +1340,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
# Use _model_id_matches for fuzzy matching: LM Studio stores models as
|
||||
# "publisher/slug" but users configure only "slug" after "local:" prefix.
|
||||
if server_type == "lm-studio":
|
||||
resp = client.get(f"{lmstudio_url}/api/v1/models")
|
||||
resp = client.get(f"{server_url}/api/v1/models")
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
for m in data.get("models", []):
|
||||
@@ -1708,34 +1646,6 @@ def get_model_context_length(
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0a. MoA virtual provider — ``model`` is a preset name, not a real model,
|
||||
# and ``base_url`` is the local virtual endpoint, so every probe below would
|
||||
# miss and fall through to the 256K default. The aggregator is the acting
|
||||
# model, so resolve the context window from the aggregator slot's real
|
||||
# provider+model instead. References are advisory-only and never bound the
|
||||
# acting context, so they're ignored here.
|
||||
if (provider or "").strip().lower() == "moa":
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.moa_config import resolve_moa_preset
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
preset = resolve_moa_preset(load_config().get("moa") or {}, model)
|
||||
agg = preset.get("aggregator") or {}
|
||||
agg_provider = str(agg.get("provider") or "").strip()
|
||||
agg_model = str(agg.get("model") or "").strip()
|
||||
if agg_model and agg_provider and agg_provider.lower() != "moa":
|
||||
rt = resolve_runtime_provider(requested=agg_provider, target_model=agg_model)
|
||||
return get_model_context_length(
|
||||
agg_model,
|
||||
base_url=rt.get("base_url", "") or "",
|
||||
api_key=rt.get("api_key", "") or "",
|
||||
provider=agg_provider,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("MoA aggregator context-length resolution failed", exc_info=True)
|
||||
# Fall through to the generic default if aggregator resolution failed.
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
|
||||
@@ -26,7 +26,7 @@ from __future__ import annotations
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from typing import Any, Optional
|
||||
from typing import Optional
|
||||
|
||||
from utils import base_url_hostname, normalize_proxy_url
|
||||
|
||||
@@ -142,46 +142,6 @@ def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
|
||||
return proxy
|
||||
|
||||
|
||||
def build_keepalive_http_client(
|
||||
base_url: str = "",
|
||||
*,
|
||||
async_mode: bool = False,
|
||||
) -> Optional[Any]:
|
||||
"""Build an httpx client for OpenAI SDK calls with env-only proxy policy.
|
||||
|
||||
Uses explicit ``HTTPS_PROXY`` / ``NO_PROXY`` env vars via
|
||||
``_get_proxy_for_base_url``. A custom transport disables httpx's default
|
||||
``trust_env`` path, so macOS system proxy settings from
|
||||
``urllib.request.getproxies()`` (which omit the ExceptionsList) are not
|
||||
applied. Mirrors ``AIAgent._build_keepalive_http_client``.
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
import socket
|
||||
|
||||
if "api.githubcopilot.com" in str(base_url or "").lower():
|
||||
client_cls = httpx.AsyncClient if async_mode else httpx.Client
|
||||
return client_cls()
|
||||
|
||||
sock_opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
|
||||
if hasattr(socket, "TCP_KEEPIDLE"):
|
||||
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30))
|
||||
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10))
|
||||
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3))
|
||||
elif hasattr(socket, "TCP_KEEPALIVE"):
|
||||
sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 30))
|
||||
|
||||
proxy = _get_proxy_for_base_url(base_url)
|
||||
transport_cls = httpx.AsyncHTTPTransport if async_mode else httpx.HTTPTransport
|
||||
client_cls = httpx.AsyncClient if async_mode else httpx.Client
|
||||
return client_cls(
|
||||
transport=transport_cls(socket_options=sock_opts),
|
||||
proxy=proxy,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _install_safe_stdio() -> None:
|
||||
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
|
||||
for stream_name in ("stdout", "stderr"):
|
||||
@@ -204,5 +164,4 @@ __all__ = [
|
||||
"_install_safe_stdio",
|
||||
"_get_proxy_from_env",
|
||||
"_get_proxy_for_base_url",
|
||||
"build_keepalive_http_client",
|
||||
]
|
||||
|
||||
@@ -88,15 +88,12 @@ def _find_hermes_md(cwd: Path) -> Optional[Path]:
|
||||
stop_at = _find_git_root(cwd)
|
||||
current = cwd.resolve()
|
||||
|
||||
# When there is no git root, only check cwd itself – walking parents
|
||||
# could pick up a .hermes.md planted in /tmp, /home, etc.
|
||||
search_dirs = [current, *current.parents] if stop_at else [current]
|
||||
|
||||
for directory in search_dirs:
|
||||
for directory in [current, *current.parents]:
|
||||
for name in _HERMES_MD_NAMES:
|
||||
candidate = directory / name
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
# Stop walking at the git root (or filesystem root).
|
||||
if stop_at and directory == stop_at:
|
||||
break
|
||||
return None
|
||||
@@ -620,12 +617,7 @@ DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
|
||||
PLATFORM_HINTS = {
|
||||
"whatsapp": (
|
||||
"You are on a text messaging communication platform, WhatsApp. "
|
||||
"Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
|
||||
"`code`, ```code blocks```, [links](url)) is auto-converted to "
|
||||
"WhatsApp's native syntax (*bold*, _italic_, ~strike~, monospace) — "
|
||||
"feel free to write in markdown, and use bullet lists ('- item') "
|
||||
"freely. Tables are NOT supported — prefer bullet lists or labeled "
|
||||
"key:value pairs. "
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. The file "
|
||||
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
|
||||
@@ -690,11 +682,7 @@ PLATFORM_HINTS = {
|
||||
),
|
||||
"signal": (
|
||||
"You are on a text messaging communication platform, Signal. "
|
||||
"Standard markdown (**bold**, *italic*, ~~strike~~, # headers, "
|
||||
"`code`, ```code blocks```) is auto-converted to Signal's native "
|
||||
"rich formatting — feel free to write in markdown, and use bullet "
|
||||
"lists ('- item') freely (they render as • bullets). Tables are NOT "
|
||||
"supported — prefer bullet lists or labeled key:value pairs. "
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
|
||||
@@ -929,7 +917,8 @@ def _probe_remote_backend(env_type: str) -> str | None:
|
||||
try:
|
||||
# Import locally: tools/ imports are heavy and only relevant when a
|
||||
# non-local backend is actually configured.
|
||||
from tools.terminal_tool import _create_environment, _get_env_config # type: ignore
|
||||
from tools.terminal_tool import _get_env_config # type: ignore
|
||||
from tools.environments import get_environment # type: ignore
|
||||
except Exception as e:
|
||||
logger.debug("Backend probe unavailable (import failed): %s", e)
|
||||
_BACKEND_PROBE_CACHE[cache_key] = ""
|
||||
@@ -937,59 +926,7 @@ def _probe_remote_backend(env_type: str) -> str | None:
|
||||
|
||||
try:
|
||||
config = _get_env_config()
|
||||
# Build the environment the same way tools/terminal_tool.py does for a
|
||||
# live command: select the backend image, then assemble ssh/container
|
||||
# config from the env-derived dict. (There is no `get_environment`
|
||||
# factory — the real entry point is `_create_environment`.)
|
||||
if env_type == "docker":
|
||||
image = config.get("docker_image", "")
|
||||
elif env_type == "singularity":
|
||||
image = config.get("singularity_image", "")
|
||||
elif env_type == "modal":
|
||||
image = config.get("modal_image", "")
|
||||
elif env_type == "daytona":
|
||||
image = config.get("daytona_image", "")
|
||||
else:
|
||||
image = ""
|
||||
|
||||
ssh_config = None
|
||||
if env_type == "ssh":
|
||||
ssh_config = {
|
||||
"host": config.get("ssh_host", ""),
|
||||
"user": config.get("ssh_user", ""),
|
||||
"port": config.get("ssh_port", 22),
|
||||
"key": config.get("ssh_key", ""),
|
||||
"persistent": config.get("ssh_persistent", False),
|
||||
}
|
||||
|
||||
container_config = None
|
||||
if env_type in {"docker", "singularity", "modal", "daytona"}:
|
||||
container_config = {
|
||||
"container_cpu": config.get("container_cpu", 1),
|
||||
"container_memory": config.get("container_memory", 5120),
|
||||
"container_disk": config.get("container_disk", 51200),
|
||||
"container_persistent": config.get("container_persistent", True),
|
||||
"modal_mode": config.get("modal_mode", "auto"),
|
||||
"docker_volumes": config.get("docker_volumes", []),
|
||||
"docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
|
||||
"docker_forward_env": config.get("docker_forward_env", []),
|
||||
"docker_env": config.get("docker_env", {}),
|
||||
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
|
||||
"docker_extra_args": config.get("docker_extra_args", []),
|
||||
"docker_persist_across_processes": config.get("docker_persist_across_processes", True),
|
||||
"docker_orphan_reaper": config.get("docker_orphan_reaper", True),
|
||||
}
|
||||
|
||||
env = _create_environment(
|
||||
env_type=env_type,
|
||||
image=image,
|
||||
cwd=config.get("cwd", ""),
|
||||
timeout=config.get("timeout", 180),
|
||||
ssh_config=ssh_config,
|
||||
container_config=container_config,
|
||||
task_id="prompt-backend-probe",
|
||||
host_cwd=config.get("host_cwd"),
|
||||
)
|
||||
env = get_environment(config)
|
||||
# Single-line POSIX probe — works on any Unixy backend. Wrapped in
|
||||
# `2>/dev/null` so a missing binary doesn't pollute the output.
|
||||
probe_cmd = (
|
||||
|
||||
@@ -1,216 +0,0 @@
|
||||
"""Per-reasoning-model stale-timeout floor for known reasoning models.
|
||||
|
||||
Reasoning models (those that emit extended thinking blocks before their
|
||||
first content token) routinely exceed Hermes's default chat-model
|
||||
stale detectors:
|
||||
|
||||
* Stream stale detector: ``HERMES_STREAM_STALE_TIMEOUT`` default 180s
|
||||
``agent/chat_completion_helpers.py:2544``
|
||||
* Non-stream stale detector: ``HERMES_API_CALL_STALE_TIMEOUT`` default 90s
|
||||
``run_agent.py:1140``
|
||||
|
||||
For NVIDIA Nemotron 3 Ultra on the hosted NIM gateway the empirical
|
||||
upstream idle kill is ~120s (first-party reproduction at
|
||||
NVIDIA/NemoClaw#4846 — TTFB ~31s, stream dies at 120s). The same
|
||||
failure mode exists on OpenAI o1/o3, Anthropic Opus 4.x thinking,
|
||||
DeepSeek R1, Qwen QwQ, xAI Grok reasoning — every cloud reasoning
|
||||
model hits upstream-proxies / load-balancers with idle timeouts
|
||||
shorter than the model's thinking phase. Result: the stale detector
|
||||
kills the connection mid-think, surfacing as
|
||||
``BrokenPipeError``/``RemoteProtocolError`` on the next read.
|
||||
|
||||
This module provides a floor that the existing stale-detector scaling
|
||||
blocks consult via :func:`get_reasoning_stale_timeout_floor` and
|
||||
apply as ``max(default, floor)``. It is a FLOOR:
|
||||
|
||||
* Never overrides explicit user config (``providers.<id>.models.<model>.stale_timeout_seconds``
|
||||
or ``request_timeout_seconds`` already wins — this code never runs
|
||||
in that branch).
|
||||
* Never lowers an existing threshold.
|
||||
* Has zero effect on non-reasoning models — they are not in the
|
||||
allowlist and the resolver returns ``None``.
|
||||
|
||||
Matching uses start-anchored regex on the slug-only component of
|
||||
the model name (after stripping any aggregator prefix like
|
||||
``openai/``, ``x-ai/``, ``anthropic/``). The right-anchor matches
|
||||
end-of-string or a ``-``/``.``/``_`` slug separator, so ``qwen3-235b``
|
||||
matches the ``qwen3`` family entry (a future model slug would be
|
||||
``qwen3-235b-instruct`` and would also match) but ``some-other-qwen3``
|
||||
does NOT match ``qwen3`` (the ``-qwen3`` is not at start of slug).
|
||||
|
||||
The ``o1`` case is the most delicate: a model named
|
||||
``llama-4-70b-o1-preview`` is a hypothetical community derivative that
|
||||
should NOT trigger the reasoning-model floor for the user (the user
|
||||
chose a non-OpenAI model, not a reasoning model). The start-of-slug
|
||||
anchor naturally excludes this — the matched ``o1-preview`` is at
|
||||
position 11 of the slug, not at position 0. The previous substring-
|
||||
with-trailing-hyphen design would have over-matched here, which is
|
||||
why start-of-slug anchoring is the right shape.
|
||||
|
||||
Fixes #52217.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# (slug, floor_seconds). Each slug is matched as a discrete
|
||||
# word-boundary component via the wrapper regex in ``_match_any``
|
||||
# below. Order is irrelevant — the first regex match wins.
|
||||
_REASONING_STALE_TIMEOUT_FLOORS: tuple[tuple[str, int], ...] = (
|
||||
# NVIDIA Nemotron — reasoning models behind hosted NIM with
|
||||
# documented 60-180s upstream idle kill (NVIDIA/NemoClaw#4846:
|
||||
# 120s measured).
|
||||
("nemotron-3-ultra", 600),
|
||||
("nemotron-3-super", 600),
|
||||
("nemotron-3-nano", 300),
|
||||
# DeepSeek — R1 reasoning model on hosted NIM / DeepSeek direct.
|
||||
("deepseek-r1", 600),
|
||||
("deepseek-reasoner", 600),
|
||||
# Qwen — QwQ reasoning + Qwen3 thinking variants. QwQ-32B
|
||||
# preview is the stable slug; ``qwen3`` covers the family of
|
||||
# thinking-mode Qwen3 models (qwen3-235b-a22b, qwen3-32b, etc.)
|
||||
# without over-matching every Qwen3 instruct variant — the
|
||||
# right-anchor requires the slug to be at the start of the
|
||||
# remaining model name, so ``qwen3-235b-instruct`` (instruct is
|
||||
# NOT a thinking variant) would still match. Acceptable
|
||||
# trade-off: instruct variants of qwen3 get the 180s floor
|
||||
# even though they don't reason. The cost is a slightly longer
|
||||
# wait on a hung provider; the alternative (matching only
|
||||
# ``qwen3-.*-thinking``) breaks the moment NVIDIA or Alibaba
|
||||
# ships a slightly different naming shape.
|
||||
("qwq-32b", 300),
|
||||
("qwen3", 180),
|
||||
# OpenAI o-series — known multi-minute TTFB. Each variant
|
||||
# enumerated explicitly so bare ``o1`` doesn't over-match
|
||||
# ``olmo-1`` or hypothetical future community derivatives.
|
||||
("o1", 600),
|
||||
("o1-mini", 600),
|
||||
("o1-pro", 600),
|
||||
("o1-preview", 600),
|
||||
("o3", 600),
|
||||
("o3-pro", 600),
|
||||
("o3-mini", 300),
|
||||
("o4-mini", 300),
|
||||
# Anthropic Claude 4.x thinking variants. Anchored at
|
||||
# ``claude-opus-4`` so non-thinking Claude 3.x or future
|
||||
# non-reasoning Claude variants don't match.
|
||||
("claude-opus-4", 240),
|
||||
("claude-sonnet-4.5", 180),
|
||||
("claude-sonnet-4.6", 180),
|
||||
# xAI Grok reasoning variants. Explicit reasoning-only keys
|
||||
# plus one for the ``non-reasoning`` variant so users picking
|
||||
# the fast variant don't get the 300s floor. Bare ``grok-3``,
|
||||
# ``grok-4`` etc. don't match — only the explicit reasoning /
|
||||
# non-reasoning pairs.
|
||||
("grok-4-fast-reasoning", 300),
|
||||
("grok-4.20-reasoning", 300),
|
||||
("grok-4-fast-non-reasoning", 180),
|
||||
)
|
||||
|
||||
|
||||
# Pre-compile each pattern. Wrapper = start-of-slug + slug + end-or-
|
||||
# separator, where ``start-of-slug`` means start-of-string OR
|
||||
# immediately after the last ``/`` (aggregator separator) and
|
||||
# ``end-or-separator`` means end-of-string OR a ``-``/``.``/``_``.
|
||||
#
|
||||
# Why start-of-slug and not start-of-string: aggregator prefixes
|
||||
# like ``openai/`` should not affect matching — the slug identity is
|
||||
# the part after the last ``/``. Stripping the aggregator prefix in
|
||||
# :func:`get_reasoning_stale_timeout_floor` before regex matching
|
||||
# gives the wrapper a clean start-of-string anchor.
|
||||
#
|
||||
# Why end-or-separator on the right: ``openai/o3-mini`` must match
|
||||
# the ``o3-mini`` slug (the right anchor is end-of-string). And
|
||||
# ``openai/o3-mini-2025-01-31`` must also match ``o3-mini`` (the right
|
||||
# anchor is the ``-`` separator). But ``openai/o3-mini-fork`` should
|
||||
# NOT match ``o3-mini`` if we wanted to exclude forks — though the
|
||||
# pattern ``o3-mini-fork`` would be matched as a derivative anyway,
|
||||
# so we accept that community forks inheriting the same prefix are
|
||||
# treated as reasoning models (a reasonable default — the upstream
|
||||
# gateway timing is the same).
|
||||
_PATTERN_CACHE: dict[str, re.Pattern[str]] = {}
|
||||
|
||||
|
||||
def _get_pattern(slug: str) -> re.Pattern[str]:
|
||||
compiled = _PATTERN_CACHE.get(slug)
|
||||
if compiled is None:
|
||||
compiled = re.compile(
|
||||
r"^"
|
||||
+ re.escape(slug)
|
||||
+ r"(?:$|[\-._])"
|
||||
)
|
||||
_PATTERN_CACHE[slug] = compiled
|
||||
return compiled
|
||||
|
||||
|
||||
def _match_any(model_lower: str) -> Optional[float]:
|
||||
"""Return the floor for the first matching slug, else None.
|
||||
|
||||
Each table entry is matched as a start-of-slug prefix with the
|
||||
slug-separator-or-end-of-string right-anchor. Table iteration
|
||||
order is irrelevant: longest slug wins (so ``o3-mini`` beats
|
||||
``o3`` on a model like ``openai/o3-mini``).
|
||||
"""
|
||||
# Sort by slug length descending so longer / more-specific slugs
|
||||
# win on shared prefixes (o3-mini beats o3).
|
||||
sorted_floors = sorted(
|
||||
_REASONING_STALE_TIMEOUT_FLOORS, key=lambda kv: -len(kv[0])
|
||||
)
|
||||
for slug, floor in sorted_floors:
|
||||
if _get_pattern(slug).search(model_lower):
|
||||
return float(floor)
|
||||
return None
|
||||
|
||||
|
||||
def get_reasoning_stale_timeout_floor(model: object) -> Optional[float]:
|
||||
"""Return the stale-timeout floor (seconds) for a known reasoning model.
|
||||
|
||||
Returns ``None`` when the model is not in the allowlist or the
|
||||
argument is empty / not a string. Matching uses
|
||||
word-boundary-anchored regex on the lowercased model name, so
|
||||
``openai/o3-mini`` matches the ``o3-mini`` slug but
|
||||
``olmo-1`` does NOT match ``o1`` (the ``o1`` substring is not
|
||||
at a word boundary inside ``olmo-1``).
|
||||
|
||||
Aggregator prefixes (``openai/``, ``x-ai/``, ``anthropic/`` etc.)
|
||||
are preserved through matching — the ``/`` is itself a word
|
||||
boundary, so ``openai/o3-mini`` matches ``o3-mini`` because the
|
||||
``/`` before ``o3-mini`` satisfies the left-anchor alternation.
|
||||
|
||||
This is a FLOOR — callers must apply it as ``max(default, floor)``
|
||||
and only when no explicit user-configured per-model
|
||||
``stale_timeout_seconds`` exists.
|
||||
|
||||
>>> get_reasoning_stale_timeout_floor("nvidia/nemotron-3-ultra-550b-a55b")
|
||||
600.0
|
||||
>>> get_reasoning_stale_timeout_floor("openai/o3-mini")
|
||||
300.0
|
||||
>>> get_reasoning_stale_timeout_floor("deepseek/deepseek-r1")
|
||||
600.0
|
||||
>>> get_reasoning_stale_timeout_floor("qwen/qwen3-235b-a22b-thinking")
|
||||
180.0
|
||||
>>> get_reasoning_stale_timeout_floor("x-ai/grok-4-fast-reasoning")
|
||||
300.0
|
||||
>>> get_reasoning_stale_timeout_floor("anthropic/claude-opus-4-6")
|
||||
240.0
|
||||
>>> get_reasoning_stale_timeout_floor("gpt-4o") is None
|
||||
True
|
||||
>>> get_reasoning_stale_timeout_floor("olmo-1") is None
|
||||
True
|
||||
>>> get_reasoning_stale_timeout_floor(None) is None
|
||||
True
|
||||
"""
|
||||
if not model or not isinstance(model, str):
|
||||
return None
|
||||
name = model.strip().lower()
|
||||
if not name:
|
||||
return None
|
||||
# Strip aggregator prefix (everything before and including the
|
||||
# last ``/``). The wrapper regex anchors at start-of-string, so
|
||||
# the slug identity is the bare model name.
|
||||
if "/" in name:
|
||||
name = name.rsplit("/", 1)[1]
|
||||
return _match_any(name)
|
||||
256
agent/redact.py
256
agent/redact.py
@@ -10,7 +10,6 @@ the first 6 and last 4 characters for debuggability.
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -108,60 +107,12 @@ _PREFIX_PATTERNS = [
|
||||
r"ntn_[A-Za-z0-9]{10,}", # Notion internal integration token
|
||||
]
|
||||
|
||||
# ENV assignment patterns: KEY=value where KEY contains a secret-like name.
|
||||
# Uppercase keys tolerate spaces around "=" (e.g. ``FOO_SECRET = bar``) because
|
||||
# an all-caps key is almost never prose/code.
|
||||
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
|
||||
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
|
||||
_ENV_ASSIGN_RE = re.compile(
|
||||
rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
|
||||
)
|
||||
|
||||
# Lowercase / dotted / hyphenated config keys from config files
|
||||
# (application.properties, .env, YAML-ish dumps): ``spring.datasource.password=secret``,
|
||||
# ``app.api.key=xyz``, ``password=secret``. The uppercase _ENV_ASSIGN_RE above
|
||||
# never matched these, so config-file passwords leaked verbatim (issue #16413).
|
||||
#
|
||||
# These run only in a config-file context, NOT in prose, code, or URLs — three
|
||||
# carve-outs preserved from the original design (#4367 + the documented
|
||||
# web-URL passthrough below):
|
||||
# 1. The value is bounded by ``[^\s&]`` (stops at whitespace AND ``&``) so
|
||||
# form-urlencoded bodies are handled pair-by-pair (by _redact_form_body),
|
||||
# not greedily swallowed.
|
||||
# 2. _CFG_DOTTED_RE only matches when the key is NAMESPACED (contains a dot),
|
||||
# which is unambiguously a config key — never a prose word.
|
||||
# 3. _CFG_ANCHORED_RE matches a bare secret-word key only at line start
|
||||
# (optionally after ``export``), so conversational ``I have password=foo``
|
||||
# mid-sentence is left alone.
|
||||
# The colon-form URL guard (skip when ``://`` present) lives at the call site.
|
||||
_SECRET_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential|auth)"
|
||||
_CFG_VALUE = r"(['\"]?)([^\s&]+?)\2(?=[\s&]|$)"
|
||||
# Namespaced (dotted) key: the secret word may sit anywhere in a dotted path.
|
||||
_CFG_DOTTED_RE = re.compile(
|
||||
rf"((?:[A-Za-z0-9_\-]+\.)+[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*"
|
||||
rf"|[A-Za-z0-9_.\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_.\-]*\.[A-Za-z0-9_.\-]+)"
|
||||
rf"={_CFG_VALUE}",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
# Line-anchored bare key: ``password=…`` / ``export api_key=…`` at start of line.
|
||||
_CFG_ANCHORED_RE = re.compile(
|
||||
rf"(^[ \t]*(?:export[ \t]+)?[A-Za-z0-9_\-]*{_SECRET_CFG_NAMES}[A-Za-z0-9_\-]*)={_CFG_VALUE}",
|
||||
re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
|
||||
# Unquoted YAML / colon config (e.g. ``password: secret``,
|
||||
# ``spring.datasource.password: hunter2``). The secret keyword must be part of
|
||||
# the KEY (anchored to the start of the line/indent), and the value is a single
|
||||
# whitespace-free token — so prose like ``note: secret meeting`` (keyword in the
|
||||
# value) and ``error: token expired`` are left alone. Bare ``auth`` is excluded
|
||||
# from the key set so ``Authorization:`` / ``author:`` don't match (the former
|
||||
# is masked by _AUTH_HEADER_RE); ``auth_token``/``auth-token`` still match via
|
||||
# the ``token`` keyword. Quoted values defer to _JSON_FIELD_RE via the lookahead.
|
||||
_YAML_CFG_NAMES = r"(?:api[ _.\-]?key|token|secret|passwd|password|credential)"
|
||||
_YAML_ASSIGN_RE = re.compile(
|
||||
rf"(^[ \t]*[A-Za-z0-9_.\-]*{_YAML_CFG_NAMES}[A-Za-z0-9_.\-]*)(:[ \t]*)(?!['\"])([^\s&]+)",
|
||||
re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
|
||||
# JSON field patterns: "apiKey": "value", "token": "value", etc.
|
||||
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)"
|
||||
_JSON_FIELD_RE = re.compile(
|
||||
@@ -174,15 +125,8 @@ _JSON_FIELD_RE = re.compile(
|
||||
# while the header name and scheme word are preserved for debuggability. The
|
||||
# previous rule only matched ``Bearer``, so ``Basic <base64 user:pass>`` and
|
||||
# ``token <pat>`` leaked verbatim into logs/transcripts.
|
||||
#
|
||||
# The credential class excludes quote characters (``"`` / ``'``): a token sitting
|
||||
# flush against a closing quote (``"Authorization: Bearer sk-..."``) must not pull
|
||||
# that quote into the match, or masking turns value corruption into *syntax*
|
||||
# corruption — the closing quote vanishes and the command/string no longer parses
|
||||
# (unterminated quote → shell EOF / Python SyntaxError). Real credentials never
|
||||
# contain ``"`` or ``'``, so excluding them is safe. See #43083.
|
||||
_AUTH_HEADER_RE = re.compile(
|
||||
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?([^\s\"']+)",
|
||||
r"((?:Proxy-)?Authorization:\s*)([A-Za-z][\w.+-]*\s+)?(\S+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@@ -210,37 +154,9 @@ _PRIVATE_KEY_RE = re.compile(
|
||||
)
|
||||
|
||||
# Database connection strings: protocol://user:PASSWORD@host
|
||||
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password.
|
||||
# The userinfo and password groups forbid whitespace ([^:\s]+ / [^@\s]+) so the
|
||||
# match can never span a line break. A real DSN password never contains
|
||||
# whitespace; without this bound the greedy [^@]+ would scan past the end of a
|
||||
# code line to the next stray "@" (e.g. a Python decorator), swallowing
|
||||
# intervening lines and corrupting tool OUTPUT for any source containing a
|
||||
# postgresql:// f-string template. See issue #33801.
|
||||
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
|
||||
_DB_CONNSTR_RE = re.compile(
|
||||
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:\s]+:)([^@\s]+)(@)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Bare-token credential in a web/transport URL: ``scheme://TOKEN@host``.
|
||||
# This is the ``git remote set-url origin https://PASSWORD@github.com/...``
|
||||
# shape from issue #6396 — a single opaque credential in the userinfo position
|
||||
# with NO ``user:pass`` colon. It is unambiguously a secret: legitimate
|
||||
# round-trip URLs (OAuth callbacks, magic links, pre-signed shares — see the
|
||||
# "Web-URL redaction is intentionally OFF" note in redact_sensitive_text) carry
|
||||
# their tokens in the QUERY STRING, never in bare userinfo. The colon form
|
||||
# ``user:pass@`` is deliberately left to pass through (commit "pass web URLs
|
||||
# through unchanged", #34029) and is NOT matched here — the token class forbids
|
||||
# ``:``. DB schemes are handled by _DB_CONNSTR_RE above and excluded here.
|
||||
#
|
||||
# Guards against false positives:
|
||||
# - 8+ char floor skips short usernames (git, admin, root, deploy, ubuntu).
|
||||
# - The token class ``[^\s:@/]`` cannot cross ``/``, so an ``@`` sitting in a
|
||||
# path or query (e.g. ``?q=user@example.com``) is never treated as userinfo.
|
||||
_URL_BARE_TOKEN_RE = re.compile(
|
||||
r"((?:https?|wss?|git|ssh|ftp|ftps|sftp)://)" # scheme
|
||||
r"([^\s:@/]{8,})" # bare token (no colon/slash/@), 8+ chars
|
||||
r"(@[^\s]+)", # @host...
|
||||
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@@ -424,40 +340,7 @@ def _redact_form_body(text: str) -> str:
|
||||
return _redact_query_string(text.strip())
|
||||
|
||||
|
||||
def _mask_token_nonreusable(token: str) -> str:
|
||||
"""Redact a prefix-matched credential to a NON-REUSABLE sentinel.
|
||||
|
||||
Unlike :func:`_mask_token` (which keeps head/tail chars — fine for logs
|
||||
that are never fed back into a config), this emits a marker that:
|
||||
|
||||
* cannot be mistaken for a usable-but-truncated key, so an agent that
|
||||
reads it from a config file and writes it back does NOT corrupt the
|
||||
stored credential into a dead 13-char string (issue #35519); and
|
||||
* still does not leak the secret material (no head/tail chars).
|
||||
|
||||
The vendor prefix label is preserved for debuggability so the agent can
|
||||
still tell *which* credential is present (e.g. a GitHub PAT vs an OpenAI
|
||||
key) without seeing any of its bytes.
|
||||
"""
|
||||
if not token:
|
||||
return "«redacted-secret»"
|
||||
# Preserve only the recognizable vendor prefix label (e.g. "ghp_", "sk-"),
|
||||
# never any of the random secret body.
|
||||
label = ""
|
||||
for sub in _PREFIX_SUBSTRINGS:
|
||||
if token.startswith(sub):
|
||||
label = sub
|
||||
break
|
||||
return f"«redacted:{label}…»" if label else "«redacted-secret»"
|
||||
|
||||
|
||||
def redact_sensitive_text(
|
||||
text: str,
|
||||
*,
|
||||
force: bool = False,
|
||||
code_file: bool = False,
|
||||
file_read: bool = False,
|
||||
) -> str:
|
||||
def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
@@ -470,17 +353,6 @@ def redact_sensitive_text(
|
||||
constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
|
||||
private keys, DB connstrings, JWTs, and URL secrets are still redacted.
|
||||
|
||||
Set file_read=True for file *content* returned to the agent (read_file /
|
||||
search_files / cat). Secrets are STILL redacted — they are never exposed —
|
||||
but prefix-matched credentials are replaced with a non-reusable sentinel
|
||||
(``«redacted:ghp_…»``) instead of a head/tail-preserving mask
|
||||
(``ghp_S1...Pn2T``). The old mask looked like a real-but-truncated key, so
|
||||
an agent reading it from config.yaml and writing it back silently corrupted
|
||||
the stored credential into a dead 13-char value → 401 (issue #35519). The
|
||||
sentinel is syntactically invalid as a token, so it can't be mistaken for a
|
||||
usable key or written back as one. Implies code_file=True (config/data
|
||||
files shouldn't trigger the source-code ENV/JSON false-positive paths).
|
||||
|
||||
Performance: each regex pattern is gated behind a cheap substring
|
||||
pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
|
||||
for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
|
||||
@@ -499,15 +371,9 @@ def redact_sensitive_text(
|
||||
if not (force or _REDACT_ENABLED):
|
||||
return text
|
||||
|
||||
# file_read content shouldn't hit the source-code ENV/JSON false-positive
|
||||
# paths either (it's config/data, not log lines).
|
||||
if file_read:
|
||||
code_file = True
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.) — gate on substring presence
|
||||
if _has_known_prefix_substring(text):
|
||||
_prefix_sub = _mask_token_nonreusable if file_read else _mask_token
|
||||
text = _PREFIX_RE.sub(lambda m: _prefix_sub(m.group(1)), text)
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
|
||||
# ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives)
|
||||
if not code_file:
|
||||
@@ -516,13 +382,6 @@ def redact_sensitive_text(
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
# Lowercase/dotted config keys (issue #16413). Skip URLs entirely —
|
||||
# web-URL query params are intentionally passed through (see note
|
||||
# near the bottom of this function); _DB_CONNSTR_RE still guards
|
||||
# connection-string passwords.
|
||||
if "://" not in text:
|
||||
text = _CFG_DOTTED_RE.sub(_redact_env, text)
|
||||
text = _CFG_ANCHORED_RE.sub(_redact_env, text)
|
||||
|
||||
# JSON fields: "apiKey": "***" (skip for code files — false positives)
|
||||
if ":" in text and '"' in text:
|
||||
@@ -531,15 +390,6 @@ def redact_sensitive_text(
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Unquoted YAML / colon config: password: *** (after JSON so quoted
|
||||
# values are handled there; the lookahead in _YAML_ASSIGN_RE skips
|
||||
# quotes). Skip URLs — web-URL query params pass through by design.
|
||||
if ":" in text and "://" not in text:
|
||||
def _redact_yaml(m):
|
||||
key, sep, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{key}{sep}{_mask_token(value)}"
|
||||
text = _YAML_ASSIGN_RE.sub(_redact_yaml, text)
|
||||
|
||||
# Authorization headers — _AUTH_HEADER_RE matches any scheme after
|
||||
# "[Proxy-]Authorization:" case-insensitively, so "uthorization" is the
|
||||
# cheapest substring gate that covers every casing without a casefold().
|
||||
@@ -569,32 +419,9 @@ def redact_sensitive_text(
|
||||
if "BEGIN" in text and "-----" in text:
|
||||
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
|
||||
|
||||
# Database connection string passwords. With code_file=True, a password
|
||||
# group that is a pure ``{...}`` brace expression is an f-string template
|
||||
# reference (e.g. f"postgresql://{user}:{pass}@{host}"), not a literal
|
||||
# credential — preserve it. Literal passwords are still redacted. The regex
|
||||
# forbids whitespace in the password group, so a single-line template's
|
||||
# group(2) is exactly the brace expression. See issue #33801.
|
||||
# Database connection string passwords
|
||||
if "://" in text:
|
||||
if code_file:
|
||||
def _redact_db(m):
|
||||
pw = m.group(2)
|
||||
if pw.startswith("{") and pw.endswith("}"):
|
||||
return m.group(0)
|
||||
return f"{m.group(1)}***{m.group(3)}"
|
||||
text = _DB_CONNSTR_RE.sub(_redact_db, text)
|
||||
else:
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# Bare-token userinfo in web/transport URLs: ``scheme://TOKEN@host``.
|
||||
# The git-remote-with-embedded-password shape from #6396. Only the
|
||||
# colon-less bare-token form is redacted — ``user:pass@`` and
|
||||
# query-string tokens are left to pass through (see the web-URL note
|
||||
# below). See _URL_BARE_TOKEN_RE for the false-positive guards.
|
||||
text = _URL_BARE_TOKEN_RE.sub(
|
||||
lambda m: f"{m.group(1)}{_mask_token(m.group(2))}{m.group(3)}",
|
||||
text,
|
||||
)
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||
if "eyJ" in text:
|
||||
@@ -607,12 +434,7 @@ def redact_sensitive_text(
|
||||
# blanket-redacting param values by name breaks those skills mid-flow.
|
||||
# Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
|
||||
# caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
|
||||
# are still caught by _DB_CONNSTR_RE. The ONE userinfo case still redacted
|
||||
# is the colon-less bare-token form ``scheme://TOKEN@host`` (#6396, handled
|
||||
# by _URL_BARE_TOKEN_RE in the ``://`` block above): a bare credential in
|
||||
# userinfo is never a round-trip workflow token (those live in the query
|
||||
# string), so masking it can't break a skill. The ``user:pass@`` form is
|
||||
# left to pass through per #34029.
|
||||
# are still caught by _DB_CONNSTR_RE.
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
if "&" in text and "=" in text:
|
||||
@@ -630,66 +452,6 @@ def redact_sensitive_text(
|
||||
return text
|
||||
|
||||
|
||||
# Commands whose stdout is an environment-variable dump (KEY=value lines),
|
||||
# NOT source code. For these, terminal-output redaction must run the
|
||||
# ENV-assignment pass (code_file=False) so opaque tokens with no recognized
|
||||
# vendor prefix (e.g. ``MY_SERVICE_TOKEN=abc123randomstring``) are still
|
||||
# masked. For all other commands, code_file=True is used to avoid mangling
|
||||
# legitimate source/config dumps (``MAX_TOKENS=100``, ``"apiKey": "x"``
|
||||
# fixtures, ``postgresql://{user}`` f-string templates). See issue #43025.
|
||||
_ENV_DUMP_COMMANDS = frozenset({"env", "printenv", "set", "export", "declare"})
|
||||
|
||||
|
||||
def is_env_dump_command(command: str | None) -> bool:
|
||||
"""Return True if ``command`` dumps environment variables to stdout.
|
||||
|
||||
Detects ``env`` / ``printenv`` / ``set`` / ``export`` / ``declare`` as the
|
||||
first token of any segment in a pipeline or sequence (``;`` / ``&&`` /
|
||||
``||`` / ``|``). Conservative: a parse failure or anything unrecognized
|
||||
returns False (callers then fall back to the safer code_file=True path,
|
||||
which still masks prefix-shaped keys).
|
||||
"""
|
||||
if not command or not isinstance(command, str):
|
||||
return False
|
||||
# Split on shell separators, then inspect the first token of each segment.
|
||||
segments = re.split(r"[|;&]+", command)
|
||||
for seg in segments:
|
||||
seg = seg.strip()
|
||||
if not seg:
|
||||
continue
|
||||
try:
|
||||
tokens = shlex.split(seg)
|
||||
except ValueError:
|
||||
tokens = seg.split()
|
||||
if tokens and tokens[0] in _ENV_DUMP_COMMANDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def redact_terminal_output(
|
||||
output: str, command: str | None = None, *, force: bool = False
|
||||
) -> str:
|
||||
"""Redact secrets from terminal/process stdout.
|
||||
|
||||
Single redaction policy for ALL terminal-output surfaces — foreground
|
||||
``terminal`` results AND background ``process(action=poll/log/wait)``
|
||||
output — so they can't diverge. Picks ``code_file`` based on whether
|
||||
``command`` is an environment dump:
|
||||
|
||||
- env-dump command (``env``/``printenv``/``set``/``export``/``declare``)
|
||||
→ ``code_file=False`` so the ENV-assignment pass masks opaque tokens.
|
||||
- anything else (or unknown command) → ``code_file=True`` to avoid
|
||||
false positives on source/config dumps.
|
||||
|
||||
``force=True`` bypasses the global ``security.redact_secrets`` preference
|
||||
for safety boundaries that must never emit raw credentials.
|
||||
"""
|
||||
if not output:
|
||||
return output
|
||||
code_file = not is_env_dump_command(command or "")
|
||||
return redact_sensitive_text(output, force=force, code_file=code_file)
|
||||
|
||||
|
||||
# Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
|
||||
# the input string, the prefix regex cannot match anything, so we skip it.
|
||||
# False positives are fine (they just run the regex, which then matches
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
"""Replay-history sanitization shared across resume code paths.
|
||||
|
||||
When a session's last turn dies mid-tool-loop — the process is killed by a
|
||||
restart/shutdown command, a stale-timeout fires, or an interrupt lands before
|
||||
the tool result is written — the persisted transcript can end with a dangling
|
||||
``assistant(tool_calls)`` (no matching ``tool`` answer) or an interrupted
|
||||
``assistant→tool`` block. On resume the model sees that broken tail and
|
||||
re-issues the unanswered call, producing an endless "thinking"/reboot loop
|
||||
(#49201, #29086).
|
||||
|
||||
These pure helpers strip those tails before the history is replayed to the
|
||||
model. They were originally local to ``gateway/run.py`` (which fixed the
|
||||
messaging-gateway path) and are extracted here so every resume surface — the
|
||||
messaging gateway AND the TUI/WebUI gateway — shares the same cleanup instead
|
||||
of the WebUI path silently skipping it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_interrupted_tool_result(content: Any) -> bool:
|
||||
"""Return True if a tool result indicates the tool was interrupted."""
|
||||
if not isinstance(content, str):
|
||||
return False
|
||||
lowered = content.lower()
|
||||
if "[command interrupted]" in lowered:
|
||||
return True
|
||||
if "exit_code" in lowered and ("130" in lowered or "-1" in lowered):
|
||||
return "interrupt" in lowered
|
||||
return False
|
||||
|
||||
|
||||
def strip_interrupted_tool_tails(
|
||||
agent_history: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Strip interrupted assistant→tool sequences from replay history.
|
||||
|
||||
Older interrupted gateway turns can be followed by a queued real user
|
||||
message, so the interrupted assistant/tool block is not necessarily the
|
||||
final tail by the time we rebuild replay history. Remove any contiguous
|
||||
assistant(tool_calls) + tool-result block that contains an interrupted tool
|
||||
result, while preserving successful tool-call sequences intact.
|
||||
"""
|
||||
if not agent_history:
|
||||
return agent_history
|
||||
|
||||
cleaned: List[Dict[str, Any]] = []
|
||||
i = 0
|
||||
n = len(agent_history)
|
||||
while i < n:
|
||||
msg = agent_history[i]
|
||||
if msg.get("role") == "assistant" and "tool_calls" in msg:
|
||||
j = i + 1
|
||||
tool_results: List[Dict[str, Any]] = []
|
||||
while j < n and agent_history[j].get("role") == "tool":
|
||||
tool_results.append(agent_history[j])
|
||||
j += 1
|
||||
if tool_results and any(
|
||||
is_interrupted_tool_result(m.get("content", ""))
|
||||
for m in tool_results
|
||||
):
|
||||
logger.debug(
|
||||
"Stripping interrupted assistant→tool replay block "
|
||||
"(indices %d–%d, tool_results=%d)",
|
||||
i, j - 1, len(tool_results),
|
||||
)
|
||||
i = j
|
||||
continue
|
||||
if msg.get("role") == "tool" and is_interrupted_tool_result(msg.get("content", "")):
|
||||
logger.debug("Stripping orphan interrupted tool result from replay history")
|
||||
i += 1
|
||||
continue
|
||||
cleaned.append(msg)
|
||||
i += 1
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def strip_dangling_tool_call_tail(
|
||||
agent_history: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Strip a trailing ``assistant(tool_calls)`` block left with NO answers.
|
||||
|
||||
When a tool call itself kills the gateway process (``docker restart``,
|
||||
``systemctl restart``, ``kill``, ``hermes gateway restart``), the process
|
||||
is terminated by SIGKILL *mid-call* — before the tool result is ever
|
||||
written and before the orderly shutdown rewind
|
||||
(``_drop_trailing_empty_response_scaffolding``) can run. The last thing
|
||||
persisted is the ``assistant`` message that issued the ``tool_calls``,
|
||||
with zero matching ``tool`` rows.
|
||||
|
||||
On resume the model sees an unanswered tool call at the tail and naturally
|
||||
re-issues it — which restarts the gateway again, producing the infinite
|
||||
reboot loop in #49201. ``strip_interrupted_tool_tails`` does not catch
|
||||
this because there is no tool result to inspect for an interrupt marker.
|
||||
|
||||
This strips that dangling tail at the source so there is nothing for the
|
||||
model to re-execute. It only acts when the tail is an
|
||||
``assistant(tool_calls)`` whose calls have NO corresponding ``tool``
|
||||
results — a completed assistant→tool pair (any tool answers present) is
|
||||
left untouched so genuine mid-progress tool loops still resume.
|
||||
"""
|
||||
if not agent_history:
|
||||
return agent_history
|
||||
|
||||
last = agent_history[-1]
|
||||
if not (
|
||||
isinstance(last, dict)
|
||||
and last.get("role") == "assistant"
|
||||
and last.get("tool_calls")
|
||||
):
|
||||
return agent_history
|
||||
|
||||
logger.debug(
|
||||
"Stripping dangling unanswered assistant(tool_calls) tail "
|
||||
"(%d call(s)) — process likely killed mid-tool-call by a "
|
||||
"restart/shutdown command (#49201)",
|
||||
len(last.get("tool_calls") or []),
|
||||
)
|
||||
return agent_history[:-1]
|
||||
|
||||
|
||||
def sanitize_replay_history(
|
||||
agent_history: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Apply both replay-tail strippers in the canonical order.
|
||||
|
||||
Convenience entry point for resume code paths: removes interrupted
|
||||
assistant→tool blocks anywhere in the history, then removes a dangling
|
||||
unanswered ``assistant(tool_calls)`` tail. Returns the same list object
|
||||
when there is nothing to strip.
|
||||
"""
|
||||
if not agent_history:
|
||||
return agent_history
|
||||
return strip_dangling_tool_call_tail(strip_interrupted_tool_tails(agent_history))
|
||||
@@ -122,8 +122,6 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
|
||||
|
||||
try:
|
||||
import fcntl # POSIX only; Windows falls back to best-effort without flock.
|
||||
except ImportError: # pragma: no cover
|
||||
@@ -443,7 +441,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
|
||||
return result
|
||||
|
||||
t0 = time.monotonic()
|
||||
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
argv,
|
||||
@@ -452,7 +449,6 @@ def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
|
||||
timeout=spec.timeout,
|
||||
text=True,
|
||||
shell=False,
|
||||
**_popen_kwargs,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
result["timed_out"] = True
|
||||
|
||||
@@ -5,8 +5,6 @@ import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli._subprocess_compat import IS_WINDOWS, windows_hide_flags
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
@@ -68,7 +66,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
_popen_kwargs = {"creationflags": windows_hide_flags()} if IS_WINDOWS else {}
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
@@ -78,7 +75,6 @@ def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
stdin=subprocess.DEVNULL,
|
||||
**_popen_kwargs,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
|
||||
@@ -507,34 +507,6 @@ def get_all_skills_dirs() -> List[Path]:
|
||||
return dirs
|
||||
|
||||
|
||||
def _resolve_for_skill_ownership(path) -> Path:
|
||||
path_obj = path if isinstance(path, Path) else Path(str(path))
|
||||
try:
|
||||
return path_obj.expanduser().resolve()
|
||||
except (OSError, RuntimeError):
|
||||
return path_obj.expanduser().absolute()
|
||||
|
||||
|
||||
def is_external_skill_path(path) -> bool:
|
||||
"""Return True when ``path`` lives under a configured external skills dir.
|
||||
|
||||
``skills.external_dirs`` are externally owned: Hermes can discover and view
|
||||
their skills, and foreground user-directed tool calls may still edit them,
|
||||
but autonomous lifecycle maintenance must treat them as read-only. This
|
||||
helper centralizes the ownership boundary so curator/reporting/tool paths do
|
||||
not each need to re-interpret the config.
|
||||
"""
|
||||
candidate = _resolve_for_skill_ownership(path)
|
||||
for root in get_external_skills_dirs():
|
||||
resolved_root = _resolve_for_skill_ownership(root)
|
||||
try:
|
||||
candidate.relative_to(resolved_root)
|
||||
return True
|
||||
except ValueError:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
# ── Condition extraction ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
"""Thinking-timeout detection and user-facing guidance for reasoning models.
|
||||
|
||||
When a known reasoning model (NVIDIA Nemotron 3 Ultra, OpenAI o1/o3,
|
||||
Anthropic Opus 4.x thinking, DeepSeek R1, Qwen QwQ, xAI Grok reasoning)
|
||||
hits a transport-layer error before the first content token arrives, the
|
||||
upstream proxy has almost certainly idle-killed a long thinking stream —
|
||||
not a true context overflow or a configuration error. The user needs
|
||||
distinct guidance for this case:
|
||||
|
||||
"The model's thinking phase exceeded the upstream proxy's idle
|
||||
timeout before the first content token arrived. This is a known
|
||||
issue with reasoning models behind cloud gateways (NVIDIA NIM,
|
||||
OpenAI, Anthropic, DeepSeek). Workarounds in priority order:
|
||||
1. Set `providers.<provider>.models.<model>.stale_timeout_seconds: 900`
|
||||
in `~/.hermes/config.yaml` to extend the per-call timeout...
|
||||
2. Lower `reasoning_budget` or set `reasoning_effort: medium`...
|
||||
3. Use a smaller / faster reasoning model..."
|
||||
|
||||
The existing `_is_stream_drop` guidance at
|
||||
``agent/conversation_loop.py:3464-3486`` fires for large-file-write
|
||||
stream drops ("try execute_code with Python's open() for large files")
|
||||
which is the WRONG advice for the thinking-timeout case. This module
|
||||
provides the detection and the message as standalone helpers so the
|
||||
detection logic is unit-testable without driving the full retry loop,
|
||||
and the message text can be regression-tested for spelling and accuracy.
|
||||
|
||||
Part 2 of Fixes #52310.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Substring set that identifies a transport-layer failure on the
|
||||
# response stream. Same shape as the existing
|
||||
# ``_SERVER_DISCONNECT_PATTERNS`` in ``agent/error_classifier.py:394``
|
||||
# but extended to also catch the OSS-level error signature
|
||||
# (``broken pipe`` / ``errno 32``) that the upstream kill surfaces
|
||||
# to the OpenAI SDK wrapper.
|
||||
_THINKING_TIMEOUT_SUBSTRINGS: tuple[str, ...] = (
|
||||
"broken pipe",
|
||||
"errno 32",
|
||||
"remote protocol",
|
||||
"connection reset",
|
||||
"connection lost",
|
||||
"peer closed",
|
||||
"server disconnected",
|
||||
)
|
||||
|
||||
|
||||
def is_thinking_timeout(classified: object, model: str, error_msg: str) -> bool:
|
||||
"""Return True when a reasoning model's thinking phase hit a transport kill.
|
||||
|
||||
Args:
|
||||
classified: a :class:`agent.error_classifier.ClassifiedError` instance
|
||||
(duck-typed here to avoid an import cycle in unit tests).
|
||||
model: the model slug at failure time (e.g.
|
||||
``"nvidia/nemotron-3-ultra-550b-a55b"``).
|
||||
error_msg: lowercased string representation of the underlying
|
||||
exception (typically ``str(api_error).lower()``).
|
||||
|
||||
Returns True when ALL conditions hold:
|
||||
1. ``classified.reason == FailoverReason.timeout`` (the classifier
|
||||
override at ``agent/error_classifier.py:720-738`` ensures this
|
||||
is the case for reasoning models even on large sessions).
|
||||
2. ``api_error`` has no ``.status_code`` attribute set (transport
|
||||
disconnect, not an HTTP error).
|
||||
3. ``model`` is in the reasoning-model allowlist (reuses
|
||||
``agent.reasoning_timeouts.get_reasoning_stale_timeout_floor``).
|
||||
4. ``error_msg`` contains one of the transport-kill substrings.
|
||||
|
||||
Non-reasoning models always return False. Non-transport errors
|
||||
(billing / rate_limit / auth / context_overflow / format_error)
|
||||
always return False. HTTP-status errors always return False.
|
||||
"""
|
||||
# Import here (not at module top) to keep this helper cheap to
|
||||
# import even from callers that don't need it. ``agent.reasoning_timeouts``
|
||||
# is small and dependency-free.
|
||||
from agent.reasoning_timeouts import get_reasoning_stale_timeout_floor
|
||||
|
||||
# Condition 1: classifier says timeout. Use a string/value check
|
||||
# rather than importing FailoverReason so this module has zero
|
||||
# import cycles from the error_classifier package.
|
||||
reason = getattr(classified, "reason", None)
|
||||
reason_value = getattr(reason, "value", None)
|
||||
if reason_value != "timeout":
|
||||
return False
|
||||
|
||||
# Condition 2: no HTTP status code (transport, not API error).
|
||||
# Caller is expected to gate on ``getattr(api_error, "status_code", None) is None``
|
||||
# before calling this helper; the surface here is just the post-gate
|
||||
# boolean so the caller can pass an already-prepped error_msg.
|
||||
|
||||
# Condition 3: reasoning model allowlist.
|
||||
if get_reasoning_stale_timeout_floor(model) is None:
|
||||
return False
|
||||
|
||||
# Condition 4: transport-kill substring in the error message.
|
||||
error_msg_lower = (error_msg or "").lower()
|
||||
return any(p in error_msg_lower for p in _THINKING_TIMEOUT_SUBSTRINGS)
|
||||
|
||||
|
||||
def build_thinking_timeout_guidance(
|
||||
provider: str, model: str, model_label: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Return the user-facing guidance string appended to ``_final_response``.
|
||||
|
||||
Args:
|
||||
provider: provider slug (e.g. ``"nvidia"``, ``"openai"``).
|
||||
model: bare model slug the user would put in their config
|
||||
(e.g. ``"nemotron-3-ultra-550b-a55b"`` if the user uses
|
||||
NVIDIA direct, or the full ``"nvidia/nemotron-3-ultra-550b-a55b"``
|
||||
if they go through an aggregator). Used verbatim in the
|
||||
config snippet so the user can copy-paste.
|
||||
model_label: optional short label for the model name in the
|
||||
prose (e.g. ``"Nemotron 3 Ultra"``). Falls back to the
|
||||
slug if not provided.
|
||||
"""
|
||||
label = model_label or model
|
||||
return (
|
||||
"\n\nThe model's thinking phase exceeded the upstream proxy's "
|
||||
"idle timeout before the first content token arrived. This is a "
|
||||
f"known issue with reasoning models (like {label}) behind cloud "
|
||||
"gateways (NVIDIA NIM, OpenAI, Anthropic, DeepSeek). Workarounds "
|
||||
"in priority order:\n"
|
||||
f"1. Set `providers.{provider}.models.{model}.stale_timeout_seconds: 900` "
|
||||
"in `~/.hermes/config.yaml` to extend the per-call timeout. "
|
||||
"(Hermes's built-in floor is 600s for known reasoning models — "
|
||||
"if you still see this after raising, the upstream cap is even "
|
||||
"shorter.)\n"
|
||||
"2. Lower `reasoning_budget` or set `reasoning_effort: medium` on this "
|
||||
"model if the provider supports it.\n"
|
||||
"3. Use a smaller / faster reasoning model if the task doesn't "
|
||||
"require deep thinking."
|
||||
)
|
||||
@@ -26,7 +26,6 @@ from agent.display import (
|
||||
build_tool_preview as _build_tool_preview,
|
||||
get_cute_tool_message as _get_cute_tool_message_impl,
|
||||
get_tool_emoji as _get_tool_emoji,
|
||||
redact_tool_args_for_display as _redact_tool_args_for_display,
|
||||
_detect_tool_failure,
|
||||
)
|
||||
from agent.tool_guardrails import ToolGuardrailDecision
|
||||
@@ -470,11 +469,10 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
|
||||
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
|
||||
for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
|
||||
display_args = _redact_tool_args_for_display(name, args) or args
|
||||
args_str = json.dumps(display_args, ensure_ascii=False)
|
||||
args_str = json.dumps(args, ensure_ascii=False)
|
||||
if agent.verbose_logging:
|
||||
print(f" 📞 Tool {i}: {name}({list(display_args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
|
||||
else:
|
||||
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
|
||||
@@ -484,9 +482,8 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
continue
|
||||
if agent.tool_progress_callback:
|
||||
try:
|
||||
display_args = _redact_tool_args_for_display(name, args) or args
|
||||
preview = _build_tool_preview(name, display_args)
|
||||
agent.tool_progress_callback("tool.started", name, preview, display_args)
|
||||
preview = _build_tool_preview(name, args)
|
||||
agent.tool_progress_callback("tool.started", name, preview, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
@@ -495,8 +492,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
continue
|
||||
if agent.tool_start_callback:
|
||||
try:
|
||||
display_args = _redact_tool_args_for_display(name, args) or args
|
||||
agent.tool_start_callback(tc.id, name, display_args)
|
||||
agent.tool_start_callback(tc.id, name, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
@@ -796,8 +792,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
|
||||
|
||||
if not blocked and agent.tool_complete_callback:
|
||||
try:
|
||||
display_args = _redact_tool_args_for_display(name, args) or args
|
||||
agent.tool_complete_callback(tc.id, name, display_args, function_result)
|
||||
agent.tool_complete_callback(tc.id, name, args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
@@ -959,11 +954,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off":
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
args_str = json.dumps(display_args, ensure_ascii=False)
|
||||
args_str = json.dumps(function_args, ensure_ascii=False)
|
||||
if agent.verbose_logging:
|
||||
print(f" 📞 Tool {i}: {function_name}({list(display_args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(display_args, indent=2, ensure_ascii=False)))
|
||||
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
|
||||
else:
|
||||
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
|
||||
@@ -984,16 +978,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
|
||||
if not _execution_blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
preview = _build_tool_preview(function_name, display_args)
|
||||
agent.tool_progress_callback("tool.started", function_name, preview, display_args)
|
||||
preview = _build_tool_preview(function_name, function_args)
|
||||
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
if not _execution_blocked and agent.tool_start_callback:
|
||||
try:
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
agent.tool_start_callback(tool_call.id, function_name, display_args)
|
||||
agent.tool_start_callback(tool_call.id, function_name, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
@@ -1223,8 +1215,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
preview = _build_tool_preview(function_name, display_args) or function_name
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_ce_result = None
|
||||
@@ -1257,8 +1248,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
preview = _build_tool_preview(function_name, display_args) or function_name
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_mem_result = None
|
||||
@@ -1289,8 +1279,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
preview = _build_tool_preview(function_name, display_args) or function_name
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_spinner_result = None
|
||||
@@ -1452,8 +1441,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
||||
|
||||
if not _execution_blocked and agent.tool_complete_callback:
|
||||
try:
|
||||
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
|
||||
agent.tool_complete_callback(tool_call.id, function_name, display_args, function_result)
|
||||
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
|
||||
@@ -217,7 +217,9 @@ class CodexEventProjector:
|
||||
def _project_mcp_tool_call(self, item: dict, item_id: str) -> ProjectionResult:
|
||||
server = item.get("server") or "mcp"
|
||||
tool = item.get("tool") or "unknown"
|
||||
call_id = _deterministic_call_id(f"mcp_{server}_{tool}", item_id)
|
||||
# Mirror the native MCP tool-name convention (mcp__server__tool) so the
|
||||
# deterministic call_id input stays consistent with registration names.
|
||||
call_id = _deterministic_call_id(f"mcp__{server}__{tool}", item_id)
|
||||
args = item.get("arguments") or {}
|
||||
if not isinstance(args, dict):
|
||||
args = {"arguments": args}
|
||||
|
||||
@@ -28,7 +28,6 @@ import uuid
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.conversation_compression import conversation_history_after_compression
|
||||
from agent.iteration_budget import IterationBudget
|
||||
from agent.model_metadata import (
|
||||
estimate_messages_tokens_rough,
|
||||
@@ -401,9 +400,7 @@ def build_turn_context(
|
||||
_orig_len, len(messages), _orig_tokens, _preflight_tokens
|
||||
):
|
||||
break # Cannot compress further: neither rows nor tokens moved
|
||||
conversation_history = conversation_history_after_compression(
|
||||
agent, messages
|
||||
)
|
||||
conversation_history = None
|
||||
agent._empty_content_retries = 0
|
||||
agent._thinking_prefill_retries = 0
|
||||
agent._last_content_with_tools = None
|
||||
|
||||
@@ -289,14 +289,7 @@ def finalize_turn(
|
||||
and len(_stripped) <= 24
|
||||
and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"}
|
||||
)
|
||||
_is_partial_stream_recovery = (
|
||||
str(_turn_exit_reason) == "partial_stream_recovery"
|
||||
)
|
||||
if (
|
||||
_is_empty_terminal
|
||||
or _is_partial_fragment
|
||||
or _is_partial_stream_recovery
|
||||
):
|
||||
if _is_empty_terminal or _is_partial_fragment:
|
||||
_explanation = agent._format_turn_completion_explanation(
|
||||
_turn_exit_reason
|
||||
)
|
||||
|
||||
@@ -67,11 +67,6 @@ class TurnRetryState:
|
||||
# ── Restart signals (read by the outer loop after the attempt) ───────
|
||||
restart_with_compressed_messages: bool = False
|
||||
restart_with_length_continuation: bool = False
|
||||
# Set when a content-filter stream stall (e.g. MiniMax "new_sensitive")
|
||||
# has been escalated to the fallback chain: the partial-stream content
|
||||
# was rolled back off ``messages`` and the loop should re-issue the API
|
||||
# call against the newly-activated provider (#32421).
|
||||
restart_with_rebuilt_messages: bool = False
|
||||
|
||||
def __iter__(self):
|
||||
# Convenience for debugging / tests: iterate (name, value) pairs.
|
||||
|
||||
@@ -15,135 +15,9 @@ from typing import Any, Iterable
|
||||
|
||||
_MAX_CHANGED_PATHS_IN_NUDGE = 8
|
||||
|
||||
# Non-code file extensions whose edits carry no verifiable runtime behavior:
|
||||
# documentation, prose, and data/markup that no test/build exercises. When a
|
||||
# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is
|
||||
# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a
|
||||
# SKILL.md or README edit must never demand a /tmp verification script). A turn
|
||||
# that edits any non-listed path (a real source/code/config file) still nudges.
|
||||
_NON_CODE_VERIFY_EXTENSIONS = frozenset(
|
||||
{
|
||||
".md",
|
||||
".markdown",
|
||||
".mdx",
|
||||
".rst",
|
||||
".txt",
|
||||
".text",
|
||||
".adoc",
|
||||
".asciidoc",
|
||||
".org",
|
||||
".log",
|
||||
".csv",
|
||||
".tsv",
|
||||
}
|
||||
)
|
||||
|
||||
# Filenames (case-insensitive, extension-less or otherwise) that are pure prose
|
||||
# even without a recognized doc extension.
|
||||
_NON_CODE_VERIFY_FILENAMES = frozenset(
|
||||
{
|
||||
"license",
|
||||
"licence",
|
||||
"notice",
|
||||
"authors",
|
||||
"contributors",
|
||||
"changelog",
|
||||
"codeowners",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _is_non_code_path(raw: str) -> bool:
|
||||
"""Return True when a changed path is documentation/prose with nothing to verify."""
|
||||
try:
|
||||
p = Path(str(raw))
|
||||
except Exception:
|
||||
return False
|
||||
suffix = p.suffix.lower()
|
||||
if suffix in _NON_CODE_VERIFY_EXTENSIONS:
|
||||
return True
|
||||
if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]:
|
||||
"""Drop documentation/prose paths; keep paths that could have verifiable behavior."""
|
||||
return [p for p in paths if p and not _is_non_code_path(p)]
|
||||
|
||||
|
||||
# Session identities (platform or source) that are NOT human conversational
|
||||
# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
|
||||
# local, gateway) and programmatic callers (API server, webhooks, tools).
|
||||
# Verify-on-stop stays ON by default for these. Any other resolved gateway
|
||||
# platform is a conversational messaging surface (Telegram, Discord, WhatsApp,
|
||||
# Signal, Slack, etc.) where the verification narrative would reach a human as
|
||||
# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in
|
||||
# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a
|
||||
# local or programmatic surface. Default-deny by design: an unrecognized
|
||||
# identity is treated as messaging (OFF) so a new chat platform never leaks the
|
||||
# verification receipt before this set is updated.
|
||||
_NON_MESSAGING_SESSION_SURFACES = frozenset(
|
||||
{
|
||||
"",
|
||||
"cli",
|
||||
"codex",
|
||||
"desktop",
|
||||
"gateway",
|
||||
"local",
|
||||
"tui",
|
||||
"tool",
|
||||
"api_server",
|
||||
"webhook",
|
||||
"msgraph_webhook",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _session_is_messaging_surface() -> bool:
|
||||
"""Return whether this turn is delivered over a human messaging channel.
|
||||
|
||||
The gateway binds the platform value (e.g. ``telegram``) to
|
||||
``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE``
|
||||
(e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context
|
||||
helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM``
|
||||
override, matching the sibling platform resolution in
|
||||
``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a
|
||||
messaging surface when a resolved identity is present and is not a known
|
||||
non-messaging surface.
|
||||
"""
|
||||
try:
|
||||
from gateway.session_context import get_session_env
|
||||
|
||||
platform = (
|
||||
os.getenv("HERMES_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM", "")
|
||||
)
|
||||
source = get_session_env("HERMES_SESSION_SOURCE", "")
|
||||
except Exception:
|
||||
platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get(
|
||||
"HERMES_SESSION_PLATFORM", ""
|
||||
)
|
||||
source = os.environ.get("HERMES_SESSION_SOURCE", "")
|
||||
for identity in (platform, source):
|
||||
identity = str(identity or "").strip().lower()
|
||||
if identity and identity not in _NON_MESSAGING_SESSION_SURFACES:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
|
||||
"""Return whether edit -> verify-before-finish behavior is enabled.
|
||||
|
||||
Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
|
||||
explicit ``agent.verify_on_stop`` config value. The config default is
|
||||
``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user
|
||||
opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
|
||||
sets it explicitly: it resolves to ON for interactive coding surfaces
|
||||
(CLI, TUI, desktop) and programmatic callers, and OFF for conversational
|
||||
messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
|
||||
falls back to OFF.
|
||||
"""
|
||||
"""Return whether edit -> verify-before-finish behavior is enabled."""
|
||||
env = os.environ.get("HERMES_VERIFY_ON_STOP")
|
||||
if env is not None:
|
||||
return env.strip().lower() not in {"0", "false", "no", "off"}
|
||||
@@ -155,20 +29,9 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
|
||||
except Exception:
|
||||
config = {}
|
||||
agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
|
||||
cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None
|
||||
if isinstance(cfg_val, bool):
|
||||
return cfg_val
|
||||
if isinstance(cfg_val, str):
|
||||
token = cfg_val.strip().lower()
|
||||
if token in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if token in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
if token == "auto":
|
||||
# Explicit opt-in to the legacy surface-aware behavior.
|
||||
return not _session_is_messaging_surface()
|
||||
# Missing or unknown value -> OFF (the new default).
|
||||
return False
|
||||
if isinstance(agent_cfg, dict) and "verify_on_stop" in agent_cfg:
|
||||
return bool(agent_cfg.get("verify_on_stop"))
|
||||
return True
|
||||
|
||||
|
||||
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
|
||||
@@ -251,10 +114,7 @@ def build_verify_on_stop_nudge(
|
||||
max_attempts: int = 2,
|
||||
) -> str | None:
|
||||
"""Return a synthetic follow-up when edited code lacks fresh verification."""
|
||||
# Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) —
|
||||
# they carry no verifiable behavior, so a turn that touched only those has
|
||||
# nothing to verify and must not nudge.
|
||||
paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)})
|
||||
paths = sorted({str(p) for p in changed_paths if p})
|
||||
if not paths or attempts >= max_attempts:
|
||||
return None
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ Installers are built and uploaded to GitHub Releases manually. macOS/Windows sig
|
||||
|
||||
### How it works
|
||||
|
||||
The packaged app ships the Electron shell and a native React chat surface. On first launch it can install the Hermes Agent runtime into `HERMES_HOME` (`~/.hermes`, or `%LOCALAPPDATA%\hermes` on Windows) — the **same layout a CLI install uses**, so the two are interchangeable. Backend resolution first honours `HERMES_DESKTOP_HERMES_ROOT`, then a completed managed install, then a probed `hermes` on `PATH` (unless `HERMES_DESKTOP_IGNORE_EXISTING=1` is set), and finally an explicit `HERMES_DESKTOP_HERMES` command override for packagers/troubleshooting. The renderer (React, in `src/`) talks to a headless backend the app launches for you — a `hermes serve` process that serves the `tui_gateway` JSON-RPC/WebSocket API — through the framework-agnostic client in [`apps/shared`](../shared/) (the same client the web dashboard consumes), and reuses the agent runtime rather than embedding `hermes --tui`. The app is **self-contained**: it runs its own `hermes serve` backend and never opens or requires the web dashboard UI. (For backward compatibility, a runtime that predates the `serve` command automatically falls back to a headless `dashboard --no-open` — see `electron/backend-command.cjs` — so mid-upgrade installs never break.) The install, backend-resolution, and self-update logic all live in `electron/main.cjs`.
|
||||
The packaged app ships the Electron shell and a native React chat surface. On first launch it can install the Hermes Agent runtime into `HERMES_HOME` (`~/.hermes`, or `%LOCALAPPDATA%\hermes` on Windows) — the **same layout a CLI install uses**, so the two are interchangeable. Backend resolution first honours `HERMES_DESKTOP_HERMES_ROOT`, then a completed managed install, then a probed `hermes` on `PATH` (unless `HERMES_DESKTOP_IGNORE_EXISTING=1` is set), and finally an explicit `HERMES_DESKTOP_HERMES` command override for packagers/troubleshooting. The renderer (React, in `src/`) talks to a `hermes dashboard` backend over the `tui_gateway`/dashboard APIs and reuses the agent runtime rather than embedding `hermes --tui`. The install, backend-resolution, and self-update logic all live in `electron/main.cjs`.
|
||||
|
||||
### Verification
|
||||
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
'use strict'
|
||||
|
||||
// Backend subcommand routing for the desktop-managed Hermes process.
|
||||
//
|
||||
// The desktop app launches its own headless backend via `hermes serve` — it
|
||||
// must NEVER depend on or launch the browser `dashboard`. But `serve` is a
|
||||
// newer subcommand: a runtime that predates it (an older managed install the
|
||||
// app hasn't updated yet, or an older `hermes` resolved from PATH) only knows
|
||||
// `dashboard --no-open`. To avoid bricking those users mid-upgrade we detect
|
||||
// whether the resolved runtime understands `serve` and, only when it does not,
|
||||
// fall back to the legacy `dashboard --no-open` invocation. Both produce the
|
||||
// exact same headless gateway; `serve` is just the decoupled name.
|
||||
//
|
||||
// These helpers are pure so they can be unit-tested without Electron.
|
||||
|
||||
/**
|
||||
* Build the canonical headless backend argv (always `serve`).
|
||||
* @param {string} [profile] optional Hermes profile to pin via `--profile`.
|
||||
*/
|
||||
function serveBackendArgs(profile) {
|
||||
const head = profile ? ['--profile', profile] : []
|
||||
return [...head, 'serve', '--host', '127.0.0.1', '--port', '0']
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite a resolved backend argv from `serve` to the legacy
|
||||
* `dashboard --no-open` form, preserving every other argument (incl. a leading
|
||||
* `-m hermes_cli.main` and any `--profile <name>`). Returns a copy; if there is
|
||||
* no `serve` token the argv is returned unchanged.
|
||||
*/
|
||||
function dashboardFallbackArgs(args) {
|
||||
const i = args.indexOf('serve')
|
||||
if (i === -1) return args.slice()
|
||||
return [...args.slice(0, i), 'dashboard', '--no-open', ...args.slice(i + 1)]
|
||||
}
|
||||
|
||||
/**
|
||||
* True when a runtime's `hermes_cli/subcommands/dashboard.py` source registers
|
||||
* the `serve` subcommand. Matches `add_parser("serve"` / `add_parser('serve'`
|
||||
* specifically so the substring "server" (e.g. "start_server", "web server")
|
||||
* never produces a false positive.
|
||||
*/
|
||||
function sourceDeclaresServe(dashboardPySource) {
|
||||
return /add_parser\(\s*["']serve["']/.test(String(dashboardPySource || ''))
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
serveBackendArgs,
|
||||
dashboardFallbackArgs,
|
||||
sourceDeclaresServe,
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
'use strict'
|
||||
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
|
||||
const {
|
||||
serveBackendArgs,
|
||||
dashboardFallbackArgs,
|
||||
sourceDeclaresServe,
|
||||
} = require('./backend-command.cjs')
|
||||
|
||||
test('serveBackendArgs builds a headless serve invocation', () => {
|
||||
assert.deepEqual(serveBackendArgs(), [
|
||||
'serve',
|
||||
'--host',
|
||||
'127.0.0.1',
|
||||
'--port',
|
||||
'0',
|
||||
])
|
||||
})
|
||||
|
||||
test('serveBackendArgs pins a profile when provided', () => {
|
||||
assert.deepEqual(serveBackendArgs('worker'), [
|
||||
'--profile',
|
||||
'worker',
|
||||
'serve',
|
||||
'--host',
|
||||
'127.0.0.1',
|
||||
'--port',
|
||||
'0',
|
||||
])
|
||||
})
|
||||
|
||||
test('dashboardFallbackArgs rewrites serve -> dashboard --no-open, keeping the -m prefix', () => {
|
||||
const serve = ['-m', 'hermes_cli.main', 'serve', '--host', '127.0.0.1', '--port', '0']
|
||||
assert.deepEqual(dashboardFallbackArgs(serve), [
|
||||
'-m',
|
||||
'hermes_cli.main',
|
||||
'dashboard',
|
||||
'--no-open',
|
||||
'--host',
|
||||
'127.0.0.1',
|
||||
'--port',
|
||||
'0',
|
||||
])
|
||||
})
|
||||
|
||||
test('dashboardFallbackArgs preserves a --profile flag ahead of serve', () => {
|
||||
const serve = ['-m', 'hermes_cli.main', '--profile', 'worker', 'serve', '--host', '127.0.0.1', '--port', '0']
|
||||
assert.deepEqual(dashboardFallbackArgs(serve), [
|
||||
'-m',
|
||||
'hermes_cli.main',
|
||||
'--profile',
|
||||
'worker',
|
||||
'dashboard',
|
||||
'--no-open',
|
||||
'--host',
|
||||
'127.0.0.1',
|
||||
'--port',
|
||||
'0',
|
||||
])
|
||||
})
|
||||
|
||||
test('dashboardFallbackArgs is a no-op (copy) when there is no serve token', () => {
|
||||
const args = ['-m', 'hermes_cli.main', 'dashboard', '--no-open']
|
||||
const out = dashboardFallbackArgs(args)
|
||||
assert.deepEqual(out, args)
|
||||
assert.notEqual(out, args, 'should return a copy, not the same reference')
|
||||
})
|
||||
|
||||
test('sourceDeclaresServe detects the serve subparser registration', () => {
|
||||
assert.equal(sourceDeclaresServe('subparsers.add_parser("serve", help="...")'), true)
|
||||
assert.equal(sourceDeclaresServe("subparsers.add_parser('serve')"), true)
|
||||
assert.equal(sourceDeclaresServe('subparsers.add_parser(\n "serve",\n)'), true)
|
||||
})
|
||||
|
||||
test('sourceDeclaresServe does not false-positive on the substring "server"', () => {
|
||||
const oldSource = `
|
||||
dashboard_parser = subparsers.add_parser("dashboard", help="Start the web UI dashboard")
|
||||
from hermes_cli.web_server import start_server # web server
|
||||
`
|
||||
assert.equal(sourceDeclaresServe(oldSource), false)
|
||||
})
|
||||
@@ -61,7 +61,10 @@ function buildDesktopBackendPath({
|
||||
const venvBin = venvRoot ? pathModule.join(venvRoot, platform === 'win32' ? 'Scripts' : 'bin') : null
|
||||
const saneEntries = platform === 'win32' ? [] : POSIX_SANE_PATH_ENTRIES
|
||||
|
||||
return appendUniquePathEntries([hermesNodeBin, venvBin, currentPath, saneEntries], { delimiter })
|
||||
return appendUniquePathEntries(
|
||||
[hermesNodeBin, venvBin, currentPath, saneEntries],
|
||||
{ delimiter }
|
||||
)
|
||||
}
|
||||
|
||||
function normalizeHermesHomeRoot(hermesHome, { pathModule = pathModuleForPlatform(process.platform) } = {}) {
|
||||
|
||||
@@ -76,7 +76,10 @@ test('normalizeHermesHomeRoot maps profile homes back to the global Hermes root'
|
||||
normalizeHermesHomeRoot('C:\\Users\\test\\AppData\\Local\\hermes\\profiles\\oracle', { pathModule: path.win32 }),
|
||||
'C:\\Users\\test\\AppData\\Local\\hermes'
|
||||
)
|
||||
assert.equal(normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }), '/Users/test/.hermes')
|
||||
assert.equal(
|
||||
normalizeHermesHomeRoot('/Users/test/.hermes', { pathModule: path.posix }),
|
||||
'/Users/test/.hermes'
|
||||
)
|
||||
})
|
||||
|
||||
test('Windows PATH casing and delimiter are preserved without POSIX sane entries', () => {
|
||||
@@ -101,5 +104,8 @@ test('Windows PATH casing and delimiter are preserved without POSIX sane entries
|
||||
})
|
||||
|
||||
test('appendUniquePathEntries drops empty entries and keeps first occurrence', () => {
|
||||
assert.equal(appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }), '/a:/b:/c')
|
||||
assert.equal(
|
||||
appendUniquePathEntries([':/a::/b', ['/a', '/c']], { delimiter: ':' }),
|
||||
'/a:/b:/c'
|
||||
)
|
||||
})
|
||||
|
||||
@@ -37,18 +37,7 @@ const { execFileSync } = require('node:child_process')
|
||||
const PROBE_TIMEOUT_MS = 5000
|
||||
|
||||
/**
|
||||
* Return the Python snippet used to verify Hermes can import far enough to
|
||||
* launch the CLI. Kept exported for tests so dependency regressions are
|
||||
* caught without needing a real broken venv fixture.
|
||||
*
|
||||
* @returns {string}
|
||||
*/
|
||||
function hermesRuntimeImportProbe() {
|
||||
return 'import yaml; import hermes_cli.config'
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true iff the Hermes runtime import probe exits 0.
|
||||
* Return true iff `python -c "import hermes_cli"` exits 0.
|
||||
*
|
||||
* Used to gate the "fallback to system Python with hermes_cli installed"
|
||||
* rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
|
||||
@@ -57,20 +46,13 @@ function hermesRuntimeImportProbe() {
|
||||
* site-packages -- and the resolver returns a backend that immediately
|
||||
* dies on spawn.
|
||||
*
|
||||
* The probe intentionally imports hermes_cli.config, not just the top-level
|
||||
* package: a broken/empty Windows launcher venv can still see the source tree
|
||||
* through PYTHONPATH but lack PyYAML, then die on the first real CLI import.
|
||||
*
|
||||
* @param {string} pythonPath - Absolute path to a python.exe / python.
|
||||
* @param {object} [opts]
|
||||
* @param {object} [opts.env] - Additional environment for the probe.
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function canImportHermesCli(pythonPath, opts = {}) {
|
||||
function canImportHermesCli(pythonPath) {
|
||||
if (!pythonPath) return false
|
||||
try {
|
||||
execFileSync(pythonPath, ['-c', hermesRuntimeImportProbe()], {
|
||||
env: { ...process.env, ...(opts.env || {}) },
|
||||
execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
|
||||
stdio: 'ignore',
|
||||
timeout: PROBE_TIMEOUT_MS,
|
||||
windowsHide: true
|
||||
@@ -119,7 +101,6 @@ function verifyHermesCli(hermesCommand, opts = {}) {
|
||||
|
||||
module.exports = {
|
||||
canImportHermesCli,
|
||||
hermesRuntimeImportProbe,
|
||||
verifyHermesCli,
|
||||
PROBE_TIMEOUT_MS
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ const fs = require('node:fs')
|
||||
const os = require('node:os')
|
||||
const path = require('node:path')
|
||||
|
||||
const { canImportHermesCli, hermesRuntimeImportProbe, verifyHermesCli } = require('./backend-probes.cjs')
|
||||
const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
|
||||
|
||||
// Resolve the host's own Node binary -- guaranteed to be on disk and
|
||||
// runnable. We use it as both a stand-in for "a python that doesn't
|
||||
@@ -40,12 +40,6 @@ test('canImportHermesCli returns false when binary does not exist', () => {
|
||||
assert.equal(canImportHermesCli(ghost), false)
|
||||
})
|
||||
|
||||
test('hermes runtime import probe checks config dependencies', () => {
|
||||
const probe = hermesRuntimeImportProbe()
|
||||
assert.match(probe, /\bimport yaml\b/)
|
||||
assert.match(probe, /\bimport hermes_cli\.config\b/)
|
||||
})
|
||||
|
||||
test('verifyHermesCli returns false when command is falsy', () => {
|
||||
assert.equal(verifyHermesCli(''), false)
|
||||
assert.equal(verifyHermesCli(null), false)
|
||||
|
||||
@@ -167,5 +167,5 @@ module.exports = {
|
||||
readDashboardReadyFile,
|
||||
resolvePortAnnounceTimeoutMs,
|
||||
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
MIN_PORT_ANNOUNCE_TIMEOUT_MS
|
||||
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ const {
|
||||
waitForDashboardReadyFile,
|
||||
resolvePortAnnounceTimeoutMs,
|
||||
DEFAULT_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
MIN_PORT_ANNOUNCE_TIMEOUT_MS
|
||||
MIN_PORT_ANNOUNCE_TIMEOUT_MS,
|
||||
} = require('./backend-ready.cjs')
|
||||
|
||||
// A minimal stand-in for a spawned child process: an EventEmitter with a
|
||||
|
||||
@@ -179,13 +179,7 @@ function downloadInstallScript(commit, destPath) {
|
||||
})
|
||||
}
|
||||
|
||||
async function resolveInstallScript({
|
||||
installStamp,
|
||||
sourceRepoRoot,
|
||||
hermesHome,
|
||||
emit,
|
||||
_download = downloadInstallScript
|
||||
}) {
|
||||
async function resolveInstallScript({ installStamp, sourceRepoRoot, hermesHome, emit, _download = downloadInstallScript }) {
|
||||
// 1. Dev shortcut: prefer a local checkout's installer so we can iterate
|
||||
// without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
|
||||
// of APP_ROOT/../..).
|
||||
@@ -299,19 +293,15 @@ function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, herme
|
||||
const ps = process.platform === 'win32' ? resolveWindowsPowerShell() : 'pwsh'
|
||||
const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]
|
||||
|
||||
const child = spawn(
|
||||
ps,
|
||||
fullArgs,
|
||||
hiddenWindowsChildOptions({
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
env: {
|
||||
...process.env,
|
||||
// Pass HERMES_HOME through so install.ps1 respects the caller's
|
||||
// choice rather than re-computing the default.
|
||||
HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
|
||||
}
|
||||
})
|
||||
)
|
||||
const child = spawn(ps, fullArgs, hiddenWindowsChildOptions({
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
env: {
|
||||
...process.env,
|
||||
// Pass HERMES_HOME through so install.ps1 respects the caller's
|
||||
// choice rather than re-computing the default.
|
||||
HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
|
||||
}
|
||||
}))
|
||||
|
||||
let stdout = ''
|
||||
let stderr = ''
|
||||
|
||||
@@ -37,20 +37,6 @@
|
||||
const AT_COOKIE_VARIANTS = ['__Host-hermes_session_at', '__Secure-hermes_session_at', 'hermes_session_at']
|
||||
const RT_COOKIE_VARIANTS = ['__Host-hermes_session_rt', '__Secure-hermes_session_rt', 'hermes_session_rt']
|
||||
|
||||
// The Nous portal (NAS) does NOT use Hermes gateway session cookies — it is a
|
||||
// Privy-authed Next.js app. NAS `auth()` (src/server/auth/session.ts) reads the
|
||||
// `privy-token` access-token cookie (with `privy-id-token` alongside), which is
|
||||
// also exactly what the `/api/agents` cookie-auth path validates. So portal
|
||||
// sign-in / discovery liveness must look for the Privy cookie, NOT the gateway
|
||||
// cookies above. `privy-token` is the access token (the required signal);
|
||||
// variants cover the secured-prefix forms and the older `privy-session` name.
|
||||
const PRIVY_SESSION_COOKIE_VARIANTS = [
|
||||
'__Host-privy-token',
|
||||
'__Secure-privy-token',
|
||||
'privy-token',
|
||||
'privy-session'
|
||||
]
|
||||
|
||||
function normalizeRemoteBaseUrl(rawUrl) {
|
||||
const value = String(rawUrl || '').trim()
|
||||
|
||||
@@ -156,30 +142,19 @@ function normAuthMode(mode) {
|
||||
return mode === 'oauth' ? 'oauth' : 'token'
|
||||
}
|
||||
|
||||
// True for connection modes that resolve to a REMOTE backend. 'cloud' is a
|
||||
// Hermes Cloud connection (cloud-auto-discovery Q3/Q6): it carries a
|
||||
// remote-shaped block and reuses the entire remote connect/probe/reconnect
|
||||
// path, so every resolution site treats it exactly like 'remote'. The only
|
||||
// places that distinguish cloud from remote are the settings UI (which card to
|
||||
// show) and config persistence (remembering the provenance). Centralized here
|
||||
// so no resolution site forgets the third arm.
|
||||
function modeIsRemoteLike(mode) {
|
||||
return mode === 'remote' || mode === 'cloud'
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a profile's explicit remote override from a connection config, or null
|
||||
* when it has none (so the caller falls back to env → global remote → local).
|
||||
*
|
||||
* The config may carry a `profiles` map keyed by name; an entry counts as an
|
||||
* override only with a remote-like `mode` (remote or cloud) and a non-empty
|
||||
* `url`. Pure: `token` is the raw stored secret; main.cjs decrypts it. Returns
|
||||
* override only with `mode === 'remote'` and a non-empty `url`. Pure: `token`
|
||||
* is the raw stored secret; main.cjs decrypts it. Returns
|
||||
* `{ url, authMode, token } | null`.
|
||||
*/
|
||||
function profileRemoteOverride(config, profile) {
|
||||
const key = connectionScopeKey(profile)
|
||||
const entry = key ? config?.profiles?.[key] : null
|
||||
if (!entry || typeof entry !== 'object' || !modeIsRemoteLike(entry.mode)) {
|
||||
if (!entry || typeof entry !== 'object' || entry.mode !== 'remote') {
|
||||
return null
|
||||
}
|
||||
|
||||
@@ -286,34 +261,23 @@ function cookiesHaveSession(cookies) {
|
||||
*/
|
||||
function cookiesHaveLiveSession(cookies) {
|
||||
if (!Array.isArray(cookies)) return false
|
||||
return cookies.some(c => c && c.value && (AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name)))
|
||||
}
|
||||
|
||||
/**
|
||||
* True if the cookie jar holds a live Nous PORTAL (Privy) session — a non-empty
|
||||
* `privy-token` (access-token) cookie, or a variant. This is the portal
|
||||
* analogue of `cookiesHaveLiveSession`: the portal authenticates via Privy, not
|
||||
* the Hermes gateway session cookies, so cloud sign-in / discovery liveness
|
||||
* must check THIS, not the gateway helpers. (NAS `auth()` and the `/api/agents`
|
||||
* cookie path both key off `privy-token`.)
|
||||
*/
|
||||
function cookiesHavePrivySession(cookies) {
|
||||
if (!Array.isArray(cookies)) return false
|
||||
return cookies.some(c => c && c.value && PRIVY_SESSION_COOKIE_VARIANTS.includes(c.name))
|
||||
return cookies.some(
|
||||
c =>
|
||||
c &&
|
||||
c.value &&
|
||||
(AT_COOKIE_VARIANTS.includes(c.name) || RT_COOKIE_VARIANTS.includes(c.name))
|
||||
)
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
AT_COOKIE_VARIANTS,
|
||||
RT_COOKIE_VARIANTS,
|
||||
PRIVY_SESSION_COOKIE_VARIANTS,
|
||||
authModeFromStatus,
|
||||
buildGatewayWsUrl,
|
||||
buildGatewayWsUrlWithTicket,
|
||||
connectionScopeKey,
|
||||
cookiesHaveSession,
|
||||
cookiesHaveLiveSession,
|
||||
cookiesHavePrivySession,
|
||||
modeIsRemoteLike,
|
||||
normAuthMode,
|
||||
normalizeRemoteBaseUrl,
|
||||
pathWithGlobalRemoteProfile,
|
||||
|
||||
@@ -22,8 +22,6 @@ const {
|
||||
connectionScopeKey,
|
||||
cookiesHaveSession,
|
||||
cookiesHaveLiveSession,
|
||||
cookiesHavePrivySession,
|
||||
modeIsRemoteLike,
|
||||
normAuthMode,
|
||||
normalizeRemoteBaseUrl,
|
||||
pathWithGlobalRemoteProfile,
|
||||
@@ -49,19 +47,6 @@ test('normAuthMode coerces to token unless explicitly oauth', () => {
|
||||
assert.equal(normAuthMode('weird'), 'token')
|
||||
})
|
||||
|
||||
// --- modeIsRemoteLike ---
|
||||
|
||||
test('modeIsRemoteLike is true for remote and cloud, false otherwise', () => {
|
||||
// cloud resolves to a remote backend under the hood (Q6), so every resolution
|
||||
// site treats it like remote.
|
||||
assert.equal(modeIsRemoteLike('remote'), true)
|
||||
assert.equal(modeIsRemoteLike('cloud'), true)
|
||||
assert.equal(modeIsRemoteLike('local'), false)
|
||||
assert.equal(modeIsRemoteLike(undefined), false)
|
||||
assert.equal(modeIsRemoteLike(null), false)
|
||||
assert.equal(modeIsRemoteLike('weird'), false)
|
||||
})
|
||||
|
||||
// --- profileRemoteOverride ---
|
||||
|
||||
test('profileRemoteOverride returns null when no profile is given', () => {
|
||||
@@ -100,21 +85,6 @@ test('profileRemoteOverride preserves an explicit oauth auth mode', () => {
|
||||
assert.equal(profileRemoteOverride(config, 'coder').authMode, 'oauth')
|
||||
})
|
||||
|
||||
test('profileRemoteOverride treats a cloud entry as a remote override', () => {
|
||||
// A 'cloud' per-profile entry resolves to the same remote backend a 'remote'
|
||||
// entry would (Q6) — the override must be returned, not dropped.
|
||||
const config = {
|
||||
profiles: {
|
||||
coder: { mode: 'cloud', url: 'https://agent-1.agents.nousresearch.com', authMode: 'oauth' }
|
||||
}
|
||||
}
|
||||
assert.deepEqual(profileRemoteOverride(config, 'coder'), {
|
||||
url: 'https://agent-1.agents.nousresearch.com',
|
||||
authMode: 'oauth',
|
||||
token: undefined
|
||||
})
|
||||
})
|
||||
|
||||
test('profileRemoteOverride tolerates a missing/!object profiles map', () => {
|
||||
assert.equal(profileRemoteOverride({}, 'coder'), null)
|
||||
assert.equal(profileRemoteOverride({ profiles: null }, 'coder'), null)
|
||||
@@ -361,35 +331,6 @@ test('cookiesHaveLiveSession is false for unrelated cookies and non-arrays', ()
|
||||
assert.equal(cookiesHaveLiveSession([]), false)
|
||||
})
|
||||
|
||||
// --- cookiesHavePrivySession (Nous portal / Privy auth, NOT gateway cookies) ---
|
||||
|
||||
test('cookiesHavePrivySession detects the privy-token access cookie', () => {
|
||||
assert.equal(cookiesHavePrivySession([{ name: 'privy-token', value: 'jwt' }]), true)
|
||||
})
|
||||
|
||||
test('cookiesHavePrivySession detects __Host-/__Secure- prefixes and the legacy privy-session name', () => {
|
||||
assert.equal(cookiesHavePrivySession([{ name: '__Host-privy-token', value: 'x' }]), true)
|
||||
assert.equal(cookiesHavePrivySession([{ name: '__Secure-privy-token', value: 'x' }]), true)
|
||||
assert.equal(cookiesHavePrivySession([{ name: 'privy-session', value: 'x' }]), true)
|
||||
})
|
||||
|
||||
test('cookiesHavePrivySession is false for an empty value', () => {
|
||||
assert.equal(cookiesHavePrivySession([{ name: 'privy-token', value: '' }]), false)
|
||||
})
|
||||
|
||||
test('cookiesHavePrivySession does NOT treat hermes gateway cookies as a portal session', () => {
|
||||
// The whole point of Q7: a gateway session cookie is NOT a portal sign-in.
|
||||
assert.equal(cookiesHavePrivySession([{ name: 'hermes_session_at', value: 'x' }]), false)
|
||||
assert.equal(cookiesHavePrivySession([{ name: '__Host-hermes_session_rt', value: 'x' }]), false)
|
||||
})
|
||||
|
||||
test('cookiesHavePrivySession is false for unrelated cookies and non-arrays', () => {
|
||||
assert.equal(cookiesHavePrivySession([{ name: 'other', value: 'x' }]), false)
|
||||
assert.equal(cookiesHavePrivySession(null), false)
|
||||
assert.equal(cookiesHavePrivySession(undefined), false)
|
||||
assert.equal(cookiesHavePrivySession([]), false)
|
||||
})
|
||||
|
||||
// --- tokenPreview ---
|
||||
|
||||
test('tokenPreview returns null for empty', () => {
|
||||
|
||||
@@ -138,7 +138,10 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
|
||||
if (pythonPath) {
|
||||
lines.push(`export PYTHONPATH=${q(pythonPath)}\${PYTHONPATH:+:$PYTHONPATH}`)
|
||||
}
|
||||
lines.push(`cd ${q(agentRoot)} 2>/dev/null || true`, `${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`)
|
||||
lines.push(
|
||||
`cd ${q(agentRoot)} 2>/dev/null || true`,
|
||||
`${q(pythonExe)} ${uninstallArgs.map(q).join(' ')} || true`
|
||||
)
|
||||
if (appPath) {
|
||||
lines.push(`rm -rf ${q(appPath)} || true`)
|
||||
}
|
||||
@@ -166,15 +169,7 @@ function buildPosixCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot,
|
||||
* Removal: even after the desktop PID is gone, Windows releases directory
|
||||
* handles lazily, so a single `rmdir /s /q` can half-fail — retry up to 10x.
|
||||
*/
|
||||
function buildWindowsCleanupScript({
|
||||
desktopPid,
|
||||
pythonExe,
|
||||
pythonPath,
|
||||
agentRoot,
|
||||
uninstallArgs,
|
||||
appPath,
|
||||
hermesHome
|
||||
}) {
|
||||
function buildWindowsCleanupScript({ desktopPid, pythonExe, pythonPath, agentRoot, uninstallArgs, appPath, hermesHome }) {
|
||||
const pid = Number(desktopPid) || 0
|
||||
// cmd.exe has no string escaping inside quotes; strip embedded quotes (paths
|
||||
// under %LOCALAPPDATA% never contain them). `&`/`^` in a path would still be
|
||||
|
||||
@@ -101,7 +101,10 @@ test('resolveRemovableAppPath uses APPIMAGE on Linux when set', () => {
|
||||
})
|
||||
|
||||
test('resolveRemovableAppPath finds the unpacked dir on Linux', () => {
|
||||
assert.equal(resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}), '/opt/hermes/linux-unpacked')
|
||||
assert.equal(
|
||||
resolveRemovableAppPath('/opt/hermes/linux-unpacked/hermes', 'linux', {}),
|
||||
'/opt/hermes/linux-unpacked'
|
||||
)
|
||||
// A system-package install (/usr/bin) → null, left to apt/dnf.
|
||||
assert.equal(resolveRemovableAppPath('/usr/bin/hermes', 'linux', {}), null)
|
||||
})
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
'use strict'
|
||||
|
||||
const { session } = require('electron')
|
||||
|
||||
const EMBED_SESSION_PARTITION = 'persist:hermes-embed'
|
||||
const EMBED_REFERER = 'https://www.youtube.com/'
|
||||
const YOUTUBE_REFERER_HOST_RE =
|
||||
/(^|\.)(youtube\.com|youtube-nocookie\.com|googlevideo\.com|ytimg\.com|youtubei\.googleapis\.com)$/i
|
||||
|
||||
function installEmbedRefererForSession(embedSession) {
|
||||
if (!embedSession) {
|
||||
return
|
||||
}
|
||||
|
||||
embedSession.webRequest.onBeforeSendHeaders((details, callback) => {
|
||||
let host = ''
|
||||
|
||||
try {
|
||||
host = new URL(details.url).hostname
|
||||
} catch {
|
||||
host = ''
|
||||
}
|
||||
|
||||
if (!YOUTUBE_REFERER_HOST_RE.test(host)) {
|
||||
callback({ requestHeaders: details.requestHeaders })
|
||||
return
|
||||
}
|
||||
|
||||
const headers = { ...details.requestHeaders }
|
||||
|
||||
if (!headers.Referer && !headers.referer) {
|
||||
headers.Referer = EMBED_REFERER
|
||||
}
|
||||
|
||||
callback({ requestHeaders: headers })
|
||||
})
|
||||
}
|
||||
|
||||
/** Stamp Referer on YouTube requests in the embed webview partition only. */
|
||||
function installEmbedReferer() {
|
||||
try {
|
||||
installEmbedRefererForSession(session.fromPartition(EMBED_SESSION_PARTITION))
|
||||
} catch {
|
||||
// Non-fatal: embeds still render; YouTube may show referer errors.
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { installEmbedReferer }
|
||||
@@ -92,7 +92,9 @@ async function readDirForIpc(dirPath, options = {}) {
|
||||
try {
|
||||
const dirents = await fsImpl.promises.readdir(resolved, { withFileTypes: true })
|
||||
const visibleDirents = dirents.filter(dirent => !FS_READDIR_HIDDEN.has(dirent.name))
|
||||
const entries = await mapWithStatConcurrency(visibleDirents, dirent => entryForDirent(dirent, resolved, fsImpl))
|
||||
const entries = await mapWithStatConcurrency(visibleDirents, dirent =>
|
||||
entryForDirent(dirent, resolved, fsImpl)
|
||||
)
|
||||
|
||||
entries.sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name))
|
||||
|
||||
|
||||
@@ -349,10 +349,7 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
|
||||
assert.equal(result.error, undefined)
|
||||
assert.equal(result.entries.length, names.length)
|
||||
assert.equal(statCalls.length, names.length)
|
||||
assert.equal(
|
||||
statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)),
|
||||
false
|
||||
)
|
||||
assert.equal(statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)), false)
|
||||
assert.ok(peak > 1, `expected concurrent stats, observed peak ${peak}`)
|
||||
assert.ok(peak <= 16, `expected at most 16 concurrent stats, observed peak ${peak}`)
|
||||
assert.deepEqual(
|
||||
@@ -360,5 +357,8 @@ test('readDirForIpc bounds concurrent stats while preserving complete sorted out
|
||||
expectedNames
|
||||
)
|
||||
assert.equal(result.entries.find(entry => entry.name === failedName)?.isDirectory, false)
|
||||
assert.equal(result.entries.filter(entry => entry.isDirectory).length, successfulDirectoryNames.size)
|
||||
assert.equal(
|
||||
result.entries.filter(entry => entry.isDirectory).length,
|
||||
successfulDirectoryNames.size
|
||||
)
|
||||
})
|
||||
|
||||
@@ -86,8 +86,10 @@ async function scanGitRepos(roots, options = {}) {
|
||||
await mapLimit(subdirs, MAX_CONCURRENCY, sub => walk(sub, depth + 1))
|
||||
}
|
||||
|
||||
await mapLimit(searchRoots.map(root => String(root || '').trim()).filter(Boolean), MAX_CONCURRENCY, root =>
|
||||
walk(root, 0)
|
||||
await mapLimit(
|
||||
searchRoots.map(root => String(root || '').trim()).filter(Boolean),
|
||||
MAX_CONCURRENCY,
|
||||
root => walk(root, 0)
|
||||
)
|
||||
|
||||
return [...found.entries()].map(([root, label]) => ({ label, root }))
|
||||
|
||||
@@ -10,26 +10,7 @@ const { execFile } = require('node:child_process')
|
||||
const fs = require('node:fs/promises')
|
||||
const path = require('node:path')
|
||||
|
||||
// `simple-git` is a pure-JS runtime dep that workspace dedup hoists into the
|
||||
// repo-root node_modules. Packaged builds set `files:` in package.json, which
|
||||
// excludes node_modules from the asar, so the normal require() fails at launch
|
||||
// (issue #52735: "Cannot find module 'simple-git'"). We ship the dep's
|
||||
// closure under resources/native-deps/vendor/node_modules/ via extraResources
|
||||
// + scripts/stage-native-deps.cjs, and resolve from there when the hoisted
|
||||
// require() isn't reachable. The `vendor/` nesting matters: electron-builder
|
||||
// drops a node_modules dir at the root of an extraResources copy but keeps a
|
||||
// nested one. Dev mode never hits the fallback -- Node's normal lookup finds
|
||||
// the hoisted copy.
|
||||
let simpleGit
|
||||
try {
|
||||
simpleGit = require('simple-git')
|
||||
} catch {
|
||||
const resourcesPath = process.resourcesPath
|
||||
if (!resourcesPath) {
|
||||
throw new Error("git-review IPC: 'simple-git' not found and no resourcesPath to fall back to")
|
||||
}
|
||||
simpleGit = require(path.join(resourcesPath, 'native-deps', 'vendor', 'node_modules', 'simple-git'))
|
||||
}
|
||||
const simpleGit = require('simple-git')
|
||||
|
||||
const { resolveRequestedPathForIpc } = require('./hardening.cjs')
|
||||
|
||||
@@ -207,12 +188,7 @@ async function defaultBranchName(git) {
|
||||
|
||||
// Prefer a local trunk, then a remote-only one (returns the clean name either
|
||||
// way) so "branch off main" works even before main is checked out locally.
|
||||
for (const ref of [
|
||||
'refs/heads/main',
|
||||
'refs/heads/master',
|
||||
'refs/remotes/origin/main',
|
||||
'refs/remotes/origin/master'
|
||||
]) {
|
||||
for (const ref of ['refs/heads/main', 'refs/heads/master', 'refs/remotes/origin/main', 'refs/remotes/origin/master']) {
|
||||
try {
|
||||
await git.raw(['rev-parse', '--verify', '--quiet', ref])
|
||||
|
||||
|
||||
@@ -45,10 +45,7 @@ function parseWorktrees(out) {
|
||||
} else if (!cur) {
|
||||
continue
|
||||
} else if (line.startsWith('branch ')) {
|
||||
cur.branch = line
|
||||
.slice(7)
|
||||
.trim()
|
||||
.replace(/^refs\/heads\//, '')
|
||||
cur.branch = line.slice(7).trim().replace(/^refs\/heads\//, '')
|
||||
} else if (line === 'detached') {
|
||||
cur.detached = true
|
||||
} else if (line === 'bare') {
|
||||
@@ -125,9 +122,10 @@ async function gitLine(gitBin, args, cwd) {
|
||||
}
|
||||
|
||||
async function defaultBranch(gitBin, cwd) {
|
||||
const remote = (
|
||||
await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)
|
||||
).replace(/^origin\//, '')
|
||||
const remote = (await gitLine(gitBin, ['symbolic-ref', '--quiet', '--short', 'refs/remotes/origin/HEAD'], cwd)).replace(
|
||||
/^origin\//,
|
||||
''
|
||||
)
|
||||
|
||||
if (remote) {
|
||||
return remote
|
||||
@@ -179,16 +177,7 @@ async function ensureGitRepo(gitBin, dir) {
|
||||
// Inline identity so the seed commit lands even with no global git config.
|
||||
await runGit(
|
||||
gitBin,
|
||||
[
|
||||
'-c',
|
||||
'user.email=hermes@localhost',
|
||||
'-c',
|
||||
'user.name=Hermes',
|
||||
'commit',
|
||||
'--allow-empty',
|
||||
'-m',
|
||||
'Initial commit'
|
||||
],
|
||||
['-c', 'user.email=hermes@localhost', '-c', 'user.name=Hermes', 'commit', '--allow-empty', '-m', 'Initial commit'],
|
||||
dir
|
||||
)
|
||||
}
|
||||
|
||||
@@ -186,10 +186,7 @@ async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) {
|
||||
if (code === 'ENOENT' || code === 'ENOTDIR') {
|
||||
throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`)
|
||||
}
|
||||
throw ipcPathError(
|
||||
code || 'read-error',
|
||||
`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
|
||||
)
|
||||
throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,10 +201,7 @@ async function realpathForIpc(fsImpl, resolvedPath, purpose) {
|
||||
return realPath
|
||||
} catch (error) {
|
||||
const code = error && typeof error === 'object' ? error.code : ''
|
||||
throw ipcPathError(
|
||||
code || 'read-error',
|
||||
`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`
|
||||
)
|
||||
throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,8 +30,5 @@ test('setJsonRequestHeaders does not set Electron-restricted Content-Length', ()
|
||||
setJsonRequestHeaders(request)
|
||||
|
||||
assert.deepEqual(headers, [['Content-Type', 'application/json']])
|
||||
assert.equal(
|
||||
headers.some(([name]) => name.toLowerCase() === 'content-length'),
|
||||
false
|
||||
)
|
||||
assert.equal(headers.some(([name]) => name.toLowerCase() === 'content-length'), false)
|
||||
})
|
||||
|
||||
@@ -41,16 +41,6 @@ contextBridge.exposeInMainWorld('hermesDesktop', {
|
||||
probeConnectionConfig: remoteUrl => ipcRenderer.invoke('hermes:connection-config:probe', remoteUrl),
|
||||
oauthLoginConnectionConfig: remoteUrl => ipcRenderer.invoke('hermes:connection-config:oauth-login', remoteUrl),
|
||||
oauthLogoutConnectionConfig: remoteUrl => ipcRenderer.invoke('hermes:connection-config:oauth-logout', remoteUrl),
|
||||
// Hermes Cloud: one portal login powers discovery + silent per-agent sign-in
|
||||
// (cloud-auto-discovery Phase 3).
|
||||
cloud: {
|
||||
status: () => ipcRenderer.invoke('hermes:cloud:status'),
|
||||
betaEnabled: () => ipcRenderer.invoke('hermes:cloud:beta-enabled'),
|
||||
login: () => ipcRenderer.invoke('hermes:cloud:login'),
|
||||
logout: () => ipcRenderer.invoke('hermes:cloud:logout'),
|
||||
discover: org => ipcRenderer.invoke('hermes:cloud:discover', org),
|
||||
agentSignIn: dashboardUrl => ipcRenderer.invoke('hermes:cloud:agent-sign-in', dashboardUrl)
|
||||
},
|
||||
profile: {
|
||||
get: () => ipcRenderer.invoke('hermes:profile:get'),
|
||||
set: name => ipcRenderer.invoke('hermes:profile:set', name)
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
'use strict'
|
||||
|
||||
const OVERLAY_FALLBACK_WIDTH = 144
|
||||
|
||||
/**
|
||||
* Static pre-layout reservation (px) for the right-side native window-controls
|
||||
* overlay (min/max/close). Only a FALLBACK — once laid out the renderer reads
|
||||
* the exact width from navigator.windowControlsOverlay
|
||||
* (use-window-controls-overlay-width.ts) and uses this value only when the WCO
|
||||
* API is unavailable.
|
||||
*
|
||||
* macOS uses traffic lights positioned via trafficLightPosition, not a WCO
|
||||
* overlay, so it reserves nothing here. Every other desktop platform now paints
|
||||
* the Electron overlay (Windows, WSLg, and plain Linux KDE/GNOME), so they all
|
||||
* reserve the fallback width.
|
||||
*
|
||||
* @param {{ isWindows?: boolean, isWsl?: boolean, isMac?: boolean }} opts
|
||||
*/
|
||||
function nativeOverlayWidth({ isWindows = false, isWsl = false, isMac = false } = {}) {
|
||||
if (isMac) return 0
|
||||
return OVERLAY_FALLBACK_WIDTH
|
||||
}
|
||||
|
||||
module.exports = { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth }
|
||||
@@ -1,36 +0,0 @@
|
||||
const assert = require('node:assert/strict')
|
||||
const test = require('node:test')
|
||||
|
||||
const { OVERLAY_FALLBACK_WIDTH, nativeOverlayWidth } = require('./titlebar-overlay-width.cjs')
|
||||
|
||||
// This static reservation is only the pre-layout FALLBACK. Once laid out the
|
||||
// renderer reads the exact width from navigator.windowControlsOverlay
|
||||
// (use-window-controls-overlay-width.ts) and uses these values only when the WCO
|
||||
// API is unavailable.
|
||||
|
||||
test('Windows reserves the overlay fallback width', () => {
|
||||
assert.equal(nativeOverlayWidth({ isWindows: true }), OVERLAY_FALLBACK_WIDTH)
|
||||
})
|
||||
|
||||
test('WSLg paints the same WCO, so it reserves the same fallback width', () => {
|
||||
// The original bug: WSL fell through to 0, so the right tools sat under the
|
||||
// controls and the title overran into them.
|
||||
assert.equal(nativeOverlayWidth({ isWsl: true }), OVERLAY_FALLBACK_WIDTH)
|
||||
})
|
||||
|
||||
test('plain Linux paints the WCO too, so it reserves the fallback width', () => {
|
||||
// Regression #53185: re-enabling the overlay on plain Linux (KDE/GNOME)
|
||||
// without reserving its width left the native min/max/close buttons painting
|
||||
// on top of the app's right-edge titlebar tools.
|
||||
assert.equal(nativeOverlayWidth({ isWindows: false, isWsl: false }), OVERLAY_FALLBACK_WIDTH)
|
||||
assert.equal(nativeOverlayWidth(), OVERLAY_FALLBACK_WIDTH)
|
||||
assert.equal(nativeOverlayWidth({}), OVERLAY_FALLBACK_WIDTH)
|
||||
})
|
||||
|
||||
test('macOS uses traffic lights, not a WCO overlay, so it reserves nothing', () => {
|
||||
assert.equal(nativeOverlayWidth({ isMac: true }), 0)
|
||||
})
|
||||
|
||||
test('the fallback width is a sane positive pixel value', () => {
|
||||
assert.ok(Number.isInteger(OVERLAY_FALLBACK_WIDTH) && OVERLAY_FALLBACK_WIDTH > 0)
|
||||
})
|
||||
@@ -7,81 +7,45 @@ const { resolveBehindCount, shouldCountCommits } = require('./update-count.cjs')
|
||||
// unconditionally, so a shallow checkout with no merge-base surfaced the bogus
|
||||
// rev-list count (e.g. 12104). This asserts the new shallow/no-merge-base branch.
|
||||
test('shallow checkout with no merge-base does NOT trust the bogus rev-list count', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '12104',
|
||||
currentSha: 'aaa',
|
||||
targetSha: 'bbb',
|
||||
isShallow: true,
|
||||
hasMergeBase: false
|
||||
}),
|
||||
1
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '12104', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 1)
|
||||
})
|
||||
|
||||
test('shallow checkout with no merge-base but identical SHA reports up-to-date', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '12104',
|
||||
currentSha: 'abc',
|
||||
targetSha: 'abc',
|
||||
isShallow: true,
|
||||
hasMergeBase: false
|
||||
}),
|
||||
0
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '12104', currentSha: 'abc', targetSha: 'abc',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
test('shallow checkout WITH a merge-base keeps the exact count (reliable)', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '3',
|
||||
currentSha: 'aaa',
|
||||
targetSha: 'bbb',
|
||||
isShallow: true,
|
||||
hasMergeBase: true
|
||||
}),
|
||||
3
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '3', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: true, hasMergeBase: true,
|
||||
}), 3)
|
||||
})
|
||||
|
||||
test('full (non-shallow) clone keeps the exact count path unchanged', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '7',
|
||||
currentSha: 'aaa',
|
||||
targetSha: 'bbb',
|
||||
isShallow: false,
|
||||
hasMergeBase: true
|
||||
}),
|
||||
7
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '7', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: false, hasMergeBase: true,
|
||||
}), 7)
|
||||
})
|
||||
|
||||
test('up-to-date full clone reports 0', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '0',
|
||||
currentSha: 'x',
|
||||
targetSha: 'x',
|
||||
isShallow: false,
|
||||
hasMergeBase: true
|
||||
}),
|
||||
0
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '0', currentSha: 'x', targetSha: 'x',
|
||||
isShallow: false, hasMergeBase: true,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
test('non-numeric count falls back to 0 (defensive, unchanged behaviour)', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '',
|
||||
currentSha: 'aaa',
|
||||
targetSha: 'bbb',
|
||||
isShallow: false,
|
||||
hasMergeBase: true
|
||||
}),
|
||||
0
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: false, hasMergeBase: true,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
// shouldCountCommits gates the expensive `rev-list --count` in checkUpdates().
|
||||
@@ -104,24 +68,12 @@ test('full (non-shallow) clone always runs the count', () => {
|
||||
// The skip path produces an empty countStr; resolveBehindCount must NOT trust
|
||||
// it and must fall through to the SHA compare (mirrors the live call site).
|
||||
test('skipped-count path resolves via SHA compare, never via empty countStr', () => {
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '',
|
||||
currentSha: 'aaa',
|
||||
targetSha: 'bbb',
|
||||
isShallow: true,
|
||||
hasMergeBase: false
|
||||
}),
|
||||
1
|
||||
)
|
||||
assert.equal(
|
||||
resolveBehindCount({
|
||||
countStr: '',
|
||||
currentSha: 'same',
|
||||
targetSha: 'same',
|
||||
isShallow: true,
|
||||
hasMergeBase: false
|
||||
}),
|
||||
0
|
||||
)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '', currentSha: 'aaa', targetSha: 'bbb',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 1)
|
||||
assert.equal(resolveBehindCount({
|
||||
countStr: '', currentSha: 'same', targetSha: 'same',
|
||||
isShallow: true, hasMergeBase: false,
|
||||
}), 0)
|
||||
})
|
||||
|
||||
@@ -62,10 +62,7 @@ test('resolveUnpackedRelease is null for AppImage / .deb / .rpm / dev / unresolv
|
||||
assert.equal(resolveUnpackedRelease('/usr/lib/hermes/hermes', ROOT, 'linux'), null)
|
||||
assert.equal(resolveUnpackedRelease('/opt/Hermes/hermes', ROOT, 'linux'), null)
|
||||
// dev electron
|
||||
assert.equal(
|
||||
resolveUnpackedRelease('/home/u/.hermes/hermes-agent/node_modules/electron/dist/electron', ROOT, 'linux'),
|
||||
null
|
||||
)
|
||||
assert.equal(resolveUnpackedRelease('/home/u/.hermes/hermes-agent/node_modules/electron/dist/electron', ROOT, 'linux'), null)
|
||||
// empty / missing
|
||||
assert.equal(resolveUnpackedRelease('', ROOT, 'linux'), null)
|
||||
assert.equal(resolveUnpackedRelease(path.join(UNPACKED, 'hermes'), '', 'linux'), null)
|
||||
|
||||
@@ -39,9 +39,7 @@ function canonicalGitHubRemote(url) {
|
||||
}
|
||||
|
||||
function isSshRemote(url) {
|
||||
const value = String(url || '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
const value = String(url || '').trim().toLowerCase()
|
||||
return value.startsWith('git@') || value.startsWith('ssh://')
|
||||
}
|
||||
|
||||
|
||||
@@ -26,11 +26,7 @@ const REQUEST_TIMEOUT_MS = 20_000
|
||||
const ID_RE = /^[\w-]+\.[\w-]+$/
|
||||
|
||||
/** Minimal HTTPS helper with redirect-following, timeout, and a size cap. */
|
||||
function request(
|
||||
url,
|
||||
{ method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {},
|
||||
redirectsLeft = MAX_REDIRECTS
|
||||
) {
|
||||
function request(url, { method = 'GET', headers = {}, body = null, maxBytes = MAX_VSIX_BYTES } = {}, redirectsLeft = MAX_REDIRECTS) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const req = https.request(url, { method, headers }, res => {
|
||||
const status = res.statusCode ?? 0
|
||||
@@ -46,13 +42,7 @@ function request(
|
||||
const next = new URL(res.headers.location, url).toString()
|
||||
res.resume()
|
||||
// Redirects to the CDN are plain GETs (drop the POST body).
|
||||
resolve(
|
||||
request(
|
||||
next,
|
||||
{ method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes },
|
||||
redirectsLeft - 1
|
||||
)
|
||||
)
|
||||
resolve(request(next, { method: 'GET', headers: { 'User-Agent': headers['User-Agent'] }, maxBytes }, redirectsLeft - 1))
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -26,16 +26,7 @@ const LAPTOP = [{ workArea: { x: 0, y: 0, width: 1366, height: 728 } }]
|
||||
// ─── sanitizeWindowState ───────────────────────────────────────────────────
|
||||
|
||||
test('sanitizeWindowState rejects missing/garbage input', () => {
|
||||
for (const bad of [
|
||||
null,
|
||||
undefined,
|
||||
'nope',
|
||||
42,
|
||||
{},
|
||||
{ width: 'x', height: 800 },
|
||||
{ width: NaN, height: 800 },
|
||||
{ width: 1000 }
|
||||
]) {
|
||||
for (const bad of [null, undefined, 'nope', 42, {}, { width: 'x', height: 800 }, { width: NaN, height: 800 }, { width: 1000 }]) {
|
||||
assert.equal(sanitizeWindowState(bad), null)
|
||||
}
|
||||
})
|
||||
@@ -121,13 +112,9 @@ test('computeWindowOptions does not clamp when displays are unknown', () => {
|
||||
test('debounce coalesces a burst into one trailing run', t => {
|
||||
t.mock.timers.enable({ apis: ['setTimeout'] })
|
||||
let calls = 0
|
||||
const d = debounce(() => {
|
||||
calls += 1
|
||||
}, 250)
|
||||
const d = debounce(() => { calls += 1 }, 250)
|
||||
|
||||
d()
|
||||
d()
|
||||
d()
|
||||
d(); d(); d()
|
||||
assert.equal(calls, 0)
|
||||
t.mock.timers.tick(249)
|
||||
assert.equal(calls, 0)
|
||||
@@ -138,9 +125,7 @@ test('debounce coalesces a burst into one trailing run', t => {
|
||||
test('debounce.flush runs now and cancels the pending timer', t => {
|
||||
t.mock.timers.enable({ apis: ['setTimeout'] })
|
||||
let calls = 0
|
||||
const d = debounce(() => {
|
||||
calls += 1
|
||||
}, 250)
|
||||
const d = debounce(() => { calls += 1 }, 250)
|
||||
|
||||
d()
|
||||
d.flush()
|
||||
|
||||
@@ -13,7 +13,7 @@ function readElectronFile(name) {
|
||||
|
||||
function requireHiddenChildOptions(source, needle) {
|
||||
const match = needle instanceof RegExp ? needle.exec(source) : null
|
||||
const index = needle instanceof RegExp ? (match?.index ?? -1) : source.indexOf(needle)
|
||||
const index = needle instanceof RegExp ? match?.index ?? -1 : source.indexOf(needle)
|
||||
assert.notEqual(index, -1, `missing call site: ${needle}`)
|
||||
const snippet = source.slice(index, index + 700)
|
||||
assert.match(
|
||||
@@ -38,40 +38,19 @@ test('desktop background child processes opt into hidden Windows consoles', () =
|
||||
requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/)
|
||||
requireHiddenChildOptions(source, /spawn\(\s*py,\s*\['-m', 'hermes_cli\.main', 'uninstall', '--gui-summary'\]/)
|
||||
|
||||
assert.match(source, /function unwrapWindowsVenvHermesCommand\(command, backendArgs\)/)
|
||||
assert.match(source, /function unwrapWindowsVenvHermesCommand\(command, dashboardArgs\)/)
|
||||
assert.match(source, /existing Hermes no-console Python at/)
|
||||
assert.match(source, /function getNoConsoleVenvPython\(venvRoot\)/)
|
||||
assert.match(source, /function toNoConsolePython\(pythonPath\)/)
|
||||
assert.match(source, /function applyWindowsNoConsoleSpawnHints\(backend\)/)
|
||||
assert.match(source, /function readVenvHome\(venvRoot\)/)
|
||||
assert.match(source, /path\.join\(venvRoot, 'Scripts', 'pythonw\.exe'\)/)
|
||||
assert.match(source, /backendStartFailure/)
|
||||
assert.match(source, /HERMES_DESKTOP_READY_FILE/)
|
||||
assert.match(source, /readyFile: true/)
|
||||
assert.match(source, /function getVenvSitePackagesEntries\(venvRoot\)/)
|
||||
assert.match(source, /path\.join\(venvRoot, 'Lib', 'site-packages'\)/)
|
||||
assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.backendArgs\]/)
|
||||
})
|
||||
|
||||
test('desktop backend launches console python so child consoles are inherited, not pythonw', () => {
|
||||
const source = readElectronFile('main.cjs')
|
||||
|
||||
// The flash fix is structural: the backend runs as a console-subsystem
|
||||
// python.exe under hiddenWindowsChildOptions() (-> CREATE_NO_WINDOW), so it
|
||||
// owns ONE windowless console that every descendant spawn inherits. Launching
|
||||
// it as GUI-subsystem pythonw.exe is what made each child allocate (and flash)
|
||||
// its own console, so the backend command must never be pythonw.
|
||||
assert.doesNotMatch(source, /pythonw\.exe'\)/, 'backend must not be launched via pythonw.exe')
|
||||
assert.doesNotMatch(
|
||||
source,
|
||||
/function getNoConsoleVenvPython\b/,
|
||||
'pythonw-conversion helper should be gone; console python is launched directly'
|
||||
)
|
||||
assert.doesNotMatch(
|
||||
source,
|
||||
/function applyWindowsNoConsoleSpawnHints\b/,
|
||||
'pythonw spawn-hint rewriter should be gone'
|
||||
)
|
||||
|
||||
// Console python restores stdout, so the port is announced on the normal
|
||||
// HERMES_DASHBOARD_READY stdout line — no ready-file side channel is set.
|
||||
assert.doesNotMatch(source, /readyFile: true/, 'no backend should opt into the pythonw ready-file path')
|
||||
|
||||
// Both desktop backend launches must still go through hiddenWindowsChildOptions
|
||||
// so the single backend console is created windowless.
|
||||
requireHiddenChildOptions(source, /spawn\(\s*backend\.command,\s*backend\.args/)
|
||||
requireHiddenChildOptions(source, /hermesProcess = spawn\(\s*backend\.command,\s*backend\.args/)
|
||||
assert.match(source, /args: \['-m', 'hermes_cli\.main', \.\.\.dashboardArgs\]/)
|
||||
})
|
||||
|
||||
test('intentional or interactive desktop child processes stay documented', () => {
|
||||
@@ -89,5 +68,5 @@ test('bootstrap PowerShell runner hides Windows console children', () => {
|
||||
const source = readElectronFile('bootstrap-runner.cjs')
|
||||
|
||||
assert.match(source, /function hiddenWindowsChildOptions\(options = \{\}\)/)
|
||||
requireHiddenChildOptions(source, /spawn\(\s*ps,\s*fullArgs/)
|
||||
requireHiddenChildOptions(source, 'spawn(ps, fullArgs')
|
||||
})
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
'use strict'
|
||||
|
||||
// Regression guards for Windows `hermes` resolution in main.cjs.
|
||||
//
|
||||
// main.cjs has no module.exports, so these follow the repo's source-assertion
|
||||
// test pattern (see windows-child-process.test.cjs). They pin the two Windows
|
||||
// resolution bugs that caused desktop reinstall loops:
|
||||
// 1. findOnPath() tried the empty extension FIRST, so an extensionless
|
||||
// Git-Bash `hermes` shim shadowed the real hermes.cmd/hermes.exe; the
|
||||
// shim then failed the --version probe and the desktop fell through to a
|
||||
// spurious bootstrap/repair.
|
||||
// 2. handOffWindowsBootstrapRecovery() chose --update vs the destructive
|
||||
// --repair by checking ONLY venv\Scripts\hermes.exe (the console-script
|
||||
// shim, written at the END of venv setup and absent in interrupted
|
||||
// states), so it escalated to a full venv recreate even on healthy
|
||||
// installs.
|
||||
|
||||
const test = require('node:test')
|
||||
const assert = require('node:assert/strict')
|
||||
const fs = require('node:fs')
|
||||
const path = require('node:path')
|
||||
|
||||
function readMain() {
|
||||
return fs.readFileSync(path.join(__dirname, 'main.cjs'), 'utf8').replace(/\r\n/g, '\n')
|
||||
}
|
||||
|
||||
test('findOnPath tries PATHEXT extensions before the bare (empty) name on Windows', () => {
|
||||
const source = readMain()
|
||||
// Fixed order: PATHEXT first, empty string LAST.
|
||||
assert.match(
|
||||
source,
|
||||
/\(process\.env\.PATHEXT \|\| '\.COM;\.EXE;\.BAT;\.CMD'\)\.split\(';'\)\.filter\(Boolean\), ''\]/,
|
||||
'extensions array must end with the empty string, not start with it'
|
||||
)
|
||||
// The buggy empty-first order must not return.
|
||||
assert.doesNotMatch(
|
||||
source,
|
||||
/\['', \.\.\.\(process\.env\.PATHEXT/,
|
||||
'empty-extension-first order regressed: an extensionless shim can shadow hermes.cmd/.exe'
|
||||
)
|
||||
})
|
||||
|
||||
test('Windows bootstrap recovery chooses --update when any real-install signal is present', () => {
|
||||
const source = readMain()
|
||||
assert.match(source, /const haveRealInstall =/, 'recovery must compute haveRealInstall')
|
||||
assert.match(
|
||||
source,
|
||||
/fileExists\(venvPython\)/,
|
||||
'recovery must accept the venv interpreter as a real-install signal'
|
||||
)
|
||||
assert.match(
|
||||
source,
|
||||
/\.hermes-bootstrap-complete/,
|
||||
'recovery must accept the bootstrap-complete marker as a real-install signal'
|
||||
)
|
||||
assert.match(
|
||||
source,
|
||||
/updaterArgs = haveRealInstall \? \['--update'/,
|
||||
'updaterArgs must gate on haveRealInstall'
|
||||
)
|
||||
// The old too-narrow check (only venv\Scripts\hermes.exe) must not return.
|
||||
assert.doesNotMatch(
|
||||
source,
|
||||
/updaterArgs = fileExists\(venvHermes\) \?/,
|
||||
'recovery regressed to gating only on the hermes.exe shim, which forces destructive --repair'
|
||||
)
|
||||
})
|
||||
@@ -21,7 +21,8 @@ const { execFileSync } = require('node:child_process')
|
||||
// the requested value line isn't present.
|
||||
function parseRegQueryValue(stdout, name) {
|
||||
if (!stdout || !name) return null
|
||||
const typePattern = /^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
|
||||
const typePattern =
|
||||
/^(\S+)\s+(?:REG_SZ|REG_EXPAND_SZ|REG_MULTI_SZ|REG_DWORD|REG_QWORD|REG_BINARY|REG_NONE)\s+(.*)$/
|
||||
for (const rawLine of String(stdout).split(/\r?\n/)) {
|
||||
const line = rawLine.trim()
|
||||
const match = line.match(typePattern)
|
||||
@@ -46,7 +47,10 @@ function expandWindowsEnvRefs(value, env = process.env) {
|
||||
// Read a User-scoped env var from HKCU\Environment. Windows-only: returns null
|
||||
// off-Windows (without spawning), on any spawn error, when `reg` exits non-zero
|
||||
// (the value doesn't exist), or when the value is empty.
|
||||
function readWindowsUserEnvVar(name, { platform = process.platform, env = process.env, exec = execFileSync } = {}) {
|
||||
function readWindowsUserEnvVar(
|
||||
name,
|
||||
{ platform = process.platform, env = process.env, exec = execFileSync } = {}
|
||||
) {
|
||||
if (platform !== 'win32' || !name) return null
|
||||
let stdout
|
||||
try {
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
const assert = require('node:assert/strict')
|
||||
const { test } = require('node:test')
|
||||
|
||||
const { expandWindowsEnvRefs, parseRegQueryValue, readWindowsUserEnvVar } = require('./windows-user-env.cjs')
|
||||
const {
|
||||
expandWindowsEnvRefs,
|
||||
parseRegQueryValue,
|
||||
readWindowsUserEnvVar
|
||||
} = require('./windows-user-env.cjs')
|
||||
|
||||
// ── parseRegQueryValue ─────────────────────────────────────────────────────
|
||||
|
||||
test('parseRegQueryValue extracts a REG_SZ value', () => {
|
||||
const out = ['', 'HKEY_CURRENT_USER\\Environment', ' HERMES_HOME REG_SZ F:\\Hermes\\data', ''].join('\r\n')
|
||||
const out = [
|
||||
'',
|
||||
'HKEY_CURRENT_USER\\Environment',
|
||||
' HERMES_HOME REG_SZ F:\\Hermes\\data',
|
||||
''
|
||||
].join('\r\n')
|
||||
assert.equal(parseRegQueryValue(out, 'HERMES_HOME'), 'F:\\Hermes\\data')
|
||||
})
|
||||
|
||||
@@ -30,7 +39,10 @@ test('parseRegQueryValue returns null when the value line is absent', () => {
|
||||
// ── expandWindowsEnvRefs ───────────────────────────────────────────────────
|
||||
|
||||
test('expandWindowsEnvRefs expands %VAR% case-insensitively', () => {
|
||||
assert.equal(expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }), 'C:\\Users\\jeff\\h')
|
||||
assert.equal(
|
||||
expandWindowsEnvRefs('%UserProfile%\\h', { USERPROFILE: 'C:\\Users\\jeff' }),
|
||||
'C:\\Users\\jeff\\h'
|
||||
)
|
||||
})
|
||||
|
||||
test('expandWindowsEnvRefs leaves literal paths and unknown refs intact', () => {
|
||||
|
||||
@@ -14,7 +14,11 @@ function isPackagedInstallPath(dir, { installRoots, isPackaged }) {
|
||||
return false
|
||||
}
|
||||
|
||||
const roots = new Set((installRoots ?? []).filter(Boolean).map(candidate => path.resolve(String(candidate))))
|
||||
const roots = new Set(
|
||||
(installRoots ?? [])
|
||||
.filter(Boolean)
|
||||
.map(candidate => path.resolve(String(candidate)))
|
||||
)
|
||||
|
||||
for (const root of roots) {
|
||||
if (resolved === root) {
|
||||
|
||||
@@ -13,21 +13,33 @@ const { isPackagedInstallPath } = require('./workspace-cwd.cjs')
|
||||
const installRoot = path.resolve('/opt/Hermes')
|
||||
|
||||
test('isPackagedInstallPath returns false when not packaged', () => {
|
||||
assert.equal(isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }), false)
|
||||
assert.equal(
|
||||
isPackagedInstallPath(installRoot, { isPackaged: false, installRoots: [installRoot] }),
|
||||
false
|
||||
)
|
||||
})
|
||||
|
||||
test('isPackagedInstallPath flags the install root itself', () => {
|
||||
assert.equal(isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }), true)
|
||||
assert.equal(
|
||||
isPackagedInstallPath(installRoot, { isPackaged: true, installRoots: [installRoot] }),
|
||||
true
|
||||
)
|
||||
})
|
||||
|
||||
test('isPackagedInstallPath flags paths nested under the install root', () => {
|
||||
const nested = path.join(installRoot, 'resources', 'app.asar')
|
||||
|
||||
assert.equal(isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }), true)
|
||||
assert.equal(
|
||||
isPackagedInstallPath(nested, { isPackaged: true, installRoots: [installRoot] }),
|
||||
true
|
||||
)
|
||||
})
|
||||
|
||||
test('isPackagedInstallPath ignores paths outside the install root', () => {
|
||||
const homeProject = path.resolve('/home/user/projects/demo')
|
||||
|
||||
assert.equal(isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }), false)
|
||||
assert.equal(
|
||||
isPackagedInstallPath(homeProject, { isPackaged: true, installRoots: [installRoot] }),
|
||||
false
|
||||
)
|
||||
})
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
// Pull a Windows-host clipboard image from inside WSL2 via PowerShell (WSLg
|
||||
// bridges text but not images). Returns PNG bytes or null; exec injectable.
|
||||
|
||||
const { execFileSync } = require('node:child_process')
|
||||
|
||||
// STA is mandatory: System.Windows.Forms.Clipboard throws ThreadStateException
|
||||
// off a single-threaded apartment. We emit base64 (not raw bytes) so the PNG
|
||||
// survives stdout's text decoding intact, and write with [Console]::Out.Write
|
||||
// to avoid a trailing newline.
|
||||
const PS_SCRIPT = [
|
||||
'Add-Type -AssemblyName System.Windows.Forms,System.Drawing',
|
||||
'$img = [System.Windows.Forms.Clipboard]::GetImage()',
|
||||
'if ($null -eq $img) { exit 0 }',
|
||||
'$ms = New-Object System.IO.MemoryStream',
|
||||
'$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)',
|
||||
'[Console]::Out.Write([System.Convert]::ToBase64String($ms.ToArray()))'
|
||||
].join('\n')
|
||||
|
||||
// PowerShell's -EncodedCommand takes UTF-16LE base64. Encoding the whole script
|
||||
// this way sidesteps every layer of WSL→Windows quoting (spaces, quotes,
|
||||
// brackets, newlines) that plain -Command arguments would mangle.
|
||||
function encodePowerShellCommand(script) {
|
||||
return Buffer.from(String(script), 'utf16le').toString('base64')
|
||||
}
|
||||
|
||||
// Locate powershell.exe. The bare name resolves through WSL's Windows-interop
|
||||
// PATH on every standard WSL2 setup; the absolute fallback covers a stripped
|
||||
// PATH. Returns the first candidate — execFile surfaces ENOENT if it's wrong
|
||||
// and we fall back to null.
|
||||
function powershellCandidates() {
|
||||
return ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
|
||||
}
|
||||
|
||||
function decodeClipboardImageBase64(stdout) {
|
||||
const b64 = String(stdout || '').trim()
|
||||
if (!b64) return null
|
||||
|
||||
let buffer
|
||||
try {
|
||||
buffer = Buffer.from(b64, 'base64')
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
|
||||
// Guard against partial / garbage output: require a real PNG signature.
|
||||
const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
|
||||
if (buffer.length < PNG_SIGNATURE.length || !buffer.subarray(0, PNG_SIGNATURE.length).equals(PNG_SIGNATURE)) {
|
||||
return null
|
||||
}
|
||||
|
||||
return buffer
|
||||
}
|
||||
|
||||
// Read the Windows clipboard image from inside WSL. Returns a PNG Buffer, or
|
||||
// null when there's no image, PowerShell is unreachable, or output is invalid.
|
||||
// Linux-only by contract (caller gates on IS_WSL); never throws.
|
||||
function readWslWindowsClipboardImage({ exec = execFileSync, candidates = powershellCandidates() } = {}) {
|
||||
const encoded = encodePowerShellCommand(PS_SCRIPT)
|
||||
|
||||
for (const ps of candidates) {
|
||||
try {
|
||||
const stdout = exec(
|
||||
ps,
|
||||
['-NoProfile', '-NonInteractive', '-STA', '-ExecutionPolicy', 'Bypass', '-EncodedCommand', encoded],
|
||||
{
|
||||
encoding: 'utf8',
|
||||
windowsHide: true,
|
||||
timeout: 8000,
|
||||
// A 4K screenshot base64s to a few MB; give stdout generous headroom.
|
||||
maxBuffer: 64 * 1024 * 1024,
|
||||
// PowerShell writes progress/CLIXML noise to stderr — ignore it.
|
||||
stdio: ['ignore', 'pipe', 'ignore']
|
||||
}
|
||||
)
|
||||
const decoded = decodeClipboardImageBase64(stdout)
|
||||
if (decoded) return decoded
|
||||
// Empty stdout = no image on the clipboard; stop, don't try fallbacks.
|
||||
if (String(stdout || '').trim() === '') return null
|
||||
} catch {
|
||||
// This powershell.exe candidate is missing/failed — try the next one.
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
decodeClipboardImageBase64,
|
||||
encodePowerShellCommand,
|
||||
powershellCandidates,
|
||||
readWslWindowsClipboardImage
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
const assert = require('node:assert/strict')
|
||||
const test = require('node:test')
|
||||
|
||||
const {
|
||||
decodeClipboardImageBase64,
|
||||
encodePowerShellCommand,
|
||||
powershellCandidates,
|
||||
readWslWindowsClipboardImage
|
||||
} = require('./wsl-clipboard-image.cjs')
|
||||
|
||||
const PNG_SIGNATURE = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
|
||||
|
||||
function fakePngBuffer(extraBytes = 16) {
|
||||
return Buffer.concat([PNG_SIGNATURE, Buffer.alloc(extraBytes, 0x42)])
|
||||
}
|
||||
|
||||
test('encodePowerShellCommand produces UTF-16LE base64 PowerShell can decode', () => {
|
||||
const encoded = encodePowerShellCommand('Write-Output "hi"')
|
||||
const roundTripped = Buffer.from(encoded, 'base64').toString('utf16le')
|
||||
assert.equal(roundTripped, 'Write-Output "hi"')
|
||||
})
|
||||
|
||||
test('decodeClipboardImageBase64 returns a Buffer for valid PNG base64', () => {
|
||||
const png = fakePngBuffer()
|
||||
const decoded = decodeClipboardImageBase64(png.toString('base64'))
|
||||
assert.ok(Buffer.isBuffer(decoded))
|
||||
assert.ok(decoded.equals(png))
|
||||
})
|
||||
|
||||
test('decodeClipboardImageBase64 trims surrounding whitespace before decoding', () => {
|
||||
const png = fakePngBuffer()
|
||||
const decoded = decodeClipboardImageBase64(`\n ${png.toString('base64')} \r\n`)
|
||||
assert.ok(decoded && decoded.equals(png))
|
||||
})
|
||||
|
||||
test('decodeClipboardImageBase64 returns null for empty / whitespace input', () => {
|
||||
assert.equal(decodeClipboardImageBase64(''), null)
|
||||
assert.equal(decodeClipboardImageBase64(' \n '), null)
|
||||
assert.equal(decodeClipboardImageBase64(null), null)
|
||||
assert.equal(decodeClipboardImageBase64(undefined), null)
|
||||
})
|
||||
|
||||
test('decodeClipboardImageBase64 rejects base64 without a PNG signature', () => {
|
||||
// Valid base64, but the decoded bytes are not a PNG.
|
||||
const notPng = Buffer.from('this is not a png at all').toString('base64')
|
||||
assert.equal(decodeClipboardImageBase64(notPng), null)
|
||||
})
|
||||
|
||||
test('readWslWindowsClipboardImage decodes the first candidate that returns a PNG', () => {
|
||||
const png = fakePngBuffer()
|
||||
const calls = []
|
||||
const exec = (cmd, args) => {
|
||||
calls.push({ cmd, args })
|
||||
return png.toString('base64')
|
||||
}
|
||||
|
||||
const result = readWslWindowsClipboardImage({ exec, candidates: ['powershell.exe'] })
|
||||
assert.ok(result && result.equals(png))
|
||||
assert.equal(calls.length, 1)
|
||||
assert.equal(calls[0].cmd, 'powershell.exe')
|
||||
// -STA is mandatory for System.Windows.Forms.Clipboard.
|
||||
assert.ok(calls[0].args.includes('-STA'))
|
||||
assert.ok(calls[0].args.includes('-EncodedCommand'))
|
||||
})
|
||||
|
||||
test('readWslWindowsClipboardImage returns null and stops when stdout is empty (no image)', () => {
|
||||
let count = 0
|
||||
const exec = () => {
|
||||
count += 1
|
||||
return ''
|
||||
}
|
||||
|
||||
const result = readWslWindowsClipboardImage({
|
||||
exec,
|
||||
candidates: ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
|
||||
})
|
||||
assert.equal(result, null)
|
||||
// Empty stdout means "no image on the clipboard" — don't probe further candidates.
|
||||
assert.equal(count, 1)
|
||||
})
|
||||
|
||||
test('readWslWindowsClipboardImage falls through to the next candidate when one throws', () => {
|
||||
const png = fakePngBuffer()
|
||||
const seen = []
|
||||
const exec = cmd => {
|
||||
seen.push(cmd)
|
||||
if (cmd === 'powershell.exe') {
|
||||
throw Object.assign(new Error('not found'), { code: 'ENOENT' })
|
||||
}
|
||||
return png.toString('base64')
|
||||
}
|
||||
|
||||
const result = readWslWindowsClipboardImage({
|
||||
exec,
|
||||
candidates: ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe']
|
||||
})
|
||||
assert.ok(result && result.equals(png))
|
||||
assert.deepEqual(seen, ['powershell.exe', '/mnt/c/Windows/System32/WindowsPowerShell/v1.0/powershell.exe'])
|
||||
})
|
||||
|
||||
test('readWslWindowsClipboardImage returns null when every candidate throws', () => {
|
||||
const exec = () => {
|
||||
throw new Error('boom')
|
||||
}
|
||||
|
||||
const result = readWslWindowsClipboardImage({ exec, candidates: ['a', 'b'] })
|
||||
assert.equal(result, null)
|
||||
})
|
||||
|
||||
test('powershellCandidates lists the bare name first, then the absolute fallback', () => {
|
||||
const candidates = powershellCandidates()
|
||||
assert.equal(candidates[0], 'powershell.exe')
|
||||
assert.ok(candidates.some(c => c.endsWith('WindowsPowerShell/v1.0/powershell.exe')))
|
||||
})
|
||||
@@ -18,7 +18,7 @@
|
||||
"profile:main": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron --inspect=9229 .",
|
||||
"profile:main:cpu": "wait-on http://127.0.0.1:5174 && cross-env XCURSOR_SIZE=24 NODE_OPTIONS=--cpu-prof HERMES_DESKTOP_DEV_SERVER=http://127.0.0.1:5174 electron .",
|
||||
"start": "npm run build && electron .",
|
||||
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && npm run postbuild",
|
||||
"build": "node scripts/assert-root-install.cjs && node scripts/write-build-stamp.cjs && node scripts/stage-native-deps.cjs && tsc -b && vite build && node scripts/bundle-electron-main.mjs && npm run postbuild",
|
||||
"postbuild": "node scripts/assert-dist-built.cjs",
|
||||
"prebuilder": "node scripts/patch-electron-builder-mac-binary.cjs",
|
||||
"builder": "cross-env NODE_OPTIONS=--max-old-space-size=16384 node scripts/run-electron-builder.cjs",
|
||||
@@ -37,7 +37,7 @@
|
||||
"test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
|
||||
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
|
||||
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/wsl-clipboard-image.test.cjs electron/titlebar-overlay-width.test.cjs electron/window-state.test.cjs electron/windows-hermes-resolution.test.cjs",
|
||||
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-env.test.cjs electron/backend-probes.test.cjs electron/backend-ready.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/dashboard-token.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/link-title-window.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/git-worktree-ops.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs electron/update-count.test.cjs electron/update-rebuild.test.cjs electron/update-marker.test.cjs electron/update-relaunch.test.cjs electron/windows-user-env.test.cjs electron/window-state.test.cjs",
|
||||
"typecheck": "tsc -p . --noEmit",
|
||||
"lint": "eslint src/ electron/",
|
||||
"lint:fix": "eslint src/ electron/ --fix",
|
||||
@@ -51,17 +51,11 @@
|
||||
"@assistant-ui/react-streamdown": "^0.1.11",
|
||||
"@audiowave/react": "^0.6.2",
|
||||
"@chenglou/pretext": "^0.0.6",
|
||||
"@codemirror/commands": "^6.10.4",
|
||||
"@codemirror/language": "^6.12.4",
|
||||
"@codemirror/language-data": "^6.5.2",
|
||||
"@codemirror/state": "^6.7.0",
|
||||
"@codemirror/view": "^6.43.3",
|
||||
"@dnd-kit/core": "^6.3.1",
|
||||
"@dnd-kit/sortable": "^10.0.0",
|
||||
"@dnd-kit/utilities": "^3.2.2",
|
||||
"@hermes/shared": "file:../shared",
|
||||
"@icons-pack/react-simple-icons": "=13.11.1",
|
||||
"@lezer/highlight": "^1.2.3",
|
||||
"@nanostores/react": "^1.1.0",
|
||||
"@nous-research/ui": "^0.13.0",
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
@@ -73,7 +67,6 @@
|
||||
"@tanstack/react-virtual": "^3.13.24",
|
||||
"@vscode/codicons": "^0.0.45",
|
||||
"@xterm/addon-fit": "^0.11.0",
|
||||
"@xterm/addon-serialize": "^0.14.0",
|
||||
"@xterm/addon-unicode11": "^0.9.0",
|
||||
"@xterm/addon-web-links": "^0.12.0",
|
||||
"@xterm/addon-webgl": "^0.19.0",
|
||||
@@ -82,13 +75,11 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.1.1",
|
||||
"dnd-core": "^14.0.1",
|
||||
"dompurify": "^3.4.11",
|
||||
"hast-util-from-html-isomorphic": "^2.0.0",
|
||||
"hast-util-to-text": "^4.0.2",
|
||||
"ignore": "^7.0.5",
|
||||
"katex": "^0.16.45",
|
||||
"leva": "^0.10.1",
|
||||
"mermaid": "^11.15.0",
|
||||
"motion": "^12.38.0",
|
||||
"nanostores": "^1.3.0",
|
||||
"node-pty": "1.1.0",
|
||||
|
||||
33
apps/desktop/scripts/bundle-electron-main.mjs
Normal file
33
apps/desktop/scripts/bundle-electron-main.mjs
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env node
|
||||
// bundle-electron-main.mjs — bundles electron/main.cjs into a single
|
||||
// self-contained file so the nix build doesn't need to ship node_modules/.
|
||||
//
|
||||
// `electron` is provided by the runtime; `node-pty` is staged separately
|
||||
// via stage-native-deps.cjs. `preload.cjs` is NOT require()'d by main —
|
||||
// Electron loads it via path.join(__dirname, 'preload.cjs') — so it stays
|
||||
// as a separate file and doesn't need bundling.
|
||||
import { build } from 'esbuild'
|
||||
import { resolve, dirname } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
import { renameSync } from 'node:fs'
|
||||
|
||||
const here = dirname(fileURLToPath(import.meta.url))
|
||||
const root = resolve(here, '..')
|
||||
const entry = resolve(root, 'electron/main.cjs')
|
||||
const tmp = resolve(root, 'electron/main.bundled.cjs')
|
||||
|
||||
await build({
|
||||
entryPoints: [entry],
|
||||
bundle: true,
|
||||
platform: 'node',
|
||||
format: 'cjs',
|
||||
target: 'node20',
|
||||
outfile: tmp,
|
||||
external: ['electron', 'node-pty'],
|
||||
logLevel: 'info'
|
||||
})
|
||||
|
||||
// Overwrite the original with the bundled version.
|
||||
renameSync(tmp, entry)
|
||||
|
||||
console.log(`bundled ${entry}`)
|
||||
@@ -66,31 +66,6 @@ const NATIVE_DEPS = [
|
||||
}
|
||||
]
|
||||
|
||||
// Pure-JS runtime dependencies that the packaged electron main require()s but
|
||||
// that workspace dedup hoists into the repo-root node_modules -- out of reach
|
||||
// of electron-builder's file collector, exactly like node-pty above. Unlike
|
||||
// node-pty there is no native binary to select; we stage each package's whole
|
||||
// directory into build/native-deps/vendor/node_modules/<name> so the dep's own
|
||||
// internal require()s resolve against a real node_modules tree, and the
|
||||
// requiring file (electron/git-review-ops.cjs) falls back to that path via
|
||||
// process.resourcesPath when the normal require() fails. See issue #52735
|
||||
// (packaged app crashed at launch on `Cannot find module 'simple-git'`).
|
||||
//
|
||||
// The closure is resolved at stage time by walking dependencies +
|
||||
// optionalDependencies, so a simple-git version bump that pulls in a new
|
||||
// transitive dep can't silently re-introduce the crash.
|
||||
//
|
||||
// Layout note: the closure lands in build/native-deps/vendor/node_modules/,
|
||||
// NOT build/native-deps/node_modules/. electron-builder's file collector
|
||||
// hard-drops a `node_modules` directory that sits at the ROOT of an
|
||||
// extraResources copy (app-builder-lib/out/util/filter.js: `if (relative ===
|
||||
// "node_modules") return false`), but keeps a NESTED one. Nesting under
|
||||
// `vendor/` makes node_modules a subdirectory so it survives packing; the
|
||||
// require() fallback in git-review-ops.cjs resolves the matching
|
||||
// vendor/node_modules path.
|
||||
const JS_DEP_ROOTS = ['simple-git']
|
||||
const JS_DEP_STAGE_ROOT = path.join(STAGE_ROOT, 'vendor', 'node_modules')
|
||||
|
||||
function rmrf(target) {
|
||||
fs.rmSync(target, { recursive: true, force: true })
|
||||
}
|
||||
@@ -173,111 +148,12 @@ function stageOne(spec) {
|
||||
console.log(`[stage-native-deps] ${path.relative(APP_ROOT, spec.to)}: ${copied} files`)
|
||||
}
|
||||
|
||||
// Resolve a package's directory by name, searching the repo-root node_modules
|
||||
// first (where workspace dedup hoists everything) and then the requiring
|
||||
// package's own node_modules for any non-hoisted nested copy.
|
||||
//
|
||||
// We deliberately do NOT use require.resolve(`${name}/package.json`): packages
|
||||
// with an "exports" map that doesn't list "./package.json" (e.g. simple-git
|
||||
// 3.x) make that subpath unresolvable under Node's exports enforcement
|
||||
// (ERR_PACKAGE_PATH_NOT_EXPORTED), which fails on CI even though it happened to
|
||||
// work locally. Instead resolve the package's main entry (exports-aware) and
|
||||
// walk up to the directory whose package.json's "name" matches.
|
||||
function resolvePkgDir(name, fromDir) {
|
||||
const searchPaths = [fromDir, REPO_ROOT, path.join(REPO_ROOT, 'node_modules')]
|
||||
let entry
|
||||
try {
|
||||
entry = require.resolve(name, { paths: searchPaths })
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
// Walk up from the resolved entry file to the package root: the first
|
||||
// ancestor dir whose package.json declares this package's name.
|
||||
let dir = path.dirname(entry)
|
||||
while (true) {
|
||||
const pjPath = path.join(dir, 'package.json')
|
||||
try {
|
||||
const pj = JSON.parse(fs.readFileSync(pjPath, 'utf8'))
|
||||
if (pj.name === name) {
|
||||
return dir
|
||||
}
|
||||
} catch {
|
||||
// no package.json here (or unreadable) — keep walking up
|
||||
}
|
||||
const parent = path.dirname(dir)
|
||||
if (parent === dir) {
|
||||
return null
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
}
|
||||
|
||||
// Walk dependencies + optionalDependencies from each root package and return
|
||||
// the set of resolved package directories in the runtime closure. Keyed by
|
||||
// package name so a dep reached via two paths is staged once.
|
||||
function resolveJsClosure(roots) {
|
||||
const closure = new Map() // name -> absolute package dir
|
||||
const stack = roots.map(name => ({ name, fromDir: REPO_ROOT }))
|
||||
while (stack.length) {
|
||||
const { name, fromDir } = stack.pop()
|
||||
if (closure.has(name)) continue
|
||||
const dir = resolvePkgDir(name, fromDir)
|
||||
if (!dir) {
|
||||
throw new Error(
|
||||
`stage-native-deps: could not resolve '${name}' for the simple-git ` +
|
||||
`closure. Run \`npm install\` at the workspace root first.`
|
||||
)
|
||||
}
|
||||
closure.set(name, dir)
|
||||
let pj
|
||||
try {
|
||||
pj = JSON.parse(fs.readFileSync(path.join(dir, 'package.json'), 'utf8'))
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
const deps = { ...(pj.dependencies || {}), ...(pj.optionalDependencies || {}) }
|
||||
for (const depName of Object.keys(deps)) {
|
||||
stack.push({ name: depName, fromDir: dir })
|
||||
}
|
||||
}
|
||||
return closure
|
||||
}
|
||||
|
||||
// Stage the resolved JS dependency closure into build/native-deps/vendor/node_modules/
|
||||
// so the packaged app (and the nix output) can require() it from
|
||||
// process.resourcesPath when the hoisted-root require() isn't reachable. Each
|
||||
// package is copied whole (minus node_modules/ — the closure is flattened so
|
||||
// every dep already has its own top-level entry) into a real node_modules
|
||||
// layout, which keeps the deps' own internal require()s working unchanged.
|
||||
function stageJsClosure(roots) {
|
||||
const closure = resolveJsClosure(roots)
|
||||
rmrf(JS_DEP_STAGE_ROOT)
|
||||
ensureDir(JS_DEP_STAGE_ROOT)
|
||||
let staged = 0
|
||||
for (const [name, fromDir] of closure) {
|
||||
const dest = path.join(JS_DEP_STAGE_ROOT, name)
|
||||
ensureDir(path.dirname(dest))
|
||||
// Copy the package directory but skip any nested node_modules/ — the
|
||||
// closure is flattened, so nested copies would just bloat the bundle.
|
||||
fs.cpSync(fromDir, dest, {
|
||||
recursive: true,
|
||||
filter: src => path.basename(src) !== 'node_modules'
|
||||
})
|
||||
staged += 1
|
||||
}
|
||||
console.log(
|
||||
`[stage-native-deps] vendor/node_modules/: ${staged} package(s) ` +
|
||||
`(${[...closure.keys()].sort().join(', ')})`
|
||||
)
|
||||
}
|
||||
|
||||
function main() {
|
||||
rmrf(STAGE_ROOT)
|
||||
ensureDir(STAGE_ROOT)
|
||||
for (const spec of NATIVE_DEPS) {
|
||||
stageOne(spec)
|
||||
}
|
||||
stageJsClosure(JS_DEP_ROOTS)
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
@@ -3,8 +3,8 @@ import { type ReactNode, useEffect, useMemo, useState } from 'react'
|
||||
|
||||
import { useElapsedSeconds } from '@/components/chat/activity-timer'
|
||||
import { ActivityTimerText } from '@/components/chat/activity-timer-text'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { FadeText } from '@/components/ui/fade-text'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { GlyphSpinner } from '@/components/ui/glyph-spinner'
|
||||
import { type Translations, useI18n } from '@/i18n'
|
||||
import { AlertCircle, CheckCircle2 } from '@/lib/icons'
|
||||
@@ -19,7 +19,7 @@ import {
|
||||
type SubagentStreamEntry
|
||||
} from '@/store/subagents'
|
||||
|
||||
import { Panel, PanelEmpty, PanelHeader } from '../overlays/panel'
|
||||
import { OverlayView } from '../overlays/overlay-view'
|
||||
|
||||
// Mirrors statusGlyph() in tool-fallback.tsx so subagent rows speak the
|
||||
// same visual vocabulary as the chat tool blocks.
|
||||
@@ -86,16 +86,18 @@ export function AgentsView({ onClose }: AgentsViewProps) {
|
||||
const tree = useMemo(() => buildSubagentTree(allSubagents(subagentsBySession)), [subagentsBySession])
|
||||
|
||||
return (
|
||||
<Panel closeLabel={t.agents.close} onClose={onClose}>
|
||||
{tree.length === 0 ? (
|
||||
<PanelEmpty description={t.agents.emptyDesc} icon="hubot" title={t.agents.emptyTitle} />
|
||||
) : (
|
||||
<>
|
||||
<PanelHeader subtitle={t.agents.subtitle} title={t.agents.title} />
|
||||
<SubagentTree tree={tree} />
|
||||
</>
|
||||
)}
|
||||
</Panel>
|
||||
<OverlayView
|
||||
closeLabel={t.agents.close}
|
||||
contentClassName="px-5 pt-5 pb-4 sm:px-6"
|
||||
onClose={onClose}
|
||||
rootClassName="mx-auto max-w-3xl"
|
||||
>
|
||||
<header className="mb-3 shrink-0">
|
||||
<h2 className="text-sm font-semibold text-foreground">{t.agents.title}</h2>
|
||||
<p className="text-xs text-muted-foreground/80">{t.agents.subtitle}</p>
|
||||
</header>
|
||||
<SubagentTree tree={tree} />
|
||||
</OverlayView>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -477,20 +477,17 @@ export function ArtifactsView({ setStatusbarItemGroup: _setStatusbarItemGroup, .
|
||||
}
|
||||
}, [artifacts])
|
||||
|
||||
const openArtifact = useCallback(
|
||||
async (href: string) => {
|
||||
try {
|
||||
if (window.hermesDesktop?.openExternal) {
|
||||
await window.hermesDesktop.openExternal(href)
|
||||
} else {
|
||||
window.open(href, '_blank', 'noopener,noreferrer')
|
||||
}
|
||||
} catch (err) {
|
||||
notifyError(err, a.openFailed)
|
||||
const openArtifact = useCallback(async (href: string) => {
|
||||
try {
|
||||
if (window.hermesDesktop?.openExternal) {
|
||||
await window.hermesDesktop.openExternal(href)
|
||||
} else {
|
||||
window.open(href, '_blank', 'noopener,noreferrer')
|
||||
}
|
||||
},
|
||||
[a]
|
||||
)
|
||||
} catch (err) {
|
||||
notifyError(err, a.openFailed)
|
||||
}
|
||||
}, [a])
|
||||
|
||||
const markImageFailed = useCallback((id: string) => {
|
||||
setFailedImageIds(current => {
|
||||
@@ -842,8 +839,7 @@ const ARTIFACT_COLUMNS: readonly ArtifactColumn[] = [
|
||||
{
|
||||
Cell: PrimaryCell,
|
||||
bodyClassName: 'p-0',
|
||||
header: (filter, a) =>
|
||||
filter === 'link' ? a.colTitleLink : filter === 'file' ? a.colTitleFile : a.colTitleDefault,
|
||||
header: (filter, a) => (filter === 'link' ? a.colTitleLink : filter === 'file' ? a.colTitleFile : a.colTitleDefault),
|
||||
id: 'primary',
|
||||
width: filter => (filter === 'link' ? 'w-[50%]' : 'w-[35%]')
|
||||
},
|
||||
|
||||
@@ -2,9 +2,9 @@ import { cleanup, render, screen } from '@testing-library/react'
|
||||
import { afterEach, describe, expect, it } from 'vitest'
|
||||
|
||||
import { I18nProvider } from '@/i18n/context'
|
||||
import type { ComposerAttachment } from '@/store/composer'
|
||||
|
||||
import { AttachmentList } from './attachments'
|
||||
import type { ComposerAttachment } from '@/store/composer'
|
||||
|
||||
function makeAttachment(id: string, label = 'test.pdf'): ComposerAttachment {
|
||||
return { id, kind: 'file', label }
|
||||
@@ -32,10 +32,7 @@ describe('AttachmentList', () => {
|
||||
|
||||
it('renders empty list without error', () => {
|
||||
renderWithI18n(<AttachmentList attachments={[]} />)
|
||||
|
||||
const container =
|
||||
screen.getByTestId?.('composer-attachments') ?? document.querySelector('[data-slot="composer-attachments"]')
|
||||
|
||||
const container = screen.getByTestId?.('composer-attachments') ?? document.querySelector('[data-slot="composer-attachments"]')
|
||||
expect(container).toBeDefined()
|
||||
})
|
||||
|
||||
@@ -58,7 +55,10 @@ describe('AttachmentList', () => {
|
||||
})
|
||||
|
||||
it('does not crash when attachments array contains null entries', () => {
|
||||
const attachments = [null as unknown as ComposerAttachment, makeAttachment('a', 'valid.txt')]
|
||||
const attachments = [
|
||||
null as unknown as ComposerAttachment,
|
||||
makeAttachment('a', 'valid.txt')
|
||||
]
|
||||
|
||||
expect(() => {
|
||||
renderWithI18n(<AttachmentList attachments={attachments} />)
|
||||
|
||||
@@ -73,11 +73,7 @@ export function ContextMenu({
|
||||
<ContextMenuItem disabled={!onPickImages} icon={ImageIcon} onSelect={onPickImages}>
|
||||
{c.images}
|
||||
</ContextMenuItem>
|
||||
<ContextMenuItem
|
||||
disabled={!onPasteClipboardImage}
|
||||
icon={Clipboard}
|
||||
onSelect={onPasteClipboardImage ? () => void onPasteClipboardImage() : undefined}
|
||||
>
|
||||
<ContextMenuItem disabled={!onPasteClipboardImage} icon={Clipboard} onSelect={onPasteClipboardImage}>
|
||||
{c.pasteImage}
|
||||
</ContextMenuItem>
|
||||
<ContextMenuItem icon={Link} onSelect={onOpenUrlDialog}>
|
||||
@@ -171,7 +167,7 @@ interface ContextMenuItemProps {
|
||||
interface ContextMenuProps {
|
||||
onInsertText: (text: string) => void
|
||||
onOpenUrlDialog: () => void
|
||||
onPasteClipboardImage?: (opts?: { silent?: boolean }) => Promise<boolean> | void
|
||||
onPasteClipboardImage?: () => void
|
||||
onPickFiles?: () => void
|
||||
onPickFolders?: () => void
|
||||
onPickImages?: () => void
|
||||
|
||||
@@ -4,7 +4,7 @@ import { KbdCombo } from '@/components/ui/kbd'
|
||||
import { Tip } from '@/components/ui/tooltip'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { triggerHaptic } from '@/lib/haptics'
|
||||
import { AudioLines, Layers3, Loader2, Square, SteeringWheel, Volume2, VolumeX } from '@/lib/icons'
|
||||
import { AudioLines, Layers3, Loader2, Square, SteeringWheel } from '@/lib/icons'
|
||||
import { formatCombo } from '@/lib/keybinds/combo'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
@@ -39,7 +39,6 @@ interface ConversationProps {
|
||||
}
|
||||
|
||||
export function ComposerControls({
|
||||
autoSpeak,
|
||||
busy,
|
||||
busyAction,
|
||||
canSteer,
|
||||
@@ -51,10 +50,8 @@ export function ComposerControls({
|
||||
state,
|
||||
voiceStatus,
|
||||
onDictate,
|
||||
onSteer,
|
||||
onToggleAutoSpeak
|
||||
onSteer
|
||||
}: {
|
||||
autoSpeak: boolean
|
||||
busy: boolean
|
||||
busyAction: 'queue' | 'stop'
|
||||
canSteer: boolean
|
||||
@@ -67,7 +64,6 @@ export function ComposerControls({
|
||||
voiceStatus: VoiceStatus
|
||||
onDictate: () => void
|
||||
onSteer: () => void
|
||||
onToggleAutoSpeak: () => void
|
||||
}) {
|
||||
const { t } = useI18n()
|
||||
const c = t.composer
|
||||
@@ -109,7 +105,6 @@ export function ComposerControls({
|
||||
) : (
|
||||
<DictationButton disabled={disabled} onToggle={onDictate} state={state.voice} status={voiceStatus} />
|
||||
)}
|
||||
<AutoSpeakButton active={autoSpeak} disabled={disabled} onToggle={onToggleAutoSpeak} />
|
||||
{showVoicePrimary ? (
|
||||
<Tip label={c.startVoice}>
|
||||
<Button
|
||||
@@ -259,47 +254,6 @@ function ConversationIndicator({
|
||||
)
|
||||
}
|
||||
|
||||
// Pure-TTS toggle: type normally, but have every assistant reply read aloud —
|
||||
// no dictation, no full conversation loop. Filled/accent when on, mirroring the
|
||||
// muted-mic pressed state above. Driven by (and persisted to) `voice.auto_tts`.
|
||||
function AutoSpeakButton({
|
||||
active,
|
||||
disabled,
|
||||
onToggle
|
||||
}: {
|
||||
active: boolean
|
||||
disabled: boolean
|
||||
onToggle: () => void
|
||||
}) {
|
||||
const { t } = useI18n()
|
||||
const c = t.composer
|
||||
const label = active ? c.stopSpeakingReplies : c.speakReplies
|
||||
|
||||
return (
|
||||
<Tip label={label}>
|
||||
<Button
|
||||
aria-label={label}
|
||||
aria-pressed={active}
|
||||
className={cn(
|
||||
GHOST_ICON_BTN,
|
||||
'p-0',
|
||||
active && 'bg-primary/10 text-primary hover:bg-primary/15 hover:text-primary'
|
||||
)}
|
||||
disabled={disabled}
|
||||
onClick={() => {
|
||||
triggerHaptic(active ? 'close' : 'open')
|
||||
onToggle()
|
||||
}}
|
||||
size="icon"
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
{active ? <Volume2 size={14} /> : <VolumeX size={14} />}
|
||||
</Button>
|
||||
</Tip>
|
||||
)
|
||||
}
|
||||
|
||||
function DictationButton({
|
||||
disabled,
|
||||
state,
|
||||
|
||||
@@ -59,10 +59,8 @@ function Harness({
|
||||
}
|
||||
|
||||
const editor = editorRef.current
|
||||
|
||||
if (editor) {
|
||||
const domText = composerPlainText(editor)
|
||||
|
||||
if (domText !== draftRef.current) {
|
||||
draftRef.current = domText
|
||||
setDraft(domText)
|
||||
@@ -129,11 +127,9 @@ function Harness({
|
||||
describe('composer Enter submit — live DOM vs stale composer state (#39630)', () => {
|
||||
it('sends the just-typed text on Enter even when composer state has not synced', async () => {
|
||||
const onSubmit = vi.fn()
|
||||
|
||||
const { getByTestId } = render(
|
||||
<Harness onCancel={vi.fn()} onDrain={vi.fn()} onQueue={vi.fn()} onSubmit={onSubmit} />
|
||||
)
|
||||
|
||||
const editor = getByTestId('editor')
|
||||
|
||||
// Fast typing: the DOM has the text but NO input event fired, so `draft`
|
||||
@@ -150,11 +146,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
|
||||
const onQueue = vi.fn()
|
||||
const onDrain = vi.fn()
|
||||
const onCancel = vi.fn()
|
||||
|
||||
const { getByTestId } = render(
|
||||
<Harness busy onCancel={onCancel} onDrain={onDrain} onQueue={onQueue} onSubmit={vi.fn()} queued={['queued-1']} />
|
||||
)
|
||||
|
||||
const editor = getByTestId('editor')
|
||||
|
||||
await act(async () => {
|
||||
@@ -171,11 +165,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
|
||||
const onCancel = vi.fn()
|
||||
const onSubmit = vi.fn()
|
||||
const onQueue = vi.fn()
|
||||
|
||||
const { getByTestId } = render(
|
||||
<Harness busy onCancel={onCancel} onDrain={vi.fn()} onQueue={onQueue} onSubmit={onSubmit} />
|
||||
)
|
||||
|
||||
const editor = getByTestId('editor')
|
||||
|
||||
await act(async () => {
|
||||
@@ -191,11 +183,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
|
||||
it('drains the next queued prompt on Enter when idle with a truly empty editor', async () => {
|
||||
const onDrain = vi.fn()
|
||||
const onSubmit = vi.fn()
|
||||
|
||||
const { getByTestId } = render(
|
||||
<Harness onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
|
||||
)
|
||||
|
||||
const editor = getByTestId('editor')
|
||||
|
||||
await act(async () => {
|
||||
@@ -210,18 +200,9 @@ describe('composer Enter submit — live DOM vs stale composer state (#39630)',
|
||||
it('keeps reconnect drafts editable but blocks Enter submit until the gateway returns', async () => {
|
||||
const onSubmit = vi.fn()
|
||||
const onDrain = vi.fn()
|
||||
|
||||
const { getByTestId } = render(
|
||||
<Harness
|
||||
disabled
|
||||
onCancel={vi.fn()}
|
||||
onDrain={onDrain}
|
||||
onQueue={vi.fn()}
|
||||
onSubmit={onSubmit}
|
||||
queued={['queued-1']}
|
||||
/>
|
||||
<Harness disabled onCancel={vi.fn()} onDrain={onDrain} onQueue={vi.fn()} onSubmit={onSubmit} queued={['queued-1']} />
|
||||
)
|
||||
|
||||
const editor = getByTestId('editor')
|
||||
|
||||
await act(async () => {
|
||||
|
||||
@@ -33,7 +33,7 @@ export function HelpHint() {
|
||||
|
||||
<Section title={c.hotkeys}>
|
||||
{COMPOSER_HOTKEY_ROWS.map(row => (
|
||||
<HotkeyRow combos={[...row.combos]} description={c.hotkeyDescs[row.id] ?? ''} key={row.id} />
|
||||
<HotkeyRow description={c.hotkeyDescs[row.id] ?? ''} combos={[...row.combos]} key={row.id} />
|
||||
))}
|
||||
</Section>
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user