mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 12:18:44 +08:00
Compare commits
9 Commits
refactor/s
...
api-server
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c45d18265c | ||
|
|
1c6d144a10 | ||
|
|
2b4abf8d9c | ||
|
|
f8dbe0ffd1 | ||
|
|
42e7755d4c | ||
|
|
68954b7c03 | ||
|
|
95220facdf | ||
|
|
5ea9bf70de | ||
|
|
67e4d43ea1 |
@@ -5,15 +5,6 @@
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# Built artifacts that are regenerated inside the image. Excluded so local
|
||||
# rebuilds on the developer's machine don't invalidate the npm-install layer
|
||||
# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
|
||||
ui-tui/dist/
|
||||
ui-tui/packages/hermes-ink/dist/
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
@@ -22,10 +13,3 @@ ui-tui/packages/hermes-ink/dist/
|
||||
.env
|
||||
|
||||
*.md
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
|
||||
hermes-config/
|
||||
runtime/
|
||||
|
||||
137
.env.example
137
.env.example
@@ -14,14 +14,6 @@
|
||||
# LLM_MODEL is no longer read from .env — this line is kept for reference only.
|
||||
# LLM_MODEL=anthropic/claude-opus-4.6
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (NovitaAI)
|
||||
# =============================================================================
|
||||
# NovitaAI — 90+ models, pay-per-use
|
||||
# Get your key at: https://novita.ai/settings/key-management
|
||||
# NOVITA_API_KEY=
|
||||
# NOVITA_BASE_URL=https://api.novita.ai/openai/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Google AI Studio / Gemini)
|
||||
# =============================================================================
|
||||
@@ -32,15 +24,6 @@
|
||||
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
|
||||
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Ollama Cloud)
|
||||
# =============================================================================
|
||||
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
|
||||
# Get your key at: https://ollama.com/settings
|
||||
# OLLAMA_API_KEY=your_ollama_key_here
|
||||
# Optional base URL override (default: https://ollama.com/v1)
|
||||
# OLLAMA_BASE_URL=https://ollama.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (z.ai / GLM)
|
||||
# =============================================================================
|
||||
@@ -60,15 +43,6 @@
|
||||
# KIMI_BASE_URL=https://api.kimi.com/coding/v1 # Default for sk-kimi- keys
|
||||
# KIMI_BASE_URL=https://api.moonshot.ai/v1 # For legacy Moonshot keys
|
||||
# KIMI_BASE_URL=https://api.moonshot.cn/v1 # For Moonshot China keys
|
||||
# KIMI_CN_API_KEY= # Dedicated Moonshot China key
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Arcee AI)
|
||||
# =============================================================================
|
||||
# Arcee AI provides access to Trinity models (trinity-mini, trinity-large-*)
|
||||
# Get an Arcee key at: https://chat.arcee.ai/
|
||||
# ARCEEAI_API_KEY=
|
||||
# ARCEE_BASE_URL= # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (MiniMax)
|
||||
@@ -115,15 +89,6 @@
|
||||
# Optional base URL override:
|
||||
# HERMES_QWEN_BASE_URL=https://portal.qwen.ai/v1
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Xiaomi MiMo)
|
||||
# =============================================================================
|
||||
# Xiaomi MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash).
|
||||
# Get your key at: https://platform.xiaomimimo.com
|
||||
# XIAOMI_API_KEY=your_key_here
|
||||
# Optional base URL override:
|
||||
# XIAOMI_BASE_URL=https://api.xiaomimimo.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# TOOL API KEYS
|
||||
# =============================================================================
|
||||
@@ -151,18 +116,6 @@
|
||||
# Also requires ~/.honcho/config.json with enabled=true (see README).
|
||||
# HONCHO_API_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# HYPERLIQUID OPTIONAL SKILL
|
||||
# =============================================================================
|
||||
# Optional defaults for the Hyperliquid skill in optional-skills/blockchain/hyperliquid
|
||||
#
|
||||
# Hyperliquid API base URL override
|
||||
# Default: https://api.hyperliquid.xyz
|
||||
# HYPERLIQUID_API_URL=https://api.hyperliquid-testnet.xyz
|
||||
#
|
||||
# Default address for account-level commands like state, fills, orders, and review
|
||||
# HYPERLIQUID_USER_ADDRESS=0x0000000000000000000000000000000000000000
|
||||
|
||||
# =============================================================================
|
||||
# TERMINAL TOOL CONFIGURATION
|
||||
# =============================================================================
|
||||
@@ -174,10 +127,6 @@
|
||||
# Only override here if you need to force a backend without touching config.yaml:
|
||||
# TERMINAL_ENV=local
|
||||
|
||||
# Override the container runtime binary (e.g. to use Podman instead of Docker).
|
||||
# Useful on systems where Docker's storage driver is broken or unavailable.
|
||||
# HERMES_DOCKER_BINARY=/usr/local/bin/podman
|
||||
|
||||
# Container images (for singularity/docker/modal backends)
|
||||
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
|
||||
@@ -264,15 +213,6 @@ BROWSERBASE_PROXIES=true
|
||||
# Uses custom Chromium build to avoid bot detection altogether
|
||||
BROWSERBASE_ADVANCED_STEALTH=false
|
||||
|
||||
# Browser engine for local mode (default: auto = Chrome)
|
||||
# "auto" — use Chrome (don't pass --engine flag)
|
||||
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||
# "chrome" — explicitly request Chrome
|
||||
# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
|
||||
# empty results are automatically retried with Chrome.
|
||||
# Also configurable via browser.engine in config.yaml.
|
||||
# AGENT_BROWSER_ENGINE=auto
|
||||
|
||||
# Browser session timeout in seconds (default: 300)
|
||||
# Sessions are cleaned up after this duration of inactivity
|
||||
BROWSER_SESSION_TIMEOUT=300
|
||||
@@ -281,27 +221,6 @@ BROWSER_SESSION_TIMEOUT=300
|
||||
# Browser sessions are automatically closed after this period of no activity
|
||||
BROWSER_INACTIVITY_TIMEOUT=120
|
||||
|
||||
# Extra Chromium launch flags passed to agent-browser, comma- or newline-separated.
|
||||
# Hermes auto-injects "--no-sandbox,--disable-dev-shm-usage" when it detects root
|
||||
# or AppArmor-restricted unprivileged user namespaces (Ubuntu 23.10+, DGX Spark,
|
||||
# many container images), so leave this unset unless you need extra flags.
|
||||
# Setting this disables the auto-injection.
|
||||
# AGENT_BROWSER_ARGS=--no-sandbox
|
||||
|
||||
# Camofox local anti-detection browser (Camoufox-based Firefox).
|
||||
# Set CAMOFOX_URL to route the browser tools through a local Camofox server
|
||||
# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
|
||||
# CAMOFOX_URL=http://localhost:9377
|
||||
|
||||
# Externally managed Camofox sessions — when another app owns the visible
|
||||
# Camofox browser, set these so Hermes shares the same userId/profile instead
|
||||
# of creating its own isolated session.
|
||||
# CAMOFOX_USER_ID=
|
||||
# CAMOFOX_SESSION_KEY=
|
||||
# Set to true to reuse an already-open Camofox tab for this identity before
|
||||
# creating a new one (useful for gateway restarts).
|
||||
# CAMOFOX_ADOPT_EXISTING_TAB=false
|
||||
|
||||
# =============================================================================
|
||||
# SESSION LOGGING
|
||||
# =============================================================================
|
||||
@@ -339,7 +258,6 @@ BROWSER_INACTIVITY_TIMEOUT=120
|
||||
# TELEGRAM_ALLOWED_USERS= # Comma-separated user IDs
|
||||
# TELEGRAM_HOME_CHANNEL= # Default chat for cron delivery
|
||||
# TELEGRAM_HOME_CHANNEL_NAME= # Display name for home channel
|
||||
# TELEGRAM_CRON_THREAD_ID= # Forum topic ID for cron deliveries; overrides TELEGRAM_HOME_CHANNEL_THREAD_ID for cron so replies work in topic mode
|
||||
|
||||
# Webhook mode (optional — for cloud deployments like Fly.io/Railway)
|
||||
# Default is long polling. Setting TELEGRAM_WEBHOOK_URL switches to webhook mode.
|
||||
@@ -395,6 +313,24 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
|
||||
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
|
||||
|
||||
# =============================================================================
|
||||
# RL TRAINING (Tinker + Atropos)
|
||||
# =============================================================================
|
||||
# Run reinforcement learning training on language models using the Tinker API.
|
||||
# Requires the rl-server to be running (from tinker-atropos package).
|
||||
|
||||
# Tinker API Key - RL training service
|
||||
# Get at: https://tinker-console.thinkingmachines.ai/keys
|
||||
# TINKER_API_KEY=
|
||||
|
||||
# Weights & Biases API Key - Experiment tracking and metrics
|
||||
# Get at: https://wandb.ai/authorize
|
||||
# WANDB_API_KEY=
|
||||
|
||||
# RL API Server URL (default: http://localhost:8080)
|
||||
# Change if running the rl-server on a different host/port
|
||||
# RL_API_URL=http://localhost:8080
|
||||
|
||||
# =============================================================================
|
||||
# SKILLS HUB (GitHub integration for skill search/install/publish)
|
||||
# =============================================================================
|
||||
@@ -431,40 +367,3 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# Override STT provider endpoints (for proxies or self-hosted instances)
|
||||
# GROQ_BASE_URL=https://api.groq.com/openai/v1
|
||||
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# MICROSOFT TEAMS INTEGRATION
|
||||
# =============================================================================
|
||||
# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
|
||||
# Or use Azure Portal: Azure Active Directory → App registrations → New registration
|
||||
# Then add the bot to Teams via the Bot Framework or App Studio.
|
||||
#
|
||||
# TEAMS_CLIENT_ID= # Azure AD App (client) ID
|
||||
# TEAMS_CLIENT_SECRET= # Azure AD client secret value
|
||||
# TEAMS_TENANT_ID= # Azure AD tenant ID (or "common" for multi-tenant)
|
||||
# TEAMS_ALLOWED_USERS= # Comma-separated AAD object IDs or UPNs
|
||||
# TEAMS_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery
|
||||
# TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
# TEAMS_PORT=3978 # Webhook listen port (Bot Framework default)
|
||||
|
||||
# =============================================================================
|
||||
# GOOGLE CHAT INTEGRATION
|
||||
# =============================================================================
|
||||
# Connects via Cloud Pub/Sub pull subscription (no public URL required).
|
||||
# Setup walkthrough: website/docs/user-guide/messaging/google_chat.md.
|
||||
# 1. Create a GCP project, enable the Google Chat API and Cloud Pub/Sub.
|
||||
# 2. Create a Service Account with roles/pubsub.subscriber on the
|
||||
# subscription (NOT project-wide); download the JSON key.
|
||||
# 3. Configure your Chat app at console.cloud.google.com/apis/credentials
|
||||
# → Google Chat API → Configuration → Cloud Pub/Sub topic.
|
||||
# 4. (Optional, for native attachment delivery) Each user runs
|
||||
# `/setup-files` once in their own DM after Pub/Sub is wired up.
|
||||
#
|
||||
# GOOGLE_CHAT_PROJECT_ID= # GCP project hosting the topic (or set GOOGLE_CLOUD_PROJECT)
|
||||
# GOOGLE_CHAT_SUBSCRIPTION_NAME= # Full path: projects/<id>/subscriptions/<name>
|
||||
# GOOGLE_CHAT_SERVICE_ACCOUNT_JSON= # Path to SA JSON (or set GOOGLE_APPLICATION_CREDENTIALS)
|
||||
# GOOGLE_CHAT_ALLOWED_USERS= # Comma-separated emails allowed to talk to the bot
|
||||
# GOOGLE_CHAT_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# GOOGLE_CHAT_HOME_CHANNEL= # Default space (spaces/XXXX) for cron delivery
|
||||
# GOOGLE_CHAT_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
|
||||
4
.envrc
4
.envrc
@@ -1,5 +1 @@
|
||||
watch_file pyproject.toml uv.lock
|
||||
watch_file ui-tui/package-lock.json ui-tui/package.json
|
||||
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
|
||||
|
||||
use flake
|
||||
|
||||
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1,2 +0,0 @@
|
||||
# Auto-generated files — collapse diffs and exclude from language stats
|
||||
web/package-lock.json linguist-generated=true
|
||||
30
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
30
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -11,7 +11,6 @@ body:
|
||||
**Before submitting**, please:
|
||||
- [ ] Search [existing issues](https://github.com/NousResearch/hermes-agent/issues) to avoid duplicates
|
||||
- [ ] Update to the latest version (`hermes update`) and confirm the bug still exists
|
||||
- [ ] Run `hermes debug share` and paste the links below (see Debug Report section)
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
@@ -83,25 +82,6 @@ body:
|
||||
- Slack
|
||||
- WhatsApp
|
||||
|
||||
- type: textarea
|
||||
id: debug-report
|
||||
attributes:
|
||||
label: Debug Report
|
||||
description: |
|
||||
Run `hermes debug share` from your terminal and paste the links it prints here.
|
||||
This uploads your system info, config, and recent logs to a paste service automatically.
|
||||
|
||||
If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
|
||||
|
||||
If the upload fails, run `hermes debug share --local` and paste the output directly.
|
||||
placeholder: |
|
||||
Report https://paste.rs/abc123
|
||||
agent.log https://paste.rs/def456
|
||||
gateway.log https://paste.rs/ghi789
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
@@ -117,6 +97,8 @@ body:
|
||||
label: Python Version
|
||||
description: Output of `python --version`
|
||||
placeholder: "3.11.9"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: hermes-version
|
||||
@@ -124,14 +106,14 @@ body:
|
||||
label: Hermes Version
|
||||
description: Output of `hermes version`
|
||||
placeholder: "2.1.0"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Additional Logs / Traceback (optional)
|
||||
description: |
|
||||
The debug report above covers most logs. Use this field for any extra error output,
|
||||
tracebacks, or screenshots not captured by `hermes debug share`.
|
||||
label: Relevant Logs / Traceback
|
||||
description: Paste any error output, traceback, or log messages. This will be auto-formatted as code.
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
|
||||
12
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
12
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -71,15 +71,3 @@ body:
|
||||
label: Contribution
|
||||
options:
|
||||
- label: I'd like to implement this myself and submit a PR
|
||||
|
||||
- type: textarea
|
||||
id: debug-report
|
||||
attributes:
|
||||
label: Debug Report (optional)
|
||||
description: |
|
||||
If this feature request is related to a problem you're experiencing, run `hermes debug share` and paste the links here.
|
||||
In an interactive chat session, you can use `/debug` instead.
|
||||
This helps us understand your environment and any related logs.
|
||||
placeholder: |
|
||||
Report https://paste.rs/abc123
|
||||
render: shell
|
||||
|
||||
20
.github/ISSUE_TEMPLATE/setup_help.yml
vendored
20
.github/ISSUE_TEMPLATE/setup_help.yml
vendored
@@ -9,8 +9,7 @@ body:
|
||||
Sorry you're having trouble! Please fill out the details below so we can help.
|
||||
|
||||
**Quick checks first:**
|
||||
- Run `hermes debug share` and paste the links in the Debug Report section below
|
||||
- If you're in a chat session, you can use `/debug` instead — it does the same thing
|
||||
- Run `hermes doctor` and include the output below
|
||||
- Try `hermes update` to get the latest version
|
||||
- Check the [README troubleshooting section](https://github.com/NousResearch/hermes-agent#troubleshooting)
|
||||
- For general questions, consider the [Nous Research Discord](https://discord.gg/NousResearch) for faster help
|
||||
@@ -75,21 +74,10 @@ body:
|
||||
placeholder: "2.1.0"
|
||||
|
||||
- type: textarea
|
||||
id: debug-report
|
||||
id: doctor-output
|
||||
attributes:
|
||||
label: Debug Report
|
||||
description: |
|
||||
Run `hermes debug share` from your terminal and paste the links it prints here.
|
||||
This uploads your system info, config, and recent logs to a paste service automatically.
|
||||
|
||||
If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
|
||||
|
||||
If the upload fails or install didn't get that far, run `hermes debug share --local` and paste the output directly.
|
||||
If even that doesn't work, run `hermes doctor` and paste that output instead.
|
||||
placeholder: |
|
||||
Report https://paste.rs/abc123
|
||||
agent.log https://paste.rs/def456
|
||||
gateway.log https://paste.rs/ghi789
|
||||
label: Output of `hermes doctor`
|
||||
description: Run `hermes doctor` and paste the full output. This will be auto-formatted.
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
|
||||
47
.github/actions/hermes-smoke-test/action.yml
vendored
47
.github/actions/hermes-smoke-test/action.yml
vendored
@@ -1,47 +0,0 @@
|
||||
name: Hermes smoke test
|
||||
description: >
|
||||
Run the image's built-in entrypoint against `--help` and `dashboard --help`
|
||||
to catch basic runtime regressions before publishing. Requires the image
|
||||
to already be loaded into the local Docker daemon under `image`.
|
||||
|
||||
Works identically on amd64 and arm64 runners.
|
||||
|
||||
inputs:
|
||||
image:
|
||||
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Ensure /tmp/hermes-test is hermes-writable
|
||||
shell: bash
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
|
||||
- name: hermes --help
|
||||
shell: bash
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
"${{ inputs.image }}" --help
|
||||
|
||||
- name: hermes dashboard --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Regression guard for #9153: dashboard was present in source but
|
||||
# missing from the published image. If this fails, something in
|
||||
# the Dockerfile is excluding the dashboard subcommand from the
|
||||
# installed package.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
"${{ inputs.image }}" dashboard --help
|
||||
18
.github/actions/nix-setup/action.yml
vendored
18
.github/actions/nix-setup/action.yml
vendored
@@ -1,18 +0,0 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix and configure Cachix binary cache'
|
||||
|
||||
inputs:
|
||||
cachix-auth-token:
|
||||
description: 'Cachix auth token (enables push). Omit for read-only.'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
|
||||
with:
|
||||
name: hermes-agent
|
||||
authToken: ${{ inputs.cachix-auth-token }}
|
||||
continue-on-error: true
|
||||
44
.github/dependabot.yml
vendored
44
.github/dependabot.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# Dependabot configuration for hermes-agent.
|
||||
#
|
||||
# Deliberately scoped to github-actions only.
|
||||
#
|
||||
# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
|
||||
# because we pin source dependencies exactly (uv.lock, package-lock.json) as
|
||||
# part of our supply-chain posture. Automatic version-bump PRs against those
|
||||
# pins would undermine the strategy — pins are moved deliberately, after
|
||||
# review, not on a schedule.
|
||||
#
|
||||
# github-actions is the exception: action pins (we use full commit SHAs per
|
||||
# supply-chain policy) must be updated when upstream actions publish
|
||||
# patches — usually themselves security fixes. Dependabot opens a PR with
|
||||
# the new SHA and release notes; we review and merge like any other PR.
|
||||
#
|
||||
# Security-update PRs for source dependencies (opened ONLY when a CVE is
|
||||
# published affecting a currently-pinned version) are enabled separately
|
||||
# via the repo's Dependabot security updates setting
|
||||
# (Settings → Code security → Dependabot → Dependabot security updates).
|
||||
# Those are CVE-only, not schedule-driven, and do not conflict with our
|
||||
# pinning strategy — they fire when a pinned version becomes known-bad,
|
||||
# which is exactly when we want to move the pin.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
- "github-actions"
|
||||
commit-message:
|
||||
prefix: "chore(actions)"
|
||||
include: "scope"
|
||||
groups:
|
||||
# Batch routine action bumps into one PR per week to reduce noise.
|
||||
# Security updates still open individually and bypass grouping.
|
||||
actions-minor-patch:
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
73
.github/workflows/contributor-check.yml
vendored
73
.github/workflows/contributor-check.yml
vendored
@@ -1,73 +0,0 @@
|
||||
name: Contributor Attribution Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
# Only run when code files change (not docs-only PRs)
|
||||
- '*.py'
|
||||
- '**/*.py'
|
||||
- '.github/workflows/contributor-check.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-attribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0 # Full history needed for git log
|
||||
|
||||
- name: Check for unmapped contributor emails
|
||||
run: |
|
||||
# Get the merge base between this PR and main
|
||||
MERGE_BASE=$(git merge-base origin/main HEAD)
|
||||
|
||||
# Find any new author emails in this PR's commits
|
||||
NEW_EMAILS=$(git log ${MERGE_BASE}..HEAD --format='%ae' --no-merges | sort -u)
|
||||
|
||||
if [ -z "$NEW_EMAILS" ]; then
|
||||
echo "No new commits to check."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check each email against AUTHOR_MAP in release.py
|
||||
MISSING=""
|
||||
while IFS= read -r email; do
|
||||
# Skip teknium and bot emails
|
||||
case "$email" in
|
||||
*teknium*|*noreply@github.com*|*dependabot*|*github-actions*|*anthropic.com*|*cursor.com*)
|
||||
continue ;;
|
||||
esac
|
||||
|
||||
# Check if email is in AUTHOR_MAP (either as a key or matches noreply pattern)
|
||||
if echo "$email" | grep -qP '\+.*@users\.noreply\.github\.com'; then
|
||||
continue # GitHub noreply emails auto-resolve
|
||||
fi
|
||||
|
||||
if ! grep -qF "\"${email}\"" scripts/release.py 2>/dev/null; then
|
||||
AUTHOR=$(git log --author="$email" --format='%an' -1)
|
||||
MISSING="${MISSING}\n ${email} (${AUTHOR})"
|
||||
fi
|
||||
done <<< "$NEW_EMAILS"
|
||||
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo ""
|
||||
echo "⚠️ New contributor email(s) not in AUTHOR_MAP:"
|
||||
echo -e "$MISSING"
|
||||
echo ""
|
||||
echo "Please add mappings to scripts/release.py AUTHOR_MAP:"
|
||||
echo -e "$MISSING" | while read -r line; do
|
||||
email=$(echo "$line" | sed 's/^ *//' | cut -d' ' -f1)
|
||||
[ -z "$email" ] && continue
|
||||
echo " \"${email}\": \"<github-username>\","
|
||||
done
|
||||
echo ""
|
||||
echo "To find the GitHub username for an email:"
|
||||
echo " gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All contributor emails are mapped in AUTHOR_MAP."
|
||||
fi
|
||||
51
.github/workflows/deploy-site.yml
vendored
51
.github/workflows/deploy-site.yml
vendored
@@ -1,12 +1,11 @@
|
||||
name: Deploy Site
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'website/**'
|
||||
- 'landingpage/**'
|
||||
- 'skills/**'
|
||||
- 'optional-skills/**'
|
||||
- '.github/workflows/deploy-site.yml'
|
||||
@@ -21,49 +20,32 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy-vercel:
|
||||
if: github.event_name == 'release'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Vercel Deploy
|
||||
run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
|
||||
|
||||
deploy-docs:
|
||||
build-and-deploy:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deploy.outputs.page_url }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: website/package-lock.json
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install PyYAML for skill extraction
|
||||
run: pip install pyyaml==6.0.2 httpx==0.28.1
|
||||
run: pip install pyyaml
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
if [ ! -f website/static/api/skills-index.json ]; then
|
||||
python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
|
||||
fi
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
@@ -75,23 +57,18 @@ jobs:
|
||||
- name: Stage deployment
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
# Landing page at root
|
||||
cp -r landingpage/* _site/
|
||||
# Docusaurus at /docs/
|
||||
cp -r website/build/* _site/docs/
|
||||
# llms.txt / llms-full.txt are also published at the site root
|
||||
# (https://hermes-agent.nousresearch.com/llms.txt) because some
|
||||
# agents and IDE plugins probe the classic root-level path rather
|
||||
# than /docs/llms.txt. Same file, two URLs, one source of truth.
|
||||
if [ -f website/build/llms.txt ]; then
|
||||
cp website/build/llms.txt _site/llms.txt
|
||||
fi
|
||||
if [ -f website/build/llms-full.txt ]; then
|
||||
cp website/build/llms-full.txt _site/llms-full.txt
|
||||
fi
|
||||
# CNAME so GitHub Pages keeps the custom domain between deploys
|
||||
echo "hermes-agent.nousresearch.com" > _site/CNAME
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: _site
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deploy
|
||||
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4
|
||||
uses: actions/deploy-pages@v4
|
||||
|
||||
539
.github/workflows/docker-publish.yml
vendored
539
.github/workflows/docker-publish.yml
vendored
@@ -3,532 +3,89 @@ name: Docker Build and Publish
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets its
|
||||
# own SHA-tagged image; :main and :latest are guarded separately by the
|
||||
# move-main and move-latest jobs. PR runs reuse a PR-scoped group with
|
||||
# cancel-in-progress: true so rapid pushes to the same PR collapse to the
|
||||
# latest commit.
|
||||
concurrency:
|
||||
group: docker-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
env:
|
||||
IMAGE_NAME: nousresearch/hermes-agent
|
||||
group: docker-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build amd64 natively. This job also runs the smoke tests (basic --help
|
||||
# and the dashboard subcommand regression guard from #9153), because amd64
|
||||
# is the only arch we can `load` into the local daemon on an amd64 runner.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-amd64:
|
||||
build-and-push:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
# Build amd64 only so we can `load` the image for smoke testing.
|
||||
# `load: true` cannot export a multi-arch manifest to the local daemon.
|
||||
# The multi-arch build follows on push to main / release.
|
||||
- name: Build image (amd64, smoke test)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/amd64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
tags: nousresearch/hermes-agent:test
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Smoke test image
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
- name: Test image starts
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
nousresearch/hermes-agent:test --help
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Push amd64 by digest only (no tag). The merge job assembles the
|
||||
# tagged manifest list. `push-by-digest=true` is docker's recommended
|
||||
# pattern for multi-runner multi-platform builds.
|
||||
#
|
||||
# We apply the OCI revision label here (and again on arm64) because
|
||||
# the move-main / move-latest jobs read it off the linux/amd64
|
||||
# sub-manifest config of the floating tag to decide whether it's safe
|
||||
# to advance. The label must be on each per-arch image — manifest
|
||||
# lists themselves don't carry image config labels.
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/amd64
|
||||
labels: |
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
# Write the digest to a file and upload it as an artifact so the
|
||||
# merge job can stitch both per-arch digests into a manifest list.
|
||||
- name: Export digest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.push.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-amd64
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
|
||||
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
|
||||
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
|
||||
# smoke test, then on push/release push by digest.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-arm64:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-24.04-arm
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (arm64, smoke test)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
cache-from: type=gha,scope=docker-arm64
|
||||
cache-to: type=gha,mode=max,scope=docker-arm64
|
||||
|
||||
- name: Smoke test image
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Push arm64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/arm64
|
||||
labels: |
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=docker-arm64
|
||||
cache-to: type=gha,mode=max,scope=docker-arm64
|
||||
|
||||
- name: Export digest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.push.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-arm64
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stitch both per-arch digests into a single tagged multi-arch manifest.
|
||||
# This is a registry-side operation — no building, no layer re-push —
|
||||
# so it runs in ~30 seconds. On main pushes it produces :sha-<sha>.
|
||||
# On releases it produces :<release_tag_name>.
|
||||
# ---------------------------------------------------------------------------
|
||||
merge:
|
||||
if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-amd64, build-arm64]
|
||||
timeout-minutes: 10
|
||||
outputs:
|
||||
pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }}
|
||||
pushed_release_tag: ${{ steps.mark_release_pushed.outputs.pushed }}
|
||||
release_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digest-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Compute the tag for this run. Main pushes use sha-<sha> (so every
|
||||
# commit gets its own immutable tag); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=sha-${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Build the arg array from each digest file (filename = the digest
|
||||
# hex, with no sha256: prefix; empty file content, only the name
|
||||
# matters). Using an array avoids shellcheck SC2046 and keeps
|
||||
# every digest a single argv token even under pathological names.
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-main that the SHA tag is live. Only on main pushes;
|
||||
# releases set pushed_release_tag instead.
|
||||
- name: Mark SHA tag pushed
|
||||
id: mark_pushed
|
||||
- name: Push multi-arch image (main branch)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
nousresearch/hermes-agent:latest
|
||||
nousresearch/hermes-agent:${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
- name: Push multi-arch image (release)
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :main to point at the SHA tag the merge job pushed.
|
||||
#
|
||||
# :main is the floating tag that tracks the tip of the main branch. Every
|
||||
# merge to main retags :main forward. Users who want "latest dev build"
|
||||
# pull :main; users who want stable releases pull :latest.
|
||||
#
|
||||
# The real serialization guarantee comes from the top-level concurrency
|
||||
# group (`docker-${{ github.ref }}` with `cancel-in-progress: false`),
|
||||
# which ensures at most one workflow run for this ref executes at a time.
|
||||
# That means two move-main steps for the same ref cannot overlap.
|
||||
#
|
||||
# This job has its own concurrency group as defense-in-depth: if the
|
||||
# top-level group is ever loosened, queued move-mains will run serially
|
||||
# in arrival order, each one running the ancestor check below and either
|
||||
# advancing :main or skipping. `cancel-in-progress: false` matches the
|
||||
# top-level setting — we don't want rapid pushes to cancel a queued
|
||||
# move-main, because the ancestor check is the real safety mechanism
|
||||
# and queueing is cheap (move-main is a ~30s registry op).
|
||||
#
|
||||
# Combined with the ancestor check, this means :main only ever moves
|
||||
# forward in git history.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-main:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'push'
|
||||
&& github.ref == 'refs/heads/main'
|
||||
&& needs.merge.outputs.pushed_sha_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-main-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Read the git revision label off the current :main manifest, then
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is a
|
||||
# descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run already
|
||||
# advanced :main past us (or diverged), we skip and leave it alone.
|
||||
- name: Decide whether to move :main
|
||||
id: main_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
# Pull the JSON for the linux/amd64 sub-manifest's config and extract
|
||||
# the OCI revision label with jq — Go template field access can't
|
||||
# handle dots in map keys, so using json+jq is the robust route.
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:main" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
2>/dev/null || true
|
||||
)
|
||||
|
||||
if [ -z "${image_json}" ]; then
|
||||
echo "No existing :main (or inspect failed) — safe to publish."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
current_sha=$(
|
||||
printf '%s' "${image_json}" \
|
||||
| jq -r '.config.Labels."org.opencontainers.image.revision" // ""'
|
||||
)
|
||||
|
||||
if [ -z "${current_sha}" ]; then
|
||||
echo "Registry :main has no revision label — safe to publish."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Registry :main is at ${current_sha}"
|
||||
echo "This run is at ${GITHUB_SHA}"
|
||||
|
||||
if [ "${current_sha}" = "${GITHUB_SHA}" ]; then
|
||||
echo ":main already points at our SHA — nothing to do."
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :main commit locally for merge-base.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
|| true
|
||||
fi
|
||||
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
echo "Registry :main points at an unknown commit (${current_sha}); refusing to overwrite."
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our SHA must be a descendant of the current :main to be safe.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our commit is a descendant of :main — safe to advance."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "Another run advanced :main past us (or diverged) — leaving it alone."
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Retag the already-pushed SHA manifest as :main. This is a registry-
|
||||
# side operation — no rebuild, no layer re-push — so it's quick and
|
||||
# atomic per-tag. The ancestor check above plus the cancel-in-progress
|
||||
# concurrency on this job together guarantee we only ever move :main
|
||||
# forward in git history.
|
||||
- name: Move :main to this SHA
|
||||
if: steps.main_check.outputs.push_main == 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
docker buildx imagetools create \
|
||||
--tag "${image}:main" \
|
||||
"${image}:sha-${GITHUB_SHA}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :latest to point at the release tag the merge job pushed.
|
||||
#
|
||||
# :latest is the floating tag that tracks the most recent stable release.
|
||||
# Only `release: published` events advance it — never main pushes.
|
||||
#
|
||||
# We still run an ancestor check against the existing :latest so that a
|
||||
# backport release on an older branch (e.g. patching v1.1.5 after v1.2.3
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape as
|
||||
# move-main: read the OCI revision label off the current :latest, look up
|
||||
# that commit in git, and only advance if our release commit is a strict
|
||||
# descendant.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-latest:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'release'
|
||||
&& needs.merge.outputs.pushed_release_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-latest
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Decide whether to move :latest
|
||||
id: latest_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:latest" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
2>/dev/null || true
|
||||
)
|
||||
|
||||
if [ -z "${image_json}" ]; then
|
||||
echo "No existing :latest (or inspect failed) — safe to publish."
|
||||
echo "push_latest=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
current_sha=$(
|
||||
printf '%s' "${image_json}" \
|
||||
| jq -r '.config.Labels."org.opencontainers.image.revision" // ""'
|
||||
)
|
||||
|
||||
if [ -z "${current_sha}" ]; then
|
||||
echo "Registry :latest has no revision label — safe to publish."
|
||||
echo "push_latest=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Registry :latest is at ${current_sha}"
|
||||
echo "This release is at ${GITHUB_SHA}"
|
||||
|
||||
if [ "${current_sha}" = "${GITHUB_SHA}" ]; then
|
||||
echo ":latest already points at our SHA — nothing to do."
|
||||
echo "push_latest=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :latest commit locally for merge-base.
|
||||
# Releases can be cut from any branch, so fetch broadly.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
|| true
|
||||
fi
|
||||
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
echo "Registry :latest points at an unknown commit (${current_sha}); refusing to overwrite."
|
||||
echo "push_latest=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our release SHA must be a descendant of the current :latest.
|
||||
# Backport releases on older branches won't satisfy this and will
|
||||
# be left alone — :latest stays on the newer release.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our release commit is a descendant of :latest — safe to advance."
|
||||
echo "push_latest=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "Existing :latest is newer than this release (likely a backport) — leaving it alone."
|
||||
echo "push_latest=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Retag the already-pushed release manifest as :latest.
|
||||
- name: Move :latest to this release tag
|
||||
if: steps.latest_check.outputs.push_latest == 'true'
|
||||
env:
|
||||
RELEASE_TAG: ${{ needs.merge.outputs.release_tag }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
docker buildx imagetools create \
|
||||
--tag "${image}:latest" \
|
||||
"${image}:${RELEASE_TAG}"
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
nousresearch/hermes-agent:latest
|
||||
nousresearch/hermes-agent:${{ github.event.release.tag_name }}
|
||||
nousresearch/hermes-agent:${{ github.sha }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
12
.github/workflows/docs-site-checks.yml
vendored
12
.github/workflows/docs-site-checks.yml
vendored
@@ -7,16 +7,13 @@ on:
|
||||
- '.github/workflows/docs-site-checks.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
docs-site-checks:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
@@ -26,7 +23,7 @@ jobs:
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
@@ -36,9 +33,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
58
.github/workflows/history-check.yml
vendored
58
.github/workflows/history-check.yml
vendored
@@ -1,58 +0,0 @@
|
||||
name: History Check
|
||||
|
||||
# Rejects PRs whose branch has no common ancestor with main.
|
||||
#
|
||||
# In May 2026 PR #25045 was merged from a branch that had been disconnected
|
||||
# from main's history (likely an accidental `git checkout --orphan` or
|
||||
# `.git/` re-init). GitHub's merge UI does not refuse merges of unrelated
|
||||
# histories, so the PR landed cleanly with the intended one-file change —
|
||||
# but its parent-less root commit (413990c94) got grafted into main as a
|
||||
# second root, and ~1500 files' worth of `git blame` history collapsed
|
||||
# onto that single commit.
|
||||
#
|
||||
# This check catches the failure mode by requiring `git merge-base` between
|
||||
# the PR head and main to be non-empty.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-common-ancestor:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0 # full history both sides for merge-base
|
||||
|
||||
- name: Reject PRs with no common ancestor on main
|
||||
run: |
|
||||
# `git merge-base` exits non-zero AND prints nothing when the two
|
||||
# commits share no ancestor. We check both conditions explicitly
|
||||
# so the failure message is clear regardless of which signal fires
|
||||
# first.
|
||||
if ! BASE=$(git merge-base origin/main HEAD 2>/dev/null) || [ -z "$BASE" ]; then
|
||||
echo ""
|
||||
echo "::error::This PR has no common ancestor with main."
|
||||
echo ""
|
||||
echo "Your branch's history is disconnected from main. Common causes:"
|
||||
echo " - the branch was created with 'git checkout --orphan'"
|
||||
echo " - '.git/' was re-initialized at some point during the work"
|
||||
echo " - the branch was force-pushed from an unrelated repository"
|
||||
echo ""
|
||||
echo "Merging an unrelated-history PR grafts a parent-less root commit"
|
||||
echo "into main and collapses git blame for every file in that snapshot."
|
||||
echo "Reference: PR #25045 caused this and re-rooted blame on ~1500"
|
||||
echo "files to a single orphan commit."
|
||||
echo ""
|
||||
echo "To fix, rebase your changes onto current main:"
|
||||
echo " git fetch origin main"
|
||||
echo " git checkout -b fix-branch origin/main"
|
||||
echo " # re-apply your changes (cherry-pick, copy files, etc.)"
|
||||
echo " git push -f origin fix-branch"
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::Common ancestor with main: $BASE"
|
||||
202
.github/workflows/lint.yml
vendored
202
.github/workflows/lint.yml
vendored
@@ -1,202 +0,0 @@
|
||||
name: Lint (ruff + ty)
|
||||
|
||||
# Two things here:
|
||||
# 1. Advisory diff — ruff + ty diagnostics as a diff vs the target branch.
|
||||
# Posts a Markdown summary and a PR comment. Exit zero always.
|
||||
# 2. Blocking ``ruff check .`` — enforces the explicit rules in
|
||||
# ``[tool.ruff.lint.select]`` (currently PLW1514). Failure blocks merge.
|
||||
# Separate job so the advisory diff still runs and posts even when
|
||||
# enforcement fails.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # needed to post/update PR comments
|
||||
|
||||
concurrency:
|
||||
group: lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint-diff:
|
||||
name: ruff + ty diff
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
run: |
|
||||
uv tool install ruff
|
||||
uv tool install ty
|
||||
|
||||
- name: Determine base ref
|
||||
id: base
|
||||
run: |
|
||||
# For PRs, diff against the merge base with the target branch.
|
||||
# For pushes to main, diff against the previous commit on main.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
|
||||
BASE_REF="origin/${{ github.base_ref }}"
|
||||
else
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
|
||||
BASE_REF="HEAD~1"
|
||||
fi
|
||||
echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
|
||||
echo "Base SHA: ${BASE_SHA}"
|
||||
echo "Base ref: ${BASE_REF}"
|
||||
|
||||
- name: Run ruff + ty on HEAD
|
||||
run: |
|
||||
mkdir -p .lint-reports/head
|
||||
ruff check --output-format json --exit-zero \
|
||||
> .lint-reports/head/ruff.json || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> .lint-reports/head/ty.json || true
|
||||
echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
|
||||
echo "HEAD ty: $(wc -c < .lint-reports/head/ty.json) bytes"
|
||||
|
||||
- name: Run ruff + ty on base (via git worktree)
|
||||
run: |
|
||||
mkdir -p .lint-reports/base
|
||||
# Use a worktree so we don't clobber the main checkout. If the basex
|
||||
# SHA is identical to HEAD (e.g. first commit), skip and leave the
|
||||
# base reports empty — the diff script handles missing files.
|
||||
HEAD_SHA=$(git rev-parse HEAD)
|
||||
BASE_SHA="${{ steps.base.outputs.sha }}"
|
||||
if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
|
||||
echo "Base SHA == HEAD SHA, skipping base scan."
|
||||
echo '[]' > .lint-reports/base/ruff.json
|
||||
echo '[]' > .lint-reports/base/ty.json
|
||||
else
|
||||
git worktree add --detach /tmp/lint-base "$BASE_SHA"
|
||||
(
|
||||
cd /tmp/lint-base
|
||||
ruff check --output-format json --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
|
||||
)
|
||||
git worktree remove --force /tmp/lint-base
|
||||
fi
|
||||
echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
|
||||
echo "base ty: $(wc -c < .lint-reports/base/ty.json) bytes"
|
||||
|
||||
- name: Generate diff summary
|
||||
run: |
|
||||
python scripts/lint_diff.py \
|
||||
--base-ruff .lint-reports/base/ruff.json \
|
||||
--head-ruff .lint-reports/head/ruff.json \
|
||||
--base-ty .lint-reports/base/ty.json \
|
||||
--head-ty .lint-reports/head/ty.json \
|
||||
--base-ref "${{ steps.base.outputs.ref }}" \
|
||||
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
retention-days: 14
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
|
||||
const marker = '<!-- lint-diff-summary -->';
|
||||
const fullBody = marker + '\n' + body;
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: fullBody,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: fullBody,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
ruff-blocking:
|
||||
# Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
|
||||
# PLW1514 (unspecified-encoding) — catches bare ``open()`` /
|
||||
# ``read_text()`` / ``write_text()`` calls that default to locale
|
||||
# encoding on Windows. Failure here blocks merge; the advisory
|
||||
# ``lint-diff`` job above runs independently so reviewers still get
|
||||
# the diff comment even when enforcement fails.
|
||||
name: ruff enforcement (blocking)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff
|
||||
run: uv tool install ruff
|
||||
|
||||
- name: ruff check .
|
||||
# No --exit-zero, no || true. Exit code propagates to the job,
|
||||
# which propagates to the required-check gate.
|
||||
run: |
|
||||
ruff check .
|
||||
|
||||
windows-footguns:
|
||||
# Static guardrails on Windows-unsafe Python primitives — os.kill(pid, 0),
|
||||
# os.killpg, os.setsid, signal.SIGKILL without getattr fallback,
|
||||
# shebang scripts via subprocess, bare open() without encoding=, etc.
|
||||
# See scripts/check-windows-footguns.py for the full rule list.
|
||||
name: Windows footguns (blocking)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Run footgun checker
|
||||
run: python scripts/check-windows-footguns.py --all
|
||||
254
.github/workflows/nix-lockfile-fix.yml
vendored
254
.github/workflows/nix-lockfile-fix.yml
vendored
@@ -1,254 +0,0 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'web/package-lock.json'
|
||||
- 'web/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
description: 'PR number to fix (leave empty to run on the selected branch)'
|
||||
required: false
|
||||
type: string
|
||||
issue_comment:
|
||||
types: [edited]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json
|
||||
# in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
|
||||
# update commit directly to main so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'ui-tui/package-lock.json' 'ui-tui/package.json' \
|
||||
'web/package-lock.json' 'web/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
&& github.event.issue.pull_request != null
|
||||
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
|
||||
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
- name: Authorize & resolve PR
|
||||
id: resolve
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
with:
|
||||
script: |
|
||||
// 1. Verify the actor has write access — applies to both checkbox
|
||||
// clicks and manual dispatch.
|
||||
const { data: perm } =
|
||||
await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
username: context.actor,
|
||||
});
|
||||
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
|
||||
core.setFailed(
|
||||
`${context.actor} lacks write access (has: ${perm.permission})`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Resolve which ref to check out.
|
||||
let prNumber = '';
|
||||
if (context.eventName === 'issue_comment') {
|
||||
prNumber = String(context.payload.issue.number);
|
||||
} else if (context.eventName === 'workflow_dispatch') {
|
||||
prNumber = context.payload.inputs.pr_number || '';
|
||||
}
|
||||
|
||||
if (!prNumber) {
|
||||
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
|
||||
core.setOutput('repo', context.repo.repo);
|
||||
core.setOutput('owner', context.repo.owner);
|
||||
core.setOutput('pr', '');
|
||||
return;
|
||||
}
|
||||
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: Number(prNumber),
|
||||
});
|
||||
core.setOutput('ref', pr.head.ref);
|
||||
core.setOutput('repo', pr.head.repo.name);
|
||||
core.setOutput('owner', pr.head.repo.owner.login);
|
||||
core.setOutput('pr', String(pr.number));
|
||||
|
||||
# Wipe the sticky lockfile-check comment to a "running" state as soon
|
||||
# as the job is authorized, so the user sees their click was picked up
|
||||
# before the ~minute of nix build work.
|
||||
- name: Mark sticky as running
|
||||
if: steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### 🔄 Applying lockfile fix…
|
||||
|
||||
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
|
||||
ref: ${{ steps.resolve.outputs.ref }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): refresh npm lockfile hashes"
|
||||
git push
|
||||
|
||||
- name: Update sticky (applied)
|
||||
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ✅ Lockfile fix applied
|
||||
|
||||
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- name: Update sticky (already current)
|
||||
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ✅ Lockfile hashes already current
|
||||
|
||||
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- name: Update sticky (failed)
|
||||
if: failure() && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ❌ Lockfile fix failed
|
||||
|
||||
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
|
||||
101
.github/workflows/nix.yml
vendored
101
.github/workflows/nix.yml
vendored
@@ -4,10 +4,15 @@ on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
paths:
|
||||
- 'flake.nix'
|
||||
- 'flake.lock'
|
||||
- 'nix/**'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'hermes_cli/**'
|
||||
- 'run_agent.py'
|
||||
- 'acp_adapter/**'
|
||||
|
||||
concurrency:
|
||||
group: nix-${{ github.ref }}
|
||||
@@ -21,97 +26,15 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Resolve head SHA
|
||||
if: github.event_name == 'pull_request'
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
- name: Check flake
|
||||
id: flake
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
- name: Build package
|
||||
id: build
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix build --print-build-logs
|
||||
|
||||
# When the real Nix build fails, run a targeted diagnostic to see if
|
||||
# the failure is specifically a stale npm lockfile hash in one of the
|
||||
# known npm subpackages (tui / web). This avoids surfacing a generic
|
||||
# "build failed" message when the fix is a single known command.
|
||||
- name: Diagnose npm lockfile hashes
|
||||
id: hash_check
|
||||
if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
# If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
|
||||
# etc.) it won't set stale=true/false. Treat that as a distinct failure
|
||||
# mode rather than silently ignoring it.
|
||||
- name: Fail if hash check crashed without reporting
|
||||
if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale hashes)
|
||||
if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.hash_check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles` and commit the diff
|
||||
|
||||
# Clear the sticky comment when either the build passed outright (no
|
||||
# hash check needed) or the hash check explicitly returned stale=false
|
||||
# (build failed for a non-hash reason).
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
runner.os == 'Linux' &&
|
||||
(steps.hash_check.outputs.stale == 'false' ||
|
||||
(steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Final fail if build or flake failed
|
||||
if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
|
||||
run: |
|
||||
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
|
||||
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
|
||||
else
|
||||
echo "::error::Nix build/flake check failed. See logs above."
|
||||
fi
|
||||
exit 1
|
||||
|
||||
- name: Evaluate flake (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: nix flake show --json > /dev/null
|
||||
|
||||
67
.github/workflows/osv-scanner.yml
vendored
67
.github/workflows/osv-scanner.yml
vendored
@@ -1,67 +0,0 @@
|
||||
name: OSV-Scanner
|
||||
|
||||
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
|
||||
# database. Runs on every PR that touches a lockfile and on a weekly schedule
|
||||
# against main.
|
||||
#
|
||||
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
|
||||
# It reports known CVEs in currently-pinned dependency versions so we can
|
||||
# decide when and how to patch on our own schedule. Our pinning strategy
|
||||
# (full SHA / exact version) is preserved; only the notification signal
|
||||
# is added.
|
||||
#
|
||||
# Complements the existing supply-chain-audit.yml workflow (which scans
|
||||
# for malicious code patterns in PR diffs) by covering the orthogonal
|
||||
# "currently-pinned dep became known-vulnerable" case.
|
||||
#
|
||||
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
|
||||
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
|
||||
# fail-on-vuln is disabled so the job does not block merges on pre-existing
|
||||
# vulnerabilities in pinned deps that we may need to patch deliberately.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package.json'
|
||||
- 'website/package-lock.json'
|
||||
- '.github/workflows/osv-scanner.yml'
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package-lock.json'
|
||||
schedule:
|
||||
# Weekly scan against main — catches CVEs published after merge for
|
||||
# deps that haven't changed since.
|
||||
- cron: '0 9 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
# Required by the reusable workflow to upload SARIF to the Security tab.
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan lockfiles
|
||||
uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
|
||||
with:
|
||||
# Scan explicit lockfiles rather than recursing, so we only look at
|
||||
# the three sources of truth and skip vendored / test / worktree dirs.
|
||||
scan-args: |-
|
||||
--lockfile=uv.lock
|
||||
--lockfile=ui-tui/package-lock.json
|
||||
--lockfile=website/package-lock.json
|
||||
fail-on-vuln: false
|
||||
101
.github/workflows/skills-index.yml
vendored
101
.github/workflows/skills-index.yml
vendored
@@ -1,101 +0,0 @@
|
||||
name: Build Skills Index
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run twice daily: 6 AM and 6 PM UTC
|
||||
- cron: '0 6,18 * * *'
|
||||
workflow_dispatch: # Manual trigger
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'scripts/build_skills_index.py'
|
||||
- '.github/workflows/skills-index.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-index:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install httpx==0.28.1 pyyaml==6.0.2
|
||||
|
||||
- name: Build skills index
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: python scripts/build_skills_index.py
|
||||
|
||||
- name: Upload index artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: skills-index
|
||||
path: website/static/api/skills-index.json
|
||||
retention-days: 7
|
||||
|
||||
deploy-with-index:
|
||||
needs: build-index
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pages: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deploy.outputs.page_url }}
|
||||
# Only deploy on schedule or manual trigger (not on every push to the script)
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: skills-index
|
||||
path: website/static/api/
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: website/package-lock.json
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install PyYAML for skill extraction
|
||||
run: pip install pyyaml==6.0.2
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
|
||||
- name: Build Docusaurus
|
||||
run: npm run build
|
||||
working-directory: website
|
||||
|
||||
- name: Stage deployment
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
cp -r landingpage/* _site/
|
||||
cp -r website/build/* _site/docs/
|
||||
echo "hermes-agent.nousresearch.com" > _site/CNAME
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
|
||||
with:
|
||||
path: _site
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deploy
|
||||
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4
|
||||
197
.github/workflows/supply-chain-audit.yml
vendored
197
.github/workflows/supply-chain-audit.yml
vendored
@@ -3,40 +3,22 @@ name: Supply Chain Audit
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- '**/*.pth'
|
||||
- '**/setup.py'
|
||||
- '**/setup.cfg'
|
||||
- '**/sitecustomize.py'
|
||||
- '**/usercustomize.py'
|
||||
- '**/__init__.pth'
|
||||
- 'pyproject.toml'
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
# Narrow, high-signal scanner. Only fires on critical indicators of supply
|
||||
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
|
||||
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
|
||||
# Actions version unpinning, outbound POST/PUT) were intentionally
|
||||
# removed — they fired on nearly every PR and trained reviewers to ignore
|
||||
# the scanner. Keep this file's checks ruthlessly narrow: if you find
|
||||
# yourself adding WARNING-tier patterns here again, make a separate
|
||||
# advisory-only workflow instead.
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan PR for critical supply chain risks
|
||||
name: Scan PR for supply chain risks
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Scan diff for critical patterns
|
||||
- name: Scan diff for suspicious patterns
|
||||
id: scan
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -46,19 +28,19 @@ jobs:
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Added lines only, excluding lockfiles.
|
||||
# Get the full diff (added lines only)
|
||||
DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
|
||||
|
||||
FINDINGS=""
|
||||
CRITICAL=false
|
||||
|
||||
# --- .pth files (auto-execute on Python startup) ---
|
||||
# The exact mechanism used in the litellm supply chain attack:
|
||||
# https://github.com/BerriAI/litellm/issues/24512
|
||||
PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
|
||||
if [ -n "$PTH_FILES" ]; then
|
||||
CRITICAL=true
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: .pth file added or modified
|
||||
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.
|
||||
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
@@ -67,12 +49,13 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
|
||||
# --- base64 + exec/eval combo (the litellm attack pattern) ---
|
||||
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
|
||||
if [ -n "$B64_EXEC_HITS" ]; then
|
||||
CRITICAL=true
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: base64 decode + exec/eval combo
|
||||
Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.
|
||||
This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
@@ -81,12 +64,41 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- subprocess with encoded/obfuscated command argument ---
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
|
||||
# --- base64 decode/encode (alone — legitimate uses exist) ---
|
||||
B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
|
||||
if [ -n "$B64_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: base64 encoding/decoding detected
|
||||
Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
|
||||
|
||||
**Matches (first 20):**
|
||||
\`\`\`
|
||||
${B64_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- exec/eval with string arguments ---
|
||||
EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
|
||||
if [ -n "$EXEC_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: exec() or eval() usage
|
||||
Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
|
||||
|
||||
**Matches (first 20):**
|
||||
\`\`\`
|
||||
${EXEC_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- subprocess with encoded/obfuscated commands ---
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
|
||||
if [ -n "$PROC_HITS" ]; then
|
||||
CRITICAL=true
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
|
||||
Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.
|
||||
Subprocess calls with encoded arguments are a strong indicator of payload execution.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
@@ -95,12 +107,25 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
|
||||
# These execute during pip install or interpreter startup.
|
||||
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
|
||||
# --- Network calls to non-standard domains ---
|
||||
EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
|
||||
if [ -n "$EXFIL_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: Outbound network calls (POST/PUT)
|
||||
Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
|
||||
|
||||
**Matches (first 10):**
|
||||
\`\`\`
|
||||
${EXFIL_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- setup.py / setup.cfg install hooks ---
|
||||
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
|
||||
if [ -n "$SETUP_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: Install-hook file added or modified
|
||||
### ⚠️ WARNING: Install hook files modified
|
||||
These files can execute code during package installation or interpreter startup.
|
||||
|
||||
**Files:**
|
||||
@@ -110,96 +135,58 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Compile/marshal/pickle (code object injection) ---
|
||||
MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
|
||||
if [ -n "$MARSHAL_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: marshal/pickle/compile usage
|
||||
These can deserialize or construct executable code objects.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
${MARSHAL_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Output results ---
|
||||
if [ -n "$FINDINGS" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
if [ "$CRITICAL" = true ]; then
|
||||
echo "critical=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
# Write findings to a file (multiline env vars are fragile)
|
||||
echo "$FINDINGS" > /tmp/findings.md
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Post critical finding comment
|
||||
- name: Post warning comment
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## 🚨 CRITICAL Supply Chain Risk Detected
|
||||
SEVERITY="⚠️ Supply Chain Risk Detected"
|
||||
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
|
||||
SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
|
||||
fi
|
||||
|
||||
This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.
|
||||
BODY="## ${SEVERITY}
|
||||
|
||||
This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
|
||||
|
||||
$(cat /tmp/findings.md)
|
||||
|
||||
---
|
||||
*Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"
|
||||
*Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
|
||||
- name: Fail on critical findings
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
if: steps.scan.outputs.critical == 'true'
|
||||
run: |
|
||||
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
|
||||
exit 1
|
||||
|
||||
dep-bounds:
|
||||
name: Check PyPI dependency upper bounds
|
||||
runs-on: ubuntu-latest
|
||||
if: contains(github.event.pull_request.changed_files_url, 'pyproject.toml') || true
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for unbounded PyPI deps
|
||||
id: bounds
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Only check added lines in pyproject.toml
|
||||
ADDED=$(git diff "$BASE".."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
|
||||
|
||||
if [ -z "$ADDED" ]; then
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Match PyPI dep specs that have >= but no < ceiling.
|
||||
# Pattern: "package>=version" without a following ",<" bound.
|
||||
# Excludes git+ URLs (which use commit SHAs) and comments.
|
||||
UNBOUNDED=$(echo "$ADDED" | grep -oE '"[a-zA-Z0-9_-]+(\[[^\]]*\])?>=[ 0-9.]+"' | grep -v ',<' || true)
|
||||
|
||||
if [ -n "$UNBOUNDED" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
echo "$UNBOUNDED" > /tmp/unbounded.txt
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Post unbounded dep warning
|
||||
if: steps.bounds.outputs.found == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## ⚠️ Unbounded PyPI Dependency Detected
|
||||
|
||||
This PR adds PyPI dependencies without a \`<next_major\` upper bound. Per our [supply chain policy](../blob/main/CONTRIBUTING.md#dependency-pinning-policy-supply-chain-hardening), all PyPI deps must be pinned as \`>=floor,<next_major\`.
|
||||
|
||||
**Unbounded specs found:**
|
||||
\`\`\`
|
||||
$(cat /tmp/unbounded.txt)
|
||||
\`\`\`
|
||||
|
||||
**Fix:** Add an upper bound, e.g. \`\"package>=1.2.0,<2\"\`
|
||||
|
||||
---
|
||||
*See PR #2810 and CONTRIBUTING.md for the full policy rationale.*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
|
||||
|
||||
- name: Fail on unbounded deps
|
||||
if: steps.bounds.outputs.found == 'true'
|
||||
run: |
|
||||
echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
|
||||
exit 1
|
||||
|
||||
26
.github/workflows/tests.yml
vendored
26
.github/workflows/tests.yml
vendored
@@ -3,17 +3,8 @@ name: Tests
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**/*.md'
|
||||
- 'docs/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**/*.md'
|
||||
- 'docs/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Cancel in-progress runs for the same PR/branch
|
||||
concurrency:
|
||||
@@ -23,16 +14,16 @@ concurrency:
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
@@ -46,7 +37,7 @@ jobs:
|
||||
- name: Run tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto --timeout=30 --timeout-method=signal
|
||||
python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
@@ -55,16 +46,13 @@ jobs:
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
164
.github/workflows/upload_to_pypi.yml
vendored
164
.github/workflows/upload_to_pypi.yml
vendored
@@ -1,164 +0,0 @@
|
||||
name: Publish to PyPI
|
||||
|
||||
# Triggered by CalVer tag pushes from scripts/release.py (e.g. v2026.5.15)
|
||||
# Can also be triggered manually from the Actions tab as an escape hatch.
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm_tag:
|
||||
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
# Restrict default token to read-only; each job escalates as needed.
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Prevent overlapping publishes (e.g. two same-day tags pushed quickly).
|
||||
concurrency:
|
||||
group: pypi-publish
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build distribution 📦
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
# On workflow_dispatch, check out the confirmed tag.
|
||||
ref: ${{ inputs.confirm_tag || github.ref }}
|
||||
fetch-tags: true
|
||||
|
||||
- name: Validate tag exists
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ! git tag -l "${{ inputs.confirm_tag }}" | grep -q .; then
|
||||
echo "::error::Tag '${{ inputs.confirm_tag }}' does not exist in the repo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: '22'
|
||||
|
||||
- name: Build web dashboard
|
||||
run: cd web && npm ci && npm run build
|
||||
|
||||
- name: Build TUI bundle
|
||||
run: cd ui-tui && npm ci && npm run build
|
||||
|
||||
- name: Bundle TUI into hermes_cli
|
||||
run: |
|
||||
mkdir -p hermes_cli/tui_dist
|
||||
cp ui-tui/dist/entry.js hermes_cli/tui_dist/entry.js
|
||||
|
||||
- name: Verify frontend assets exist
|
||||
run: |
|
||||
test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; }
|
||||
test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; }
|
||||
|
||||
- name: Bundle install scripts into wheel
|
||||
run: |
|
||||
mkdir -p hermes_cli/scripts
|
||||
cp scripts/install.sh hermes_cli/scripts/install.sh
|
||||
cp scripts/install.ps1 hermes_cli/scripts/install.ps1
|
||||
|
||||
- name: Build wheel and sdist
|
||||
run: uv build --sdist --wheel
|
||||
|
||||
- name: Upload distribution artifacts
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
publish:
|
||||
name: Publish to PyPI
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/hermes-agent
|
||||
permissions:
|
||||
id-token: write # OIDC trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
||||
with:
|
||||
skip-existing: true
|
||||
|
||||
sign:
|
||||
name: Sign and attach to GitHub Release
|
||||
# Only runs on tag pushes — release.py creates the GitHub Release,
|
||||
# and workflow_dispatch won't have a matching release to attach to.
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
needs: publish
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # attach assets to the existing release
|
||||
id-token: write # sigstore signing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
- name: Wait for GitHub Release to exist
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
# release.py creates the GitHub Release after pushing the tag,
|
||||
# but this workflow starts from the tag push — wait for it.
|
||||
run: |
|
||||
for i in $(seq 1 30); do
|
||||
if gh release view "$GITHUB_REF_NAME" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then
|
||||
echo "Release $GITHUB_REF_NAME found"
|
||||
exit 0
|
||||
fi
|
||||
echo "Waiting for release... ($i/30)"
|
||||
sleep 10
|
||||
done
|
||||
echo "::warning::Release $GITHUB_REF_NAME not found after 5 minutes — skipping signature upload"
|
||||
echo "skip_sign=true" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Sign with Sigstore
|
||||
if: env.skip_sign != 'true'
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
with:
|
||||
inputs: >-
|
||||
./dist/*.tar.gz
|
||||
./dist/*.whl
|
||||
|
||||
- name: Attach signed artifacts to GitHub Release
|
||||
if: env.skip_sign != 'true'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
# release.py already created the GitHub Release — just upload
|
||||
# the Sigstore signatures alongside the existing assets.
|
||||
run: >-
|
||||
gh release upload
|
||||
"$GITHUB_REF_NAME" dist/*.sigstore.json
|
||||
--repo "$GITHUB_REPOSITORY"
|
||||
--clobber
|
||||
119
.github/workflows/uv-lockfile-check.yml
vendored
119
.github/workflows/uv-lockfile-check.yml
vendored
@@ -1,119 +0,0 @@
|
||||
name: uv.lock check
|
||||
|
||||
# Verify uv.lock is in sync with pyproject.toml. Blocking check — PRs
|
||||
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
|
||||
# must not merge, because the Docker build's `uv sync --frozen` step will
|
||||
# fail on a stale lockfile and we'd rather catch it here than in the
|
||||
# docker-publish workflow on main.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# IMPORTANT: this check runs against the MERGED state, not just your branch
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# For `pull_request` events, GitHub checks out `refs/pull/<N>/merge` by
|
||||
# default — a synthetic commit that merges your PR branch into the CURRENT
|
||||
# state of `main`. That means the pyproject.toml evaluated here is
|
||||
# `main's pyproject.toml + your PR's changes to pyproject.toml`, not just
|
||||
# what's on your branch.
|
||||
#
|
||||
# Failure mode this creates: if `main` has advanced since you branched
|
||||
# (e.g. someone merged a PR that added a dep to pyproject.toml + its
|
||||
# corresponding uv.lock entries), your branch's uv.lock is missing those
|
||||
# new entries. `uv lock --check` resolves against the merged pyproject
|
||||
# and sees a lockfile that doesn't cover all the current deps → fails
|
||||
# with "The lockfile at uv.lock needs to be updated."
|
||||
#
|
||||
# This can be confusing: `uv lock --check` passes locally (your branch
|
||||
# is internally consistent) but fails in CI (merged state isn't).
|
||||
#
|
||||
# Fix is to sync your branch with main and regenerate the lockfile:
|
||||
#
|
||||
# git fetch origin main
|
||||
# git rebase origin/main # or merge, whatever the repo prefers
|
||||
# uv lock # regenerates uv.lock against new pyproject.toml
|
||||
# git add uv.lock
|
||||
# git commit -m "chore: refresh uv.lock after rebase onto main"
|
||||
# git push --force-with-lease # if you rebased
|
||||
#
|
||||
# If you also changed pyproject.toml in your PR, `uv lock` handles that
|
||||
# at the same time — one regeneration covers both your changes and the
|
||||
# drift from main.
|
||||
#
|
||||
# This is the correct behavior! The check is protecting main's Docker
|
||||
# build: a post-merge build would see the same merged state and fail
|
||||
# the same way. Better to catch it here than after merge.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/uv-lockfile-check.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/uv-lockfile-check.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: uv-lockfile-check-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: uv lock --check
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
# `uv lock --check` re-resolves the project from pyproject.toml and
|
||||
# compares the result to uv.lock, exiting non-zero if they disagree.
|
||||
# No network writes, no file modifications.
|
||||
#
|
||||
# On PRs this runs against the merge commit (see comment at the top
|
||||
# of this file) — failures often mean "your branch is behind main,
|
||||
# rebase and regenerate uv.lock."
|
||||
- name: Verify uv.lock is up-to-date
|
||||
run: |
|
||||
if ! uv lock --check; then
|
||||
cat <<'EOF' >> "$GITHUB_STEP_SUMMARY"
|
||||
## ❌ uv.lock is out of sync with pyproject.toml
|
||||
|
||||
**If this is a PR:** this check runs against the merged state
|
||||
(your branch + current `main`), not just your branch. If
|
||||
`uv lock --check` passes locally, your branch is likely behind
|
||||
`main` — recent changes to `pyproject.toml` on `main` aren't
|
||||
reflected in your branch's `uv.lock` yet.
|
||||
|
||||
To fix, sync with main and regenerate the lockfile:
|
||||
|
||||
```bash
|
||||
git fetch origin main
|
||||
git rebase origin/main # or `git merge origin/main`
|
||||
uv lock # regenerate against new pyproject.toml
|
||||
git add uv.lock
|
||||
git commit -m "chore: refresh uv.lock after syncing with main"
|
||||
git push --force-with-lease # drop --force-with-lease if you merged
|
||||
```
|
||||
|
||||
**If you only changed pyproject.toml:** run `uv lock` locally
|
||||
and commit the result.
|
||||
|
||||
This check is blocking because the Docker image build uses
|
||||
`uv sync --frozen --extra all`, which rejects stale lockfiles
|
||||
— catching it here avoids a ~15 min failed docker-publish run
|
||||
on `main` post-merge.
|
||||
EOF
|
||||
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
|
||||
exit 1
|
||||
fi
|
||||
15
.gitignore
vendored
15
.gitignore
vendored
@@ -1,4 +1,3 @@
|
||||
.DS_Store
|
||||
/venv/
|
||||
/_pycache/
|
||||
*.pyc*
|
||||
@@ -52,24 +51,10 @@ ignored/
|
||||
.worktrees/
|
||||
environments/benchmarks/evals/
|
||||
|
||||
# Web UI build output
|
||||
hermes_cli/web_dist/
|
||||
|
||||
# Web UI assets — synced from @nous-research/ui at build time via
|
||||
# `npm run sync-assets` (see web/package.json).
|
||||
web/public/fonts/
|
||||
web/public/ds-assets/
|
||||
|
||||
# Release script temp files
|
||||
.release_notes.md
|
||||
mini-swe-agent/
|
||||
|
||||
# Nix
|
||||
.direnv/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
models-dev-upstream/
|
||||
hermes_cli/tui_dist/*
|
||||
hermes_cli/scripts/
|
||||
docs/superpowers/*
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "tinker-atropos"]
|
||||
path = tinker-atropos
|
||||
url = https://github.com/nousresearch/tinker-atropos
|
||||
108
.mailmap
108
.mailmap
@@ -1,108 +0,0 @@
|
||||
# .mailmap — canonical author mapping for git shortlog / git log / GitHub
|
||||
# Format: Canonical Name <canonical@email> <commit@email>
|
||||
# See: https://git-scm.com/docs/gitmailmap
|
||||
#
|
||||
# This maps commit emails to GitHub noreply addresses so that:
|
||||
# 1. `git shortlog -sn` shows deduplicated contributor counts
|
||||
# 2. GitHub's contributor graph can attribute commits correctly
|
||||
# 3. Contributors with personal/work emails get proper credit
|
||||
#
|
||||
# When adding entries: use the contributor's GitHub noreply email as canonical
|
||||
# so GitHub can link commits to their profile.
|
||||
|
||||
# === Teknium (multiple emails) ===
|
||||
Teknium <127238744+teknium1@users.noreply.github.com> <teknium1@gmail.com>
|
||||
Teknium <127238744+teknium1@users.noreply.github.com> <teknium@nousresearch.com>
|
||||
|
||||
# === Contributors — personal/work emails mapped to GitHub noreply ===
|
||||
# Format: Canonical Name <GH-noreply> <commit-email>
|
||||
|
||||
# Verified via GH API email search
|
||||
luyao618 <364939526@qq.com> <364939526@qq.com>
|
||||
ethernet8023 <arilotter@gmail.com> <arilotter@gmail.com>
|
||||
nicoloboschi <boschi1997@gmail.com> <boschi1997@gmail.com>
|
||||
cherifya <chef.ya@gmail.com> <chef.ya@gmail.com>
|
||||
BongSuCHOI <chlqhdtn98@gmail.com> <chlqhdtn98@gmail.com>
|
||||
dsocolobsky <dsocolobsky@gmail.com> <dsocolobsky@gmail.com>
|
||||
pefontana <fontana.pedro93@gmail.com> <fontana.pedro93@gmail.com>
|
||||
Helmi <frank@helmschrott.de> <frank@helmschrott.de>
|
||||
hata1234 <hata1234@gmail.com> <hata1234@gmail.com>
|
||||
|
||||
# Verified via PR investigation / salvage PR bodies
|
||||
DeployFaith <agents@kylefrench.dev> <agents@kylefrench.dev>
|
||||
flobo3 <floptopbot33@gmail.com> <floptopbot33@gmail.com>
|
||||
gaixianggeng <gaixg94@gmail.com> <gaixg94@gmail.com>
|
||||
KUSH42 <xush@xush.org> <xush@xush.org>
|
||||
konsisumer <der@konsi.org> <der@konsi.org>
|
||||
WorldInnovationsDepartment <vorvul.danylo@gmail.com> <vorvul.danylo@gmail.com>
|
||||
m0n5t3r <iacobs@m0n5t3r.info> <iacobs@m0n5t3r.info>
|
||||
sprmn24 <oncuevtv@gmail.com> <oncuevtv@gmail.com>
|
||||
fancydirty <fancydirty@gmail.com> <fancydirty@gmail.com>
|
||||
fxfitz <francis.x.fitzpatrick@gmail.com> <francis.x.fitzpatrick@gmail.com>
|
||||
limars874 <limars874@gmail.com> <limars874@gmail.com>
|
||||
AaronWong1999 <aaronwong1999@icloud.com> <aaronwong1999@icloud.com>
|
||||
dippwho <dipp.who@gmail.com> <dipp.who@gmail.com>
|
||||
duerzy <duerzy@gmail.com> <duerzy@gmail.com>
|
||||
geoffwellman <geoff.wellman@gmail.com> <geoff.wellman@gmail.com>
|
||||
hcshen0111 <shenhaocheng19990111@gmail.com> <shenhaocheng19990111@gmail.com>
|
||||
jamesarch <han.shan@live.cn> <han.shan@live.cn>
|
||||
stephenschoettler <stephenschoettler@gmail.com> <stephenschoettler@gmail.com>
|
||||
Tranquil-Flow <tranquil_flow@protonmail.com> <tranquil_flow@protonmail.com>
|
||||
Dusk1e <yusufalweshdemir@gmail.com> <yusufalweshdemir@gmail.com>
|
||||
Awsh1 <ysfalweshcan@gmail.com> <ysfalweshcan@gmail.com>
|
||||
WAXLYY <ysfwaxlycan@gmail.com> <ysfwaxlycan@gmail.com>
|
||||
donrhmexe <don.rhm@gmail.com> <don.rhm@gmail.com>
|
||||
hqhq1025 <1506751656@qq.com> <1506751656@qq.com>
|
||||
BlackishGreen33 <s5460703@gmail.com> <s5460703@gmail.com>
|
||||
tomqiaozc <zqiao@microsoft.com> <zqiao@microsoft.com>
|
||||
MagicRay1217 <mingjwan@microsoft.com> <mingjwan@microsoft.com>
|
||||
aaronagent <1115117931@qq.com> <1115117931@qq.com>
|
||||
YoungYang963 <young@YoungdeMacBook-Pro.local> <young@YoungdeMacBook-Pro.local>
|
||||
LongOddCode <haolong@microsoft.com> <haolong@microsoft.com>
|
||||
Cafexss <coffeemjj@gmail.com> <coffeemjj@gmail.com>
|
||||
Cygra <sjtuwbh@gmail.com> <sjtuwbh@gmail.com>
|
||||
DomGrieco <dgrieco@redhat.com> <dgrieco@redhat.com>
|
||||
|
||||
# Duplicate email mapping (same person, multiple emails)
|
||||
Sertug17 <104278804+Sertug17@users.noreply.github.com> <srhtsrht17@gmail.com>
|
||||
yyovil <birdiegyal@gmail.com> <tanishq231003@gmail.com>
|
||||
DomGrieco <dgrieco@redhat.com> <dgrieco@redhat.com>
|
||||
dsocolobsky <dsocolobsky@gmail.com> <dylan.socolobsky@lambdaclass.com>
|
||||
olafthiele <programming@olafthiele.com> <olafthiele@gmail.com>
|
||||
|
||||
# Verified via git display name matching GH contributor username
|
||||
cokemine <aptx4561@gmail.com> <aptx4561@gmail.com>
|
||||
dalianmao000 <dalianmao0107@gmail.com> <dalianmao0107@gmail.com>
|
||||
emozilla <emozilla@nousresearch.com> <emozilla@nousresearch.com>
|
||||
jjovalle99 <juan.ovalle@mistral.ai> <juan.ovalle@mistral.ai>
|
||||
kagura-agent <kagura.chen28@gmail.com> <kagura.chen28@gmail.com>
|
||||
spniyant <niyant@spicefi.xyz> <niyant@spicefi.xyz>
|
||||
olafthiele <programming@olafthiele.com> <programming@olafthiele.com>
|
||||
r266-tech <r2668940489@gmail.com> <r2668940489@gmail.com>
|
||||
xingkongliang <tianliangjay@gmail.com> <tianliangjay@gmail.com>
|
||||
win4r <win4r@outlook.com> <win4r@outlook.com>
|
||||
zhouboli <zhouboli@gmail.com> <zhouboli@gmail.com>
|
||||
yongtenglei <yongtenglei@gmail.com> <yongtenglei@gmail.com>
|
||||
|
||||
# Nous Research team
|
||||
benbarclay <ben@nousresearch.com> <ben@nousresearch.com>
|
||||
jquesnelle <jonny@nousresearch.com> <jonny@nousresearch.com>
|
||||
|
||||
# GH contributor list verified
|
||||
spideystreet <dhicham.pro@gmail.com> <dhicham.pro@gmail.com>
|
||||
dorukardahan <dorukardahan@hotmail.com> <dorukardahan@hotmail.com>
|
||||
MustafaKara7 <karamusti912@gmail.com> <karamusti912@gmail.com>
|
||||
Hmbown <hmbown@gmail.com> <hmbown@gmail.com>
|
||||
kamil-gwozdz <kamil@gwozdz.me> <kamil@gwozdz.me>
|
||||
kira-ariaki <kira@ariaki.me> <kira@ariaki.me>
|
||||
knopki <knopki@duck.com> <knopki@duck.com>
|
||||
Unayung <unayung@gmail.com> <unayung@gmail.com>
|
||||
SeeYangZhi <yangzhi.see@gmail.com> <yangzhi.see@gmail.com>
|
||||
Julientalbot <julien.talbot@ergonomia.re> <julien.talbot@ergonomia.re>
|
||||
lesterli <lisicheng168@gmail.com> <lisicheng168@gmail.com>
|
||||
JiayuuWang <jiayuw794@gmail.com> <jiayuw794@gmail.com>
|
||||
tesseracttars-creator <tesseracttars@gmail.com> <tesseracttars@gmail.com>
|
||||
xinbenlv <zzn+pa@zzn.im> <zzn+pa@zzn.im>
|
||||
SaulJWu <saul.jj.wu@gmail.com> <saul.jj.wu@gmail.com>
|
||||
angelos <angelos@oikos.lan.home.malaiwah.com> <angelos@oikos.lan.home.malaiwah.com>
|
||||
MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> <MestreY0d4-Uninter@users.noreply.github.com>
|
||||
791
AGENTS.md
791
AGENTS.md
@@ -5,66 +5,65 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
|
||||
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
|
||||
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
|
||||
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
|
||||
│ ├── models.py # Model catalog, provider model lists
|
||||
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
|
||||
│ └── auth.py # Provider credential resolution
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ ├── model-providers/ # Inference backend plugins (openrouter, anthropic, gmi, ...)
|
||||
│ ├── kanban/ # Multi-agent board dispatcher + worker plugin
|
||||
│ ├── hermes-achievements/ # Gamified achievement tracking
|
||||
│ ├── observability/ # Metrics / traces / logs plugin
|
||||
│ ├── image_gen/ # Image-generation providers
|
||||
│ └── <others>/ # disk-cleanup, example-dashboard, google_meet, platforms,
|
||||
│ # spotify, strike-freedom-cockpit, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~17k tests across ~900 files as of May 2026)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
@@ -82,30 +81,20 @@ run_agent.py, cli.py, batch_runner.py, environments/
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
@@ -118,13 +107,10 @@ class AIAgent:
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
|
||||
```python
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
@@ -135,8 +121,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
@@ -194,84 +179,9 @@ if canonical == "mycommand":
|
||||
|
||||
---
|
||||
|
||||
## TUI Architecture (ui-tui + tui_gateway)
|
||||
|
||||
The TUI is a full replacement for the classic (prompt_toolkit) CLI, activated via `hermes --tui` or `HERMES_TUI=1`.
|
||||
|
||||
### Process Model
|
||||
|
||||
```
|
||||
hermes --tui
|
||||
└─ Node (Ink) ──stdio JSON-RPC── Python (tui_gateway)
|
||||
│ └─ AIAgent + tools + sessions
|
||||
└─ renders transcript, composer, prompts, activity
|
||||
```
|
||||
|
||||
TypeScript owns the screen. Python owns sessions, tools, model calls, and slash command logic.
|
||||
|
||||
### Transport
|
||||
|
||||
Newline-delimited JSON-RPC over stdio. Requests from Ink, events from Python. See `tui_gateway/server.py` for the full method/event catalog.
|
||||
|
||||
### Key Surfaces
|
||||
|
||||
| Surface | Ink component | Gateway method |
|
||||
|---------|---------------|----------------|
|
||||
| Chat streaming | `app.tsx` + `messageLine.tsx` | `prompt.submit` → `message.delta/complete` |
|
||||
| Tool activity | `thinking.tsx` | `tool.start/progress/complete` |
|
||||
| Approvals | `prompts.tsx` | `approval.respond` ← `approval.request` |
|
||||
| Clarify/sudo/secret | `prompts.tsx`, `maskedPrompt.tsx` | `clarify/sudo/secret.respond` |
|
||||
| Session picker | `sessionPicker.tsx` | `session.list/resume` |
|
||||
| Slash commands | Local handler + fallthrough | `slash.exec` → `_SlashWorker`, `command.dispatch` |
|
||||
| Completions | `useCompletion` hook | `complete.slash`, `complete.path` |
|
||||
| Theming | `theme.ts` + `branding.tsx` | `gateway.ready` with skin data |
|
||||
|
||||
### Slash Command Flow
|
||||
|
||||
1. Built-in client commands (`/help`, `/quit`, `/clear`, `/resume`, `/copy`, `/paste`, etc.) handled locally in `app.tsx`
|
||||
2. Everything else → `slash.exec` (runs in persistent `_SlashWorker` subprocess) → `command.dispatch` fallback
|
||||
|
||||
### Dev Commands
|
||||
|
||||
```bash
|
||||
cd ui-tui
|
||||
npm install # first time
|
||||
npm run dev # watch mode (rebuilds hermes-ink + tsx --watch)
|
||||
npm start # production
|
||||
npm run build # full build (hermes-ink + tsc)
|
||||
npm run type-check # typecheck only (tsc --noEmit)
|
||||
npm run lint # eslint
|
||||
npm run fmt # prettier
|
||||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
|
||||
route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
|
||||
`~/.hermes/plugins/<name>/__init__.py`, then register tools with
|
||||
`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
|
||||
enabled or disabled without touching `tools/` or `toolsets.py`.
|
||||
|
||||
Use the built-in route below only when the user is explicitly contributing a new
|
||||
core Hermes tool that should ship in the base system.
|
||||
|
||||
Built-in/core tools require changes in **2 files**:
|
||||
Requires changes in **3 files**:
|
||||
|
||||
**1. Create `tools/your_tool.py`:**
|
||||
```python
|
||||
@@ -294,9 +204,9 @@ registry.register(
|
||||
)
|
||||
```
|
||||
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.
|
||||
**2. Add import** in `model_tools.py` `_discover_tools()` list.
|
||||
|
||||
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.
|
||||
**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
|
||||
|
||||
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
|
||||
|
||||
@@ -304,30 +214,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
## Dependency Pinning Policy
|
||||
|
||||
All dependencies must have upper bounds to limit supply-chain attack surface.
|
||||
This policy was established after the litellm compromise (PR #2796, #2810) and
|
||||
reinforced after the Mini Shai-Hulud worm campaign (May 2026).
|
||||
|
||||
| Source type | Treatment | Example |
|
||||
|---|---|---|
|
||||
| PyPI package | `>=floor,<next_major` | `"httpx>=0.28.1,<1"` |
|
||||
| Git URL | Commit SHA | `git+https://...@<40-char-sha>` |
|
||||
| GitHub Actions | Commit SHA + comment | `uses: actions/checkout@<sha> # v4` |
|
||||
| CI-only pip | `==exact` | `pyyaml==6.0.2` |
|
||||
|
||||
**When adding a new dependency to `pyproject.toml`:**
|
||||
1. Pin to `>=current_version,<next_major` for post-1.0 (e.g. `>=1.5.0,<2`).
|
||||
2. For pre-1.0 packages, use `<0.(current_minor + 2)` (e.g. `>=0.29,<0.32`).
|
||||
3. Never commit a bare `>=X.Y.Z` without a ceiling — CI and reviewers will reject it.
|
||||
4. Run `uv lock` to regenerate `uv.lock` with hashes.
|
||||
|
||||
Reference: #2810 (bounds pass), #9801 (SHA pinning + audit CI).
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
@@ -335,29 +222,9 @@ Reference: #2810 (bounds pass), #9801 (SHA pinning + audit CI).
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
|
||||
### Top-level `config.yaml` sections (non-exhaustive):
|
||||
|
||||
`model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
|
||||
`memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
|
||||
`auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
|
||||
`plugins`, `honcho`.
|
||||
|
||||
`auxiliary` holds per-task overrides for side-LLM work (curator, vision,
|
||||
embedding, title generation, session_search, etc.) — each task can pin
|
||||
its own provider/model/base_url/max_tokens/reasoning_effort. See
|
||||
`agent/auxiliary_client.py::_resolve_auto` for resolution order.
|
||||
|
||||
`curator` holds the background skill-maintenance config —
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup` (nested).
|
||||
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
### .env variables:
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
@@ -369,29 +236,13 @@ its own provider/model/base_url/max_tokens/reasoning_effort. See
|
||||
},
|
||||
```
|
||||
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
### Config loaders (two separate systems):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
|
||||
---
|
||||
|
||||
@@ -484,382 +335,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
||||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
**No new in-tree memory providers (policy, May 2026):** the set of
|
||||
built-in memory providers under `plugins/memory/` is closed. New memory
|
||||
backends must ship as **standalone plugin repos** that users install
|
||||
into `~/.hermes/plugins/` (or via pip entry points) — they implement
|
||||
the same `MemoryProvider` ABC, register through the same discovery
|
||||
path, and integrate via `hermes memory setup` / `post_setup()` without
|
||||
landing in this tree. PRs that add a new directory under
|
||||
`plugins/memory/` will be closed with a pointer to publish the
|
||||
provider as its own repo. Existing in-tree providers stay; bug fixes
|
||||
to them are welcome.
|
||||
|
||||
### Model-provider plugins (`plugins/model-providers/<name>/`)
|
||||
|
||||
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
|
||||
ships as a plugin here. Each plugin's `__init__.py` calls
|
||||
`providers.register_provider(ProviderProfile(...))` at module load.
|
||||
`providers/__init__.py._discover_providers()` is a **lazy, separate
|
||||
discovery system** — scanned on first `get_provider_profile()` or
|
||||
`list_providers()` call, NOT by the general PluginManager.
|
||||
|
||||
Scan order:
|
||||
1. Bundled: `<repo>/plugins/model-providers/<name>/`
|
||||
2. User: `$HERMES_HOME/plugins/model-providers/<name>/`
|
||||
3. Legacy: `<repo>/providers/<name>.py` (back-compat)
|
||||
|
||||
User plugins of the same name override bundled ones — `register_provider()`
|
||||
is last-writer-wins. This lets third parties swap out any built-in
|
||||
profile without a repo patch.
|
||||
|
||||
The general PluginManager records `kind: model-provider` manifests but does
|
||||
NOT import them (would double-instantiate `ProviderProfile`). Plugins
|
||||
without an explicit `kind:` get auto-coerced via a source-text heuristic
|
||||
(`register_provider` + `ProviderProfile` in `__init__.py`).
|
||||
|
||||
Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, etc. follow the same
|
||||
pattern (ABC + orchestrator + per-plugin directory). Context engines
|
||||
plug into `agent/context_engine.py`; image-gen providers into
|
||||
`agent/image_gen_provider.py`. Reference / docs-companion plugins
|
||||
(`example-dashboard`, `strike-freedom-cockpit`, `plugin-llm-example`,
|
||||
`plugin-llm-async-example`) live in the
|
||||
[`hermes-example-plugins`](https://github.com/NousResearch/hermes-example-plugins)
|
||||
companion repo, not in this tree.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `author`, `license`,
|
||||
`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
|
||||
settings the skill needs — stored under `skills.config.<key>`, prompted
|
||||
during setup, injected at load time).
|
||||
|
||||
Top-level `tags:` and `category:` are also accepted and mirrored from
|
||||
`metadata.hermes.*` by the loader.
|
||||
|
||||
### Skill authoring standards (HARDLINE)
|
||||
|
||||
Every new or modernized skill — bundled, optional, or contributed —
|
||||
must meet these standards before merge. Reviewers reject PRs that
|
||||
violate them.
|
||||
|
||||
1. **`description` ≤ 60 characters, one sentence, ends with a period.**
|
||||
Long descriptions bloat skill listings and dilute the model's
|
||||
attention when many skills are loaded. State the capability, not
|
||||
the implementation. No marketing words ("powerful",
|
||||
"comprehensive", "seamless", "advanced"). Don't repeat the skill
|
||||
name. Verify with:
|
||||
```python
|
||||
import re, pathlib
|
||||
m = re.search(r'^description: (.*)$',
|
||||
pathlib.Path('skills/<cat>/<name>/SKILL.md').read_text(),
|
||||
re.MULTILINE)
|
||||
assert len(m.group(1)) <= 60, len(m.group(1))
|
||||
```
|
||||
|
||||
2. **Tools referenced in SKILL.md prose must be native Hermes tools or
|
||||
MCP servers the skill explicitly expects.** When the skill needs a
|
||||
capability, point at the proper tool by name in backticks
|
||||
(`` `terminal` ``, `` `web_extract` ``, `` `read_file` ``,
|
||||
`` `patch` ``, `` `search_files` ``, `` `vision_analyze` ``,
|
||||
`` `browser_navigate` ``, `` `delegate_task` ``, etc.). Do NOT
|
||||
name shell utilities the agent already has wrapped — `grep` →
|
||||
`search_files`, `cat`/`head`/`tail` → `read_file`, `sed`/`awk` →
|
||||
`patch`, `find`/`ls` → `search_files target='files'`. If the skill
|
||||
depends on an MCP server, name the MCP server and document the
|
||||
expected setup in `## Prerequisites`. Anything else (third-party
|
||||
CLIs, shell pipelines, etc.) is fair game inside script files but
|
||||
should not be the headline interaction surface in the prose.
|
||||
|
||||
3. **`platforms:` gating audited against actual script imports.**
|
||||
Skills that use POSIX-only primitives (`fcntl`, `termios`,
|
||||
`os.setsid`, `os.kill(pid, 0)` for liveness, `/proc`, `/tmp`
|
||||
hardcoded, `signal.SIGKILL`, bash heredocs, `osascript`, `apt`,
|
||||
`systemctl`) must declare their supported platforms. Default
|
||||
posture: try to fix it cross-platform first — `tempfile.gettempdir`,
|
||||
`pathlib.Path`, `psutil.pid_exists`, Python-level filtering instead
|
||||
of `grep`. Gate to a narrower set only when the dependency is
|
||||
genuinely platform-bound.
|
||||
|
||||
4. **`author` credits the human contributor first.** For external
|
||||
contributions, the contributor's real name + GitHub handle goes
|
||||
first; "Hermes Agent" is the secondary collaborator. If the
|
||||
contributor's commit shows "Hermes Agent" as author (because they
|
||||
used Hermes to draft the skill), replace it with their actual name
|
||||
— credit the human, not the tool.
|
||||
|
||||
5. **SKILL.md body uses the modern section order.** `# <Skill> Skill`
|
||||
title, 2-3 sentence intro stating what it does and doesn't do,
|
||||
`## When to Use`, `## Prerequisites`, `## How to Run`,
|
||||
`## Quick Reference`, `## Procedure`, `## Pitfalls`,
|
||||
`## Verification`. Target ~200 lines for a complex skill,
|
||||
~100 lines for a simple one. Cut redundant intro fluff, marketing
|
||||
prose, and re-explanations of env vars already in
|
||||
`## Prerequisites`.
|
||||
|
||||
6. **Scripts go in `scripts/`, references in `references/`,
|
||||
templates in `templates/`.** Don't expect the model to inline-write
|
||||
parsers, XML walkers, or non-trivial logic every call — ship a
|
||||
helper script. Reference it from SKILL.md by path relative to the
|
||||
skill directory.
|
||||
|
||||
7. **Tests live at `tests/skills/test_<skill>_skill.py`** and use only
|
||||
stdlib + pytest + `unittest.mock`. No live network calls. Run via
|
||||
`scripts/run_tests.sh tests/skills/test_<skill>_skill.py -q`.
|
||||
|
||||
8. **`.env.example` additions are isolated to a clearly delimited
|
||||
block.** Don't touch the surrounding file — contributor-supplied
|
||||
`.env.example` versions are usually stale and edits outside the
|
||||
skill's own block must be dropped during salvage.
|
||||
|
||||
The full salvage / modernization checklist for external skill PRs
|
||||
lives in the `hermes-agent-dev` skill at
|
||||
`references/new-skill-pr-salvage.md` — load it before polishing
|
||||
contributor skill PRs.
|
||||
|
||||
---
|
||||
|
||||
## Toolsets
|
||||
|
||||
All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
|
||||
Each platform's adapter picks a base toolset (e.g. Telegram uses
|
||||
`"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
|
||||
platforms inherit from.
|
||||
|
||||
Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
|
||||
`debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
|
||||
`feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
|
||||
`messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
|
||||
`spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
|
||||
|
||||
Enable/disable per platform via `hermes tools` (the curses UI) or the
|
||||
`tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
|
||||
`config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## Delegation (`delegate_task`)
|
||||
|
||||
`tools/delegate_tool.py` spawns a subagent with an isolated
|
||||
context + terminal session. Synchronous: the parent waits for the
|
||||
child's summary before continuing its own loop — if the parent is
|
||||
interrupted, the child is cancelled.
|
||||
|
||||
Two shapes:
|
||||
|
||||
- **Single:** pass `goal` (+ optional `context`, `toolsets`).
|
||||
- **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
|
||||
running concurrently. Concurrency is capped by
|
||||
`delegation.max_concurrent_children` (default 3).
|
||||
|
||||
Roles:
|
||||
|
||||
- `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
|
||||
`clarify`, `memory`, `send_message`, `execute_code`.
|
||||
- `role="orchestrator"` — retains `delegate_task` so it can spawn its
|
||||
own workers. Gated by `delegation.orchestrator_enabled` (default true)
|
||||
and bounded by `delegation.max_spawn_depth` (default 2).
|
||||
|
||||
Key config knobs (under `delegation:` in `config.yaml`):
|
||||
`max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
|
||||
`orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
|
||||
`max_iterations`.
|
||||
|
||||
Synchronicity rule: delegate_task is **not** durable. For long-running
|
||||
work that must outlive the current turn, use `cronjob` or
|
||||
`terminal(background=True, notify_on_complete=True)` instead.
|
||||
|
||||
---
|
||||
|
||||
## Curator (skill lifecycle)
|
||||
|
||||
Background skill-maintenance system that tracks usage on agent-created
|
||||
skills and auto-archives stale ones. Users never lose skills; archives
|
||||
go to `~/.hermes/skills/.archive/` and are restorable.
|
||||
|
||||
- **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
|
||||
prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
|
||||
- **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
|
||||
verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
|
||||
`archive`, `restore`, `prune`, `backup`, `rollback`.
|
||||
- **Telemetry:** `tools/skill_usage.py` owns the sidecar
|
||||
`~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
|
||||
`patch_count`, `last_activity_at`, `state` (active / stale /
|
||||
archived), `pinned`.
|
||||
|
||||
Invariants:
|
||||
- Curator only touches skills with `created_by: "agent"` provenance —
|
||||
bundled + hub-installed skills are off-limits.
|
||||
- Never deletes; max destructive action is archive.
|
||||
- Pinned skills are exempt from every auto-transition and from the
|
||||
LLM review pass.
|
||||
- `skill_manage(action="delete")` refuses pinned skills; patch/edit/
|
||||
write_file/remove_file go through so the agent can keep improving
|
||||
pinned skills.
|
||||
|
||||
Config section (`curator:` in `config.yaml`):
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup.*`.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/curator.md`.
|
||||
|
||||
---
|
||||
|
||||
## Cron (scheduled jobs)
|
||||
|
||||
`cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
|
||||
schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
|
||||
(`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
|
||||
`/cron` slash command.
|
||||
|
||||
Supported schedule formats:
|
||||
- Duration: `"30m"`, `"2h"`, `"1d"`
|
||||
- "every" phrase: `"every 2h"`, `"every monday 9am"`
|
||||
- 5-field cron expression: `"0 9 * * *"`
|
||||
- ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
|
||||
|
||||
Per-job fields include `skills` (load specific skills), `model` /
|
||||
`provider` overrides, `script` (pre-run data-collection script whose
|
||||
stdout is injected into the prompt; `no_agent=True` turns the script
|
||||
into the entire job), `context_from` (chain job A's last output into
|
||||
job B's prompt), `workdir` (run in a specific directory with its
|
||||
`AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
|
||||
|
||||
Hardening invariants:
|
||||
- **3-minute hard interrupt** on cron sessions — runaway agent loops
|
||||
cannot monopolize the scheduler.
|
||||
- Catchup window: half the job's period, clamped to 120s–2h.
|
||||
- Grace window: 120s for one-shot jobs whose fire time was missed.
|
||||
- File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
|
||||
across processes.
|
||||
- Cron sessions pass `skip_memory=True` by default; memory providers
|
||||
intentionally do not run during cron.
|
||||
|
||||
Cron deliveries are **not** mirrored into the target gateway session —
|
||||
they land in their own cron session with a header/footer frame so the
|
||||
main conversation's message-role alternation stays intact.
|
||||
|
||||
---
|
||||
|
||||
## Kanban (multi-agent work queue)
|
||||
|
||||
Durable SQLite-backed board that lets multiple profiles / workers
|
||||
collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
|
||||
workers spawned by the dispatcher drive it via a dedicated `kanban_*`
|
||||
toolset so their schema footprint is zero when they're not inside a
|
||||
kanban task.
|
||||
|
||||
- **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
|
||||
`init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
|
||||
`unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
|
||||
`tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
|
||||
`assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
|
||||
- **Worker/orchestrator toolset:** `tools/kanban_tools.py` exposes
|
||||
`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`,
|
||||
`kanban_comment`, `kanban_create`, `kanban_link`; profiles that
|
||||
explicitly enable the `kanban` toolset outside a dispatcher-spawned
|
||||
task also get `kanban_list` and `kanban_unblock` for board routing.
|
||||
- **Dispatcher:** long-lived loop that (default every 60s) reclaims
|
||||
stale claims, promotes ready tasks, atomically claims, and spawns
|
||||
assigned profiles. Runs **inside the gateway** by default via
|
||||
`kanban.dispatch_in_gateway: true`.
|
||||
- **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
|
||||
`plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
|
||||
standalone dispatcher deployment).
|
||||
|
||||
Isolation model:
|
||||
- **Board** is the hard boundary — workers are spawned with
|
||||
`HERMES_KANBAN_BOARD` pinned in their env so they can't see other
|
||||
boards.
|
||||
- **Tenant** is a soft namespace *within* a board — one specialist
|
||||
fleet can serve multiple businesses with workspace-path + memory-key
|
||||
isolation.
|
||||
- After `kanban.failure_limit` consecutive non-success attempts on the
|
||||
same task (default: 2), the dispatcher auto-blocks it to prevent spin
|
||||
loops.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/kanban.md`.
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
@@ -869,16 +345,14 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
When `terminal(background=true, notify_on_complete=true)` is used, the gateway runs a watcher that
|
||||
detects process completion and triggers a new agent turn. Control verbosity of background process
|
||||
messages with `display.background_process_notifications`
|
||||
When `terminal(background=true, check_interval=...)` is used, the gateway runs a watcher that
|
||||
pushes status updates to the user's chat. Control verbosity with `display.background_process_notifications`
|
||||
in config.yaml (or `HERMES_BACKGROUND_NOTIFICATIONS` env var):
|
||||
|
||||
- `all` — running-output updates + final message (default)
|
||||
@@ -894,7 +368,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
||||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
@@ -951,12 +425,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
||||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
@@ -967,30 +437,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
@@ -1011,94 +457,13 @@ def profile_env(tmp_path, monkeypatch):
|
||||
|
||||
## Testing
|
||||
|
||||
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
|
||||
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
|
||||
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
|
||||
developer machine with API keys set diverges from CI in ways that have caused
|
||||
multiple "works locally, fails in CI" incidents (and the reverse).
|
||||
|
||||
```bash
|
||||
scripts/run_tests.sh # full suite, CI-parity
|
||||
scripts/run_tests.sh tests/gateway/ # one directory
|
||||
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
|
||||
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
|
||||
source venv/bin/activate
|
||||
python -m pytest tests/ -q # Full suite (~3000 tests, ~3 min)
|
||||
python -m pytest tests/test_model_tools.py -q # Toolset resolution
|
||||
python -m pytest tests/test_cli_init.py -q # CLI config loading
|
||||
python -m pytest tests/gateway/ -q # Gateway tests
|
||||
python -m pytest tests/tools/ -q # Tool-level tests
|
||||
```
|
||||
|
||||
### Why the wrapper (and why the old "just call pytest" doesn't work)
|
||||
|
||||
Five real sources of local-vs-CI drift the script closes:
|
||||
|
||||
| | Without wrapper | With wrapper |
|
||||
|---|---|---|
|
||||
| Provider API keys | Whatever is in your env (auto-detects pool) | All `*_API_KEY`/`*_TOKEN`/etc. unset |
|
||||
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
|
||||
| Timezone | Local TZ (PDT etc.) | UTC |
|
||||
| Locale | Whatever is set | C.UTF-8 |
|
||||
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
|
||||
|
||||
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
|
||||
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
|
||||
is belt-and-suspenders.
|
||||
|
||||
### Running without the wrapper (only if you must)
|
||||
|
||||
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
Worker count above 4 will surface test-ordering flakes that CI never sees.
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
|
||||
### Don't write change-detector tests
|
||||
|
||||
A test is a **change-detector** if it fails whenever data that is **expected
|
||||
to change** gets updated — model catalogs, config version numbers,
|
||||
enumeration counts, hardcoded lists of provider models. These tests add no
|
||||
behavioral coverage; they just guarantee that routine source updates break
|
||||
CI and cost engineering time to "fix."
|
||||
|
||||
**Do not write:**
|
||||
|
||||
```python
|
||||
# catalog snapshot — breaks every model release
|
||||
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
|
||||
assert "MiniMax-M2.7" in models
|
||||
|
||||
# config version literal — breaks every schema bump
|
||||
assert DEFAULT_CONFIG["_config_version"] == 21
|
||||
|
||||
# enumeration count — breaks every time a skill/provider is added
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) == 8
|
||||
```
|
||||
|
||||
**Do write:**
|
||||
|
||||
```python
|
||||
# behavior: does the catalog plumbing work at all?
|
||||
assert "gemini" in _PROVIDER_MODELS
|
||||
assert len(_PROVIDER_MODELS["gemini"]) >= 1
|
||||
|
||||
# behavior: does migration bump the user's version to current latest?
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
|
||||
# invariant: no plan-only model leaks into the legacy list
|
||||
assert not (set(moonshot_models) & coding_plan_only_models)
|
||||
|
||||
# invariant: every model in the catalog has a context-length entry
|
||||
for m in _PROVIDER_MODELS["huggingface"]:
|
||||
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
|
||||
```
|
||||
|
||||
The rule: if the test reads like a snapshot of current data, delete it. If
|
||||
it reads like a contract about how two pieces of data must relate, keep it.
|
||||
When a PR adds a new provider/model and you want a test, make the test
|
||||
assert the relationship (e.g. "catalog entries all have context lengths"),
|
||||
not the specific names.
|
||||
|
||||
Reviewers should reject new change-detector tests; authors should convert
|
||||
them into invariants before re-requesting review.
|
||||
|
||||
316
CONTRIBUTING.md
316
CONTRIBUTING.md
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
@@ -49,34 +49,16 @@ If your skill is specialized, community-contributed, or niche, it's better suite
|
||||
|
||||
---
|
||||
|
||||
## Memory Providers: Ship as a Standalone Plugin
|
||||
|
||||
**We are no longer accepting new memory providers into this repo.** The set of built-in providers under `plugins/memory/` (honcho, mem0, supermemory, byterover, hindsight, holographic, openviking, retaindb) is closed. If you want to add a new memory backend, publish it as a **standalone plugin repo** that users install into `~/.hermes/plugins/` (or via a pip entry point).
|
||||
|
||||
Standalone memory plugins:
|
||||
|
||||
- Implement the same `MemoryProvider` ABC (`agent/memory_provider.py`) — `sync_turn`, `prefetch`, `shutdown`, and optionally `post_setup(hermes_home, config)` for setup-wizard integration
|
||||
- Use the same discovery system — `discover_memory_providers()` picks them up from user/project plugin directories and pip entry points
|
||||
- Integrate with `hermes memory setup` via `post_setup()` — no need to touch core code
|
||||
- Can register their own CLI subcommands via `register_cli(subparser)` in a `cli.py` file
|
||||
- Get all the same lifecycle hooks and config plumbing as in-tree providers
|
||||
|
||||
PRs that add a new directory under `plugins/memory/` will be closed with a pointer to publish the provider as its own repo. Existing in-tree providers stay; bug fixes to them are welcome.
|
||||
|
||||
This isn't a quality bar — it's a coupling-and-maintenance decision. Memory providers are the most common plugin type and they shouldn't all live in this tree.
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
|
||||
| **Git** | With `--recurse-submodules` support |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
|
||||
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
|
||||
|
||||
### Clone and install
|
||||
|
||||
@@ -91,6 +73,9 @@ export VIRTUAL_ENV="$(pwd)/venv"
|
||||
# Install with all extras (messaging, cron, CLI menus, dev tools)
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
# Optional: RL training submodule
|
||||
# git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
|
||||
|
||||
# Optional: browser tools
|
||||
npm install
|
||||
```
|
||||
@@ -103,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
|
||||
touch ~/.hermes/.env
|
||||
|
||||
# Add at minimum an LLM provider key:
|
||||
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
|
||||
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Run
|
||||
@@ -121,11 +106,6 @@ hermes chat -q "Hello"
|
||||
### Run tests
|
||||
|
||||
```bash
|
||||
# Preferred — matches CI (hermetic env, 4 xdist workers); see AGENTS.md
|
||||
scripts/run_tests.sh
|
||||
|
||||
# Alternative (activate the venv first). The wrapper is still recommended
|
||||
# for parity with GitHub Actions before you open a PR:
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
@@ -172,7 +152,7 @@ hermes-agent/
|
||||
│ ├── vision_tools.py # Image analysis via multimodal models
|
||||
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
||||
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
||||
│ ├── session_search_tool.py # Search past conversations with FTS5 + anchored windows
|
||||
│ ├── session_search_tool.py # Search past conversations with FTS5 + summarization
|
||||
│ ├── cronjob_tools.py # Scheduled task management
|
||||
│ ├── skill_tools.py # Skill search, load, manage
|
||||
│ └── environments/ # Terminal execution backends
|
||||
@@ -193,6 +173,7 @@ hermes-agent/
|
||||
│
|
||||
├── skills/ # Bundled skills (copied to ~/.hermes/skills/ on install)
|
||||
├── optional-skills/ # Official optional skills (discoverable via hub, not activated by default)
|
||||
├── environments/ # RL training environments (Atropos integration)
|
||||
├── tests/ # Test suite
|
||||
├── website/ # Documentation site (hermes-agent.nousresearch.com)
|
||||
│
|
||||
@@ -305,18 +286,16 @@ registry.register(
|
||||
)
|
||||
```
|
||||
|
||||
**Wire into a toolset (required):** Built-in tools are auto-discovered: any
|
||||
`tools/*.py` file that contains a top-level `registry.register(...)` call is
|
||||
imported by `discover_builtin_tools()` in `tools/registry.py` when `model_tools`
|
||||
loads. There is **no** manual import list in `model_tools.py` to maintain.
|
||||
Then add the import to `model_tools.py` in the `_modules` list:
|
||||
|
||||
You must still add the tool name to the appropriate list in `toolsets.py`
|
||||
(for example `_HERMES_CORE_TOOLS` or a dedicated toolset); otherwise the tool
|
||||
registers but is never exposed to the agent. If you introduce a new toolset,
|
||||
add it in `toolsets.py` and wire it into the relevant platform presets.
|
||||
```python
|
||||
_modules = [
|
||||
# ... existing modules ...
|
||||
"tools.my_tool",
|
||||
]
|
||||
```
|
||||
|
||||
See `AGENTS.md` (section **Adding New Tools**) for profile-aware paths and
|
||||
plugin vs core guidance.
|
||||
If it's a new toolset, add it to `toolsets.py` and to the relevant platform presets.
|
||||
|
||||
---
|
||||
|
||||
@@ -475,58 +454,6 @@ Gateway and messaging sessions never collect secrets in-band; they instruct the
|
||||
|
||||
See `skills/gifs/gif-search/` and `skills/email/himalaya/` for examples.
|
||||
|
||||
### Skill authoring standards (HARDLINE)
|
||||
|
||||
Every new or modernized skill — bundled, optional, or contributed — must meet these standards before merge. Reviewers reject PRs that violate them.
|
||||
|
||||
1. **`description` ≤ 60 characters, one sentence, ends with a period.** Long descriptions bloat the skill listing UI and dilute the model's attention when many skills are loaded. State the capability, not the implementation. No marketing words ("powerful", "comprehensive", "seamless", "advanced"). Don't repeat the skill name. Verify with:
|
||||
```python
|
||||
import re, pathlib
|
||||
m = re.search(r'^description: (.*)$',
|
||||
pathlib.Path('skills/<cat>/<name>/SKILL.md').read_text(),
|
||||
re.MULTILINE)
|
||||
assert len(m.group(1)) <= 60, len(m.group(1))
|
||||
```
|
||||
|
||||
Good: `Search arXiv papers by keyword, author, category, or ID.`
|
||||
Bad: `A powerful and comprehensive skill that allows the agent to search arXiv for relevant academic papers using various criteria including keywords, authors, and categories.`
|
||||
|
||||
2. **Tools referenced in SKILL.md prose must be native Hermes tools or MCP servers the skill explicitly expects.** When the skill needs a capability, point at the proper tool by name in backticks: `` `terminal` ``, `` `web_extract` ``, `` `web_search` ``, `` `read_file` ``, `` `write_file` ``, `` `patch` ``, `` `search_files` ``, `` `vision_analyze` ``, `` `browser_navigate` ``, `` `delegate_task` ``, `` `image_generate` ``, `` `text_to_speech` ``, `` `cronjob` ``, `` `memory` ``, `` `skill_view` ``, `` `todo` ``, `` `execute_code` ``.
|
||||
|
||||
Do NOT name shell utilities the agent already has wrapped:
|
||||
|
||||
| Don't say | Say |
|
||||
|---|---|
|
||||
| `grep`, `rg` | `search_files` |
|
||||
| `cat`, `head`, `tail` | `read_file` |
|
||||
| `sed`, `awk` | `patch` |
|
||||
| `find`, `ls` | `search_files` (with `target='files'`) |
|
||||
| `curl` for content extraction | `web_extract` |
|
||||
| `echo > file`, `cat <<EOF` | `write_file` |
|
||||
|
||||
If the skill depends on an MCP server, name the MCP server and document its setup in `## Prerequisites`. Third-party CLIs (e.g. `ffmpeg`, `gh`, a specific SDK) are fine to invoke from inside script files, but the prose should frame the interaction as "invoke through the `terminal` tool", not as a manual shell session.
|
||||
|
||||
3. **`platforms:` gating audited against actual script imports.** Skills that use POSIX-only primitives (`fcntl`, `termios`, `os.setsid`, `os.kill(pid, 0)` for liveness, `/proc`, hardcoded `/tmp` paths, `signal.SIGKILL`, bash heredocs, `osascript`, `apt`, `systemctl`) must declare their supported platforms via the `platforms:` frontmatter. Default posture is to fix it cross-platform first — `tempfile.gettempdir()`, `pathlib.Path`, `psutil.pid_exists()`, Python-level filtering instead of `grep`. Gate to a narrower set only when the dependency is genuinely platform-bound (e.g. `osascript` is macOS-only, `/proc` is Linux-only).
|
||||
|
||||
4. **`author` credits the human contributor first.** For external contributions, the contributor's real name + GitHub handle goes first (`Jane Doe (jane-doe)`); "Hermes Agent" is the secondary collaborator. If the contributor's commit shows "Hermes Agent" as author because they used Hermes to draft the skill, replace it with their actual name — credit the human, not the tool.
|
||||
|
||||
5. **SKILL.md body uses the modern section order.** `# <Skill> Skill` title, 2-3 sentence intro stating what it does and what it doesn't do, then:
|
||||
- `## When to Use` — trigger conditions
|
||||
- `## Prerequisites` — env vars, install steps, MCP setup, API key sourcing
|
||||
- `## How to Run` — canonical invocation through the `terminal` tool
|
||||
- `## Quick Reference` — flat command/API reference
|
||||
- `## Procedure` — numbered steps with copy-paste commands
|
||||
- `## Pitfalls` — known limits, rate limits, things that look broken but aren't
|
||||
- `## Verification` — single command that proves the skill works
|
||||
|
||||
Target ~200 lines for a complex skill, ~100 lines for a simple one. Cut redundant intro fluff, marketing prose, and re-explanations of env vars already documented in `## Prerequisites`.
|
||||
|
||||
6. **Scripts go in `scripts/`, references in `references/`, templates in `templates/`.** Don't expect the model to inline-write parsers, XML walkers, or non-trivial logic every call — ship a helper script. Reference scripts from SKILL.md by path relative to the skill directory.
|
||||
|
||||
7. **Tests live at `tests/skills/test_<skill>_skill.py`** and use only stdlib + pytest + `unittest.mock`. No live network calls. Run via `scripts/run_tests.sh tests/skills/test_<skill>_skill.py -q`. Must pass under the hermetic CI env (no API keys leaking through). Use `monkeypatch` and `tmp_path` for any env-var or filesystem dependencies.
|
||||
|
||||
8. **`.env.example` additions are isolated to a clearly delimited block.** Don't touch the surrounding file — contributor-supplied `.env.example` versions are usually stale, and edits outside the skill's own block will be dropped during salvage. Comment all values with `#` (it's documentation, not live config).
|
||||
|
||||
### Skill guidelines
|
||||
|
||||
- **No external dependencies unless absolutely necessary.** Prefer stdlib Python, curl, and existing Hermes tools (`web_extract`, `terminal`, `read_file`).
|
||||
@@ -567,7 +494,7 @@ branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔"
|
||||
prompt_symbol: "⚔ ❯ "
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
@@ -588,57 +515,11 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and native Windows (plus WSL2). When writing code
|
||||
that touches the OS, assume *any* platform can hit your code path.
|
||||
|
||||
> **Before you PR:** run `scripts/check-windows-footguns.py` to catch the
|
||||
> common Windows-unsafe patterns in your diff. It's grep-based and cheap;
|
||||
> CI runs it on every PR too.
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
1. **Never call `os.kill(pid, 0)` for liveness checks.** `os.kill(pid, 0)`
|
||||
is a standard POSIX idiom to check "is this PID alive" — the signal 0
|
||||
is a no-op permission check. **On Windows it is NOT a no-op.** Python's
|
||||
Windows `os.kill` maps `sig=0` to `CTRL_C_EVENT` (they collide at the
|
||||
integer value 0) and routes it through `GenerateConsoleCtrlEvent(0, pid)`,
|
||||
which broadcasts Ctrl+C to the **entire console process group** containing
|
||||
the target PID. "Probe if alive" silently becomes "kill the target and
|
||||
often unrelated processes sharing its console." See [bpo-14484](https://bugs.python.org/issue14484)
|
||||
(open since 2012 — will never be fixed for compat reasons).
|
||||
|
||||
**Preferred:** use `psutil` (a core dependency — always available):
|
||||
|
||||
```python
|
||||
import psutil
|
||||
if psutil.pid_exists(pid):
|
||||
# process is alive — safe on every platform
|
||||
...
|
||||
```
|
||||
|
||||
If you specifically need the hermes wrapper (it has a stdlib fallback
|
||||
for scaffold-phase imports before pip install finishes), use
|
||||
`gateway.status._pid_exists(pid)`. It calls `psutil.pid_exists` first
|
||||
and falls back to a hand-rolled `OpenProcess + WaitForSingleObject`
|
||||
dance on Windows only when psutil is somehow missing.
|
||||
|
||||
Audit grep for new callsites: `rg "os\.kill\([^,]+,\s*0\s*\)"`. Any hit
|
||||
in non-test code is presumptively a Windows silent-kill bug.
|
||||
|
||||
2. **Use `shutil.which()` before shelling out — don't assume Windows has
|
||||
tools Linux has.** `wmic` was removed in Windows 10 21H1 and later. `ps`,
|
||||
`kill`, `grep`, `awk`, `fuser`, `lsof`, `pgrep`, and most POSIX CLI tools
|
||||
simply don't exist on Windows. Test availability with
|
||||
`shutil.which("tool")` and fall back to a Windows-native equivalent —
|
||||
usually PowerShell via `subprocess.run(["powershell", "-NoProfile",
|
||||
"-Command", ...])`.
|
||||
|
||||
For process enumeration: PowerShell's `Get-CimInstance Win32_Process` is
|
||||
the modern replacement for `wmic process`. See
|
||||
`hermes_cli/gateway.py::_scan_gateway_pids` for the pattern.
|
||||
|
||||
3. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError`
|
||||
and `NotImplementedError`:
|
||||
1. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError` and `NotImplementedError`:
|
||||
```python
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
@@ -651,126 +532,24 @@ that touches the OS, assume *any* platform can hit your code path.
|
||||
idx = int(input("Choice: ")) - 1
|
||||
```
|
||||
|
||||
4. **File encoding.** Windows may save `.env` files in `cp1252`. Always
|
||||
handle encoding errors:
|
||||
2. **File encoding.** Windows may save `.env` files in `cp1252`. Always handle encoding errors:
|
||||
```python
|
||||
try:
|
||||
load_dotenv(env_path)
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
```
|
||||
Config files (`config.yaml`) may be saved with a UTF-8 BOM by Notepad and
|
||||
similar editors — use `encoding="utf-8-sig"` when reading files that
|
||||
could have been touched by a Windows GUI editor.
|
||||
|
||||
5. **Process management.** `os.setsid()`, `os.killpg()`, `os.fork()`,
|
||||
`os.getuid()`, and POSIX signal handling differ on Windows. Guard with
|
||||
`platform.system()`, `sys.platform`, or `hasattr(os, "setsid")`:
|
||||
3. **Process management.** `os.setsid()`, `os.killpg()`, and signal handling differ on Windows. Use platform checks:
|
||||
```python
|
||||
import platform
|
||||
if platform.system() != "Windows":
|
||||
kwargs["preexec_fn"] = os.setsid
|
||||
else:
|
||||
kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
```
|
||||
|
||||
**Preferred:** for killing a process AND its children (what `os.killpg`
|
||||
does on POSIX), use `psutil` — it works on every platform:
|
||||
```python
|
||||
import psutil
|
||||
try:
|
||||
parent = psutil.Process(pid)
|
||||
# Kill children first (leaf-up), then the parent.
|
||||
for child in parent.children(recursive=True):
|
||||
child.kill()
|
||||
parent.kill()
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
```
|
||||
4. **Path separators.** Use `pathlib.Path` instead of string concatenation with `/`.
|
||||
|
||||
6. **Signals that don't exist on Windows: `SIGALRM`, `SIGCHLD`, `SIGHUP`,
|
||||
`SIGUSR1`, `SIGUSR2`, `SIGPIPE`, `SIGQUIT`, `SIGKILL`.** Python's
|
||||
`signal` module raises `AttributeError` at import time if you reference
|
||||
them on Windows. Use `getattr(signal, "SIGKILL", signal.SIGTERM)` or
|
||||
gate the whole block behind a platform check. `loop.add_signal_handler`
|
||||
raises `NotImplementedError` on Windows — always catch it.
|
||||
|
||||
7. **Path separators.** Use `pathlib.Path` instead of string concatenation
|
||||
with `/`. Forward slashes work almost everywhere on Windows, but
|
||||
`subprocess.run(["cmd.exe", "/c", ...])` and other shell contexts can
|
||||
require backslashes — convert with `str(path)` at the subprocess boundary,
|
||||
not inside Python logic.
|
||||
|
||||
8. **Symlinks need elevated privileges on Windows** (unless Developer Mode is
|
||||
on). Tests that create symlinks need `@pytest.mark.skipif(sys.platform ==
|
||||
"win32", reason="Symlinks require elevated privileges on Windows")`.
|
||||
|
||||
9. **POSIX file modes (0o600, 0o644, etc.) are NOT enforced on NTFS** by
|
||||
default. Tests that assert on `stat().st_mode & 0o777` must skip on
|
||||
Windows — the concept doesn't translate. Use ACLs (`icacls`, `pywin32`)
|
||||
for Windows secret-file protection if needed.
|
||||
|
||||
10. **Detached background daemons on Windows need `pythonw.exe`, NOT
|
||||
`python.exe`.** `python.exe` always allocates or attaches to a console,
|
||||
which makes it vulnerable to `CTRL_C_EVENT` broadcasts from any sibling
|
||||
process. `pythonw.exe` is the no-console variant. Combine with
|
||||
`CREATE_NO_WINDOW | DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP |
|
||||
CREATE_BREAKAWAY_FROM_JOB` in `subprocess.Popen(creationflags=...)`.
|
||||
See `hermes_cli/gateway_windows.py::_spawn_detached` for the reference
|
||||
implementation.
|
||||
|
||||
11. **`subprocess.Popen` with `.cmd` or `.bat` shims needs `shutil.which`
|
||||
to resolve.** Passing `"agent-browser"` to `Popen` on Windows finds
|
||||
the extensionless POSIX shebang shim in `node_modules/.bin/`, which
|
||||
`CreateProcessW` can't execute — you'll get `WinError 193 "not a valid
|
||||
Win32 application"`. Use `shutil.which("agent-browser", path=local_bin)`
|
||||
which honors PATHEXT and picks the `.CMD` variant on Windows.
|
||||
|
||||
12. **Don't use shell shebangs as a way to run Python.** `#!/usr/bin/env
|
||||
python` only works when the file is executed through a Unix shell.
|
||||
`subprocess.run(["./myscript.py"])` on Windows fails even if the file
|
||||
has a shebang line. Always invoke Python explicitly:
|
||||
`[sys.executable, "myscript.py"]`.
|
||||
|
||||
13. **Shell commands in installers.** If you change `scripts/install.sh`,
|
||||
make the equivalent change in `scripts/install.ps1`. The two scripts
|
||||
are the canonical example of "works on Linux does not mean works on
|
||||
Windows" and have drifted multiple times — keep them in lockstep.
|
||||
|
||||
14. **Known paths that are OneDrive-redirected on Windows:** Desktop,
|
||||
Documents, Pictures, Videos. The "real" path when OneDrive Backup is
|
||||
enabled is `%USERPROFILE%\OneDrive\Desktop` (etc.), NOT
|
||||
`%USERPROFILE%\Desktop` (which exists as an empty husk). Resolve the
|
||||
real location via `ctypes` + `SHGetKnownFolderPath` or by reading the
|
||||
`Shell Folders` registry key — never assume `~/Desktop`.
|
||||
|
||||
15. **CRLF vs LF in generated scripts.** Windows `cmd.exe` and `schtasks`
|
||||
parse line-by-line; mixed or LF-only line endings can break multi-line
|
||||
`.cmd` / `.bat` files. Use `open(path, "w", encoding="utf-8",
|
||||
newline="\r\n")` — or `open(path, "wb")` + explicit bytes — when
|
||||
generating scripts Windows will execute.
|
||||
|
||||
16. **Two different quoting schemes in one command line.** `subprocess.run
|
||||
(["schtasks", "/TR", some_cmd])` → schtasks itself parses `/TR`, AND
|
||||
the `some_cmd` string is re-parsed by `cmd.exe` when the task fires.
|
||||
Different parsers, different escape rules. Use two separate quoting
|
||||
helpers and never cross them. See `hermes_cli/gateway_windows.py::
|
||||
_quote_cmd_script_arg` and `_quote_schtasks_arg` for the reference
|
||||
pair.
|
||||
|
||||
### Testing cross-platform
|
||||
|
||||
Tests that use POSIX-only syscalls need a skip marker. Common ones:
|
||||
- Symlinks → `@pytest.mark.skipif(sys.platform == "win32", ...)`
|
||||
- `0o600` file modes → `@pytest.mark.skipif(sys.platform.startswith("win"), ...)`
|
||||
- `signal.SIGALRM` → Unix-only (see `tests/conftest.py::_enforce_test_timeout`)
|
||||
- `os.setsid` / `os.fork` → Unix-only
|
||||
- Live Winsock / Windows-specific regression tests →
|
||||
`@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific regression")`
|
||||
|
||||
If you monkeypatch `sys.platform` for cross-platform tests, also patch
|
||||
`platform.system()` / `platform.release()` / `platform.mac_ver()` — each
|
||||
re-reads the real OS independently, so half-patched tests still route
|
||||
through the wrong branch on a Windows runner.
|
||||
5. **Shell commands in installers.** If you change `scripts/install.sh`, check if the equivalent change is needed in `scripts/install.ps1`.
|
||||
|
||||
---
|
||||
|
||||
@@ -800,47 +579,6 @@ Hermes has terminal access. Security matters.
|
||||
|
||||
If your PR affects security, note it explicitly in the description.
|
||||
|
||||
### Dependency pinning policy (supply chain hardening)
|
||||
|
||||
After the [litellm supply chain compromise](https://github.com/BerriAI/litellm/issues/24512) in March 2026 and the [Mini Shai-Hulud worm campaign](https://socket.dev/blog/tanstack-npm-packages-compromised-mini-shai-hulud-supply-chain-attack) in May 2026, all dependencies must follow these rules:
|
||||
|
||||
| Source type | Required treatment | Rationale |
|
||||
|---|---|---|
|
||||
| **PyPI package** | `>=floor,<next_major` | PyPI versions are immutable once published, but new versions can be pushed into your range. A `<next_major` ceiling stops a 1.x install from upgrading to a malicious 2.0.0. |
|
||||
| **Git URL** (atroposlib, tinker, yc-bench, Baileys) | Full commit SHA | Branches and tags are mutable refs; SHA is content-addressed. |
|
||||
| **GitHub Actions** | Full commit SHA + version comment | Action tags are mutable refs (e.g. tj-actions/changed-files March 2025). Pin as `uses: owner/action@<sha> # vX.Y.Z` |
|
||||
| **CI-only pip installs** | `==exact` | Hermetic CI builds; churn is acceptable. |
|
||||
|
||||
**Every new PyPI dependency in a PR must have a `<next_major` upper bound.** PRs adding unbounded `>=X.Y.Z` specs will be rejected by reviewers. The `supply-chain-audit.yml` CI workflow also flags dependency manifest changes for manual review.
|
||||
|
||||
**How to determine the ceiling:**
|
||||
- If the package is at version `1.x.y`, use `<2`.
|
||||
- If the package is at version `0.x.y` (pre-1.0), use `<0.(current_minor + 2)` — e.g. if current is `0.29.x`, use `<0.32`. This gives ~2 minor versions of headroom while keeping the window small enough that a hostile takeover version is unlikely to land inside it.
|
||||
- Exception: packages with very stable APIs (e.g. `aiohttp-socks`) can use `<1` at reviewer discretion.
|
||||
|
||||
**Examples:**
|
||||
```toml
|
||||
# ✅ Correct — post-1.0
|
||||
"openai>=2.21.0,<3"
|
||||
"pydantic>=2.12.5,<3"
|
||||
|
||||
# ✅ Correct — pre-1.0 (tight minor window)
|
||||
"asyncpg>=0.29,<0.32"
|
||||
"aiosqlite>=0.20,<0.23"
|
||||
"hindsight-client>=0.4.22,<0.5"
|
||||
|
||||
# ❌ Rejected — no upper bound
|
||||
"some-package>=1.2.3"
|
||||
|
||||
# ❌ Rejected — too tight (blocks legitimate patches)
|
||||
"some-package==1.2.3"
|
||||
|
||||
# ❌ Rejected — too loose for pre-1.0 (allows 80 minor versions)
|
||||
"some-package>=0.20,<1"
|
||||
```
|
||||
|
||||
**Reference PRs:** #2796 (litellm removal), #2810 (upper bounds pass), #9801 (SHA pinning + supply-chain-audit CI).
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
@@ -857,9 +595,9 @@ refactor/description # Code restructuring
|
||||
|
||||
### Before submitting
|
||||
|
||||
1. **Run tests**: `scripts/run_tests.sh` (recommended; same as CI) or `pytest tests/ -v` with the project venv activated
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
113
Dockerfile
113
Dockerfile
@@ -1,120 +1,29 @@
|
||||
FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
|
||||
FROM tianon/gosu:1.19-trixie@sha256:3b176695959c71e123eb390d427efc665eeb561b1540e82679c15e992006b8b9 AS gosu_source
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Store Playwright browsers outside the volume mount so the build-time
|
||||
# install survives the /opt/data volume overlay at runtime.
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
RUN useradd -u 10000 -m -d /opt/data hermes
|
||||
|
||||
COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
|
||||
COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
|
||||
|
||||
COPY . /opt/hermes
|
||||
WORKDIR /opt/hermes
|
||||
|
||||
# ---------- Layer-cached dependency install ----------
|
||||
# Copy only package manifests first so npm install + Playwright are cached
|
||||
# unless the lockfiles themselves change.
|
||||
#
|
||||
# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
|
||||
# because it is referenced as a `file:` workspace dependency from
|
||||
# ui-tui/package.json. Copying the tree up front lets npm resolve the
|
||||
# workspace to real content instead of stopping at a bare package.json.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
|
||||
|
||||
# `npm_config_install_links=false` forces npm to install `file:` deps as
|
||||
# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
|
||||
# which defaults to `install-links=true` and installs file deps as *copies*.
|
||||
# The host-side package-lock.json is generated with a newer npm that uses
|
||||
# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
|
||||
# that permanently disagrees with the root lock on the @hermes/ink entry.
|
||||
# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
|
||||
# check on every startup and triggers a runtime `npm install` that then
|
||||
# fails with EACCES (node_modules/ is root-owned from build time).
|
||||
ENV npm_config_install_links=false
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
# Install Python and Node dependencies in one layer, no cache
|
||||
RUN pip install --no-cache-dir uv --break-system-packages && \
|
||||
uv pip install --system --break-system-packages --no-cache -e ".[all]" && \
|
||||
npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
(cd web && npm install --prefer-offline --no-audit) && \
|
||||
(cd ui-tui && npm install --prefer-offline --no-audit) && \
|
||||
cd /opt/hermes/scripts/whatsapp-bridge && \
|
||||
npm install --prefer-offline --no-audit && \
|
||||
npm cache clean --force
|
||||
|
||||
# ---------- Layer-cached Python dependency install ----------
|
||||
# Copy only pyproject.toml + uv.lock so the Python dep resolve + wheel
|
||||
# download + native-extension compile layer is cached unless those inputs
|
||||
# change. Before this split the Python install sat after `COPY . .`, so
|
||||
# every source-only commit re-did ~4-5 min of dep work on cold builds.
|
||||
#
|
||||
# README.md is referenced by pyproject.toml's `readme =` field, but it's
|
||||
# excluded from the build context by .dockerignore's `*.md`. uv's build
|
||||
# frontend stats the readme path during dep resolution, so we `touch` an
|
||||
# empty placeholder — the real README is restored by `COPY . .` below.
|
||||
#
|
||||
# `uv sync --frozen --no-install-project --extra all --extra messaging`
|
||||
# installs the deps reachable through the composite `[all]` extra
|
||||
# (handpicked set intended for the production image), plus gateway
|
||||
# messaging adapters that should work in the published image without a
|
||||
# first-boot lazy install. We do NOT use `--all-extras`:
|
||||
# that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
|
||||
# git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
|
||||
# redundancy), none of which belong in the published container.
|
||||
#
|
||||
# The editable link is created after the source copy below.
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN touch ./README.md
|
||||
RUN uv sync --frozen --no-install-project --extra all --extra messaging
|
||||
WORKDIR /opt/hermes
|
||||
RUN chmod +x /opt/hermes/docker/entrypoint.sh
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
# node_modules trees additionally need to be writable by the hermes user
|
||||
# so the runtime `npm install` triggered by _tui_need_npm_install() in
|
||||
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
|
||||
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
|
||||
# not chowned here.
|
||||
# The .venv MUST remain hermes-writable so lazy_deps.py can install
|
||||
# remaining optional platform packages and future pin bumps at first use.
|
||||
# Without this, `uv pip install` fails with EACCES and adapters silently
|
||||
# fail to load. See tools/lazy_deps.py.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
# ---------- Link hermes-agent itself (editable) ----------
|
||||
# Deps are already installed in the cached layer above; `--no-deps` makes
|
||||
# this a fast (~1s) egg-link creation with no resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# ---------- Runtime ----------
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
RUN mkdir -p /opt/data
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
51
README.md
51
README.md
@@ -9,12 +9,11 @@
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
|
||||
</p>
|
||||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
|
||||
<table>
|
||||
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
|
||||
@@ -22,37 +21,23 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
||||
<tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
|
||||
<tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
|
||||
<tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## Quick Install
|
||||
|
||||
### Linux, macOS, WSL2, Termux
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
### Windows (native, PowerShell) — Early Beta
|
||||
|
||||
> **Heads up:** Native Windows support is **early beta**. It installs and runs, but hasn't been road-tested as broadly as our Linux/macOS/WSL2 paths. Please [file issues](https://github.com/NousResearch/hermes-agent/issues) when you hit rough edges. For the most battle-tested Windows setup today, run the Linux/macOS one-liner above inside **WSL2**.
|
||||
|
||||
Run this in PowerShell:
|
||||
|
||||
```powershell
|
||||
iex (irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1)
|
||||
```
|
||||
|
||||
The installer handles everything: uv, Python 3.11, Node.js, ripgrep, ffmpeg, **and a portable Git Bash** (MinGit, unpacked to `%LOCALAPPDATA%\hermes\git` — no admin required, completely isolated from any system Git install). Hermes uses this bundled Git Bash to run shell commands.
|
||||
|
||||
If you already have Git installed, the installer detects it and uses that instead. Otherwise a ~45MB MinGit download is all you need — it won't touch or interfere with any system Git.
|
||||
Works on Linux, macOS, WSL2, and Android via Termux. The installer handles the platform-specific setup for you.
|
||||
|
||||
> **Android / Termux:** The tested manual path is documented in the [Termux guide](https://hermes-agent.nousresearch.com/docs/getting-started/termux). On Termux, Hermes installs a curated `.[termux]` extra because the full `.[all]` extra currently pulls Android-incompatible voice dependencies.
|
||||
>
|
||||
> **Windows:** Native Windows is supported as an **early beta** — the PowerShell one-liner above installs everything, but expect rough edges and please file issues when you hit them. If you'd rather use WSL2 (our most battle-tested Windows path), the Linux command works there too. Native Windows install lives under `%LOCALAPPDATA%\hermes`; WSL2 installs under `~/.hermes` as on Linux. The only Hermes feature that currently needs WSL2 specifically is the browser-based dashboard chat pane (it uses a POSIX PTY — classic CLI and gateway both run natively).
|
||||
> **Windows:** Native Windows is not supported. Please install [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) and run the command above.
|
||||
|
||||
After installation:
|
||||
|
||||
@@ -91,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
@@ -156,25 +141,24 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration
|
||||
|
||||
We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.
|
||||
|
||||
Quick start for contributors — clone and go with `setup-hermes.sh`:
|
||||
Quick start for contributors:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
|
||||
./hermes # auto-detects the venv, no need to `source` first
|
||||
```
|
||||
|
||||
Manual path (equivalent to the above):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
scripts/run_tests.sh
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
## Community
|
||||
@@ -182,8 +166,7 @@ scripts/run_tests.sh
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [Skills Hub](https://agentskills.io)
|
||||
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — Linux desktop-control MCP server for Hermes and other MCP hosts, with AT-SPI accessibility trees, Wayland/X11 input, screenshots, and compositor window targeting.
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
|
||||
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
|
||||
---
|
||||
|
||||
|
||||
180
README.zh-CN.md
180
README.zh-CN.md
@@ -1,180 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="assets/banner.png" alt="Hermes Agent" width="100%">
|
||||
</p>
|
||||
|
||||
# Hermes Agent ☤
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
|
||||
</p>
|
||||
|
||||
**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能,在使用中改进技能,主动持久化知识,搜索过往对话,并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行,也可以在 GPU 集群上运行,或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话,而它在云端 VM 上工作。
|
||||
|
||||
支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)(200+ 模型)、[NVIDIA NIM](https://build.nvidia.com)(Nemotron)、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI,或自定义端点。使用 `hermes model` 即可切换——无需改代码,无锁定。
|
||||
|
||||
<table>
|
||||
<tr><td><b>真正的终端界面</b></td><td>完整的 TUI,支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
|
||||
<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
|
||||
<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
|
||||
<tr><td><b>定时自动化</b></td><td>内置 cron 调度器,支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述,无人值守运行。</td></tr>
|
||||
<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具,将多步管道压缩为零上下文开销的轮次。</td></tr>
|
||||
<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒,空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
|
||||
<tr><td><b>研究就绪</b></td><td>批量轨迹生成、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## 快速安装
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
|
||||
|
||||
> **Android / Termux:** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上,Hermes 会安装精选的 `.[termux]` 扩展,因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
|
||||
>
|
||||
> **Windows:** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
|
||||
|
||||
安装后:
|
||||
|
||||
```bash
|
||||
source ~/.bashrc # 重新加载 shell(或: source ~/.zshrc)
|
||||
hermes # 开始对话!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 快速入门
|
||||
|
||||
```bash
|
||||
hermes # 交互式 CLI — 开始对话
|
||||
hermes model # 选择 LLM 提供商和模型
|
||||
hermes tools # 配置启用的工具
|
||||
hermes config set # 设置单个配置项
|
||||
hermes gateway # 启动消息网关(Telegram、Discord 等)
|
||||
hermes setup # 运行完整设置向导(一次性配置所有内容)
|
||||
hermes claw migrate # 从 OpenClaw 迁移(如果来自 OpenClaw)
|
||||
hermes update # 更新到最新版本
|
||||
hermes doctor # 诊断问题
|
||||
```
|
||||
|
||||
📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
## CLI 与消息平台 快速对照
|
||||
|
||||
Hermes 有两种入口:用 `hermes` 启动终端 UI,或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后,许多斜杠命令在两种界面中通用。
|
||||
|
||||
| 操作 | CLI | 消息平台 |
|
||||
|------|-----|----------|
|
||||
| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`,然后给机器人发消息 |
|
||||
| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
|
||||
| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
|
||||
| 设置人格 | `/personality [name]` | `/personality [name]` |
|
||||
| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
|
||||
| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
|
||||
| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
|
||||
| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
|
||||
| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
|
||||
|
||||
完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
|
||||
|
||||
---
|
||||
|
||||
## 文档
|
||||
|
||||
所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**:
|
||||
|
||||
| 章节 | 内容 |
|
||||
|------|------|
|
||||
| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
|
||||
| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
|
||||
| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
|
||||
| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
|
||||
| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
|
||||
| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
|
||||
| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
|
||||
| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
|
||||
| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
|
||||
| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
|
||||
| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
|
||||
| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
|
||||
| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
|
||||
| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
|
||||
| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
|
||||
|
||||
---
|
||||
|
||||
## 从 OpenClaw 迁移
|
||||
|
||||
如果你来自 OpenClaw,Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
|
||||
|
||||
**首次安装时:** 安装向导(`hermes setup`)会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
|
||||
|
||||
**安装后任意时间:**
|
||||
|
||||
```bash
|
||||
hermes claw migrate # 交互式迁移(完整预设)
|
||||
hermes claw migrate --dry-run # 预览将要迁移的内容
|
||||
hermes claw migrate --preset user-data # 仅迁移用户数据,不含密钥
|
||||
hermes claw migrate --overwrite # 覆盖已有冲突
|
||||
```
|
||||
|
||||
导入内容:
|
||||
- **SOUL.md** — 人格文件
|
||||
- **记忆** — MEMORY.md 和 USER.md 条目
|
||||
- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
|
||||
- **命令白名单** — 审批模式
|
||||
- **消息设置** — 平台配置、允许用户、工作目录
|
||||
- **API 密钥** — 白名单中的密钥(Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs)
|
||||
- **TTS 资产** — 工作区音频文件
|
||||
- **工作区指令** — AGENTS.md(使用 `--workspace-target`)
|
||||
|
||||
使用 `hermes claw migrate --help` 查看所有选项,或使用 `openclaw-migration` 技能进行交互式代理引导迁移(含干运行预览)。
|
||||
|
||||
---
|
||||
|
||||
## 贡献
|
||||
|
||||
欢迎贡献!请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
|
||||
|
||||
贡献者快速开始——克隆并使用 `setup-hermes.sh`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
|
||||
./hermes # 自动检测 venv,无需先 source
|
||||
```
|
||||
|
||||
手动安装(等效于上述命令):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 社区
|
||||
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [技能中心](https://agentskills.io)
|
||||
- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接:在同一微信账号上运行 Hermes Agent 和 OpenClaw。
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT — 详见 [LICENSE](LICENSE)。
|
||||
|
||||
由 [Nous Research](https://nousresearch.com) 构建。
|
||||
@@ -1,27 +0,0 @@
|
||||
# Hermes Agent v0.10.0 (v2026.4.16)
|
||||
|
||||
**Release Date:** April 16, 2026
|
||||
|
||||
> The Tool Gateway release — paid Nous Portal subscribers can now use web search, image generation, text-to-speech, and browser automation through their existing subscription with zero additional API keys.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Nous Tool Gateway** — Paid [Nous Portal](https://portal.nousresearch.com) subscribers now get automatic access to **web search** (Firecrawl), **image generation** (FAL / FLUX 2 Pro), **text-to-speech** (OpenAI TTS), and **browser automation** (Browser Use) through their existing subscription. No separate API keys needed — just run `hermes model`, select Nous Portal, and pick which tools to enable. Per-tool opt-in via `use_gateway` config, full integration with `hermes tools` and `hermes status`, and the runtime correctly prefers the gateway even when direct API keys exist. Replaces the old hidden `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env var with clean subscription-based detection. ([#11206](https://github.com/NousResearch/hermes-agent/pull/11206), based on work by @jquesnelle; docs: [#11208](https://github.com/NousResearch/hermes-agent/pull/11208))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Bug Fixes & Improvements
|
||||
|
||||
This release includes 180+ commits with numerous bug fixes, platform improvements, and reliability enhancements across the agent core, gateway, CLI, and tool system. Full details will be published in the v0.11.0 changelog.
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
- **@jquesnelle** (emozilla) — Original Tool Gateway implementation ([#10799](https://github.com/NousResearch/hermes-agent/pull/10799)), salvaged and shipped in this release
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.16](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.16)
|
||||
@@ -1,453 +0,0 @@
|
||||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
@@ -1,505 +0,0 @@
|
||||
# Hermes Agent v0.12.0 (v2026.4.30)
|
||||
|
||||
**Release Date:** April 30, 2026
|
||||
**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
|
||||
|
||||
> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
|
||||
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
|
||||
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
|
||||
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
|
||||
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
|
||||
|
||||
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
|
||||
|
||||
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
|
||||
|
||||
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
|
||||
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
|
||||
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
|
||||
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
|
||||
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
|
||||
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
|
||||
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
|
||||
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
|
||||
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
|
||||
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Autonomous Curator & Self-Improvement Loop
|
||||
|
||||
### Curator — autonomous skill maintenance
|
||||
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
|
||||
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
|
||||
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
|
||||
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
|
||||
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
|
||||
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
|
||||
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
|
||||
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
|
||||
|
||||
### Self-improvement loop (background review fork)
|
||||
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
|
||||
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
|
||||
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
|
||||
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill integrations — newly bundled or promoted
|
||||
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
|
||||
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
|
||||
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
|
||||
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
|
||||
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
|
||||
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
|
||||
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
|
||||
|
||||
### Skills UX
|
||||
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
|
||||
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
|
||||
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
|
||||
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
|
||||
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
|
||||
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
|
||||
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
|
||||
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### New providers
|
||||
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
|
||||
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
|
||||
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
|
||||
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
#### Model catalog
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
|
||||
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
|
||||
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
|
||||
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
|
||||
|
||||
#### Model configuration
|
||||
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
|
||||
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
|
||||
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
|
||||
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
|
||||
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
|
||||
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
|
||||
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
|
||||
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
|
||||
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
|
||||
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
|
||||
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
|
||||
### Compression
|
||||
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
|
||||
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
|
||||
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
|
||||
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
|
||||
|
||||
### Session, Memory & State
|
||||
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
|
||||
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
|
||||
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
|
||||
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
|
||||
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
|
||||
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
|
||||
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
|
||||
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
|
||||
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
|
||||
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
### Auxiliary models
|
||||
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
|
||||
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
|
||||
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
|
||||
### Pluggable Gateway Platforms
|
||||
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
|
||||
### Telegram
|
||||
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
|
||||
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
|
||||
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
|
||||
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Discord
|
||||
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
|
||||
### Slack
|
||||
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
|
||||
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
|
||||
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
|
||||
|
||||
### Signal
|
||||
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Feishu / Mattermost / Email / Signal
|
||||
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Gateway Core
|
||||
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
|
||||
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin-first architecture
|
||||
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
|
||||
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
|
||||
### Browser
|
||||
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
|
||||
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
|
||||
|
||||
### Execute code / Terminal
|
||||
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
|
||||
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
|
||||
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
|
||||
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
|
||||
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
|
||||
|
||||
### Image generation
|
||||
- See Provider section for updates; no new image providers this window.
|
||||
|
||||
### TTS / Voice
|
||||
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
|
||||
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
|
||||
### Cron
|
||||
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
|
||||
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
|
||||
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
|
||||
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
|
||||
### Web search
|
||||
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
|
||||
|
||||
### Maps
|
||||
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
|
||||
|
||||
### Approvals
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
### ACP
|
||||
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
|
||||
### API Server
|
||||
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
|
||||
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
|
||||
|
||||
### Nix
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
|
||||
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
|
||||
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
|
||||
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ TUI
|
||||
|
||||
### New features
|
||||
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
|
||||
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
|
||||
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
|
||||
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
|
||||
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
|
||||
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
|
||||
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
|
||||
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
|
||||
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
|
||||
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
|
||||
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
|
||||
|
||||
### Fixes
|
||||
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
|
||||
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
### New commands
|
||||
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
|
||||
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
|
||||
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
|
||||
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
|
||||
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
|
||||
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
|
||||
|
||||
### Setup / onboarding
|
||||
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
|
||||
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
|
||||
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
|
||||
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
|
||||
|
||||
### Update / backup
|
||||
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
|
||||
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
|
||||
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
|
||||
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
|
||||
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
|
||||
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
|
||||
|
||||
### Slash-command housekeeping
|
||||
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
|
||||
|
||||
### OpenClaw migration (for folks coming from OpenClaw)
|
||||
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
|
||||
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
|
||||
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
|
||||
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
|
||||
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
|
||||
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
|
||||
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
|
||||
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
|
||||
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
|
||||
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
|
||||
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
|
||||
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
|
||||
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
This window includes 360 `fix:` PRs. Selected highlights from across the stack:
|
||||
|
||||
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
|
||||
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
|
||||
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
|
||||
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
|
||||
|
||||
The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
|
||||
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
|
||||
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
|
||||
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
|
||||
---
|
||||
|
||||
## ⚖️ Removed / Reverted
|
||||
|
||||
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
|
||||
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
|
||||
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
|
||||
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count since v0.11.0)
|
||||
|
||||
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
|
||||
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
|
||||
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
|
||||
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
|
||||
- **@ethernet8023** — 4 PRs
|
||||
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
|
||||
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
|
||||
- **@vominh1919** — 2 PRs
|
||||
- **@stephenschoettler** — 2 PRs
|
||||
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
|
||||
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
|
||||
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
|
||||
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
|
||||
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
|
||||
- **@y0shua1ee** — curator `use` activity fix (#17953)
|
||||
|
||||
### Also contributing
|
||||
Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
|
||||
|
||||
### All Contributors (alphabetical, excluding @teknium1)
|
||||
|
||||
@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
|
||||
@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
|
||||
@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
|
||||
@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
|
||||
@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
|
||||
@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
|
||||
@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
|
||||
@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
|
||||
@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
|
||||
@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
|
||||
@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
|
||||
@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
|
||||
@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
|
||||
@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
|
||||
@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
|
||||
@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
|
||||
@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
|
||||
@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
|
||||
@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
|
||||
@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
|
||||
@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
|
||||
@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
|
||||
@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
|
||||
@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
|
||||
@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
|
||||
@ztexydt-cqh.
|
||||
|
||||
Also: @Siddharth Balyan, @YuShu.
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
|
||||
@@ -1,641 +0,0 @@
|
||||
# Hermes Agent v0.13.0 (v2026.5.7)
|
||||
|
||||
**Release Date:** May 7, 2026
|
||||
**Since v0.12.0:** 864 commits · 588 merged PRs · 829 files changed · 128,366 insertions · 282 issues closed (13 P0, 36 P1) · 295 community contributors (including co-authors)
|
||||
|
||||
> The Tenacity Release — Hermes Agent now finishes what it starts. Kanban ships as a durable multi-agent board (heartbeat, reclaim, zombie detection, auto-block on incomplete exit, per-task retries, hallucination recovery). `/goal` keeps the agent locked on a target across turns (Ralph loop). Checkpoints v2 rewrites state persistence with real pruning. Gateway auto-resumes interrupted sessions after restart. Cron grows a `no_agent` watchdog mode. A security wave closes 8 P0s — redaction is now ON by default, Discord role-allowlists are guild-scoped, WhatsApp rejects strangers by default, and TOCTOU windows close across auth.json and MCP OAuth. Google Chat becomes the 20th platform. Providers become a pluggable surface. Seven i18n locales ship.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Multi-agent Kanban — delegate to an AI team that actually finishes** — Spin up a durable board, drop tasks on it, and let multiple Hermes workers pick them up, hand off, and close them out. Heartbeats, reclaim, zombie detection, retry budgets, and a hallucination gate keep the team honest. One install, many kanbans. ([#17805](https://github.com/NousResearch/hermes-agent/pull/17805), [#19653](https://github.com/NousResearch/hermes-agent/pull/19653), [#20232](https://github.com/NousResearch/hermes-agent/pull/20232), [#20332](https://github.com/NousResearch/hermes-agent/pull/20332), [#21330](https://github.com/NousResearch/hermes-agent/pull/21330), [#21183](https://github.com/NousResearch/hermes-agent/pull/21183), [#21214](https://github.com/NousResearch/hermes-agent/pull/21214))
|
||||
|
||||
- **`/goal` — the agent doesn't forget what you asked it to do** — Lock the agent onto a target and it stays on task across turns. The Ralph loop as a first-class primitive. ([#18262](https://github.com/NousResearch/hermes-agent/pull/18262), [#18275](https://github.com/NousResearch/hermes-agent/pull/18275), [#21287](https://github.com/NousResearch/hermes-agent/pull/21287))
|
||||
|
||||
- **Show it a video** — new `video_analyze` tool for native video understanding on Gemini and compatible multimodal models. (@alt-glitch) ([#19301](https://github.com/NousResearch/hermes-agent/pull/19301))
|
||||
|
||||
- **Clone a voice** — xAI Custom Voices lands as a TTS provider with voice cloning support. (@alt-glitch) ([#18776](https://github.com/NousResearch/hermes-agent/pull/18776))
|
||||
|
||||
- **Hermes speaks your language** — static gateway + CLI messages translate to 7 locales: Chinese, Japanese, German, Spanish, French, Ukrainian, and Turkish. Docs site gains a Chinese (zh-Hans) locale. ([#20231](https://github.com/NousResearch/hermes-agent/pull/20231), [#20329](https://github.com/NousResearch/hermes-agent/pull/20329), [#20467](https://github.com/NousResearch/hermes-agent/pull/20467), [#20474](https://github.com/NousResearch/hermes-agent/pull/20474), [#20430](https://github.com/NousResearch/hermes-agent/pull/20430), [#20431](https://github.com/NousResearch/hermes-agent/pull/20431))
|
||||
|
||||
- **Google Chat — the 20th messaging platform** — plus a generic platform-plugin hooks surface so third-party adapters drop in without touching core (IRC and Teams migrated). ([#21306](https://github.com/NousResearch/hermes-agent/pull/21306), [#21331](https://github.com/NousResearch/hermes-agent/pull/21331))
|
||||
|
||||
- **Sessions survive restarts** — gateway bounces mid-agent, `/update` restarts, source-file reloads — conversations auto-resume when the gateway comes back. ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192))
|
||||
|
||||
- **Security wave — 8 P0 closures** — redaction ON by default, Discord role-allowlists guild-scoped (CVSS 8.1 cross-guild DM bypass closed), WhatsApp rejects strangers by default, TOCTOU windows closed across `auth.json` and MCP OAuth, browser enforces cloud-metadata SSRF floor, cron prompt-injection scans assembled skill content, `hermes debug share` redacts at upload. ([#21193](https://github.com/NousResearch/hermes-agent/pull/21193), [#21241](https://github.com/NousResearch/hermes-agent/pull/21241), [#21291](https://github.com/NousResearch/hermes-agent/pull/21291), [#21176](https://github.com/NousResearch/hermes-agent/pull/21176), [#21194](https://github.com/NousResearch/hermes-agent/pull/21194), [#21228](https://github.com/NousResearch/hermes-agent/pull/21228), [#21350](https://github.com/NousResearch/hermes-agent/pull/21350), [#19318](https://github.com/NousResearch/hermes-agent/pull/19318))
|
||||
|
||||
- **Checkpoints v2** — state persistence rewritten. Real pruning, disk guardrails, no more orphan shadow repos. ([#20709](https://github.com/NousResearch/hermes-agent/pull/20709))
|
||||
|
||||
- **The agent lints its own writes** — post-write delta lint on `write_file` + `patch`. Python, JSON, YAML, TOML. Syntax errors surface immediately instead of shipping downstream. ([#20191](https://github.com/NousResearch/hermes-agent/pull/20191))
|
||||
|
||||
- **`no_agent` cron mode — script-only watchdog** — cron jobs can now skip the agent entirely and just run a script. Empty stdout is silent, non-empty gets delivered verbatim. ([#19709](https://github.com/NousResearch/hermes-agent/pull/19709))
|
||||
|
||||
- **Platform allowlists everywhere** — `allowed_channels` / `allowed_chats` / `allowed_rooms` config across Slack, Telegram, Mattermost, Matrix, and DingTalk. ([#21251](https://github.com/NousResearch/hermes-agent/pull/21251))
|
||||
|
||||
- **Providers are now plugins** — `ProviderProfile` ABC + `plugins/model-providers/`. Drop in third-party providers without touching core. ([#20324](https://github.com/NousResearch/hermes-agent/pull/20324))
|
||||
|
||||
- **API server — long-term memory per session** — `X-Hermes-Session-Key` header gives memory providers a stable session identifier. ([#20199](https://github.com/NousResearch/hermes-agent/pull/20199))
|
||||
|
||||
- **MCP levels up** — SSE transport with OAuth forwarding, stale-pipe retries, image results surface as MEDIA tags instead of getting dropped, keepalive on long-lived lifecycle waits. ([#21227](https://github.com/NousResearch/hermes-agent/pull/21227), [#21323](https://github.com/NousResearch/hermes-agent/pull/21323), [#21289](https://github.com/NousResearch/hermes-agent/pull/21289), [#21328](https://github.com/NousResearch/hermes-agent/pull/21328), [#20209](https://github.com/NousResearch/hermes-agent/pull/20209))
|
||||
|
||||
- **Curator grows subcommands** — `hermes curator archive`, `prune`, `list-archived`. Manual `hermes curator run` is synchronous now — you see results without polling. ([#20200](https://github.com/NousResearch/hermes-agent/pull/20200), [#21236](https://github.com/NousResearch/hermes-agent/pull/21236), [#21216](https://github.com/NousResearch/hermes-agent/pull/21216))
|
||||
|
||||
- **ACP — `/steer` and `/queue`** — direct the in-flight agent or queue follow-ups from Zed, VS Code, or JetBrains. Plus atomic session persistence and reasoning-metadata preservation across restarts. (@HenkDz) ([#18114](https://github.com/NousResearch/hermes-agent/pull/18114), [#20279](https://github.com/NousResearch/hermes-agent/pull/20279), [#20296](https://github.com/NousResearch/hermes-agent/pull/20296), [#20433](https://github.com/NousResearch/hermes-agent/pull/20433))
|
||||
|
||||
- **TUI glow-up** — `/model` picker matches `hermes model` with inline auth (@austinpickett), collapsible startup banner sections (@kshitijk4poor), context-compression counter in the status bar. ([#18117](https://github.com/NousResearch/hermes-agent/pull/18117), [#20625](https://github.com/NousResearch/hermes-agent/pull/20625), [#21218](https://github.com/NousResearch/hermes-agent/pull/21218))
|
||||
|
||||
- **Dashboard grows up** — Plugins page (manage, enable/disable, auth status) (@austinpickett), Profiles management page (@vincez-hms-coder), sortable analytics tables, reverse-proxy support via `X-Forwarded-Prefix`, new `default-large` 18px theme. ([#18095](https://github.com/NousResearch/hermes-agent/pull/18095), [#16419](https://github.com/NousResearch/hermes-agent/pull/16419), [#18192](https://github.com/NousResearch/hermes-agent/pull/18192), [#21296](https://github.com/NousResearch/hermes-agent/pull/21296), [#20820](https://github.com/NousResearch/hermes-agent/pull/20820))
|
||||
|
||||
- **SearXNG + split web tools** — SearXNG ships as a native search-only backend; web tools now let you pick different backends per capability (search vs extract vs browse). (@kshitijk4poor) ([#20823](https://github.com/NousResearch/hermes-agent/pull/20823), [#20061](https://github.com/NousResearch/hermes-agent/pull/20061), [#20841](https://github.com/NousResearch/hermes-agent/pull/20841))
|
||||
|
||||
- **OpenRouter response caching** — explicit cache control for models that expose it. (@kshitijk4poor) ([#19132](https://github.com/NousResearch/hermes-agent/pull/19132))
|
||||
|
||||
- **`[[as_document]]` — skill media-routing directive** — skills can force the gateway to deliver output as a document on platforms that support it. ([#21210](https://github.com/NousResearch/hermes-agent/pull/21210))
|
||||
|
||||
- **`transform_llm_output` plugin hook** — new lifecycle hook that lets plugins reshape or filter LLM output before it hits the conversation. Useful for context-window reducers and content filters. ([#21235](https://github.com/NousResearch/hermes-agent/pull/21235))
|
||||
|
||||
- **Nous OAuth persists across profiles** — shared token store: sign in once, every profile inherits the session. ([#19712](https://github.com/NousResearch/hermes-agent/pull/19712))
|
||||
|
||||
- **QQBot — native approval keyboards** — feature parity with Telegram / Discord approval UX. Chunked upload, quoted attachments. ([#21342](https://github.com/NousResearch/hermes-agent/pull/21342), [#21353](https://github.com/NousResearch/hermes-agent/pull/21353))
|
||||
|
||||
- **6 new optional skills** — Shopify (Admin + Storefront GraphQL), here.now, shop-app personal shopping assistant, Anthropic financial-services bundle, kanban-video-orchestrator (@SHL0MS), searxng-search (@kshitijk4poor). ([#18116](https://github.com/NousResearch/hermes-agent/pull/18116), [#18170](https://github.com/NousResearch/hermes-agent/pull/18170), [#20702](https://github.com/NousResearch/hermes-agent/pull/20702), [#21180](https://github.com/NousResearch/hermes-agent/pull/21180), [#19281](https://github.com/NousResearch/hermes-agent/pull/19281), [#20841](https://github.com/NousResearch/hermes-agent/pull/20841))
|
||||
|
||||
- **New models** — `deepseek/deepseek-v4-pro`, `x-ai/grok-4.3`, `openrouter/owl-alpha` (free), `tencent/hy3-preview` (@Contentment003111), Arcee Trinity Large Thinking temperature + compression overrides. ([#20495](https://github.com/NousResearch/hermes-agent/pull/20495), [#20497](https://github.com/NousResearch/hermes-agent/pull/20497), [#18071](https://github.com/NousResearch/hermes-agent/pull/18071), [#21077](https://github.com/NousResearch/hermes-agent/pull/21077), [#20473](https://github.com/NousResearch/hermes-agent/pull/20473))
|
||||
|
||||
- **100 fresh CLI startup tips** — the random tip banner gets 100 new entries covering cron, kanban, curator, plugins, and lesser-known flags. ([#20168](https://github.com/NousResearch/hermes-agent/pull/20168))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Multi-Agent Kanban (Durable)
|
||||
|
||||
### New — durable multi-profile collaboration board
|
||||
- **`feat(kanban): durable multi-profile collaboration board`** — post-revert reimplementation, multi-profile by design ([#17805](https://github.com/NousResearch/hermes-agent/pull/17805))
|
||||
- **Multi-project boards** — one install, many kanbans ([#19653](https://github.com/NousResearch/hermes-agent/pull/19653), [#19679](https://github.com/NousResearch/hermes-agent/pull/19679))
|
||||
- **Share board, workspaces, and worker logs across profiles** ([#19378](https://github.com/NousResearch/hermes-agent/pull/19378))
|
||||
- **Hallucination gate + recovery UX for worker-created-card claims** (closes #20017) ([#20232](https://github.com/NousResearch/hermes-agent/pull/20232))
|
||||
- **Generic diagnostics engine for task distress signals** ([#20332](https://github.com/NousResearch/hermes-agent/pull/20332))
|
||||
- **Per-task `max_retries` override** (supersedes #20972) ([#21330](https://github.com/NousResearch/hermes-agent/pull/21330))
|
||||
- **Multiline textarea for inline-create title** (salvage of #20970) ([#21243](https://github.com/NousResearch/hermes-agent/pull/21243))
|
||||
|
||||
### Kanban Dashboard
|
||||
- **Workspace kind + path inputs in inline create form** ([#19679](https://github.com/NousResearch/hermes-agent/pull/19679))
|
||||
- **Per-platform home-channel notification toggles** ([#19864](https://github.com/NousResearch/hermes-agent/pull/19864))
|
||||
- **Sharper home-channel toggle contrast + drop → running action** ([#19916](https://github.com/NousResearch/hermes-agent/pull/19916))
|
||||
- Fix: reject direct status transition to 'running' via dashboard API (salvage of #19554) ([#19705](https://github.com/NousResearch/hermes-agent/pull/19705))
|
||||
- Fix: dashboard board pin authoritative over server current file (#20879) ([#21230](https://github.com/NousResearch/hermes-agent/pull/21230))
|
||||
- Fix: treat dashboard event-stream cancellation as normal shutdown (#20790) ([#21222](https://github.com/NousResearch/hermes-agent/pull/21222))
|
||||
- Fix: filter dashboard board by selected tenant (#19817) ([#21349](https://github.com/NousResearch/hermes-agent/pull/21349))
|
||||
- Fix: code/pre styling theme-immune across all themes (#21086) ([#21247](https://github.com/NousResearch/hermes-agent/pull/21247))
|
||||
- Fix: reset `<code>` background inside dashboard board ([#20687](https://github.com/NousResearch/hermes-agent/pull/20687))
|
||||
- Fix: preserve dashboard completion summaries + add kanban edit (salvages #20016) ([#20195](https://github.com/NousResearch/hermes-agent/pull/20195))
|
||||
- Fix: avoid fragile failure-column renames (salvage #20848) (@kshitijk4poor) ([#20855](https://github.com/NousResearch/hermes-agent/pull/20855))
|
||||
|
||||
### Worker lifecycle + reliability
|
||||
- **Heartbeat + reclaim + zombie + retry-cap fixes** (#21147, #21141, #21169, #20881) ([#21183](https://github.com/NousResearch/hermes-agent/pull/21183))
|
||||
- **Auto-block workers that exit without completing + shutdown race** (#20894) ([#21214](https://github.com/NousResearch/hermes-agent/pull/21214))
|
||||
- **Detect darwin zombie workers** (salvages #20023) ([#20188](https://github.com/NousResearch/hermes-agent/pull/20188))
|
||||
- **Unify failure counter across spawn/timeout/crash outcomes** ([#20410](https://github.com/NousResearch/hermes-agent/pull/20410))
|
||||
- **Enforce worker task-ownership on destructive tool calls** ([#19713](https://github.com/NousResearch/hermes-agent/pull/19713))
|
||||
- **Drop worker identity claim from KANBAN_GUIDANCE** ([#19427](https://github.com/NousResearch/hermes-agent/pull/19427))
|
||||
- Fix: skip dispatch for tasks assigned to non-profile lanes (salvages #20105, #20134) ([#20165](https://github.com/NousResearch/hermes-agent/pull/20165))
|
||||
- Fix: include default profile in on-disk assignee enumeration (salvages #20123) ([#20170](https://github.com/NousResearch/hermes-agent/pull/20170))
|
||||
- Fix: ignore stale current board pointers (salvages #20063) ([#20183](https://github.com/NousResearch/hermes-agent/pull/20183))
|
||||
- Fix: profile discovery ignores HERMES_HOME in custom-root deployments (@jackey8616) ([#19020](https://github.com/NousResearch/hermes-agent/pull/19020))
|
||||
- Fix: allow orchestrator profiles to see kanban tools via toolsets config ([#19606](https://github.com/NousResearch/hermes-agent/pull/19606))
|
||||
|
||||
### Batch salvages
|
||||
- Tier-1 batch — metadata test, max_spawn config, run-id lifecycle guard (salvages #19522 #19556 #19829) ([#20440](https://github.com/NousResearch/hermes-agent/pull/20440))
|
||||
- Tier-2 batch — doctor, started_at, parent-guard, latest_summary, selects, linked-children ([#20448](https://github.com/NousResearch/hermes-agent/pull/20448))
|
||||
|
||||
### Documentation
|
||||
- Backfill multi-board refs in reference docs ([#19704](https://github.com/NousResearch/hermes-agent/pull/19704))
|
||||
- Document `/kanban` slash command ([#19584](https://github.com/NousResearch/hermes-agent/pull/19584))
|
||||
- Document recommended handoff evidence metadata (salvage #19512) ([#20415](https://github.com/NousResearch/hermes-agent/pull/20415))
|
||||
- Fix orchestrator + worker skill setup instructions (@helix4u) ([#20958](https://github.com/NousResearch/hermes-agent/pull/20958), [#20960](https://github.com/NousResearch/hermes-agent/pull/20960))
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Persistent Goals, Checkpoints & Session Durability
|
||||
|
||||
### `/goal` — persistent cross-turn goals (Ralph loop)
|
||||
- **`feat: /goal — persistent cross-turn goals`** ([#18262](https://github.com/NousResearch/hermes-agent/pull/18262))
|
||||
- **Docs page — Persistent Goals (/goal)** ([#18275](https://github.com/NousResearch/hermes-agent/pull/18275))
|
||||
- Fix: honor configured goal turn budget (salvage #19423) ([#21287](https://github.com/NousResearch/hermes-agent/pull/21287))
|
||||
|
||||
### Checkpoints v2
|
||||
- **Single-store rewrite with real pruning + disk guardrails** ([#20709](https://github.com/NousResearch/hermes-agent/pull/20709))
|
||||
|
||||
### Session durability
|
||||
- **Auto-resume interrupted sessions after gateway restart** (salvage #20888) ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192))
|
||||
- **Preserve pending update prompts across restarts** ([#20160](https://github.com/NousResearch/hermes-agent/pull/20160))
|
||||
- **Preserve home-channel thread targets across restart notifications** (salvage #18440) ([#19271](https://github.com/NousResearch/hermes-agent/pull/19271))
|
||||
- **Preserve thread routing from cached live session sources** ([#21206](https://github.com/NousResearch/hermes-agent/pull/21206))
|
||||
- **Preserve assistant metadata when branching sessions** ([#18222](https://github.com/NousResearch/hermes-agent/pull/18222))
|
||||
- **Preserve thread routing for /update progress and prompts** ([#18193](https://github.com/NousResearch/hermes-agent/pull/18193))
|
||||
- **Preserve document type when merging queued events** ([#18215](https://github.com/NousResearch/hermes-agent/pull/18215))
|
||||
|
||||
---
|
||||
|
||||
## 🛡️ Security & Reliability
|
||||
|
||||
### Security hardening (8 P0 closures)
|
||||
- **Enable secret redaction by default** (#17691, #20785) ([#21193](https://github.com/NousResearch/hermes-agent/pull/21193))
|
||||
- **Discord — scope `DISCORD_ALLOWED_ROLES` to originating guild** (#12136, CVSS 8.1) ([#21241](https://github.com/NousResearch/hermes-agent/pull/21241))
|
||||
- **WhatsApp — reject strangers by default, never respond in self-chat** (#8389) ([#21291](https://github.com/NousResearch/hermes-agent/pull/21291))
|
||||
- **MCP OAuth — close TOCTOU window when saving credentials** ([#21176](https://github.com/NousResearch/hermes-agent/pull/21176))
|
||||
- **`hermes_cli/auth.py` — close TOCTOU window in credential writers** ([#21194](https://github.com/NousResearch/hermes-agent/pull/21194))
|
||||
- **Browser — enforce cloud-metadata SSRF floor in hybrid routing** (#16234) ([#21228](https://github.com/NousResearch/hermes-agent/pull/21228))
|
||||
- **`hermes debug share` — redact log content at upload time** (@GodsBoy) ([#19318](https://github.com/NousResearch/hermes-agent/pull/19318))
|
||||
- **Cron — scan assembled prompt including skill content for prompt injection** (#3968) ([#21350](https://github.com/NousResearch/hermes-agent/pull/21350))
|
||||
- **Restore .env/auth.json/state.db with 0600 perms** ([#19699](https://github.com/NousResearch/hermes-agent/pull/19699))
|
||||
- **SRI integrity for dashboard plugin scripts** (salvage #19389) ([#21277](https://github.com/NousResearch/hermes-agent/pull/21277))
|
||||
- **Bind Meet node server to localhost, restrict token file to owner read** ([#19597](https://github.com/NousResearch/hermes-agent/pull/19597))
|
||||
- **Extend sensitive-write target to cover shell RC and credential files** ([#19282](https://github.com/NousResearch/hermes-agent/pull/19282))
|
||||
- **Harden YOLO mode env parsing against quoted-bool strings** ([#18214](https://github.com/NousResearch/hermes-agent/pull/18214))
|
||||
- **OSV-Scanner CI + Dependabot for github-actions only** ([#20037](https://github.com/NousResearch/hermes-agent/pull/20037))
|
||||
|
||||
### Reliability — critical bug closures
|
||||
- **CLI crash on startup — `Invalid key 'c-S-c'`** (P0, prompt_toolkit doesn't support Shift modifier) ([#19895](https://github.com/NousResearch/hermes-agent/pull/19895), [#19919](https://github.com/NousResearch/hermes-agent/pull/19919))
|
||||
- **CLOSE_WAIT fd leak audit** — httpx keepalive + WhatsApp aiohttp leak + Feishu hygiene (#18451) ([#18766](https://github.com/NousResearch/hermes-agent/pull/18766))
|
||||
- **Gateway creates AIAgent with empty OpenRouter API key when OPENROUTER_API_KEY is missing** (#20982) — fallback providers correctly honored
|
||||
- **Background review + curator protected from overwriting bundled/hub skills** (#20273) ([#20194](https://github.com/NousResearch/hermes-agent/pull/20194))
|
||||
- **TUI compression continuation — ghost sessions with incomplete metadata** (#20001)
|
||||
- **`hermes mcp add` silently launches chat instead of registering MCP server** (#19785) ([#21204](https://github.com/NousResearch/hermes-agent/pull/21204))
|
||||
- **Background review agent runtime propagation** — provider/model/credentials now actually inherit from parent
|
||||
- **Inbound document host paths translated to container paths for Docker backend** (salvage #19048) ([#21184](https://github.com/NousResearch/hermes-agent/pull/21184))
|
||||
- **Matrix gateway race between auto-redaction and message delivery with high-speed models** (#19075)
|
||||
- **`/new` during active agent session never sends response on Telegram** (#18912)
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New platform
|
||||
- **Google Chat — 20th platform** + generic `env_enablement_fn` / `cron_deliver_env_var` platform-plugin hooks (IRC + Teams migrated) ([#21306](https://github.com/NousResearch/hermes-agent/pull/21306), [#21331](https://github.com/NousResearch/hermes-agent/pull/21331))
|
||||
|
||||
### Cross-platform
|
||||
- **`allowed_{channels,chats,rooms}` whitelist** — Slack (salvage #7401), Telegram, Mattermost, Matrix, DingTalk ([#21251](https://github.com/NousResearch/hermes-agent/pull/21251))
|
||||
- **Per-platform `gateway_restart_notification` flag** ([#20892](https://github.com/NousResearch/hermes-agent/pull/20892))
|
||||
- **`busy_ack_enabled` config — suppress ack messages** ([#18194](https://github.com/NousResearch/hermes-agent/pull/18194))
|
||||
- **Auto-delete slash-command system notices after TTL** ([#18266](https://github.com/NousResearch/hermes-agent/pull/18266))
|
||||
- **Opt-in cleanup of temporary progress bubbles** ([#21186](https://github.com/NousResearch/hermes-agent/pull/21186))
|
||||
- **`[[as_document]]` directive — skill media routing** (salvage #19069) ([#21210](https://github.com/NousResearch/hermes-agent/pull/21210))
|
||||
- **`hermes gateway list` — cross-profile status** (salvage #19129) ([#21225](https://github.com/NousResearch/hermes-agent/pull/21225))
|
||||
- **Auto-resume interrupted sessions after restart** (salvage #20888) ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192))
|
||||
- **Atomic restart markers + Windows runtime-lock offset** (#17842) ([#18179](https://github.com/NousResearch/hermes-agent/pull/18179))
|
||||
- Fix: `config.yaml` wins over `.env` for agent/display/timezone settings ([#18764](https://github.com/NousResearch/hermes-agent/pull/18764))
|
||||
- Fix: auto-restart when source files change out from under us (#17648) ([#18409](https://github.com/NousResearch/hermes-agent/pull/18409))
|
||||
- Fix: use git HEAD SHA for stale-code check, not file mtimes ([#19740](https://github.com/NousResearch/hermes-agent/pull/19740))
|
||||
- Fix: shutdown + restart hygiene — drain timeout, false-fatal, success log ([#18761](https://github.com/NousResearch/hermes-agent/pull/18761))
|
||||
- Fix: preserve max_turns after env reload (salvage #19183) ([#21240](https://github.com/NousResearch/hermes-agent/pull/21240))
|
||||
- Fix: exclude ancestor PIDs from gateway process scan ([#19586](https://github.com/NousResearch/hermes-agent/pull/19586))
|
||||
- Fix: move quick-command alias dispatch before built-ins ([#19588](https://github.com/NousResearch/hermes-agent/pull/19588))
|
||||
- Fix: show other profiles in 'gateway status' to prevent confusion ([#19582](https://github.com/NousResearch/hermes-agent/pull/19582))
|
||||
- Fix: include external_dirs skills in Telegram/Discord slash commands (salvage #8790) ([#18741](https://github.com/NousResearch/hermes-agent/pull/18741))
|
||||
- Fix: match disabled/optional skills by frontmatter slug, not dir name ([#18753](https://github.com/NousResearch/hermes-agent/pull/18753))
|
||||
- Fix: read /status token totals from SessionDB (#17158) ([#18206](https://github.com/NousResearch/hermes-agent/pull/18206))
|
||||
- Fix: snapshot callback generation after agent binds it, not before ([#18219](https://github.com/NousResearch/hermes-agent/pull/18219))
|
||||
- Fix: re-inject topic-bound skill after /new or /reset ([#18205](https://github.com/NousResearch/hermes-agent/pull/18205))
|
||||
- Fix: isolate pending native image paths by session ([#18202](https://github.com/NousResearch/hermes-agent/pull/18202))
|
||||
- Fix: clear queued reload skills notes on new/resume/branch ([#19431](https://github.com/NousResearch/hermes-agent/pull/19431))
|
||||
- Fix: hide required-arg commands from Telegram menu ([#19400](https://github.com/NousResearch/hermes-agent/pull/19400))
|
||||
- Fix: bridge top-level `require_mention` to Telegram config ([#19429](https://github.com/NousResearch/hermes-agent/pull/19429))
|
||||
- Fix: suppress duplicate voice transcripts ([#19428](https://github.com/NousResearch/hermes-agent/pull/19428))
|
||||
- Fix: show friendly error when service is not installed ([#19707](https://github.com/NousResearch/hermes-agent/pull/19707))
|
||||
- Fix: read context_length from custom_providers in session info header ([#19708](https://github.com/NousResearch/hermes-agent/pull/19708))
|
||||
- Fix: preserve WSL interop PATH in systemd units ([#19867](https://github.com/NousResearch/hermes-agent/pull/19867))
|
||||
- Fix: handle planned service stops (salvage #19876) ([#19936](https://github.com/NousResearch/hermes-agent/pull/19936))
|
||||
- Fix: keep DoH-confirmed Telegram IPs that match system DNS (salvage #17043) ([#20175](https://github.com/NousResearch/hermes-agent/pull/20175))
|
||||
- Fix: load `reply_to_mode` from config.yaml for Discord + Telegram (salvage #17117) ([#20171](https://github.com/NousResearch/hermes-agent/pull/20171))
|
||||
- Fix: tolerate malformed HERMES_HUMAN_DELAY_* env vars (salvage #16933) ([#20217](https://github.com/NousResearch/hermes-agent/pull/20217))
|
||||
- Fix: deterministic thread eviction preserves newest entries (salvage #13639) ([#20285](https://github.com/NousResearch/hermes-agent/pull/20285))
|
||||
- Fix: don't dead-end setup wizard when only system-scope unit is installed ([#20905](https://github.com/NousResearch/hermes-agent/pull/20905))
|
||||
- Fix: wait for systemd restart readiness + harden Discord slash-command sync ([#20949](https://github.com/NousResearch/hermes-agent/pull/20949))
|
||||
- Fix: avoid duplicated Responses history (salvage #18995) ([#21185](https://github.com/NousResearch/hermes-agent/pull/21185))
|
||||
- Fix: surface bootstrap failures to stderr (salvage #21157) ([#21278](https://github.com/NousResearch/hermes-agent/pull/21278))
|
||||
- Fix: log agent task failures instead of silently losing usage data (salvage #21159) ([#21274](https://github.com/NousResearch/hermes-agent/pull/21274))
|
||||
- Fix: log runtime-status write failures with rate-limiting (salvage #21158) ([#21285](https://github.com/NousResearch/hermes-agent/pull/21285))
|
||||
- Fix: reset-failed before every fallback restart so the gateway can't get stranded ([#21371](https://github.com/NousResearch/hermes-agent/pull/21371))
|
||||
- Fix: Telegram — preserve `thread_id=1` for forum General typing indicator ([#21390](https://github.com/NousResearch/hermes-agent/pull/21390))
|
||||
- Fix: batch critical fixes — session resume, /new race, HA WebSocket scheme (@kshitijk4poor) ([#19182](https://github.com/NousResearch/hermes-agent/pull/19182))
|
||||
|
||||
### Telegram
|
||||
- **DM user-managed multi-session topics** (salvage of #19185) ([#19206](https://github.com/NousResearch/hermes-agent/pull/19206))
|
||||
|
||||
### Discord
|
||||
- **Message deletion action** (salvage #19052) ([#21197](https://github.com/NousResearch/hermes-agent/pull/21197))
|
||||
- Fix: allow `free_response_channels` to override `DISCORD_IGNORE_NO_MENTION` ([#19629](https://github.com/NousResearch/hermes-agent/pull/19629))
|
||||
|
||||
### Slack
|
||||
- Fix: ephemeral slash-command ack, private notice delivery, format_message fixes (@kshitijk4poor) ([#18198](https://github.com/NousResearch/hermes-agent/pull/18198))
|
||||
|
||||
### WhatsApp
|
||||
- Fix: load WhatsApp home channel from env overrides ([#18190](https://github.com/NousResearch/hermes-agent/pull/18190))
|
||||
|
||||
### Feishu
|
||||
- **Operator-configurable bot admission and mention policy** ([#18208](https://github.com/NousResearch/hermes-agent/pull/18208))
|
||||
- Fix: force text mode for markdown tables (salvage of #13723 by @WuTianyi123) ([#20275](https://github.com/NousResearch/hermes-agent/pull/20275))
|
||||
|
||||
### Matrix + Email
|
||||
- Fix: `/sethome` on Matrix and Email now persists across restarts ([#18272](https://github.com/NousResearch/hermes-agent/pull/18272))
|
||||
|
||||
### Teams
|
||||
- **Docs + feat: sidebar + threading with group-chat fallback** ([#20042](https://github.com/NousResearch/hermes-agent/pull/20042))
|
||||
|
||||
### Weixin
|
||||
- Fix: deduplicate Weixin messages by content fingerprint ([#19742](https://github.com/NousResearch/hermes-agent/pull/19742))
|
||||
|
||||
### QQBot
|
||||
- **Port SDK improvements in-tree — chunked upload, approval keyboards, quoted attachments** ([#21342](https://github.com/NousResearch/hermes-agent/pull/21342))
|
||||
- **Wire native tool-approval UX via inline keyboards** ([#21353](https://github.com/NousResearch/hermes-agent/pull/21353))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### Pluggable providers
|
||||
- **ProviderProfile ABC + `plugins/model-providers/`** — inference providers are now a pluggable surface (salvage of #14424) ([#20324](https://github.com/NousResearch/hermes-agent/pull/20324))
|
||||
- **`list_picker_providers`** — credential-filtered picker (salvage #13561) ([#20298](https://github.com/NousResearch/hermes-agent/pull/20298))
|
||||
- **Remove `/provider` alias for `/model`** ([#20358](https://github.com/NousResearch/hermes-agent/pull/20358))
|
||||
- **Shared Hermes dotenv loader across CLI + plugins** (salvage #13660) ([#20281](https://github.com/NousResearch/hermes-agent/pull/20281))
|
||||
- **Nous OAuth persisted across profiles via shared token store** ([#19712](https://github.com/NousResearch/hermes-agent/pull/19712))
|
||||
|
||||
#### New models
|
||||
- `deepseek/deepseek-v4-pro` added to OpenRouter + Nous Portal ([#20495](https://github.com/NousResearch/hermes-agent/pull/20495))
|
||||
- `x-ai/grok-4.3` added to OpenRouter + Nous Portal ([#20497](https://github.com/NousResearch/hermes-agent/pull/20497))
|
||||
- `openrouter/owl-alpha` (free tier) added to curated OpenRouter list ([#18071](https://github.com/NousResearch/hermes-agent/pull/18071))
|
||||
- `tencent/hy3-preview` paid route on OpenRouter (@Contentment003111) ([#21077](https://github.com/NousResearch/hermes-agent/pull/21077))
|
||||
- Arcee Trinity Large Thinking — temperature + compression overrides ([#20473](https://github.com/NousResearch/hermes-agent/pull/20473))
|
||||
- Rename `x-ai/grok-4.20-beta` to `x-ai/grok-4.20` ([#19640](https://github.com/NousResearch/hermes-agent/pull/19640))
|
||||
- Demote Vercel AI Gateway to bottom of provider picker ([#18112](https://github.com/NousResearch/hermes-agent/pull/18112))
|
||||
|
||||
#### Provider configuration
|
||||
- **OpenRouter — response caching support** (@kshitijk4poor) ([#19132](https://github.com/NousResearch/hermes-agent/pull/19132))
|
||||
- **`image_gen.model` from config.yaml honored** (salvage #19376) ([#21273](https://github.com/NousResearch/hermes-agent/pull/21273))
|
||||
- Fix: honor runtime default model during delegate provider resolution (@johnncenae) ([#17587](https://github.com/NousResearch/hermes-agent/pull/17587))
|
||||
- Fix: avoid Bedrock credential probe in provider picker (@helix4u) ([#18998](https://github.com/NousResearch/hermes-agent/pull/18998))
|
||||
- Fix: drop stale env-var override of persisted provider for cron ([#19627](https://github.com/NousResearch/hermes-agent/pull/19627))
|
||||
- Fix: auxiliary curator api_key/base_url into runtime resolution ([#19421](https://github.com/NousResearch/hermes-agent/pull/19421))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`video_analyze` — native video understanding tool** (@alt-glitch) ([#19301](https://github.com/NousResearch/hermes-agent/pull/19301))
|
||||
- **Show context compression count in status bar** (CLI + TUI) ([#21218](https://github.com/NousResearch/hermes-agent/pull/21218))
|
||||
- **Isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection** (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: warning-first tool-call loop guardrails ([#18227](https://github.com/NousResearch/hermes-agent/pull/18227))
|
||||
- Fix: break permanent empty-response loop from orphan tool-tail ([#21385](https://github.com/NousResearch/hermes-agent/pull/21385))
|
||||
- Fix: propagate ContextVars to concurrent tool worker threads (salvage #16660) ([#18123](https://github.com/NousResearch/hermes-agent/pull/18123))
|
||||
- Fix: surface self-improvement review summaries across CLI, TUI, and gateway ([#18073](https://github.com/NousResearch/hermes-agent/pull/18073))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: include system prompt + tool schemas in token estimates for compression ([#18265](https://github.com/NousResearch/hermes-agent/pull/18265))
|
||||
|
||||
### Compression
|
||||
- Fix: skip non-string tool content in dedup pass to prevent AttributeError ([#19398](https://github.com/NousResearch/hermes-agent/pull/19398))
|
||||
- Fix: reset `_summary_failure_cooldown_until` on session reset ([#19622](https://github.com/NousResearch/hermes-agent/pull/19622))
|
||||
- Fix: trigger fallback on timeout errors alongside model-unavailable errors ([#19665](https://github.com/NousResearch/hermes-agent/pull/19665))
|
||||
- Fix: `_prune_old_tool_results` boundary direction ([#19725](https://github.com/NousResearch/hermes-agent/pull/19725))
|
||||
- Fix: soften summary prompt for content filters (salvage #19456) ([#21302](https://github.com/NousResearch/hermes-agent/pull/21302))
|
||||
|
||||
### Delegate
|
||||
- Fix: inherit parent fallback_chain in `_build_child_agent` ([#19601](https://github.com/NousResearch/hermes-agent/pull/19601))
|
||||
- Fix: guard `_load_config()` against `delegation: null` in config.yaml ([#19662](https://github.com/NousResearch/hermes-agent/pull/19662))
|
||||
- Fix: inherit parent api_key when `delegation.base_url` set without `delegation.api_key` ([#19741](https://github.com/NousResearch/hermes-agent/pull/19741))
|
||||
- Fix: expand composite toolsets before intersection (salvage #19455) ([#21300](https://github.com/NousResearch/hermes-agent/pull/21300))
|
||||
- Fix: correct ACP docs — Claude Code CLI has no --acp flag (salvage #19058) ([#21201](https://github.com/NousResearch/hermes-agent/pull/21201))
|
||||
|
||||
### Session & Memory
|
||||
- **Hindsight — probe API for `update_mode='append'` to dedupe across processes** (@nicoloboschi) ([#20222](https://github.com/NousResearch/hermes-agent/pull/20222))
|
||||
|
||||
### Curator
|
||||
- **`hermes curator archive` and `prune` subcommands** ([#20200](https://github.com/NousResearch/hermes-agent/pull/20200))
|
||||
- **`hermes curator list-archived`** (#20651) ([#21236](https://github.com/NousResearch/hermes-agent/pull/21236))
|
||||
- **Synchronous manual `hermes curator run`** (#20555) ([#21216](https://github.com/NousResearch/hermes-agent/pull/21216))
|
||||
- Fix: preserve `last_report_path` in state ([#18169](https://github.com/NousResearch/hermes-agent/pull/18169))
|
||||
- Fix: rewrite cron job skill refs after consolidation ([#18253](https://github.com/NousResearch/hermes-agent/pull/18253))
|
||||
- Fix: defer first run + `--dry-run` preview (#18373) ([#18389](https://github.com/NousResearch/hermes-agent/pull/18389))
|
||||
- Fix: authoritative `absorbed_into` on delete + restore cron skill links on rollback (#18671) ([#18731](https://github.com/NousResearch/hermes-agent/pull/18731))
|
||||
- Fix: prevent false-positive consolidation from substring matching ([#19573](https://github.com/NousResearch/hermes-agent/pull/19573))
|
||||
- Fix: only mark agent-created for background-review sediment ([#19621](https://github.com/NousResearch/hermes-agent/pull/19621))
|
||||
- Fix: protect hub skills by frontmatter name ([#20194](https://github.com/NousResearch/hermes-agent/pull/20194))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### File tools
|
||||
- **Post-write delta lint on `write_file` + `patch`** — in-proc linters for Python, JSON, YAML, TOML ([#20191](https://github.com/NousResearch/hermes-agent/pull/20191))
|
||||
|
||||
### Cron
|
||||
- **`no_agent` mode — script-only cron jobs (watchdog pattern)** ([#19709](https://github.com/NousResearch/hermes-agent/pull/19709))
|
||||
- **`context_from` chaining docs** (salvage #15724) ([#20394](https://github.com/NousResearch/hermes-agent/pull/20394))
|
||||
- Fix: treat non-dict origin as missing instead of crashing tick ([#19283](https://github.com/NousResearch/hermes-agent/pull/19283))
|
||||
- Fix: bump skill usage when cron jobs load skills ([#19433](https://github.com/NousResearch/hermes-agent/pull/19433))
|
||||
- Fix: recover null `next_run_at` jobs ([#19576](https://github.com/NousResearch/hermes-agent/pull/19576))
|
||||
- Fix: skip AI call when prerun script produces no output ([#19628](https://github.com/NousResearch/hermes-agent/pull/19628))
|
||||
- Fix: expand config.yaml refs during job execution ([#19872](https://github.com/NousResearch/hermes-agent/pull/19872))
|
||||
- Fix: serialize `get_due_jobs` writes to prevent parallel state corruption ([#19874](https://github.com/NousResearch/hermes-agent/pull/19874))
|
||||
- Fix: initialize MCP servers before constructing the cron AIAgent ([#21354](https://github.com/NousResearch/hermes-agent/pull/21354))
|
||||
|
||||
### MCP
|
||||
- **SSE transport support** (salvage #19135) ([#21227](https://github.com/NousResearch/hermes-agent/pull/21227))
|
||||
- **Forward OAuth auth + bump `sse_read_timeout` on SSE transport** ([#21323](https://github.com/NousResearch/hermes-agent/pull/21323))
|
||||
- **Retry stale pipe transport failures as session-expired** ([#21289](https://github.com/NousResearch/hermes-agent/pull/21289))
|
||||
- **Surface image tool results as MEDIA tags instead of dropping them** ([#21328](https://github.com/NousResearch/hermes-agent/pull/21328))
|
||||
- **Periodic keepalive to `_wait_for_lifecycle_event`** (salvage #17016) ([#20209](https://github.com/NousResearch/hermes-agent/pull/20209))
|
||||
- Fix: reconnect on terminated sessions ([#19380](https://github.com/NousResearch/hermes-agent/pull/19380))
|
||||
- Fix: decouple AnyUrl import from mcp dependency ([#19695](https://github.com/NousResearch/hermes-agent/pull/19695))
|
||||
- Fix: `mcp add --command` gets distinct argparse dest ([#21204](https://github.com/NousResearch/hermes-agent/pull/21204))
|
||||
- Fix: clear stale thread interrupt before MCP discovery ([#21276](https://github.com/NousResearch/hermes-agent/pull/21276))
|
||||
- Fix: report configured timeout in MCP call errors ([#21281](https://github.com/NousResearch/hermes-agent/pull/21281))
|
||||
- Fix: include exception type in error messages when str(exc) is empty (salvage #19425) ([#21292](https://github.com/NousResearch/hermes-agent/pull/21292))
|
||||
- Fix: re-raise CancelledError explicitly in `MCPServerTask.run` ([#21318](https://github.com/NousResearch/hermes-agent/pull/21318))
|
||||
- Fix: coerce numeric tool args defensively in `mcp_serve` ([#21329](https://github.com/NousResearch/hermes-agent/pull/21329))
|
||||
- Fix: gate utility stubs on server-advertised capabilities ([#21347](https://github.com/NousResearch/hermes-agent/pull/21347))
|
||||
|
||||
### Browser
|
||||
- Fix: allow explicit CDP override without local agent-browser ([#19670](https://github.com/NousResearch/hermes-agent/pull/19670))
|
||||
- Fix: inject `--no-sandbox` for root + AppArmor userns restrictions ([#19747](https://github.com/NousResearch/hermes-agent/pull/19747))
|
||||
- Fix: tighten Lightpanda fallback edge cases (@kshitijk4poor) ([#20672](https://github.com/NousResearch/hermes-agent/pull/20672))
|
||||
|
||||
### Web tools
|
||||
- **Per-capability backend selection — search/extract split** (@kshitijk4poor) ([#20061](https://github.com/NousResearch/hermes-agent/pull/20061))
|
||||
- **SearXNG native search-only backend** (@kshitijk4poor) ([#20823](https://github.com/NousResearch/hermes-agent/pull/20823))
|
||||
|
||||
### Approval / Tool gating
|
||||
- Fix: wake blocked gateway approvals on session cleanup ([#18171](https://github.com/NousResearch/hermes-agent/pull/18171))
|
||||
- Fix: harden YOLO mode env parsing against quoted-bool strings ([#18214](https://github.com/NousResearch/hermes-agent/pull/18214))
|
||||
- Fix: extend sensitive write target to cover shell RC and credential files ([#19282](https://github.com/NousResearch/hermes-agent/pull/19282))
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Plugin System
|
||||
|
||||
- **`transform_llm_output` plugin hook** (salvage of #20813) ([#21235](https://github.com/NousResearch/hermes-agent/pull/21235))
|
||||
- **Document `env_enablement_fn` + `cron_deliver_env_var` platform-plugin hooks** ([#21331](https://github.com/NousResearch/hermes-agent/pull/21331))
|
||||
- **Pluggable surfaces coverage — model-provider guide, full plugin map, opt-in fix** ([#20749](https://github.com/NousResearch/hermes-agent/pull/20749))
|
||||
- **Plugin-authoring gaps — image-gen provider guide + publishing a skill tap** ([#20800](https://github.com/NousResearch/hermes-agent/pull/20800))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### New optional skills
|
||||
- **Shopify** — Admin + Storefront GraphQL optional skill ([#18116](https://github.com/NousResearch/hermes-agent/pull/18116))
|
||||
- **here.now** — optional skill ([#18170](https://github.com/NousResearch/hermes-agent/pull/18170))
|
||||
- **shop-app** — personal shopping assistant (optional) ([#20702](https://github.com/NousResearch/hermes-agent/pull/20702))
|
||||
- **Anthropic financial-services bundle** — ported as optional finance skills ([#21180](https://github.com/NousResearch/hermes-agent/pull/21180))
|
||||
- **kanban-video-orchestrator** — creative optional skill (@SHL0MS) ([#19281](https://github.com/NousResearch/hermes-agent/pull/19281))
|
||||
- **searxng-search** — optional skill + Web Search + Extract docs page (@kshitijk4poor) ([#20841](https://github.com/NousResearch/hermes-agent/pull/20841), [#20844](https://github.com/NousResearch/hermes-agent/pull/20844))
|
||||
|
||||
### Skill UX
|
||||
- **Linear skill — add Documents support + Python helper script** ([#20752](https://github.com/NousResearch/hermes-agent/pull/20752))
|
||||
- **Modernize Obsidian skill to use file tools** (salvage #19332) ([#20413](https://github.com/NousResearch/hermes-agent/pull/20413))
|
||||
- **Default custom tool creation to plugins** (@kshitijk4poor) ([#19755](https://github.com/NousResearch/hermes-agent/pull/19755))
|
||||
- **skill_commands cache — rescan on platform scope changes** (salvage #14570 by @LeonSGP43) ([#18739](https://github.com/NousResearch/hermes-agent/pull/18739))
|
||||
- **Skills — additional rescan paths in skill_commands cache** (salvage #19042) ([#21181](https://github.com/NousResearch/hermes-agent/pull/21181))
|
||||
- Fix: regression tests for non-dict metadata in `extract_skill_conditions` ([#18213](https://github.com/NousResearch/hermes-agent/pull/18213))
|
||||
- Docs: explain restoring bundled skills (salvage #19254) ([#20404](https://github.com/NousResearch/hermes-agent/pull/20404))
|
||||
- Docs: document `hermes skills reset` subcommand (salvage #11544) ([#20395](https://github.com/NousResearch/hermes-agent/pull/20395))
|
||||
- Docs: himalaya v1.2.0 `folder.aliases` syntax ([#19882](https://github.com/NousResearch/hermes-agent/pull/19882))
|
||||
- Point agent at `hermes-agent` skill + docs site sync ([#20390](https://github.com/NousResearch/hermes-agent/pull/20390))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ CLI & User Experience
|
||||
|
||||
### CLI
|
||||
- **`/new` accepts optional session name argument** (salvage of #19555) ([#19637](https://github.com/NousResearch/hermes-agent/pull/19637))
|
||||
- **100 new CLI startup tips** ([#20168](https://github.com/NousResearch/hermes-agent/pull/20168))
|
||||
- **`display.language` — static message translation** (zh/ja/de/es) ([#20231](https://github.com/NousResearch/hermes-agent/pull/20231))
|
||||
- **French (fr) locale** (@Foolafroos) ([#20329](https://github.com/NousResearch/hermes-agent/pull/20329))
|
||||
- **Ukrainian (uk) locale** ([#20467](https://github.com/NousResearch/hermes-agent/pull/20467))
|
||||
- **Turkish (tr) locale** ([#20474](https://github.com/NousResearch/hermes-agent/pull/20474))
|
||||
- Fix: recover classic CLI output after resize (@helix4u) ([#20444](https://github.com/NousResearch/hermes-agent/pull/20444))
|
||||
- Fix: complete absolute paths as paths (@helix4u) ([#19930](https://github.com/NousResearch/hermes-agent/pull/19930))
|
||||
- Fix: resolve lazy session creation regressions (#18370 fallout) (@alt-glitch) ([#20363](https://github.com/NousResearch/hermes-agent/pull/20363))
|
||||
- Fix: local backend CLI always uses launch directory (@alt-glitch) ([#19334](https://github.com/NousResearch/hermes-agent/pull/19334))
|
||||
- Refactor: drop dead c-S-c key binding (follow-up to #19895) ([#19919](https://github.com/NousResearch/hermes-agent/pull/19919))
|
||||
|
||||
### TUI (Ink)
|
||||
- **`/model` picker overhaul to match `hermes model` with inline auth** (@austinpickett) ([#18117](https://github.com/NousResearch/hermes-agent/pull/18117))
|
||||
- **Collapsible sections in startup banner** — skills, system prompt, MCP (@kshitijk4poor) ([#20625](https://github.com/NousResearch/hermes-agent/pull/20625))
|
||||
- **Show context compression count in status bar** ([#21218](https://github.com/NousResearch/hermes-agent/pull/21218))
|
||||
- Perf: reduce overlay render churn with focused selectors (@OutThisLife) ([#20393](https://github.com/NousResearch/hermes-agent/pull/20393))
|
||||
- Fix: restore voice push-to-talk parity (salvage of #16189 by @Montbra) (@OutThisLife) ([#20897](https://github.com/NousResearch/hermes-agent/pull/20897))
|
||||
- Fix: kanban button (@austinpickett) ([#18358](https://github.com/NousResearch/hermes-agent/pull/18358))
|
||||
|
||||
### Dashboard
|
||||
- **Plugins page — manage, enable/disable, auth status** (@austinpickett) ([#18095](https://github.com/NousResearch/hermes-agent/pull/18095))
|
||||
- **Profiles management page** (@vincez-hms-coder) ([#16419](https://github.com/NousResearch/hermes-agent/pull/16419))
|
||||
- **Interactive column sorting in analytics tables** ([#18192](https://github.com/NousResearch/hermes-agent/pull/18192))
|
||||
- **`default-large` built-in theme with 18px base size** ([#20820](https://github.com/NousResearch/hermes-agent/pull/20820))
|
||||
- **Support serving under URL prefix via `X-Forwarded-Prefix`** (salvage #19450) ([#21296](https://github.com/NousResearch/hermes-agent/pull/21296))
|
||||
- **Launch dashboard as side-process via `HERMES_DASHBOARD=1` in Docker** (@benbarclay) ([#19540](https://github.com/NousResearch/hermes-agent/pull/19540))
|
||||
- Fix: dashboard theme layout shift (@AllardQuek) ([#17232](https://github.com/NousResearch/hermes-agent/pull/17232))
|
||||
- Fix: gateway model picker current context (@helix4u) ([#20513](https://github.com/NousResearch/hermes-agent/pull/20513))
|
||||
|
||||
### Update + setup
|
||||
- **`hermes update --yes/-y` to skip interactive prompts** ([#18261](https://github.com/NousResearch/hermes-agent/pull/18261))
|
||||
- **Restart manual profile gateways after update** ([#18178](https://github.com/NousResearch/hermes-agent/pull/18178))
|
||||
|
||||
### Profiles
|
||||
- **`--no-skills` flag for empty profile creation** ([#20986](https://github.com/NousResearch/hermes-agent/pull/20986))
|
||||
|
||||
---
|
||||
|
||||
## 🎵 Voice, Image & Media
|
||||
|
||||
- **xAI Custom Voices — voice cloning** (@alt-glitch) ([#18776](https://github.com/NousResearch/hermes-agent/pull/18776))
|
||||
- **Achievements — share card render on unlocked badges** ([#19657](https://github.com/NousResearch/hermes-agent/pull/19657))
|
||||
- **Refresh systemd unit on gateway boot (not just start/restart)** (@alt-glitch) ([#19684](https://github.com/NousResearch/hermes-agent/pull/19684))
|
||||
|
||||
---
|
||||
|
||||
## 🔗 API Server & Remote Access
|
||||
|
||||
- **`X-Hermes-Session-Key` header for long-term memory scoping** (closes #20060) ([#20199](https://github.com/NousResearch/hermes-agent/pull/20199))
|
||||
|
||||
---
|
||||
|
||||
## 🧰 ACP Adapter (VS Code / Zed / JetBrains)
|
||||
|
||||
- **`/steer` and `/queue` slash commands** (@HenkDz) ([#18114](https://github.com/NousResearch/hermes-agent/pull/18114))
|
||||
- Fix: translate Windows cwd for WSL sessions (salvage #18128) ([#18233](https://github.com/NousResearch/hermes-agent/pull/18233))
|
||||
- Fix: run `/steer` as a regular prompt on idle sessions ([#18258](https://github.com/NousResearch/hermes-agent/pull/18258))
|
||||
- Fix: route Zed thoughts to reasoning + polish tool/context rendering ([#19139](https://github.com/NousResearch/hermes-agent/pull/19139))
|
||||
- Fix: atomic session persistence via `replace_messages` (salvage #13675) ([#20279](https://github.com/NousResearch/hermes-agent/pull/20279))
|
||||
- Fix: preserve assistant reasoning metadata in session persistence (salvage #13575) ([#20296](https://github.com/NousResearch/hermes-agent/pull/20296))
|
||||
- Docs: update VS Code setup for ACP Client extension (salvage #12495) ([#20433](https://github.com/NousResearch/hermes-agent/pull/20433))
|
||||
|
||||
---
|
||||
|
||||
## 🐳 Docker
|
||||
|
||||
- **Launch dashboard as side-process via `HERMES_DASHBOARD=1`** (@benbarclay) ([#19540](https://github.com/NousResearch/hermes-agent/pull/19540))
|
||||
- **Refuse root gateway runs in official image** (salvage #19215) ([#21250](https://github.com/NousResearch/hermes-agent/pull/21250))
|
||||
- **Chown runtime `node_modules` trees to hermes user** (salvage #19303) ([#21267](https://github.com/NousResearch/hermes-agent/pull/21267))
|
||||
- Fix: exclude compose/profile runtime state from build context ([#19626](https://github.com/NousResearch/hermes-agent/pull/19626))
|
||||
- CI: don't cancel overlapping builds, guard `:latest` (@ethernet8023) ([#20890](https://github.com/NousResearch/hermes-agent/pull/20890))
|
||||
- Test: align Dockerfile contract tests with simplified TUI flow (salvage #19024) ([#21174](https://github.com/NousResearch/hermes-agent/pull/21174))
|
||||
- Docs: connect to local inference servers (vLLM, Ollama) (salvage #12335) ([#20407](https://github.com/NousResearch/hermes-agent/pull/20407))
|
||||
- Docs: document `API_SERVER_*` env vars (salvage #11758) ([#20409](https://github.com/NousResearch/hermes-agent/pull/20409))
|
||||
- Docs: clarify Docker terminal backend is a single persistent container ([#20003](https://github.com/NousResearch/hermes-agent/pull/20003))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
### Agent
|
||||
- Fix: recover lazy session creation regressions (#18370 fallout) (@alt-glitch) ([#20363](https://github.com/NousResearch/hermes-agent/pull/20363))
|
||||
- Fix: propagate ContextVars to concurrent tool worker threads (salvage #16660) ([#18123](https://github.com/NousResearch/hermes-agent/pull/18123))
|
||||
- Fix: warning-first tool-call loop guardrails ([#18227](https://github.com/NousResearch/hermes-agent/pull/18227))
|
||||
- Fix: surface self-improvement review summaries across CLI, TUI, and gateway ([#18073](https://github.com/NousResearch/hermes-agent/pull/18073))
|
||||
|
||||
### Gateway streaming
|
||||
- Fix: harden StreamingConfig bool and numeric coercion (@simbam99) ([#16463](https://github.com/NousResearch/hermes-agent/pull/16463))
|
||||
|
||||
### Model
|
||||
- Fix: avoid Bedrock credential probe in provider picker (@helix4u) ([#18998](https://github.com/NousResearch/hermes-agent/pull/18998))
|
||||
|
||||
### Doctor
|
||||
- Fix: check global agent-browser when local install not found ([#19671](https://github.com/NousResearch/hermes-agent/pull/19671))
|
||||
- Test: kimi-coding-cn provider validation regression ([#19734](https://github.com/NousResearch/hermes-agent/pull/19734))
|
||||
|
||||
### Update
|
||||
- Fix: patch `isatty` on real streams to fix xdist-flaky `--yes` tests (salvage #19026) ([#21175](https://github.com/NousResearch/hermes-agent/pull/21175))
|
||||
- Fix: teach restart-mocks about the post-update survivor sweep (salvage #19031) ([#21177](https://github.com/NousResearch/hermes-agent/pull/21177))
|
||||
|
||||
### Auth
|
||||
- Fix: acp preserve assistant reasoning metadata ([#20296](https://github.com/NousResearch/hermes-agent/pull/20296))
|
||||
|
||||
### Redact
|
||||
- Fix: add `code_file` param to skip false-positive ENV/JSON patterns ([#19715](https://github.com/NousResearch/hermes-agent/pull/19715))
|
||||
|
||||
### Email
|
||||
- Fix: quoted-relative file-drop paths + Date header on tool email path ([#19646](https://github.com/NousResearch/hermes-agent/pull/19646))
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
- **ACP — accept prompt persistence kwargs in MCP E2E mocks** (@stephenschoettler) ([#18047](https://github.com/NousResearch/hermes-agent/pull/18047))
|
||||
- **Toolsets — include kanban in expected post-#17805 toolset assertions** (@briandevans) ([#18122](https://github.com/NousResearch/hermes-agent/pull/18122))
|
||||
- **Agent — cover max-iterations summary message sanitization** ([#19580](https://github.com/NousResearch/hermes-agent/pull/19580))
|
||||
- **run_agent — `-inf` and `nan` regression coverage for `_coerce_number`** ([#19703](https://github.com/NousResearch/hermes-agent/pull/19703))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
### Major docs additions
|
||||
- **`llms.txt` + `llms-full.txt` — agent-friendly ingestion** ([#18276](https://github.com/NousResearch/hermes-agent/pull/18276))
|
||||
- **User Stories and Use Cases collage page** ([#18282](https://github.com/NousResearch/hermes-agent/pull/18282))
|
||||
- **Persistent Goals (/goal) feature page** ([#18275](https://github.com/NousResearch/hermes-agent/pull/18275))
|
||||
- **Windows (WSL2) guide expansion** — filesystem, networking, services, pitfalls ([#20748](https://github.com/NousResearch/hermes-agent/pull/20748))
|
||||
- **Chinese (zh-CN) README translation** (salvage #13508) ([#20431](https://github.com/NousResearch/hermes-agent/pull/20431))
|
||||
- **zh-Hans Docusaurus locale** + Tool Gateway / image-gen / WSL quickstart translations (salvage #11728) ([#20430](https://github.com/NousResearch/hermes-agent/pull/20430))
|
||||
- **Tool Gateway docs restructure** — lead with what it does, config moved to bottom ([#20827](https://github.com/NousResearch/hermes-agent/pull/20827))
|
||||
- **Quickstart — Onchain AI Garage Hermes tutorials playlist** ([#20192](https://github.com/NousResearch/hermes-agent/pull/20192))
|
||||
- **Open WebUI bootstrap script** (salvage #9566) ([#20427](https://github.com/NousResearch/hermes-agent/pull/20427))
|
||||
- **Local Ollama setup guide** (salvage #5842) ([#20426](https://github.com/NousResearch/hermes-agent/pull/20426))
|
||||
- **Google Gemini guide** (salvage #17450) ([#20401](https://github.com/NousResearch/hermes-agent/pull/20401))
|
||||
- **Custom model aliases for /model command** ([#20475](https://github.com/NousResearch/hermes-agent/pull/20475))
|
||||
- **Together/Groq/Perplexity cookbook via `custom_providers`** (salvage #15214) ([#20400](https://github.com/NousResearch/hermes-agent/pull/20400))
|
||||
- **Doubao speech integration examples** (TTS + STT) (salvage #18065) ([#20418](https://github.com/NousResearch/hermes-agent/pull/20418))
|
||||
- **WSL-to-Windows Chrome MCP bridge** (salvage #8313) ([#20428](https://github.com/NousResearch/hermes-agent/pull/20428))
|
||||
- **Hermes skills docs sync** — slash commands + durable-systems section ([#20390](https://github.com/NousResearch/hermes-agent/pull/20390))
|
||||
- **AGENTS.md — curator/cron/delegation/toolsets + fix plugin tree** ([#20226](https://github.com/NousResearch/hermes-agent/pull/20226))
|
||||
- **Bedrock quickstart entry + fallback comment + deployment link** (salvage #11093) ([#20397](https://github.com/NousResearch/hermes-agent/pull/20397))
|
||||
|
||||
### Docs polish
|
||||
- Collapse exploding skills tree to a single Skills node ([#18259](https://github.com/NousResearch/hermes-agent/pull/18259))
|
||||
- Clarify `session_search` auxiliary model docs ([#19593](https://github.com/NousResearch/hermes-agent/pull/19593))
|
||||
- Open WebUI Quick Setup gap fill ([#19654](https://github.com/NousResearch/hermes-agent/pull/19654))
|
||||
- Default custom tool creation to plugins (@kshitijk4poor) ([#19755](https://github.com/NousResearch/hermes-agent/pull/19755))
|
||||
- Clarify Telegram group chat troubleshooting (salvage #18672) ([#20416](https://github.com/NousResearch/hermes-agent/pull/20416))
|
||||
- Codex OAuth auth prerequisite clarification (salvage #18688) ([#20417](https://github.com/NousResearch/hermes-agent/pull/20417))
|
||||
- Discord Server Members Intent + SSRC-mapping drift + /voice join slash Choice (salvage #11350) ([#20411](https://github.com/NousResearch/hermes-agent/pull/20411))
|
||||
- Document `ctx.dispatch_tool()` (salvage #10955) ([#20391](https://github.com/NousResearch/hermes-agent/pull/20391))
|
||||
- Document `hermes webhook subscribe --deliver-only` (salvage #12612) ([#20392](https://github.com/NousResearch/hermes-agent/pull/20392))
|
||||
- Document `hermes import` reference (salvage #14711) ([#20396](https://github.com/NousResearch/hermes-agent/pull/20396))
|
||||
- Document per-provider TTS `max_text_length` caps (salvage #13825) ([#20389](https://github.com/NousResearch/hermes-agent/pull/20389))
|
||||
- Clarify supported prompt customization surfaces (salvage #19987) ([#20383](https://github.com/NousResearch/hermes-agent/pull/20383))
|
||||
- Correct `web_extract` summarizer timeout comment (salvage #20051) ([#20381](https://github.com/NousResearch/hermes-agent/pull/20381))
|
||||
- Fix fallback provider config paths (salvage #20033) ([#20382](https://github.com/NousResearch/hermes-agent/pull/20382))
|
||||
- Fix misleading RL install-extras claim (salvage #19080) ([#21213](https://github.com/NousResearch/hermes-agent/pull/21213))
|
||||
- Clarify API server tool execution locality (salvage #19117) ([#21223](https://github.com/NousResearch/hermes-agent/pull/21223))
|
||||
- Prefer `.venv` to match AGENTS.md and scripts/run_tests.sh (@xxxigm) ([#21334](https://github.com/NousResearch/hermes-agent/pull/21334))
|
||||
- Align tool discovery + test runner with AGENTS.md (@xxxigm) ([#20791](https://github.com/NousResearch/hermes-agent/pull/20791))
|
||||
- Align terminal-backend count and naming across docs and code (salvage #19044) ([#20402](https://github.com/NousResearch/hermes-agent/pull/20402))
|
||||
- Refresh stale platform counts (salvage #19053) ([#20403](https://github.com/NousResearch/hermes-agent/pull/20403))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** — salvage, triage, review, feature work, and release management
|
||||
|
||||
### Top Community Contributors
|
||||
|
||||
- **@kshitijk4poor** (21 PRs) — SearXNG native search backend, per-capability backend selection, collapsible TUI startup banner, Slack ephemeral ack + format fixes, Lightpanda fallback hardening, searxng-search optional skill + Web Search + Extract docs, default custom tool creation to plugins, kanban failure-column fix
|
||||
- **@alt-glitch** (13 PRs) — video_analyze tool, xAI Custom Voices (voice cloning), local-backend CLI launch-directory fix, lazy-session creation regression recovery, systemd unit refresh on gateway boot
|
||||
- **@OutThisLife** (9 PRs) — TUI perf — overlay render churn reduction, voice push-to-talk parity restoration (salvaging @Montbra)
|
||||
- **@helix4u** (6 PRs) — Classic CLI output recovery after resize, absolute-path TUI completion, gateway model picker current-context fix, Bedrock credential probe avoidance, kanban docs fixes
|
||||
- **@ethernet8023** (3 PRs) — Docker CI — don't cancel overlapping builds, :latest guard
|
||||
- **@benbarclay** (3 PRs) — Docker — launch dashboard as side-process via HERMES_DASHBOARD=1
|
||||
- **@austinpickett** (3 PRs) — Dashboard Plugins page, TUI /model picker overhaul with inline auth, kanban button fix
|
||||
- **@sprmn24** (2 PRs) — Contributor (2 PRs)
|
||||
- **@asheriif** (2 PRs) — Contributor (2 PRs)
|
||||
- **@xxxigm** (2 PRs) — Contributing docs — .venv preference and test runner alignment with AGENTS.md
|
||||
- **@stephenschoettler** (1 PR) — ACP — MCP E2E mock kwargs
|
||||
- **@vincez-hms-coder** (1 PR) — Dashboard — Profiles management page
|
||||
- **@cdanis** (1 PR) — Contributor
|
||||
- **@briandevans** (1 PR) — Toolsets test — kanban assertions post-#17805
|
||||
- **@heyitsaamir** (1 PR) — Contributor
|
||||
|
||||
### All Contributors
|
||||
|
||||
Thanks to everyone who contributed to v0.13.0 — commits, co-authored work, and salvaged PRs. 295 contributors in one week.
|
||||
|
||||
@0oAstro, @0xDevNinja, @0xharryriddle, @0xKingBack, @0xsir0000, @0xyg3n, @0z1-ghb, @abhinav11082001-stack,
|
||||
@acc001k, @acesjohnny, @adamludwin, @adybag14-cyber, @agentlinker, @agilejava, @ai-ag2026, @AJV20,
|
||||
@alanxchen85, @albert748, @AllardQuek, @alt-glitch, @altmazza0-star, @ambition0802, @amitgaur, @amroessam,
|
||||
@andrewhosf, @Asce66, @asheriif, @ashermorse, @asimons81, @Aslaaen, @Asunfly, @atongrun, @austinpickett,
|
||||
@banditburai, @barteqpl, @Bartok9, @Beandon13, @beardthelion, @beibi9966, @benbarclay, @binhnt92, @bjianhang,
|
||||
@BlackJulySnow, @bobashopcashier, @bogerman1, @Bongulielmi, @Brecht-H, @briandevans, @brooklynnicholson,
|
||||
@c3115644151, @camaragon, @CashWilliams, @CCClelo, @cdanis, @CES4751, @cg2aigc, @changchun989, @ChanlerDev,
|
||||
@CharlieKerfoot, @chengoak, @chenyunbo411, @chinadbo, @CIRWEL, @cixuuz, @cmcgrabby-hue, @colorcross,
|
||||
@Contentment003111, @CoreyNoDream, @counterposition, @curiouscleo, @DaniuXie, @deep-name, @dengtaoyuan450-a11y,
|
||||
@discodirector, @donramon77, @dpaluy, @ee-blog, @ehz0ah, @el-analista, @elmatadorgh, @EmelyanenkoK,
|
||||
@Emidomenge, @emozilla, @Es1la, @EthanGuo-coder, @etherman-os, @ethernet8023, @EvilDrag0n, @exxmen, @Fearvox,
|
||||
@Feranmi10, @firefly, @flobo3, @fmercurio, @Foolafroos, @formulahendry, @franksong2702, @ggnnggez, @GinWU05,
|
||||
@giwaov, @glesperance, @gnanirahulnutakki, @GodsBoy, @Gosuj, @Grey0202, @guillaumemeyer, @Gutslabs, @h0tp-ftw,
|
||||
@haidao1919, @halmisen, @happy5318, @hedirman, @helix4u, @hendrixfreire, @HenkDz, @hex-clawd, @heyitsaamir,
|
||||
@hharry11, @Hinotoi-agent, @holynn-q, @hrkzogw, @Hypn0sis, @Hypnus-Yuan, @ideathinklab01-source, @IMHaoyan,
|
||||
@Interstellar-code, @ishardo, @jacdevos, @jackey8616, @JanCong, @jasonoutland, @jatingodnani, @JayGwod,
|
||||
@jethac, @JezzaHehn, @JiaDe-Wu, @jjjojoj, @jkausel-ai, @John-tip, @johnncenae, @jrusso1020, @jslizar,
|
||||
@JTroyerOvermatch, @julysir, @Junass1, @JustinUssuri, @Kailigithub, @keepcalmqqf, @kiala9, @konsisumer,
|
||||
@kowenhaoai, @Krionex, @kshitijk4poor, @kyan12, @leavrcn, @leon7609, @LeonSGP43, @leprincep35700, @lhysdl,
|
||||
@likejudy, @lisanhu, @liu-collab, @liuguangyong93, @liuhao1024, @LucianoSP, @luoyuctl, @luyao618, @M3RCUR2Y,
|
||||
@maciekczech, @Magicray1217, @magicray1217, @MaHaoHao-ch, @malaiwah, @manateelazycat, @masonjames, @megastary,
|
||||
@memosr, @MichaelWDanko, @mikeyobrien, @millerc79, @Mind-Dragon, @mioimotoai-lgtm, @misery-hl, @molvikar,
|
||||
@momowind, @Montbra, @MottledShadow, @mrbob-git, @mrcharlesiv, @mrcoferland, @ms-alan, @mwnickerson,
|
||||
@nazirulhafiy, @nftpoetrist, @nicoloboschi, @nightq, @nikolay-bratanov, @NikolayGusev-astra, @nocturnum91,
|
||||
@noOne-list, @nouseman666, @novax635, @npmisantosh, @nudiltoys-cmyk, @olisikh, @oluwadareab12, @Oxidane-bot,
|
||||
@pama0227, @pander, @pasevin, @paul-tian, @pdonizete, @perlowja, @pingchesu, @PratikRai0101, @priveperfumes,
|
||||
@probepark, @QifengKuang, @quocanh261997, @qWaitCrypto, @qxxaa, @r266-tech, @rames-jusso, @revaraver,
|
||||
@Ricardo-M-L, @rob-maron, @Roy-oss1, @rxdxxxx, @SandroHub013, @Sanjays2402, @Sertug17, @shashwatgokhe,
|
||||
@shellybotmoyer, @SHL0MS, @SimbaKingjoe, @simbam99, @simplenamebox-ops, @socrates1024, @sonic-netizen,
|
||||
@sprmn24, @steezkelly, @stephen0110, @stephenschoettler, @stevenchanin, @stevenchouai, @stormhierta,
|
||||
@subtract0, @suncokret12, @swithek, @taeng0204, @TakeshiSawaguchi, @tangyuanjc, @TheEpTic, @thelumiereguy,
|
||||
@Tkander1715, @tmdgusya, @Tranquil-Flow, @TruaShamu, @UgwujaGeorge, @valda, @vincez-hms-coder, @VinVC,
|
||||
@vominh1919, @wabrent, @WadydX, @wanazhar, @WanderWang, @warabe1122, @web-dev0521, @WideLee, @willy-scr,
|
||||
@wmagev, @WuTianyi123, @wxst, @wysie, @Wysie, @xsfX20, @xxxigm, @xyiy001, @YanzhongSu, @ygd58, @Yoimex,
|
||||
@yuehei, @Yukipukii1, @yuqianma, @YX234, @zeejaytan, @zhanggttry, @zhao0112, @zng8418, @zons-zhaozhy, @Zyproth
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.30...v2026.5.7](https://github.com/NousResearch/hermes-agent/compare/v2026.4.30...v2026.5.7)
|
||||
@@ -1,479 +0,0 @@
|
||||
# Hermes Agent v0.14.0 (v2026.5.16)
|
||||
|
||||
**Release Date:** May 16, 2026
|
||||
**Since v0.13.0:** 808 commits · 633 merged PRs · 1393 files changed · 165,061 insertions · 545 issues closed (12 P0, 50 P1) · 215 community contributors (including co-authors)
|
||||
|
||||
> The Foundation Release — Hermes installs and runs anywhere, ships with the things you actually want to use, and stops shipping the things you don't. xAI Grok lands as a SuperGrok OAuth provider with grok-4.3 bumped to a 1M context window. A new OpenAI-compatible local proxy turns any OAuth-authed Hermes provider — Claude Pro, ChatGPT Pro, SuperGrok — into an endpoint that Codex / Aider / Cline / Continue can hit. `x_search` lands as a first-class X (Twitter) search tool with OAuth-or-API-key auth. The Microsoft Teams stack is wired end-to-end (Graph auth + webhook listener + pipeline runtime + outbound delivery). A debloating wave makes installs dramatically lighter — heavyweight backends now lazy-install on first use, the `[all]` extras drop everything covered by lazy-deps, and a tiered install falls back when a wheel rejects on your platform. `pip install hermes-agent` works from PyPI. The cold-start wave shaves ~19 seconds off `hermes` launch. Browser CDP calls are 180x faster. Two new messaging platforms (LINE + SimpleX Chat) bring the total to 22. Cross-session 1-hour Claude prompt caching, `/handoff` that actually transfers sessions live, native button UI for `clarify` on Telegram and Discord, Discord channel history backfill, LSP semantic diagnostics on every write, a unified pluggable `video_generate`, a `computer_use` cua-driver backend that finally works with non-Anthropic providers, clickable URLs in any terminal, Zed ACP Registry integration via `uvx`, native Windows beta, 9 new optional skills, OpenRouter Pareto Code router, huggingface/skills as a trusted default tap. 12 P0 + 50 P1 closures.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **xAI Grok via SuperGrok OAuth — and grok-4.3 jumps to a 1M context window** — If you pay for SuperGrok, you can now use Grok inside Hermes by signing in with your xAI account — no API key, no separate billing. The wire-through also bumps grok-4.3 to a 1M token context window, so you can drop whole codebases or research corpora into a single prompt. Includes proper handling for entitlement errors and an SSH-to-tunnel docs page for when you're SSH'd into a remote box and need to complete the OAuth flow. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534), [#26664](https://github.com/NousResearch/hermes-agent/pull/26664), [#26644](https://github.com/NousResearch/hermes-agent/pull/26644), [#26592](https://github.com/NousResearch/hermes-agent/pull/26592))
|
||||
|
||||
- **OpenAI-compatible local proxy for OAuth providers** — Run `hermes proxy` and you get a `http://localhost:port` endpoint that speaks the OpenAI API but is backed by whichever OAuth provider you're signed into — Claude Pro, ChatGPT Pro, SuperGrok. Now any tool that expects an OpenAI-compatible endpoint (Codex CLI, Aider, Cline, Continue, your custom scripts) just works with your existing subscription, no API key required. One subscription, every tool. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
|
||||
|
||||
- **`x_search` — first-class X (Twitter) search tool** — The agent can now search X directly without installing a skill or wiring up a custom integration. Search the timeline, find threads, surface specific posts — straight from the chat. Auth with either your X OAuth login or an API key, whichever you have. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
|
||||
|
||||
- **Microsoft Teams — end-to-end** — Hermes can now read messages from Teams and post back. The full Microsoft Graph stack lands together: auth + client foundation, a webhook listener that receives Teams events, a pipeline plugin runtime, and outbound delivery. Wire up the bot once, then chat to your agent from any Teams channel, DM, or group. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
|
||||
|
||||
- **Debloating wave — lighter installs, less you don't use** — A clean `pip install hermes-agent` used to pull down everything: every messaging adapter SDK, every image-gen SDK, every voice/TTS provider, whether you used them or not. Now those heavy backends (Slack / Matrix / Feishu / DingTalk adapters, hindsight client, codex app-server, Pixverse / Camofox / image-gen SDKs, voice/TTS providers) install automatically the first time you actually use them. The `[all]` extras drop everything covered by lazy-deps, the installer falls back through tiers when a wheel doesn't fit your platform, and a supply-chain advisory checker scans every install for unsafe versions. Faster installs, smaller disk footprint, fewer transitive vulnerabilities. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220), [#24515](https://github.com/NousResearch/hermes-agent/pull/24515), [#25014](https://github.com/NousResearch/hermes-agent/pull/25014), [#25038](https://github.com/NousResearch/hermes-agent/pull/25038), [#25766](https://github.com/NousResearch/hermes-agent/pull/25766), [#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
|
||||
|
||||
- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. No more cloning the repo or running shell installers — one pip command and you're running. The wheel ships with the Ink TUI bundle and the shell launcher, so the full experience comes out of the box. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593), [#26148](https://github.com/NousResearch/hermes-agent/pull/26148))
|
||||
|
||||
- **Cross-session 1h Claude prompt cache** — When you use Claude through Anthropic, OpenRouter, or Nous Portal, the prompt prefix (system prompt, skills, memory) now caches for an hour across sessions. Start a `/new` session and the first response comes back faster and cheaper because the cache is still warm from your last session. Background memory review hits the cache too, so it's not paying full price every turn. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828), [#25434](https://github.com/NousResearch/hermes-agent/pull/25434), [#24778](https://github.com/NousResearch/hermes-agent/pull/24778))
|
||||
|
||||
- **180x faster `browser_console` evaluations** — When the agent uses the browser tool to inspect a page or run JavaScript, those calls now share one persistent connection to Chrome instead of spinning up a new DevTools session every time. The difference is huge: things that used to take a couple of seconds per call return in milliseconds. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
|
||||
|
||||
- **Cold-start performance wave — ~19 seconds off `hermes` launch** — Running `hermes` used to make you wait through a chunk of import overhead and network calls before you saw a prompt. Now the launch path is mostly deferred: heavy adapters only load when you use them, model catalogs come from disk cache first, doctor checks run in parallel, and `chat -q` skips the welcome banner entirely. The `hermes tools` All-Platforms screen alone dropped from 14 seconds to under 1.5 seconds. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
|
||||
|
||||
- **Two new messaging platforms — LINE + SimpleX Chat** — LINE is huge in Japan, Korea, and Taiwan, and now Hermes runs natively on the LINE Messaging API. SimpleX Chat is the privacy-focused decentralized messenger with no user IDs — also wired up as a first-class platform. That brings Hermes to 22 messaging platforms total, so wherever you and your team chat, the agent can be there. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
|
||||
|
||||
- **`/handoff` actually transfers the session live** — Switching models or personalities mid-conversation used to mean losing context or starting over. Now `/handoff` moves your active session — every message, every tool call, every piece of context — to the target model, persona, or profile, live, without dropping anything. Mid-debugging hand off from a fast model to a deep-reasoning one, or pass a session between profiles for different parts of a task. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
|
||||
|
||||
- **Native button UI for `clarify` on Telegram and Discord** — When the agent uses the `clarify` tool to ask you a multiple-choice question, it now shows real platform-native buttons on Telegram and Discord instead of asking you to type back the option number. Tap the button, the agent gets your answer. Especially nice on mobile. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
|
||||
|
||||
- **Discord channel history backfill (default on)** — When Hermes joins a Discord channel or thread for the first time, it now reads the recent message history so it knows what's been said before it responds. No more "what are we talking about?" — the agent has the context that's already on screen for everyone else. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
|
||||
|
||||
- **`vision_analyze` returns pixels to vision-capable models** — When you point the agent at an image with `vision_analyze` and the active model can actually see (GPT-5, Claude, Gemini, Grok-vision), Hermes now passes the raw pixels straight to the model instead of converting them to a text description first. You get the model's actual visual reasoning instead of a degraded text-summary round-trip. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
|
||||
|
||||
- **Per-turn file-mutation verifier footer** — After every turn that wrote or edited files, the agent now gets a short footer summarizing exactly what changed on disk — the file paths, the line counts, the actual delta. That means the agent catches its own mistakes when a write didn't land or got silently overwritten, instead of confidently telling you "I added the function" when the file wasn't actually saved. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
|
||||
|
||||
- **LSP semantic diagnostics on every write** — When the agent uses `write_file` or `patch`, Hermes now runs a real language server against the edited file and surfaces any new errors back to the agent before the next turn. Type errors, undefined symbols, missing imports — caught immediately. Goes way beyond v0.13.0's basic Python/JSON/YAML/TOML linting because it's actual semantic analysis. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
|
||||
|
||||
- **Unified `video_generate` with pluggable provider backends** — One tool, any video model. Hermes ships with the obvious backends already, but you can drop in a new video provider as a plugin without touching core. So when a new video model lands next month, it can be a one-file plugin instead of a fork. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
|
||||
|
||||
- **`computer_use` cua-driver backend — works with non-Anthropic models now** — Computer-use (the agent controlling your mouse and keyboard to drive GUI apps) used to be locked to Anthropic's SDK. The new cua-driver backend works with non-Anthropic providers too, has proper focus-safe operations, and refreshes itself on `hermes update`. Now any vision-capable model can drive your desktop. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
|
||||
|
||||
- **Clickable URLs in any terminal** — Links in agent output are now real OSC8 hyperlinks with hover-highlight in any terminal that supports them. Click to open in your browser — no more copy-paste-trim of long URLs from the transcript. Just works in iTerm2, Kitty, Ghostty, modern Windows Terminal, etc. (@OutThisLife) ([#25071](https://github.com/NousResearch/hermes-agent/pull/25071), [#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
|
||||
|
||||
- **Zed ACP Registry — `uvx` install in one click** — Hermes is now listed in Zed's Agent Client Protocol registry, so Zed users can install it with one click. The install path uses `uvx` so there's no npm dependency. `hermes acp --setup-browser` bootstraps the browser tools for registry-driven installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
|
||||
|
||||
- **OpenRouter Pareto Code router with `min_coding_score` knob** — OpenRouter's "Pareto" router automatically picks the cheapest model that meets a minimum quality bar. The new `min_coding_score` config lets you set that bar for coding tasks specifically — Hermes routes to the most affordable model that's at least that good at code. Stop paying for top-tier models when a mid-tier one would do. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
|
||||
|
||||
- **NovitaAI as a new model provider** — NovitaAI joins the provider lineup, giving you another option for open-source model hosting (Llama, Qwen, DeepSeek, etc.) with their pricing and rate limits. (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
|
||||
|
||||
- **Codex app-server runtime for OpenAI/Codex models** — An optional runtime that drives OpenAI's Codex CLI under the hood when you're using OpenAI or Codex paths. You get session reuse, automatic retirement of wedged sessions, and proper OAuth refresh classification — the kind of plumbing that makes long agentic runs not fall over. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
|
||||
|
||||
- **`huggingface/skills` as a trusted default tap** — The community skills index hosted at huggingface.co/skills is now wired into the Skills Hub by default. So when somebody publishes a useful skill there, you can install it from your own `hermes skills` browser without any extra config. (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
|
||||
|
||||
- **9 new optional skills** — Hyperliquid (perp + spot trading via the SDK and REST API), Yahoo Finance (live market data, fundamentals, historicals), api-testing (REST + GraphQL debug recipes), unified EVM multi-chain (one skill covers Ethereum + L2s + Base), darwinian-evolver (evolutionary prompt/skill tuning), osint-investigation (OSINT recipes for people / domains / orgs), pinggy-tunnel (expose local services to the public internet), watchers (polls RSS / HTTP JSON / GitHub via cron `no_agent` mode for change detection), and a full Notion overhaul for the May 2026 Developer Platform. ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
|
||||
|
||||
- **API server exposes run approval events** — If you're driving Hermes programmatically through the HTTP API, long-running runs no longer silently hang when the agent hits an approval-required command. The approval request now surfaces on the API stream so your client can prompt the user and reply — no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
|
||||
|
||||
- **Plugins can run any LLM call via `ctx.llm` + replace built-in tools via `tool_override`** — If you're writing a Hermes plugin, you now get first-class access to make LLM calls through the active provider and credentials — no manual client wiring. The new `tool_override` flag lets a plugin swap out a built-in tool with its own implementation cleanly. Plugin authors get the same model-routing and auth plumbing the core agent uses. (closes #11049) ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
|
||||
|
||||
- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — Two new free web-search backends join Tavily, SearXNG, and Exa. Brave Search has a generous free tier; DDGS is the DuckDuckGo scraper that needs no key at all. Pick whichever fits your budget and rate-limit needs. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
|
||||
|
||||
- **Sudo brute-force block + 3 dangerous-command bypasses closed + tool-error sanitization** — The approval gate now blocks `sudo -S` brute-force attempts and classifies stdin-fed or askpass-stripped sudo invocations as DANGEROUS. Three known bypasses of dangerous-command detection are closed (inspired by Claude Code's command-detection work). And tool error strings are now sanitized before being re-injected into the model context, so a malicious file or remote service can't pass instructions to your agent through error output. ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736), [#26829](https://github.com/NousResearch/hermes-agent/pull/26829), [#26823](https://github.com/NousResearch/hermes-agent/pull/26823))
|
||||
|
||||
- **`/subgoal` — user-added criteria appended to an active `/goal`** — When you've got a `/goal` running (the persistent Ralph-loop goal where the agent keeps going until criteria are met), you can now use `/subgoal <text>` to layer extra success criteria onto it mid-run. The judge factors your new criteria into the done-or-keep-going decision without restarting the loop. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
|
||||
|
||||
- **Provider rename — Alibaba Cloud → Qwen Cloud** — The Alibaba Cloud provider is renamed to Qwen Cloud in the picker and config to match what the rest of the world calls it. Existing config keys still work — no breaking changes — but the UI matches the actual brand now. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
|
||||
|
||||
- **Native Windows support (early beta)** — Hermes now runs natively on `cmd.exe` and PowerShell without WSL. A full PowerShell installer handles MinGit auto-install, Microsoft Store python stub detection, and the foreground Ctrl+C dance. There's still rough edges (this is the "early beta" stamp) — ~40 follow-up Windows-only fixes already landed in the window — but the basic loop works end-to-end on a clean Windows box. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
|
||||
|
||||
|
||||
---
|
||||
|
||||
## 🪟 Windows — Native Support (Early Beta)
|
||||
|
||||
### Bootstrap & installer
|
||||
- **Native Windows support (early beta)** — first-class native Windows path across CLI / gateway / TUI / tools ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
|
||||
- **PyPI wheel packaging — `pip install hermes-agent && hermes`** (salvage of #26350) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
|
||||
- **Recognise Shift+Enter as a newline key** + Windows docs (salvage #21545) ([#22130](https://github.com/NousResearch/hermes-agent/pull/22130))
|
||||
- **Preserve Ctrl+C for Windows foreground runs** (@helix4u) ([#22752](https://github.com/NousResearch/hermes-agent/pull/22752))
|
||||
- **Stop spamming cwd-missing + tirith-spawn warnings on every terminal call** ([#26618](https://github.com/NousResearch/hermes-agent/pull/26618))
|
||||
- **Use `--extra all` not `--all-extras`; drop lazy-covered extras from `[all]`** ([#24515](https://github.com/NousResearch/hermes-agent/pull/24515))
|
||||
|
||||
### Windows-specific fixes (40+ across cli / tools / gateway / curator / TUI)
|
||||
A long tail of native-Windows fixes shipped alongside the beta — taskkill-based subprocess management, MinGit auto-install, Microsoft Store python stub detection, npm prefix handling, native PTY paths, signal handling differences, foreground process management, ANSI sequence handling, path normalization, file-locking semantics, and many more. Full list in commit log under `fix(windows)` / `feat(windows)` / `windows`.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Performance Wave
|
||||
|
||||
### Cold start
|
||||
- **Cut ~19s from `hermes` cold start** — skills cache + lazy Feishu + no Nous HTTP at startup ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138))
|
||||
- **Skip eager plugin discovery on known built-in subcommands** ([#22120](https://github.com/NousResearch/hermes-agent/pull/22120))
|
||||
- **Cache Nous auth + .env loads** — `hermes tools` All Platforms from 14s to <1.5s ([#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
|
||||
- **Skip welcome banner on `chat -q` single-query mode** ([#22904](https://github.com/NousResearch/hermes-agent/pull/22904))
|
||||
- **Defer heavy google-cloud imports in google_chat to first adapter use** ([#22681](https://github.com/NousResearch/hermes-agent/pull/22681))
|
||||
- **Defer QQAdapter and YuanbaoAdapter imports via PEP 562** ([#22790](https://github.com/NousResearch/hermes-agent/pull/22790))
|
||||
- **Defer httpx import in teams to first webhook call** ([#22831](https://github.com/NousResearch/hermes-agent/pull/22831))
|
||||
- **Defer fal_client import to first generation request** ([#22859](https://github.com/NousResearch/hermes-agent/pull/22859))
|
||||
- **models.dev cache-first lookup, skip network when disk cache is fresh** ([#22808](https://github.com/NousResearch/hermes-agent/pull/22808))
|
||||
- **Parallelize API connectivity checks in `hermes doctor` and disable IMDS** ([#22766](https://github.com/NousResearch/hermes-agent/pull/22766))
|
||||
|
||||
### Runtime
|
||||
- **180x faster `browser_console` evaluations** — route through supervisor's persistent CDP WebSocket ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
|
||||
- **Tune Telegram cadence + adaptive fast-path for short replies** (salvage of #10388) ([#23587](https://github.com/NousResearch/hermes-agent/pull/23587))
|
||||
- **Accumulate length-continuation prefix via list+join** ([#26237](https://github.com/NousResearch/hermes-agent/pull/26237))
|
||||
|
||||
### Prompt caching
|
||||
- **Cross-session 1h prefix cache for Claude on Anthropic / OpenRouter / Nous Portal** ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828))
|
||||
- **Hit prefix cache in background review fork** (salvage #17276 + #25427) ([#25434](https://github.com/NousResearch/hermes-agent/pull/25434))
|
||||
|
||||
---
|
||||
|
||||
## 📦 Installation & Distribution
|
||||
|
||||
### PyPI + supply-chain
|
||||
- **PyPI wheel packaging — `pip install hermes-agent && hermes`** (salvage of #26350) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
|
||||
- **Supply-chain advisory checker + lazy-install framework + tiered install fallback** ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
|
||||
- **Use `--extra all` not `--all-extras`; drop lazy-covered extras from `[all]`** ([#24515](https://github.com/NousResearch/hermes-agent/pull/24515))
|
||||
- **Skip browser download when system chromium exists** (@helix4u) ([#25317](https://github.com/NousResearch/hermes-agent/pull/25317))
|
||||
|
||||
### Nix
|
||||
- **`extraDependencyGroups` for sealed venv extras** (@alt-glitch) ([#21817](https://github.com/NousResearch/hermes-agent/pull/21817))
|
||||
- **Refresh npm lockfile hashes** — keeps Nix flake builds reproducible
|
||||
|
||||
### Docker
|
||||
- **Bootstrap auth.json from env on first boot** ([#21880](https://github.com/NousResearch/hermes-agent/pull/21880))
|
||||
- **Drop manual @hermes/ink build, rely on esbuild bundle** — slimmer image
|
||||
|
||||
### ACP / Zed
|
||||
- **Zed ACP Registry integration** (salvage of #25908) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079))
|
||||
- **Switch to uvx distribution, drop npm launcher** ([#26120](https://github.com/NousResearch/hermes-agent/pull/26120))
|
||||
- **`hermes acp --setup-browser` bootstraps browser tools for registry installs** ([#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Sessions & handoff
|
||||
- **`/handoff` actually transfers the session live** ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
|
||||
- **Expose `HERMES_SESSION_ID` env var to agent tools** (@alt-glitch) ([#23847](https://github.com/NousResearch/hermes-agent/pull/23847))
|
||||
|
||||
### Goals (Ralph loop)
|
||||
- **`/subgoal` — user-added criteria appended to active `/goal`** ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
|
||||
- **`/goal` checklist + /subgoal user controls** ([#23456](https://github.com/NousResearch/hermes-agent/pull/23456)) — rolled back in window ([#23813](https://github.com/NousResearch/hermes-agent/pull/23813)); /subgoal returned in simpler form via #25449
|
||||
|
||||
### Compression
|
||||
- **Make `protect_first_n` configurable** ([#25447](https://github.com/NousResearch/hermes-agent/pull/25447))
|
||||
|
||||
### Verification
|
||||
- **Per-turn file-mutation verifier footer** ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
|
||||
|
||||
### Stream retry
|
||||
- **Log inner cause, upstream headers, bytes/elapsed on every drop** ([#23005](https://github.com/NousResearch/hermes-agent/pull/23005))
|
||||
|
||||
---
|
||||
|
||||
## 🤖 Models & Providers
|
||||
|
||||
### New providers
|
||||
- **xAI Grok OAuth (SuperGrok Subscription) provider** ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534))
|
||||
- **NovitaAI provider** (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
|
||||
- **NVIDIA NIM billing origin header** (salvage #25211) ([#26585](https://github.com/NousResearch/hermes-agent/pull/26585))
|
||||
|
||||
### Provider work
|
||||
- **OpenRouter Pareto Code router with `min_coding_score` knob** ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
|
||||
- **Optional codex app-server runtime for OpenAI/Codex models** ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182))
|
||||
- **Codex-runtime: retire wedged sessions + post-tool watchdog + OAuth refresh classify** ([#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
|
||||
- **Codex-runtime: skip unavailable plugins during migration** ([#25437](https://github.com/NousResearch/hermes-agent/pull/25437))
|
||||
- **Codex-runtime: de-dup `[plugins.X]` tables and stop leaking HERMES_HOME into config.toml** (#26250) (@kshitijk4poor) ([#26260](https://github.com/NousResearch/hermes-agent/pull/26260))
|
||||
- **Pass `reasoning.effort` to xAI Responses API** ([#22807](https://github.com/NousResearch/hermes-agent/pull/22807))
|
||||
- **Custom provider: prompt and persist explicit `api_mode`** ([#25068](https://github.com/NousResearch/hermes-agent/pull/25068))
|
||||
- **Rename Alibaba Cloud → Qwen Cloud, reorder picker** ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
|
||||
- **Restore gpt-5.3-codex-spark for ChatGPT Pro** (salvage #18286 + #19530, fixes #16172) (@kshitijk4poor) ([#22991](https://github.com/NousResearch/hermes-agent/pull/22991))
|
||||
- **Inject tool-use enforcement for GLM models** ([#24715](https://github.com/NousResearch/hermes-agent/pull/24715))
|
||||
- **Use Nous Portal as model metadata authority** (@rob-maron) ([#24502](https://github.com/NousResearch/hermes-agent/pull/24502))
|
||||
- **Unified `client=hermes-client-v<version>` tag on every Portal request** ([#24779](https://github.com/NousResearch/hermes-agent/pull/24779))
|
||||
- **Prevent stale Ollama credentials after provider switch** (@kshitijk4poor) ([#21703](https://github.com/NousResearch/hermes-agent/pull/21703))
|
||||
- **Auxiliary client: rotate pooled auth after quota failures** (salvage #22779) ([#22792](https://github.com/NousResearch/hermes-agent/pull/22792))
|
||||
- **Auxiliary client: skip providers without credentials immediately** (#25395) ([#25487](https://github.com/NousResearch/hermes-agent/pull/25487))
|
||||
- **Auth: send Nous refresh token via header** (@shannonsands) ([#21578](https://github.com/NousResearch/hermes-agent/pull/21578))
|
||||
- **MiniMax: harden OAuth dashboard and runtime** ([#24165](https://github.com/NousResearch/hermes-agent/pull/24165))
|
||||
|
||||
### OpenAI-compatible proxy
|
||||
- **Local OpenAI-compatible proxy for OAuth providers** — Codex / Aider / Cline can hit Claude Pro, ChatGPT Pro, SuperGrok ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New platforms
|
||||
- **LINE Messaging API platform plugin** ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197))
|
||||
- **SimpleX Chat platform plugin** (salvages #2558) ([#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
|
||||
|
||||
### Microsoft Graph foundation
|
||||
- **msgraph: add auth and client foundation** (salvage of #21408) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922))
|
||||
- **msgraph: add webhook listener platform** (salvage of #21409) ([#21969](https://github.com/NousResearch/hermes-agent/pull/21969))
|
||||
- **teams-pipeline: add plugin runtime and operator cli** (salvage of #21410) ([#22007](https://github.com/NousResearch/hermes-agent/pull/22007))
|
||||
- **teams: add pipeline outbound delivery via existing adapter** (salvage of #21411) ([#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
|
||||
|
||||
### Cross-platform
|
||||
- **Per-platform admin/user split for slash commands** (salvage of #4443) ([#23373](https://github.com/NousResearch/hermes-agent/pull/23373))
|
||||
- **Forensics on signal handling — non-blocking diag, per-phase timing, stale-unit warning** ([#23285](https://github.com/NousResearch/hermes-agent/pull/23285))
|
||||
- **Keep gateway running when platforms fail; add per-platform circuit breaker + `/platform`** ([#26600](https://github.com/NousResearch/hermes-agent/pull/26600))
|
||||
- **Wire `clarify` tool with inline keyboard buttons on Telegram** ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199))
|
||||
- **Add `chat_id` to `hook_ctx` for message source tracking** ([#24710](https://github.com/NousResearch/hermes-agent/pull/24710))
|
||||
|
||||
### Telegram
|
||||
- **Native draft streaming via `sendMessageDraft` (Bot API 9.5+)** (salvage of #3412) ([#23512](https://github.com/NousResearch/hermes-agent/pull/23512))
|
||||
- **Stream Telegram edits safely** — salvage of #22264 (@kshitijk4poor) ([#22518](https://github.com/NousResearch/hermes-agent/pull/22518))
|
||||
- **Telegram notification mode** (salvage #22772) ([#22793](https://github.com/NousResearch/hermes-agent/pull/22793))
|
||||
- **Telegram guest mention mode** (@kshitijk4poor) ([#22759](https://github.com/NousResearch/hermes-agent/pull/22759))
|
||||
- **Split-and-deliver oversized edits instead of silent truncation** (salvage of #19537) ([#23576](https://github.com/NousResearch/hermes-agent/pull/23576))
|
||||
- **Preserve DM topic routing via reply fallback** (salvage #22053) (@kshitijk4poor) ([#22410](https://github.com/NousResearch/hermes-agent/pull/22410))
|
||||
- **Pass `source.thread_id` explicitly on auto-reset notice** (carve-out of #7404) ([#23440](https://github.com/NousResearch/hermes-agent/pull/23440))
|
||||
|
||||
### Discord
|
||||
- **Render clarify choices as buttons** ([#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
|
||||
- **Channel history backfill — default on, broadened scope** ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
|
||||
- **`thread_require_mention` for multi-bot threads** (salvage #25313) ([#25445](https://github.com/NousResearch/hermes-agent/pull/25445))
|
||||
|
||||
### Slack
|
||||
- **Support `!cmd` as alternate prefix for slash commands in threads** ([#25355](https://github.com/NousResearch/hermes-agent/pull/25355))
|
||||
|
||||
### WhatsApp
|
||||
- **Surface quoted reply metadata from Baileys** (#25398) ([#25489](https://github.com/NousResearch/hermes-agent/pull/25489))
|
||||
|
||||
### Feishu / Google Chat / others
|
||||
- **Feishu: native update prompt cards** (@kshitijk4poor) ([#22448](https://github.com/NousResearch/hermes-agent/pull/22448))
|
||||
- **Google Chat: repair setup prompt imports** (@helix4u) ([#22038](https://github.com/NousResearch/hermes-agent/pull/22038))
|
||||
- **Google Chat: honor relay-declared sender_type** (salvage of #22107) (@kshitijk4poor) ([#22432](https://github.com/NousResearch/hermes-agent/pull/22432))
|
||||
- **LINE: use `build_source` instead of nonexistent `create_source`** ([#24717](https://github.com/NousResearch/hermes-agent/pull/24717))
|
||||
- **Add `weixin, and more` to gateway docs** (salvage of #21063 by @wuwuzhijing)
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ CLI & TUI
|
||||
|
||||
### CLI
|
||||
- **Show YOLO mode warning in banner and status bar** ([#26238](https://github.com/NousResearch/hermes-agent/pull/26238))
|
||||
- **Confirm prompt for destructive slash commands** (#4069) ([#22687](https://github.com/NousResearch/hermes-agent/pull/22687))
|
||||
- **`docker_extra_args` + `display.timestamps`** ([#23599](https://github.com/NousResearch/hermes-agent/pull/23599))
|
||||
- **Delegate tool: show user's actual concurrency / spawn-depth limits in description** ([#22694](https://github.com/NousResearch/hermes-agent/pull/22694))
|
||||
|
||||
### TUI
|
||||
- **`/sessions` slash command for browsing and resuming previous sessions** (@austinpickett) ([#20805](https://github.com/NousResearch/hermes-agent/pull/20805))
|
||||
- **Segment turns with rule above non-first user msgs; trim ticker dead space** (@OutThisLife) ([#21846](https://github.com/NousResearch/hermes-agent/pull/21846))
|
||||
- **Support attaching to an existing gateway** (@OutThisLife) ([#21978](https://github.com/NousResearch/hermes-agent/pull/21978))
|
||||
- **Resolve markdown links to readable page titles** (@OutThisLife) ([#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
|
||||
- **Width-aware markdown table rendering with vertical fallback** (@alt-glitch) ([#26195](https://github.com/NousResearch/hermes-agent/pull/26195))
|
||||
- **Keep Ink displayCursor in sync with fast-echo writes so cursor stops drifting** (@OutThisLife) ([#26717](https://github.com/NousResearch/hermes-agent/pull/26717))
|
||||
- **Allow transcript scroll + Esc during approval/clarify/confirm prompts** (@OutThisLife) ([#26414](https://github.com/NousResearch/hermes-agent/pull/26414))
|
||||
- **Preserve session when switching personality** (@austinpickett) ([#20942](https://github.com/NousResearch/hermes-agent/pull/20942))
|
||||
- **Skip native safety net on OSC52-capable terminals** (@benbarclay) ([#20954](https://github.com/NousResearch/hermes-agent/pull/20954))
|
||||
|
||||
### Dashboard / GUI
|
||||
- **Route embedded TUI through dashboard gateway** (@OutThisLife) ([#21979](https://github.com/NousResearch/hermes-agent/pull/21979))
|
||||
- **Hide token/cost analytics behind config flag (default off)** ([#25438](https://github.com/NousResearch/hermes-agent/pull/25438))
|
||||
- **Fix Langfuse observability — trace I/O, tool outputs, placeholder credentials** (closes #22342, #22763) (@kshitijk4poor) ([#26320](https://github.com/NousResearch/hermes-agent/pull/26320))
|
||||
- **MiniMax 'Login' button launched Claude OAuth** (salvage #22849) ([#24058](https://github.com/NousResearch/hermes-agent/pull/24058))
|
||||
- **Update cron modals** (@austinpickett) ([#25985](https://github.com/NousResearch/hermes-agent/pull/25985))
|
||||
- **Analytics: prevent silent token loss and add Claude 4.5–4.7 pricing** (@austinpickett) ([#21455](https://github.com/NousResearch/hermes-agent/pull/21455))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tools & Capabilities
|
||||
|
||||
### Vision & video
|
||||
- **`vision_analyze` returns pixels to vision-capable models** ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
|
||||
- **Unified `video_generate` with pluggable provider backends** ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
|
||||
- **`image_gen`: actionable setup message when no FAL backend is reachable** ([#26222](https://github.com/NousResearch/hermes-agent/pull/26222))
|
||||
|
||||
### Computer use
|
||||
- **`computer_use` cua-driver backend + focus-safe ops + non-Anthropic provider fix** (re-salvage #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967))
|
||||
- **Refresh cua-driver on `hermes update` + add `install --upgrade`** ([#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
|
||||
|
||||
### LSP & write-time diagnostics
|
||||
- **Semantic diagnostics from real language servers in `write_file`/`patch`** ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168))
|
||||
- **Shift baseline diagnostics into post-edit coordinates** ([#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
|
||||
|
||||
### Search & web
|
||||
- **Brave Search (free tier) and DDGS search providers** ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
|
||||
- **Bearer auth header for Tavily `/crawl` endpoint** ([#24658](https://github.com/NousResearch/hermes-agent/pull/24658))
|
||||
|
||||
### X (Twitter)
|
||||
- **Gated `x_search` tool with OAuth-or-API-key auth** ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
|
||||
|
||||
### Browser
|
||||
- **Route `browser_console` eval through supervisor's persistent CDP WS (180x faster)** ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
|
||||
- **Support externally managed Camofox sessions** ([#24499](https://github.com/NousResearch/hermes-agent/pull/24499))
|
||||
|
||||
### MCP
|
||||
- **`supports_parallel_tool_calls` for MCP servers** (salvage of #9944) ([#26825](https://github.com/NousResearch/hermes-agent/pull/26825))
|
||||
- **Codex preset for Codex CLI MCP server** (salvage #22663) ([#22679](https://github.com/NousResearch/hermes-agent/pull/22679))
|
||||
- **Stop retrying initial MCP auth failures** (#25624) ([#25776](https://github.com/NousResearch/hermes-agent/pull/25776))
|
||||
|
||||
### Google Workspace
|
||||
- **Drive write ops + Docs/Sheets create/append** ([#21895](https://github.com/NousResearch/hermes-agent/pull/21895))
|
||||
|
||||
### Per-turn verifier
|
||||
- **Per-turn file-mutation verifier footer** ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Kanban (Multi-Agent)
|
||||
|
||||
- **`specify` — auxiliary LLM fleshes out triage tasks** ([#21435](https://github.com/NousResearch/hermes-agent/pull/21435))
|
||||
- **Orchestrator board tools — `kanban_list` + `kanban_unblock`** (carve-out of #20568) ([#23012](https://github.com/NousResearch/hermes-agent/pull/23012))
|
||||
- **`stranded_in_ready` diagnostic for unclaimed tasks** ([#23578](https://github.com/NousResearch/hermes-agent/pull/23578))
|
||||
- **Dashboard batch QOL upgrade** (salvage of #23240) ([#23550](https://github.com/NousResearch/hermes-agent/pull/23550))
|
||||
- **Tooltips and docs link across dashboard** ([#21541](https://github.com/NousResearch/hermes-agent/pull/21541))
|
||||
- **Dedupe notifier delivery via atomic claim + rewind on failure** (salvage #22558) ([#23401](https://github.com/NousResearch/hermes-agent/pull/23401))
|
||||
- **Keep notifier subscriptions alive across retry cycles** (salvage #21398) ([#23423](https://github.com/NousResearch/hermes-agent/pull/23423))
|
||||
- **Drop caller-controlled author override in `kanban_comment`** (salvage of #22109) (@kshitijk4poor) ([#22435](https://github.com/NousResearch/hermes-agent/pull/22435))
|
||||
- **Sanitize comment author rendering in `build_worker_context`** ([#22769](https://github.com/NousResearch/hermes-agent/pull/22769))
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Plugins & Extension
|
||||
|
||||
### Plugin surface
|
||||
- **Run any LLM call from inside a plugin via `ctx.llm`** ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194))
|
||||
- **`tool_override` flag for replacing built-in tools** (closes #11049) ([#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
|
||||
- **`standalone_sender_fn` for out-of-process cron delivery** (@kshitijk4poor) ([#22461](https://github.com/NousResearch/hermes-agent/pull/22461))
|
||||
- **`HERMES_PLUGINS_DEBUG=1` surfaces plugin discovery logs** ([#22684](https://github.com/NousResearch/hermes-agent/pull/22684))
|
||||
- **Hindsight-client as optional dependency** (@alt-glitch) ([#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
|
||||
|
||||
### Profile & distribution
|
||||
- **Shareable profile distributions via git** ([#20831](https://github.com/NousResearch/hermes-agent/pull/20831))
|
||||
|
||||
---
|
||||
|
||||
## ⏰ Cron
|
||||
|
||||
- **Routing intent — `deliver=all` fans out to every connected channel** ([#21495](https://github.com/NousResearch/hermes-agent/pull/21495))
|
||||
- **Support name-based lookup for job operations** ([#26231](https://github.com/NousResearch/hermes-agent/pull/26231))
|
||||
- **Blank Cron dashboard tab + partial-record crashes** (salvage #21042 + #22330) (@kshitijk4poor) ([#22389](https://github.com/NousResearch/hermes-agent/pull/22389))
|
||||
- **Do not seed `HERMES_SESSION_*` contextvars from cron origin** (salvage of #22356) (@kshitijk4poor) ([#22382](https://github.com/NousResearch/hermes-agent/pull/22382))
|
||||
- **Scan assembled prompt including skill content for prompt injection** (#3968)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skills Hub
|
||||
- **`hermes-skills/huggingface` as a trusted default tap** (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
|
||||
- **Show per-skill pages in the left sidebar** ([#26646](https://github.com/NousResearch/hermes-agent/pull/26646))
|
||||
- **Richer info panels on the Skills Hub** ([#22905](https://github.com/NousResearch/hermes-agent/pull/22905))
|
||||
- **Refuse `skill_view` name collisions instead of guessing** (closes #6136 @polkn)
|
||||
|
||||
### Curator
|
||||
- **Show rename map in user-visible summary** ([#22910](https://github.com/NousResearch/hermes-agent/pull/22910))
|
||||
- **Hint at `hermes curator pin` in the rename block** ([#23212](https://github.com/NousResearch/hermes-agent/pull/23212))
|
||||
|
||||
### New optional skills
|
||||
- **Hyperliquid** — perp/spot trading via SDK + REST (salvage of #1952) ([#23583](https://github.com/NousResearch/hermes-agent/pull/23583))
|
||||
- **Yahoo Finance** market data ([#23590](https://github.com/NousResearch/hermes-agent/pull/23590))
|
||||
- **api-testing** (REST/GraphQL debug, salvages #1800) ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582))
|
||||
- **Unified EVM multi-chain skill** (salvages #25291 + #2010 + folds in base/) ([#25299](https://github.com/NousResearch/hermes-agent/pull/25299))
|
||||
- **darwinian-evolver** ([#26760](https://github.com/NousResearch/hermes-agent/pull/26760))
|
||||
- **osint-investigation** (closes #355) ([#26729](https://github.com/NousResearch/hermes-agent/pull/26729))
|
||||
- **pinggy-tunnel** ([#26765](https://github.com/NousResearch/hermes-agent/pull/26765))
|
||||
- **watchers** — RSS / HTTP JSON / GitHub polling via cron no-agent ([#21881](https://github.com/NousResearch/hermes-agent/pull/21881))
|
||||
- **Notion overhaul for the Developer Platform** (May 2026) ([#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
### Security hardening
|
||||
- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS** (salvage of #22194 + #21128) (@kshitijk4poor) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
|
||||
- **Drop caller-controlled author override in `kanban_comment`** (salvage of #22109) (@kshitijk4poor) ([#22435](https://github.com/NousResearch/hermes-agent/pull/22435))
|
||||
- **Cover remaining SSRF fetch paths in skills-hub** (salvage #22804) ([#22843](https://github.com/NousResearch/hermes-agent/pull/22843))
|
||||
- **Use credential_pool for custom endpoint model listing probes** (salvage #22810) ([#22842](https://github.com/NousResearch/hermes-agent/pull/22842))
|
||||
- **Require dashboard auth for plugin API routes** (salvage #19541) ([#23220](https://github.com/NousResearch/hermes-agent/pull/23220))
|
||||
- **Sanitize env and redact output in quick commands + remove write-only `_pending_messages`** ([#23584](https://github.com/NousResearch/hermes-agent/pull/23584))
|
||||
- **Reduce unnecessary `shell=True` in subprocess calls** ([#25149](https://github.com/NousResearch/hermes-agent/pull/25149))
|
||||
- **Sanitize Google Chat sender_type from relay** (salvage of #22107) (@kshitijk4poor) ([#22432](https://github.com/NousResearch/hermes-agent/pull/22432))
|
||||
- **Supply-chain advisory checker** ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
|
||||
- **Rewrite security policy around OS-level isolation as the boundary** (@jquesnelle) ([#20317](https://github.com/NousResearch/hermes-agent/pull/20317))
|
||||
- **Remove public security advisory page** ([#24253](https://github.com/NousResearch/hermes-agent/pull/24253))
|
||||
|
||||
### Reliability — notable bug closures
|
||||
- **SQLite: fall back to `journal_mode=DELETE` on NFS/SMB/FUSE** (fixes `/resume` on network mounts) (@kshitijk4poor) ([#22043](https://github.com/NousResearch/hermes-agent/pull/22043))
|
||||
- **Codex-runtime: retire wedged sessions + post-tool watchdog + OAuth refresh classify** ([#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
|
||||
- **Codex-runtime: de-dup `[plugins.X]` tables and stop leaking HERMES_HOME** (#26250) (@kshitijk4poor) ([#26260](https://github.com/NousResearch/hermes-agent/pull/26260))
|
||||
- **Daytona: migrate legacy-sandbox lookup to cursor-based `list()`** ([#24587](https://github.com/NousResearch/hermes-agent/pull/24587))
|
||||
- **MCP: stop retrying initial MCP auth failures** (#25624) ([#25776](https://github.com/NousResearch/hermes-agent/pull/25776))
|
||||
- **Gateway: enable text-intercept for multi-choice clarify fallback** (#25587) ([#25778](https://github.com/NousResearch/hermes-agent/pull/25778))
|
||||
- **Gateway: keep running when platforms fail; per-platform circuit breaker + `/platform`** ([#26600](https://github.com/NousResearch/hermes-agent/pull/26600))
|
||||
- **Delegate: salvage #21933 JSON-string batch + diagnostic logging** (@kshitijk4poor) ([#22436](https://github.com/NousResearch/hermes-agent/pull/22436))
|
||||
- **Profiles+banner: exclude infrastructure from `--clone-all` + fix stale update-check repo resolution** (@kshitijk4poor) ([#22475](https://github.com/NousResearch/hermes-agent/pull/22475))
|
||||
- **ACP: inline file attachment resources** (salvage #21400 + image support) ([#21407](https://github.com/NousResearch/hermes-agent/pull/21407))
|
||||
- **CI: unblock shared PR checks** (@stephenschoettler) ([#21012](https://github.com/NousResearch/hermes-agent/pull/21012), [#25957](https://github.com/NousResearch/hermes-agent/pull/25957))
|
||||
|
||||
### Notable reverts in window
|
||||
- **`/goal` checklist + /subgoal feature stack** — rolled back ([#23813](https://github.com/NousResearch/hermes-agent/pull/23813)); `/subgoal` returned in simpler form via [#25449](https://github.com/NousResearch/hermes-agent/pull/25449)
|
||||
- **Scrollback box width clamp** (#25975) rolled back to restore full-width borders ([#26163](https://github.com/NousResearch/hermes-agent/pull/26163))
|
||||
- **`fix(cli): tolerate unreadable dirs when building systemd PATH`** rolled back
|
||||
|
||||
---
|
||||
|
||||
## 🌍 i18n
|
||||
|
||||
- **Localize all gateway commands + web dashboard, add 8 new locales (16 total)** ([#22914](https://github.com/NousResearch/hermes-agent/pull/22914))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Repair Voice & TTS provider table** (@nightcityblade, fixes #24101) ([#24138](https://github.com/NousResearch/hermes-agent/pull/24138))
|
||||
- **Show per-skill pages in the left sidebar** ([#26646](https://github.com/NousResearch/hermes-agent/pull/26646))
|
||||
- **Mention Weixin in gateway help and docstrings** (salvage of #21063 by @wuwuzhijing)
|
||||
- **Richer info panels on the Skills Hub** ([#22905](https://github.com/NousResearch/hermes-agent/pull/22905))
|
||||
- Many more doc updates across providers, platforms, skills, Windows install paths, and dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Unblock shared PR checks** (@stephenschoettler) ([#21012](https://github.com/NousResearch/hermes-agent/pull/21012))
|
||||
- **Stabilize shared test state after 21012** (@stephenschoettler) ([#25957](https://github.com/NousResearch/hermes-agent/pull/25957))
|
||||
- A long tail of test additions for platforms, providers, plugins, and edge cases — 8 explicit `test:` PRs plus ~250 fix PRs that also added regression coverage.
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- @teknium1 — release lead, architecture, ~406 PRs merged in window
|
||||
|
||||
### Top community contributors
|
||||
- **@kshitijk4poor** — 38 PRs · Telegram cadence/streaming/topic routing, security hardening (sudo, SSRF, kanban_comment, dashboard auth), codex-runtime hygiene, NovitaAI provider, profile/banner fixes, Feishu update cards, gateway QOL across the board
|
||||
- **@alt-glitch** — 13 PRs · Markdown-table TUI rendering, `HERMES_SESSION_ID` env var, hindsight-client optional dep, Nix `extraDependencyGroups`
|
||||
- **@OutThisLife** (Brooklyn Nicholson) — 12 PRs · TUI turn segmentation, attach-to-gateway, markdown link titles, embedded TUI via dashboard gateway, Ink cursor sync, scroll/Esc during prompts
|
||||
- **@austinpickett** — 8 PRs · `/sessions` slash command, personality switching preserves session, cron modals, dashboard analytics
|
||||
- **@helix4u** — 5 PRs · Google Chat setup, browser install skip on system chromium, Windows Ctrl+C preservation
|
||||
- **@rob-maron** — 4 PRs · Nous Portal as model metadata authority, provider polish
|
||||
- **@stephenschoettler** — 3 PRs · CI stabilization
|
||||
- **@ethernet8023** — 3 PRs · platform/gateway work
|
||||
|
||||
### All contributors (alphabetical)
|
||||
|
||||
@02356abc, @0xbyt4, @0xharryriddle, @1000Delta, @1RB, @29206394, @A-kamal, @aashizpoudel, @Abd0r,
|
||||
@adybag14-cyber, @AgentArcLab, @ahmedbadr3, @AhmetArif0, @alblez, @Alex-yang00, @ALIYILD, @AllynSheep,
|
||||
@alt-glitch, @am423, @amathxbt, @amethystani, @ArecaNon, @Arkmusn, @askclaw-vesper, @AsoTora, @austinpickett,
|
||||
@aydnOktay, @ayushere, @baocin, @Bartok9, @benbarclay, @BennetYrWang, @Bihruze, @binhnt92, @briandevans,
|
||||
@brooklynnicholson, @btorresgil, @buntingszn, @CalmProton, @chrisworksai, @CoinTheHat, @dandacompany, @Dangooy,
|
||||
@DanielLSM, @David-0x221Eight, @ddupont808, @dhruv-saxena, @diablozzc, @dlkakbs, @dmahan93, @dmnkhorvath,
|
||||
@domtriola, @donrhmexe, @Dusk1e, @eloklam, @emozilla, @ephron-ren, @erenkarakus, @EthanGuo-coder,
|
||||
@ethernet8023, @evgyur, @explainanalyze, @fahdad, @fr33d3m0n, @Freeman-Consulting, @freqyfreqy, @Frowtek,
|
||||
@fu576, @github-actions[bot], @gnanirahulnutakki, @GodsBoy, @guglielmofonda, @Gutslabs, @hanzckernel,
|
||||
@heathley, @hekaru-agent, @helix4u, @HenkDz, @HiddenPuppy, @hllqkb, @hrygo, @HuangYuChuh, @Hugo-SEQUIER, @HxT9,
|
||||
@iacker, @InB4DevOps, @isaachuangGMICLOUD, @iuyup, @Jaaneek, @jackey8616, @jackjin1997, @Jaggia, @jak983464779,
|
||||
@jelrod27, @jethac, @JithendraNara, @johnisag, @Julientalbot, @Jwd-gity, @kallidean, @keyuyuan, @kfa-ai,
|
||||
@kidonng, @KiraKatana, @kjames2001, @konsisumer, @Korkyzer, @kshitijk4poor, @KvnGz, @lars-hagen, @leehack,
|
||||
@leepoweii, @LeonSGP43, @li0near, @libo1106, @liquidchen, @littlewwwhite, @liuhao1024, @liyoungc, @luandiasrj,
|
||||
@luoyuctl, @luyao618, @magic524, @mbac, @McClean, @memosr, @Mibayy, @ming1523, @mizgyo, @mrshu, @ms-alan,
|
||||
@MustafaKara7, @nederev, @nicoechaniz, @nidhi-singh02, @nightcityblade, @nik1t7n, @Ninso112, @NivOO5,
|
||||
@novax635, @nv-kasikritc, @oferlaor, @oswaldb22, @outdoorsea, @oxngon, @PaTTeeL, @pearjelly, @pefontana,
|
||||
@perng, @PhilipAD, @phuongvm, @polkn, @Prasanna28Devadiga, @princepal9120, @pty819, @purzbeats, @Quarkex,
|
||||
@quocanh261997, @qWaitCrypto, @Qwinty, @rahimsais, @raymaylee, @ReqX, @rewbs, @RhombusMaximus, @rob-maron,
|
||||
@Ruzzgar, @ryptotalent, @Sanjays2402, @shannonsands, @shaun0927, @SiliconID, @silv-mt-holdings, @simpolism,
|
||||
@smwbev, @soichiyo, @sprmn24, @steezkelly, @stephenschoettler, @Sylw3ster, @szymonclawd, @teyrebaz33,
|
||||
@Tianyu199509, @Tranquil-Flow, @TreyDong, @TurgutKural, @tw2818, @tymrtn, @uzunkuyruk, @v1b3coder,
|
||||
@vanthinh6886, @VinceZcrikl, @vKongv, @vominh1919, @voteblake, @VTRiot, @wali-reheman, @wesleysimplicio,
|
||||
@wilsen0, @WorldWriter, @worlldz, @wuli666, @wuwuzhijing, @Wysie, @XiaoXiao0221, @xieNniu, @xxxigm, @yehuosi,
|
||||
@ygd58, @yifengingit, @yuga-hashimoto, @zccyman, @ZeterMordio, @Zhekinmaksim, @zhengyn0001
|
||||
|
||||
Also: @Nagatha (Claude Opus 4.7).
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.5.7...v2026.5.16](https://github.com/NousResearch/hermes-agent/compare/v2026.5.7...v2026.5.16)
|
||||
@@ -1,329 +0,0 @@
|
||||
# Hermes Agent v0.9.0 (v2026.4.13)
|
||||
|
||||
**Release Date:** April 13, 2026
|
||||
**Since v0.8.0:** 487 commits · 269 merged PRs · 167 resolved issues · 493 files changed · 63,281 insertions · 24 contributors
|
||||
|
||||
> The everywhere release — Hermes goes mobile with Termux/Android, adds iMessage and WeChat, ships Fast Mode for OpenAI and Anthropic, introduces background process monitoring, launches a local web dashboard for managing your agent, and delivers the deepest security hardening pass yet across 16 supported platforms.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Local Web Dashboard** — A new browser-based dashboard for managing your Hermes Agent locally. Configure settings, monitor sessions, browse skills, and manage your gateway — all from a clean web interface without touching config files or the terminal. The easiest way to get started with Hermes.
|
||||
|
||||
- **Fast Mode (`/fast`)** — Priority processing for OpenAI and Anthropic models. Toggle `/fast` to route through priority queues for significantly lower latency on supported models (GPT-5.4, Codex, Claude). Expands across all OpenAI Priority Processing models and Anthropic's fast tier. ([#6875](https://github.com/NousResearch/hermes-agent/pull/6875), [#6960](https://github.com/NousResearch/hermes-agent/pull/6960), [#7037](https://github.com/NousResearch/hermes-agent/pull/7037))
|
||||
|
||||
- **iMessage via BlueBubbles** — Full iMessage integration through BlueBubbles, bringing Hermes to Apple's messaging ecosystem. Auto-webhook registration, setup wizard integration, and crash resilience. ([#6437](https://github.com/NousResearch/hermes-agent/pull/6437), [#6460](https://github.com/NousResearch/hermes-agent/pull/6460), [#6494](https://github.com/NousResearch/hermes-agent/pull/6494))
|
||||
|
||||
- **WeChat (Weixin) & WeCom Callback Mode** — Native WeChat support via iLink Bot API and a new WeCom callback-mode adapter for self-built enterprise apps. Streaming cursor, media uploads, markdown link handling, and atomic state persistence. Hermes now covers the Chinese messaging ecosystem end-to-end. ([#7166](https://github.com/NousResearch/hermes-agent/pull/7166), [#7943](https://github.com/NousResearch/hermes-agent/pull/7943))
|
||||
|
||||
- **Termux / Android Support** — Run Hermes natively on Android via Termux. Adapted install paths, TUI optimizations for mobile screens, voice backend support, and the `/image` command work on-device. ([#6834](https://github.com/NousResearch/hermes-agent/pull/6834))
|
||||
|
||||
- **Background Process Monitoring (`watch_patterns`)** — Set patterns to watch for in background process output and get notified in real-time when they match. Monitor for errors, wait for specific events ("listening on port"), or watch build logs — all without polling. ([#7635](https://github.com/NousResearch/hermes-agent/pull/7635))
|
||||
|
||||
- **Native xAI & Xiaomi MiMo Providers** — First-class provider support for xAI (Grok) and Xiaomi MiMo, with direct API access, model catalogs, and setup wizard integration. Plus Qwen OAuth with portal request support. ([#7372](https://github.com/NousResearch/hermes-agent/pull/7372), [#7855](https://github.com/NousResearch/hermes-agent/pull/7855))
|
||||
|
||||
- **Pluggable Context Engine** — Context management is now a pluggable slot via `hermes plugins`. Swap in custom context engines that control what the agent sees each turn — filtering, summarization, or domain-specific context injection. ([#7464](https://github.com/NousResearch/hermes-agent/pull/7464))
|
||||
|
||||
- **Unified Proxy Support** — SOCKS proxy, `DISCORD_PROXY`, and system proxy auto-detection across all gateway platforms. Hermes behind corporate firewalls just works. ([#6814](https://github.com/NousResearch/hermes-agent/pull/6814))
|
||||
|
||||
- **Comprehensive Security Hardening** — Path traversal protection in checkpoint manager, shell injection neutralization in sandbox writes, SSRF redirect guards in Slack image uploads, Twilio webhook signature validation (SMS RCE fix), API server auth enforcement, git argument injection prevention, and approval button authorization. ([#7933](https://github.com/NousResearch/hermes-agent/pull/7933), [#7944](https://github.com/NousResearch/hermes-agent/pull/7944), [#7940](https://github.com/NousResearch/hermes-agent/pull/7940), [#7151](https://github.com/NousResearch/hermes-agent/pull/7151), [#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
|
||||
|
||||
- **`hermes backup` & `hermes import`** — Full backup and restore of your Hermes configuration, sessions, skills, and memory. Migrate between machines or create snapshots before major changes. ([#7997](https://github.com/NousResearch/hermes-agent/pull/7997))
|
||||
|
||||
- **16 Supported Platforms** — With BlueBubbles (iMessage) and WeChat joining Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, SMS, DingTalk, Feishu, WeCom, Mattermost, Home Assistant, and Webhooks, Hermes now runs on 16 messaging platforms out of the box.
|
||||
|
||||
- **`/debug` & `hermes debug share`** — New debugging toolkit: `/debug` slash command across all platforms for quick diagnostics, plus `hermes debug share` to upload a full debug report to a pastebin for easy sharing when troubleshooting. ([#8681](https://github.com/NousResearch/hermes-agent/pull/8681))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native xAI (Grok) provider** with direct API access and model catalog ([#7372](https://github.com/NousResearch/hermes-agent/pull/7372))
|
||||
- **Xiaomi MiMo as first-class provider** — setup wizard, model catalog, empty response recovery ([#7855](https://github.com/NousResearch/hermes-agent/pull/7855))
|
||||
- **Qwen OAuth provider** with portal request support ([#6282](https://github.com/NousResearch/hermes-agent/pull/6282))
|
||||
- **Fast Mode** — `/fast` toggle for OpenAI Priority Processing + Anthropic fast tier ([#6875](https://github.com/NousResearch/hermes-agent/pull/6875), [#6960](https://github.com/NousResearch/hermes-agent/pull/6960), [#7037](https://github.com/NousResearch/hermes-agent/pull/7037))
|
||||
- **Structured API error classification** for smart failover decisions ([#6514](https://github.com/NousResearch/hermes-agent/pull/6514))
|
||||
- **Rate limit header capture** shown in `/usage` ([#6541](https://github.com/NousResearch/hermes-agent/pull/6541))
|
||||
- **API server model name** derived from profile name ([#6857](https://github.com/NousResearch/hermes-agent/pull/6857))
|
||||
- **Custom providers** now included in `/model` listings and resolution ([#7088](https://github.com/NousResearch/hermes-agent/pull/7088))
|
||||
- **Fallback provider activation** on repeated empty responses with user-visible status ([#7505](https://github.com/NousResearch/hermes-agent/pull/7505))
|
||||
- **OpenRouter variant tags** (`:free`, `:extended`, `:fast`) preserved during model switch ([#6383](https://github.com/NousResearch/hermes-agent/pull/6383))
|
||||
- **Credential exhaustion TTL** reduced from 24 hours to 1 hour ([#6504](https://github.com/NousResearch/hermes-agent/pull/6504))
|
||||
- **OAuth credential lifecycle** hardening — stale pool keys, auth.json sync, Codex CLI race fixes ([#6874](https://github.com/NousResearch/hermes-agent/pull/6874))
|
||||
- Empty response recovery for reasoning models (MiMo, Qwen, GLM) ([#8609](https://github.com/NousResearch/hermes-agent/pull/8609))
|
||||
- MiniMax context lengths, thinking guard, endpoint corrections ([#6082](https://github.com/NousResearch/hermes-agent/pull/6082), [#7126](https://github.com/NousResearch/hermes-agent/pull/7126))
|
||||
- Z.AI endpoint auto-detect via probe and cache ([#5763](https://github.com/NousResearch/hermes-agent/pull/5763))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Pluggable context engine slot** via `hermes plugins` ([#7464](https://github.com/NousResearch/hermes-agent/pull/7464))
|
||||
- **Background process monitoring** — `watch_patterns` for real-time output alerts ([#7635](https://github.com/NousResearch/hermes-agent/pull/7635))
|
||||
- **Improved context compression** — higher limits, tool tracking, degradation warnings, token-budget tail protection ([#6395](https://github.com/NousResearch/hermes-agent/pull/6395), [#6453](https://github.com/NousResearch/hermes-agent/pull/6453))
|
||||
- **`/compress <focus>`** — guided compression with a focus topic ([#8017](https://github.com/NousResearch/hermes-agent/pull/8017))
|
||||
- **Tiered context pressure warnings** with gateway dedup ([#6411](https://github.com/NousResearch/hermes-agent/pull/6411))
|
||||
- **Staged inactivity warning** before timeout escalation ([#6387](https://github.com/NousResearch/hermes-agent/pull/6387))
|
||||
- **Prevent agent from stopping mid-task** — compression floor, budget overhaul, activity tracking ([#7983](https://github.com/NousResearch/hermes-agent/pull/7983))
|
||||
- **Propagate child activity to parent** during `delegate_task` ([#7295](https://github.com/NousResearch/hermes-agent/pull/7295))
|
||||
- **Truncated streaming tool call detection** before execution ([#6847](https://github.com/NousResearch/hermes-agent/pull/6847))
|
||||
- Empty response retry (3 attempts with nudge) ([#6488](https://github.com/NousResearch/hermes-agent/pull/6488))
|
||||
- Adaptive streaming backoff + cursor strip to prevent message truncation ([#7683](https://github.com/NousResearch/hermes-agent/pull/7683))
|
||||
- Compression uses live session model instead of stale persisted config ([#8258](https://github.com/NousResearch/hermes-agent/pull/8258))
|
||||
- Strip `<thought>` tags from Gemma 4 responses ([#8562](https://github.com/NousResearch/hermes-agent/pull/8562))
|
||||
- Prevent `<think>` in prose from suppressing response output ([#6968](https://github.com/NousResearch/hermes-agent/pull/6968))
|
||||
- Turn-exit diagnostic logging to agent loop ([#6549](https://github.com/NousResearch/hermes-agent/pull/6549))
|
||||
- Scope tool interrupt signal per-thread to prevent cross-session leaks ([#7930](https://github.com/NousResearch/hermes-agent/pull/7930))
|
||||
|
||||
### Memory & Sessions
|
||||
- **Hindsight memory plugin** — feature parity, setup wizard, config improvements — @nicoloboschi ([#6428](https://github.com/NousResearch/hermes-agent/pull/6428))
|
||||
- **Honcho** — opt-in `initOnSessionStart` for tools mode — @Kathie-yu ([#6995](https://github.com/NousResearch/hermes-agent/pull/6995))
|
||||
- Orphan children instead of cascade-deleting in prune/delete ([#6513](https://github.com/NousResearch/hermes-agent/pull/6513))
|
||||
- Doctor command only checks the active memory provider ([#6285](https://github.com/NousResearch/hermes-agent/pull/6285))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **BlueBubbles (iMessage)** — full adapter with auto-webhook registration, setup wizard, and crash resilience ([#6437](https://github.com/NousResearch/hermes-agent/pull/6437), [#6460](https://github.com/NousResearch/hermes-agent/pull/6460), [#6494](https://github.com/NousResearch/hermes-agent/pull/6494), [#7107](https://github.com/NousResearch/hermes-agent/pull/7107))
|
||||
- **Weixin (WeChat)** — native support via iLink Bot API with streaming, media uploads, markdown links ([#7166](https://github.com/NousResearch/hermes-agent/pull/7166), [#8665](https://github.com/NousResearch/hermes-agent/pull/8665))
|
||||
- **WeCom Callback Mode** — self-built enterprise app adapter with atomic state persistence ([#7943](https://github.com/NousResearch/hermes-agent/pull/7943), [#7928](https://github.com/NousResearch/hermes-agent/pull/7928))
|
||||
|
||||
### Discord
|
||||
- **Allowed channels whitelist** config — @jarvis-phw ([#7044](https://github.com/NousResearch/hermes-agent/pull/7044))
|
||||
- **Forum channel topic inheritance** in thread sessions — @hermes-agent-dhabibi ([#6377](https://github.com/NousResearch/hermes-agent/pull/6377))
|
||||
- **DISCORD_REPLY_TO_MODE** setting ([#6333](https://github.com/NousResearch/hermes-agent/pull/6333))
|
||||
- Accept `.log` attachments, raise document size limit — @kira-ariaki ([#6467](https://github.com/NousResearch/hermes-agent/pull/6467))
|
||||
- Decouple readiness from slash sync ([#8016](https://github.com/NousResearch/hermes-agent/pull/8016))
|
||||
|
||||
### Slack
|
||||
- **Consolidated Slack improvements** — 7 community PRs salvaged into one ([#6809](https://github.com/NousResearch/hermes-agent/pull/6809))
|
||||
- Handle assistant thread lifecycle events ([#6433](https://github.com/NousResearch/hermes-agent/pull/6433))
|
||||
|
||||
### Matrix
|
||||
- **Migrated from matrix-nio to mautrix-python** ([#7518](https://github.com/NousResearch/hermes-agent/pull/7518))
|
||||
- SQLite crypto store replacing pickle (fixes E2EE decryption) — @alt-glitch ([#7981](https://github.com/NousResearch/hermes-agent/pull/7981))
|
||||
- Cross-signing recovery key verification for E2EE migration ([#8282](https://github.com/NousResearch/hermes-agent/pull/8282))
|
||||
- DM mention threads + group chat events for Feishu ([#7423](https://github.com/NousResearch/hermes-agent/pull/7423))
|
||||
|
||||
### Gateway Core
|
||||
- **Unified proxy support** — SOCKS, DISCORD_PROXY, multi-platform with macOS auto-detection ([#6814](https://github.com/NousResearch/hermes-agent/pull/6814))
|
||||
- **Inbound text batching** for Discord, Matrix, WeCom + adaptive delay ([#6979](https://github.com/NousResearch/hermes-agent/pull/6979))
|
||||
- **Surface natural mid-turn assistant messages** in chat platforms ([#7978](https://github.com/NousResearch/hermes-agent/pull/7978))
|
||||
- **WSL-aware gateway** with smart systemd detection ([#7510](https://github.com/NousResearch/hermes-agent/pull/7510))
|
||||
- **All missing platforms added to setup wizard** ([#7949](https://github.com/NousResearch/hermes-agent/pull/7949))
|
||||
- **Per-platform `tool_progress` overrides** ([#6348](https://github.com/NousResearch/hermes-agent/pull/6348))
|
||||
- **Configurable 'still working' notification interval** ([#8572](https://github.com/NousResearch/hermes-agent/pull/8572))
|
||||
- `/model` switch persists across messages ([#7081](https://github.com/NousResearch/hermes-agent/pull/7081))
|
||||
- `/usage` shows rate limits, cost, and token details between turns ([#7038](https://github.com/NousResearch/hermes-agent/pull/7038))
|
||||
- Drain in-flight work before restart ([#7503](https://github.com/NousResearch/hermes-agent/pull/7503))
|
||||
- Don't evict cached agent on failed runs — prevents MCP restart loop ([#7539](https://github.com/NousResearch/hermes-agent/pull/7539))
|
||||
- Replace `os.environ` session state with `contextvars` ([#7454](https://github.com/NousResearch/hermes-agent/pull/7454))
|
||||
- Derive channel directory platforms from enum instead of hardcoded list ([#7450](https://github.com/NousResearch/hermes-agent/pull/7450))
|
||||
- Validate image downloads before caching (cross-platform) ([#7125](https://github.com/NousResearch/hermes-agent/pull/7125))
|
||||
- Cross-platform webhook delivery for all platforms ([#7095](https://github.com/NousResearch/hermes-agent/pull/7095))
|
||||
- Cron Discord thread_id delivery support ([#7106](https://github.com/NousResearch/hermes-agent/pull/7106))
|
||||
- Feishu QR-based bot onboarding ([#8570](https://github.com/NousResearch/hermes-agent/pull/8570))
|
||||
- Gateway status scoped to active profile ([#7951](https://github.com/NousResearch/hermes-agent/pull/7951))
|
||||
- Prevent background process notifications from triggering false pairing requests ([#6434](https://github.com/NousResearch/hermes-agent/pull/6434))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ CLI & User Experience
|
||||
|
||||
### Interactive CLI
|
||||
- **Termux / Android support** — adapted install paths, TUI, voice, `/image` ([#6834](https://github.com/NousResearch/hermes-agent/pull/6834))
|
||||
- **Native `/model` picker modal** for provider → model selection ([#8003](https://github.com/NousResearch/hermes-agent/pull/8003))
|
||||
- **Live per-tool elapsed timer** restored in TUI spinner ([#7359](https://github.com/NousResearch/hermes-agent/pull/7359))
|
||||
- **Stacked tool progress scrollback** in TUI ([#8201](https://github.com/NousResearch/hermes-agent/pull/8201))
|
||||
- **Random tips on new session start** (CLI + gateway, 279 tips) ([#8225](https://github.com/NousResearch/hermes-agent/pull/8225), [#8237](https://github.com/NousResearch/hermes-agent/pull/8237))
|
||||
- **`hermes dump`** — copy-pasteable setup summary for debugging ([#6550](https://github.com/NousResearch/hermes-agent/pull/6550))
|
||||
- **`hermes backup` / `hermes import`** — full config backup and restore ([#7997](https://github.com/NousResearch/hermes-agent/pull/7997))
|
||||
- **WSL environment hint** in system prompt ([#8285](https://github.com/NousResearch/hermes-agent/pull/8285))
|
||||
- **Profile creation UX** — seed SOUL.md + credential warning ([#8553](https://github.com/NousResearch/hermes-agent/pull/8553))
|
||||
- Shell-aware sudo detection, empty password support ([#6517](https://github.com/NousResearch/hermes-agent/pull/6517))
|
||||
- Flush stdin after curses/terminal menus to prevent escape sequence leakage ([#7167](https://github.com/NousResearch/hermes-agent/pull/7167))
|
||||
- Handle broken stdin in prompt_toolkit startup ([#8560](https://github.com/NousResearch/hermes-agent/pull/8560))
|
||||
|
||||
### Setup & Configuration
|
||||
- **Per-platform display verbosity** configuration ([#8006](https://github.com/NousResearch/hermes-agent/pull/8006))
|
||||
- **Component-separated logging** with session context and filtering ([#7991](https://github.com/NousResearch/hermes-agent/pull/7991))
|
||||
- **`network.force_ipv4`** config to fix IPv6 timeout issues ([#8196](https://github.com/NousResearch/hermes-agent/pull/8196))
|
||||
- **Standardize message whitespace and JSON formatting** ([#7988](https://github.com/NousResearch/hermes-agent/pull/7988))
|
||||
- **Rebrand OpenClaw → Hermes** during migration ([#8210](https://github.com/NousResearch/hermes-agent/pull/8210))
|
||||
- Config.yaml takes priority over env vars for auxiliary settings ([#7889](https://github.com/NousResearch/hermes-agent/pull/7889))
|
||||
- Harden setup provider flows + live OpenRouter catalog refresh ([#7078](https://github.com/NousResearch/hermes-agent/pull/7078))
|
||||
- Normalize reasoning effort ordering across all surfaces ([#6804](https://github.com/NousResearch/hermes-agent/pull/6804))
|
||||
- Remove dead `LLM_MODEL` env var + migration to clear stale entries ([#6543](https://github.com/NousResearch/hermes-agent/pull/6543))
|
||||
- Remove `/prompt` slash command — prefix expansion footgun ([#6752](https://github.com/NousResearch/hermes-agent/pull/6752))
|
||||
- `HERMES_HOME_MODE` env var to override permissions — @ygd58 ([#6993](https://github.com/NousResearch/hermes-agent/pull/6993))
|
||||
- Fall back to default model when model config is empty ([#8303](https://github.com/NousResearch/hermes-agent/pull/8303))
|
||||
- Warn when compression model context is too small ([#7894](https://github.com/NousResearch/hermes-agent/pull/7894))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Environments & Execution
|
||||
- **Unified spawn-per-call execution layer** for environments ([#6343](https://github.com/NousResearch/hermes-agent/pull/6343))
|
||||
- **Unified file sync** with mtime tracking, deletion, and transactional state ([#7087](https://github.com/NousResearch/hermes-agent/pull/7087))
|
||||
- **Persistent sandbox envs** survive between turns ([#6412](https://github.com/NousResearch/hermes-agent/pull/6412))
|
||||
- **Bulk file sync** via tar pipe for SSH/Modal backends — @alt-glitch ([#8014](https://github.com/NousResearch/hermes-agent/pull/8014))
|
||||
- **Daytona** — bulk upload, config bridge, silent disk cap ([#7538](https://github.com/NousResearch/hermes-agent/pull/7538))
|
||||
- Foreground timeout cap to prevent session deadlocks ([#7082](https://github.com/NousResearch/hermes-agent/pull/7082))
|
||||
- Guard invalid command values ([#6417](https://github.com/NousResearch/hermes-agent/pull/6417))
|
||||
|
||||
### MCP
|
||||
- **`hermes mcp add --env` and `--preset`** support ([#7970](https://github.com/NousResearch/hermes-agent/pull/7970))
|
||||
- Combine `content` and `structuredContent` when both present ([#7118](https://github.com/NousResearch/hermes-agent/pull/7118))
|
||||
- MCP tool name deconfliction fixes ([#7654](https://github.com/NousResearch/hermes-agent/pull/7654))
|
||||
|
||||
### Browser
|
||||
- Browser hardening — dead code removal, caching, scroll perf, security, thread safety ([#7354](https://github.com/NousResearch/hermes-agent/pull/7354))
|
||||
- `/browser connect` auto-launch uses dedicated Chrome profile dir ([#6821](https://github.com/NousResearch/hermes-agent/pull/6821))
|
||||
- Reap orphaned browser sessions on startup ([#7931](https://github.com/NousResearch/hermes-agent/pull/7931))
|
||||
|
||||
### Voice & Vision
|
||||
- **Voxtral TTS provider** (Mistral AI) ([#7653](https://github.com/NousResearch/hermes-agent/pull/7653))
|
||||
- **TTS speed support** for Edge TTS, OpenAI TTS, MiniMax ([#8666](https://github.com/NousResearch/hermes-agent/pull/8666))
|
||||
- **Vision auto-resize** for oversized images, raise limit to 20 MB, retry-on-failure ([#7883](https://github.com/NousResearch/hermes-agent/pull/7883), [#7902](https://github.com/NousResearch/hermes-agent/pull/7902))
|
||||
- STT provider-model mismatch fix (whisper-1 vs faster-whisper) ([#7113](https://github.com/NousResearch/hermes-agent/pull/7113))
|
||||
|
||||
### Other Tools
|
||||
- **`hermes dump`** command for setup summary ([#6550](https://github.com/NousResearch/hermes-agent/pull/6550))
|
||||
- TODO store enforces ID uniqueness during replace operations ([#7986](https://github.com/NousResearch/hermes-agent/pull/7986))
|
||||
- List all available toolsets in `delegate_task` schema description ([#8231](https://github.com/NousResearch/hermes-agent/pull/8231))
|
||||
- API server: tool progress as custom SSE event to prevent model corruption ([#7500](https://github.com/NousResearch/hermes-agent/pull/7500))
|
||||
- API server: share one Docker container across all conversations ([#7127](https://github.com/NousResearch/hermes-agent/pull/7127))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
- **Centralized skills index + tree cache** — eliminates rate-limit failures on install ([#8575](https://github.com/NousResearch/hermes-agent/pull/8575))
|
||||
- **More aggressive skill loading instructions** in system prompt (v3) ([#8209](https://github.com/NousResearch/hermes-agent/pull/8209), [#8286](https://github.com/NousResearch/hermes-agent/pull/8286))
|
||||
- **Google Workspace skill** migrated to GWS CLI backend ([#6788](https://github.com/NousResearch/hermes-agent/pull/6788))
|
||||
- **Creative divergence strategies** skill — @SHL0MS ([#6882](https://github.com/NousResearch/hermes-agent/pull/6882))
|
||||
- **Creative ideation** — constraint-driven project generation — @SHL0MS ([#7555](https://github.com/NousResearch/hermes-agent/pull/7555))
|
||||
- Parallelize skills browse/search to prevent hanging ([#7301](https://github.com/NousResearch/hermes-agent/pull/7301))
|
||||
- Read name from SKILL.md frontmatter in skills_sync ([#7623](https://github.com/NousResearch/hermes-agent/pull/7623))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
### Security Hardening
|
||||
- **Twilio webhook signature validation** — SMS RCE fix ([#7933](https://github.com/NousResearch/hermes-agent/pull/7933))
|
||||
- **Shell injection neutralization** in `_write_to_sandbox` via path quoting ([#7940](https://github.com/NousResearch/hermes-agent/pull/7940))
|
||||
- **Git argument injection** and path traversal prevention in checkpoint manager ([#7944](https://github.com/NousResearch/hermes-agent/pull/7944))
|
||||
- **SSRF redirect bypass** in Slack image uploads + base.py cache helpers ([#7151](https://github.com/NousResearch/hermes-agent/pull/7151))
|
||||
- **Path traversal, credential gate, DANGEROUS_PATTERNS gaps** ([#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
|
||||
- **API bind guard** — enforce `API_SERVER_KEY` for non-loopback binding ([#7455](https://github.com/NousResearch/hermes-agent/pull/7455))
|
||||
- **Approval button authorization** — require auth for session continuation — @Cafexss ([#6930](https://github.com/NousResearch/hermes-agent/pull/6930))
|
||||
- Path boundary enforcement in skill manager operations ([#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
|
||||
- DingTalk/API webhook URL origin validation, header injection rejection ([#7455](https://github.com/NousResearch/hermes-agent/pull/7455))
|
||||
|
||||
### Reliability
|
||||
- **Contextual error diagnostics** for invalid API responses ([#8565](https://github.com/NousResearch/hermes-agent/pull/8565))
|
||||
- **Prevent 400 format errors** from triggering compression loop on Codex ([#6751](https://github.com/NousResearch/hermes-agent/pull/6751))
|
||||
- **Don't halve context_length** on output-cap-too-large errors — @KUSH42 ([#6664](https://github.com/NousResearch/hermes-agent/pull/6664))
|
||||
- **Recover primary client** on OpenAI transport errors ([#7108](https://github.com/NousResearch/hermes-agent/pull/7108))
|
||||
- **Credential pool rotation** on billing-classified 400s ([#7112](https://github.com/NousResearch/hermes-agent/pull/7112))
|
||||
- **Auto-increase stream read timeout** for local LLM providers ([#6967](https://github.com/NousResearch/hermes-agent/pull/6967))
|
||||
- **Fall back to default certs** when CA bundle path doesn't exist ([#7352](https://github.com/NousResearch/hermes-agent/pull/7352))
|
||||
- **Disambiguate usage-limit patterns** in error classifier — @sprmn24 ([#6836](https://github.com/NousResearch/hermes-agent/pull/6836))
|
||||
- Harden cron script timeout and provider recovery ([#7079](https://github.com/NousResearch/hermes-agent/pull/7079))
|
||||
- Gateway interrupt detection resilient to monitor task failures ([#8208](https://github.com/NousResearch/hermes-agent/pull/8208))
|
||||
- Prevent unwanted session auto-reset after graceful gateway restarts ([#8299](https://github.com/NousResearch/hermes-agent/pull/8299))
|
||||
- Prevent duplicate update prompt spam in gateway watcher ([#8343](https://github.com/NousResearch/hermes-agent/pull/8343))
|
||||
- Deduplicate reasoning items in Responses API input ([#7946](https://github.com/NousResearch/hermes-agent/pull/7946))
|
||||
|
||||
### Infrastructure
|
||||
- **Multi-arch Docker image** — amd64 + arm64 ([#6124](https://github.com/NousResearch/hermes-agent/pull/6124))
|
||||
- **Docker runs as non-root user** with virtualenv — @benbarclay contributing ([#8226](https://github.com/NousResearch/hermes-agent/pull/8226))
|
||||
- **Use `uv`** for Docker dependency resolution to fix resolution-too-deep ([#6965](https://github.com/NousResearch/hermes-agent/pull/6965))
|
||||
- **Container-aware Nix CLI** — auto-route into managed container — @alt-glitch ([#7543](https://github.com/NousResearch/hermes-agent/pull/7543))
|
||||
- **Nix shared-state permission model** for interactive CLI users — @alt-glitch ([#6796](https://github.com/NousResearch/hermes-agent/pull/6796))
|
||||
- **Per-profile subprocess HOME isolation** ([#7357](https://github.com/NousResearch/hermes-agent/pull/7357))
|
||||
- Profile paths fixed in Docker — profiles go to mounted volume ([#7170](https://github.com/NousResearch/hermes-agent/pull/7170))
|
||||
- Docker container gateway pathway hardened ([#8614](https://github.com/NousResearch/hermes-agent/pull/8614))
|
||||
- Enable unbuffered stdout for live Docker logs ([#6749](https://github.com/NousResearch/hermes-agent/pull/6749))
|
||||
- Install procps in Docker image — @HiddenPuppy ([#7032](https://github.com/NousResearch/hermes-agent/pull/7032))
|
||||
- Shallow git clone for faster installation — @sosyz ([#8396](https://github.com/NousResearch/hermes-agent/pull/8396))
|
||||
- `hermes update` always reset on stash conflict ([#7010](https://github.com/NousResearch/hermes-agent/pull/7010))
|
||||
- Write update exit code before gateway restart (cgroup kill race) ([#8288](https://github.com/NousResearch/hermes-agent/pull/8288))
|
||||
- Nix: `setupSecrets` optional, tirith runtime dep — @devorun, @ethernet8023 ([#6261](https://github.com/NousResearch/hermes-agent/pull/6261), [#6721](https://github.com/NousResearch/hermes-agent/pull/6721))
|
||||
- launchd stop uses `bootout` so `KeepAlive` doesn't respawn ([#7119](https://github.com/NousResearch/hermes-agent/pull/7119))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
- Fix: `/model` switch not persisting across gateway messages ([#7081](https://github.com/NousResearch/hermes-agent/pull/7081))
|
||||
- Fix: session-scoped gateway model overrides ignored — @Hygaard ([#7662](https://github.com/NousResearch/hermes-agent/pull/7662))
|
||||
- Fix: compaction model context length ignoring config — 3 related issues ([#8258](https://github.com/NousResearch/hermes-agent/pull/8258), [#8107](https://github.com/NousResearch/hermes-agent/pull/8107))
|
||||
- Fix: OpenCode.ai context window resolved to 128K instead of 1M ([#6472](https://github.com/NousResearch/hermes-agent/pull/6472))
|
||||
- Fix: Codex fallback auth-store lookup — @cherifya ([#6462](https://github.com/NousResearch/hermes-agent/pull/6462))
|
||||
- Fix: duplicate completion notifications when process killed ([#7124](https://github.com/NousResearch/hermes-agent/pull/7124))
|
||||
- Fix: agent daemon thread prevents orphan CLI processes on tab close ([#8557](https://github.com/NousResearch/hermes-agent/pull/8557))
|
||||
- Fix: stale image attachment on text paste and voice input ([#7077](https://github.com/NousResearch/hermes-agent/pull/7077))
|
||||
- Fix: DM thread session seeding causing cross-thread contamination ([#7084](https://github.com/NousResearch/hermes-agent/pull/7084))
|
||||
- Fix: OpenClaw migration shows dry-run preview before executing ([#6769](https://github.com/NousResearch/hermes-agent/pull/6769))
|
||||
- Fix: auth errors misclassified as retryable — @kuishou68 ([#7027](https://github.com/NousResearch/hermes-agent/pull/7027))
|
||||
- Fix: Copilot-Integration-Id header missing ([#7083](https://github.com/NousResearch/hermes-agent/pull/7083))
|
||||
- Fix: ACP session capabilities — @luyao618 ([#6985](https://github.com/NousResearch/hermes-agent/pull/6985))
|
||||
- Fix: ACP PromptResponse usage from top-level fields ([#7086](https://github.com/NousResearch/hermes-agent/pull/7086))
|
||||
- Fix: several failing/flaky tests on main — @dsocolobsky ([#6777](https://github.com/NousResearch/hermes-agent/pull/6777))
|
||||
- Fix: backup marker filenames — @sprmn24 ([#8600](https://github.com/NousResearch/hermes-agent/pull/8600))
|
||||
- Fix: `NoneType` in fast_mode check — @0xbyt4 ([#7350](https://github.com/NousResearch/hermes-agent/pull/7350))
|
||||
- Fix: missing imports in uninstall.py — @JiayuuWang ([#7034](https://github.com/NousResearch/hermes-agent/pull/7034))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Platform adapter developer guide + WeCom Callback docs ([#7969](https://github.com/NousResearch/hermes-agent/pull/7969))
|
||||
- Cron troubleshooting guide ([#7122](https://github.com/NousResearch/hermes-agent/pull/7122))
|
||||
- Streaming timeout auto-detection for local LLMs ([#6990](https://github.com/NousResearch/hermes-agent/pull/6990))
|
||||
- Tool-use enforcement documentation expanded ([#7984](https://github.com/NousResearch/hermes-agent/pull/7984))
|
||||
- BlueBubbles pairing instructions ([#6548](https://github.com/NousResearch/hermes-agent/pull/6548))
|
||||
- Telegram proxy support section ([#6348](https://github.com/NousResearch/hermes-agent/pull/6348))
|
||||
- `hermes dump` and `hermes logs` CLI reference ([#6552](https://github.com/NousResearch/hermes-agent/pull/6552))
|
||||
- `tool_progress_overrides` configuration reference ([#6364](https://github.com/NousResearch/hermes-agent/pull/6364))
|
||||
- Compression model context length warning docs ([#7879](https://github.com/NousResearch/hermes-agent/pull/7879))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
**269 merged PRs** from **24 contributors** across **487 commits**.
|
||||
|
||||
### Community Contributors
|
||||
- **@alt-glitch** (6 PRs) — Nix container-aware CLI, shared-state permissions, Matrix SQLite crypto store, bulk SSH/Modal file sync, Matrix mautrix compat
|
||||
- **@SHL0MS** (2 PRs) — Creative divergence strategies skill, creative ideation skill
|
||||
- **@sprmn24** (2 PRs) — Error classifier disambiguation, backup marker fix
|
||||
- **@nicoloboschi** — Hindsight memory plugin feature parity
|
||||
- **@Hygaard** — Session-scoped gateway model override fix
|
||||
- **@jarvis-phw** — Discord allowed_channels whitelist
|
||||
- **@Kathie-yu** — Honcho initOnSessionStart for tools mode
|
||||
- **@hermes-agent-dhabibi** — Discord forum channel topic inheritance
|
||||
- **@kira-ariaki** — Discord .log attachments and size limit
|
||||
- **@cherifya** — Codex fallback auth-store lookup
|
||||
- **@Cafexss** — Security: auth for session continuation
|
||||
- **@KUSH42** — Compaction context_length fix
|
||||
- **@kuishou68** — Auth error retryable classification fix
|
||||
- **@luyao618** — ACP session capabilities
|
||||
- **@ygd58** — HERMES_HOME_MODE env var override
|
||||
- **@0xbyt4** — Fast mode NoneType fix
|
||||
- **@JiayuuWang** — CLI uninstall import fix
|
||||
- **@HiddenPuppy** — Docker procps installation
|
||||
- **@dsocolobsky** — Test suite fixes
|
||||
- **@bobashopcashier** (1 PR) — Graceful gateway drain before restart (salvaged into #7503 from #7290)
|
||||
- **@benbarclay** — Docker image tag simplification
|
||||
- **@sosyz** — Shallow git clone for faster install
|
||||
- **@devorun** — Nix setupSecrets optional
|
||||
- **@ethernet8023** — Nix tirith runtime dep
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.8...v2026.4.13](https://github.com/NousResearch/hermes-agent/compare/v2026.4.8...v2026.4.13)
|
||||
331
SECURITY.md
331
SECURITY.md
@@ -1,331 +0,0 @@
|
||||
# Hermes Agent Security Policy
|
||||
|
||||
This document describes Hermes Agent's trust model, names the one
|
||||
security boundary the project treats as load-bearing, and defines the
|
||||
scope for vulnerability reports.
|
||||
|
||||
## 1. Reporting a Vulnerability
|
||||
|
||||
Report privately via [GitHub Security Advisories](https://github.com/NousResearch/hermes-agent/security/advisories/new)
|
||||
or **security@nousresearch.com**. Do not open public issues for
|
||||
security vulnerabilities. **Hermes Agent does not operate a bug
|
||||
bounty program.**
|
||||
|
||||
A useful report includes:
|
||||
|
||||
- A concise description and severity assessment.
|
||||
- The affected component, identified by file path and line range
|
||||
(e.g. `path/to/file.py:120-145`).
|
||||
- Environment details (`hermes version`, commit SHA, OS, Python
|
||||
version).
|
||||
- A reproduction against `main` or the latest release.
|
||||
- A statement of which trust boundary in §2 is crossed.
|
||||
|
||||
Please read §2 and §3 before submitting. Reports that demonstrate
|
||||
limits of an in-process heuristic this policy does not treat as a
|
||||
boundary will be closed as out-of-scope under §3 — but see §3.2:
|
||||
they are still welcome as regular issues or pull requests, just not
|
||||
through the private security channel.
|
||||
|
||||
---
|
||||
|
||||
## 2. Trust Model
|
||||
|
||||
Hermes Agent is a single-tenant personal agent. Its posture is
|
||||
layered, and the layers are not equally load-bearing. Reporters and
|
||||
operators should reason about them in the same terms.
|
||||
|
||||
### 2.1 Definitions
|
||||
|
||||
- **Agent process.** The Python interpreter running Hermes Agent,
|
||||
including any Python modules it has loaded (skills, plugins,
|
||||
hook handlers).
|
||||
- **Terminal backend.** A pluggable execution target for the
|
||||
`terminal()` tool. The default runs commands directly on the host.
|
||||
Other backends run commands inside a container, cloud sandbox, or
|
||||
remote host.
|
||||
- **Input surface.** Any channel through which content enters the
|
||||
agent's context: operator input, web fetches, email, gateway
|
||||
messages, file reads, MCP server responses, tool results.
|
||||
- **Trust envelope.** The set of resources an operator has implicitly
|
||||
granted Hermes Agent access to by running it — typically, whatever
|
||||
the operator's own user account can reach on the host.
|
||||
- **Stance.** An explicit statement in Hermes Agent's documentation
|
||||
or code about how a consuming layer (adapter, UI, file writer,
|
||||
shell) should treat agent output — e.g. "the dashboard renders
|
||||
agent output as inert HTML."
|
||||
|
||||
### 2.2 The Boundary: OS-Level Isolation
|
||||
|
||||
**The only security boundary against an adversarial LLM is the
|
||||
operating system.** Nothing inside the agent process constitutes
|
||||
containment — not the approval gate, not output redaction, not any
|
||||
pattern scanner, not any tool allowlist. Any in-process component
|
||||
that screens LLM output is a heuristic operating on an
|
||||
attacker-influenced string, and this policy treats it as such.
|
||||
|
||||
Hermes Agent supports two OS-level isolation postures. They address
|
||||
different threats and an operator should choose deliberately.
|
||||
|
||||
#### Terminal-backend isolation
|
||||
|
||||
A non-default terminal backend runs LLM-emitted shell commands
|
||||
inside a container, remote host, or cloud sandbox. The file tools
|
||||
(`read_file`, `write_file`, `patch`) also run through this backend,
|
||||
since they are implemented on top of the shell contract — they
|
||||
cannot reach paths the backend doesn't expose.
|
||||
|
||||
What this confines: anything the agent does by issuing shell or
|
||||
file operations. What this does **not** confine: everything the
|
||||
agent does in its own Python process. That includes the
|
||||
code-execution tool (spawned as a host subprocess), MCP subprocesses
|
||||
(spawned from the agent's environment), plugin loading, hook
|
||||
dispatch, and skill loading (all imported into the agent
|
||||
interpreter).
|
||||
|
||||
Terminal-backend isolation is the right posture when the concern is
|
||||
LLM-emitted destructive shell or unwanted file-tool writes, and the
|
||||
operator is otherwise trusted.
|
||||
|
||||
#### Whole-process wrapping
|
||||
|
||||
Whole-process wrapping runs the entire agent process tree inside a
|
||||
sandbox. Every code path — shell, code-execution, MCP, file tools,
|
||||
plugins, hooks, skill loading — is subject to the same filesystem,
|
||||
network, process, and (where applicable) inference policy.
|
||||
|
||||
Hermes Agent supports this in two ways:
|
||||
|
||||
- **Hermes Agent's own Docker image and Compose setup.** Lighter-
|
||||
weight; the agent runs in a standard container with operator-
|
||||
configured mounts and network policy.
|
||||
- **[NVIDIA OpenShell](https://github.com/NVIDIA/OpenShell)**.
|
||||
OpenShell provides per-session sandboxes with declarative policy
|
||||
across filesystem, network (L7 egress), process/syscall, and
|
||||
inference-routing layers. Network and inference policies are
|
||||
hot-reloadable. Credentials are injected from a Provider store
|
||||
and never touch the sandbox filesystem.
|
||||
|
||||
Under a whole-process wrapper, Hermes Agent's in-process heuristics
|
||||
(§2.4) function as accident-prevention layered on top of a real
|
||||
boundary. This is the supported posture when the agent ingests
|
||||
content from surfaces the operator does not control — the open web,
|
||||
inbound email, multi-user channels, untrusted MCP servers — and for
|
||||
production or shared deployments.
|
||||
|
||||
Operators running the default local backend with untrusted input
|
||||
surfaces, or running a terminal-backend sandbox and expecting it to
|
||||
contain code paths that don't go through the shell, are operating
|
||||
outside the supported security posture.
|
||||
|
||||
### 2.3 Credential Scoping
|
||||
|
||||
Hermes Agent filters the environment it passes to its lower-trust
|
||||
in-process components: shell subprocesses, MCP subprocesses, and
|
||||
the code-execution child. Credentials like provider API keys and
|
||||
gateway tokens are stripped by default; variables explicitly
|
||||
declared by the operator or by a loaded skill are passed through.
|
||||
|
||||
This reduces casual exfiltration. It is not containment. Any
|
||||
component running inside the agent process (skills, plugins, hook
|
||||
handlers) can read whatever the agent itself can read, including
|
||||
in-memory credentials. The mitigation against a compromised
|
||||
in-process component is operator review before install (§2.4,
|
||||
§2.5), not environment scrubbing.
|
||||
|
||||
### 2.4 In-Process Heuristics
|
||||
|
||||
The following components screen or warn about LLM behavior. They
|
||||
are useful. They are not boundaries.
|
||||
|
||||
- The **approval gate** detects common destructive shell patterns
|
||||
and prompts the operator before execution. Shell is Turing-
|
||||
complete; a denylist over shell strings is structurally
|
||||
incomplete. The gate catches cooperative-mode mistakes, not
|
||||
adversarial output.
|
||||
- **Output redaction** strips secret-like patterns from display.
|
||||
A motivated output producer will defeat it.
|
||||
- **Skills Guard** scans installable skill content for injection
|
||||
patterns. It is a review aid; the boundary for third-party skills
|
||||
is operator review before install. Reviewing a skill means
|
||||
reading its Python code and scripts, not just its SKILL.md
|
||||
description — skills execute arbitrary Python at import time.
|
||||
|
||||
### 2.5 Plugin Trust Model
|
||||
|
||||
Plugins load into the agent process and run with full agent
|
||||
privileges: they can read the same credentials, call the same
|
||||
tools, register the same hooks, and import the same modules as
|
||||
anything shipped in-tree. The boundary for third-party plugins is
|
||||
operator review before install — the same rule as skills (§2.4),
|
||||
called out separately because plugins are architecturally heavier
|
||||
and often ship their own background services, network listeners,
|
||||
and dependencies.
|
||||
|
||||
A malicious or buggy plugin is not a vulnerability in Hermes Agent
|
||||
itself. Bugs in Hermes Agent's plugin-install or plugin-discovery
|
||||
path that prevent the operator from seeing what they're installing
|
||||
are in scope under §3.1.
|
||||
|
||||
### 2.6 External Surfaces
|
||||
|
||||
An **external surface** is any channel outside the local agent
|
||||
process through which a caller can dispatch agent work, resolve
|
||||
approvals, or receive agent output. Each surface has its own
|
||||
authorization model, but the rules below apply uniformly.
|
||||
|
||||
**Surfaces in Hermes Agent:**
|
||||
|
||||
- **Gateway platform adapters.** Messaging integrations in
|
||||
`gateway/platforms/` (Telegram, Discord, Slack, email, SMS, etc.)
|
||||
and analogous adapters shipped as plugins.
|
||||
- **Network-exposed HTTP surfaces.** The API server adapter, the
|
||||
dashboard plugin, the kanban plugin's HTTP endpoints, and any
|
||||
other plugin that binds a listening socket.
|
||||
- **Editor / IDE adapters.** The ACP adapter (`acp_adapter/`) and
|
||||
equivalent integrations that accept requests from a local client
|
||||
process.
|
||||
- **The TUI gateway (`tui_gateway/`).** JSON-RPC backend for the
|
||||
Ink terminal UI, reached over local IPC.
|
||||
|
||||
**Uniform rules:**
|
||||
|
||||
1. **Authorization is required at every surface that crosses a
|
||||
trust boundary.** For messaging and network HTTP surfaces, the
|
||||
boundary is the network: authorization means an operator-
|
||||
configured caller allowlist. For editor and local-IPC surfaces
|
||||
(ACP, TUI gateway), the boundary is the host's user account:
|
||||
authorization means relying on OS-level access control (file
|
||||
permissions, loopback-only binds) and not exposing the surface
|
||||
beyond the local user without an explicit network auth layer.
|
||||
2. **An allowlist is required for every enabled network-exposed
|
||||
adapter.** Adapters must refuse to dispatch agent work, resolve
|
||||
approvals, or relay output until an allowlist is set. Code paths
|
||||
that fail open when no allowlist is configured are code bugs in
|
||||
scope under §3.1.
|
||||
3. **Session identifiers are routing handles, not authorization
|
||||
boundaries.** Knowing another caller's session ID does not grant
|
||||
access to their approvals or output; authorization is always
|
||||
re-checked against the allowlist (or OS-level equivalent).
|
||||
4. **Within the authorized set, all callers are equally trusted.**
|
||||
Hermes Agent does not model per-caller capabilities inside a
|
||||
single adapter. Operators who need capability separation should
|
||||
run separate agent instances with separate allowlists.
|
||||
5. **Binding a local-only surface to a non-loopback interface is a
|
||||
break-glass operator decision (§3.2).** The dashboard and other
|
||||
plugin HTTP servers default to loopback; exposing them via
|
||||
`--host 0.0.0.0` or equivalent makes public-exposure hardening
|
||||
(§4) the operator's responsibility.
|
||||
|
||||
---
|
||||
|
||||
## 3. Scope
|
||||
|
||||
### 3.1 In Scope
|
||||
|
||||
- Escape from a declared OS-level isolation posture (§2.2): an
|
||||
attacker-controlled code path reaching state that the posture
|
||||
claimed to confine.
|
||||
- Unauthorized external-surface access: a caller outside the
|
||||
configured authorization set (allowlist, or OS-level equivalent
|
||||
for local-IPC surfaces) dispatching work, receiving output, or
|
||||
resolving approvals (§2.6).
|
||||
- Credential exfiltration: leakage of operator credentials or
|
||||
session authorization material to a destination outside the
|
||||
trust envelope, via a mechanism that should have prevented it
|
||||
(environment scrubbing bug, adapter logging, transport error
|
||||
that flushes credentials to an upstream, etc.).
|
||||
- Trust-model documentation violations: code behaving contrary to
|
||||
what this policy, Hermes Agent's own documentation, or reasonable
|
||||
operator expectations would predict — including cases where
|
||||
Hermes Agent has documented a stance about how its output should
|
||||
be rendered by a consuming layer (dashboard, gateway adapter,
|
||||
file writer, shell) and a code path breaks that stance.
|
||||
|
||||
### 3.2 Out of Scope
|
||||
|
||||
"Out of scope" here means "not a security vulnerability under this
|
||||
policy." It does not mean "not worth reporting." Improvements to the
|
||||
in-process heuristics, hardening ideas, and UX fixes are welcome as
|
||||
regular issues or pull requests — the approval gate can always catch
|
||||
more patterns, redaction can always get smarter, adapter behavior
|
||||
can always be tightened. These items just don't go through the
|
||||
private-disclosure channel and don't receive advisories.
|
||||
|
||||
- **Bypasses of in-process heuristics (§2.4)** — approval-gate regex
|
||||
bypasses, redaction bypasses, Skills Guard pattern bypasses, and
|
||||
analogous reports against future heuristics. These components are
|
||||
not boundaries; defeating them is not a vulnerability under this
|
||||
policy.
|
||||
- **Prompt injection per se.** Getting the LLM to emit unusual
|
||||
output — via injected content, hallucination, training artifacts,
|
||||
or any other cause — is not itself a vulnerability. "I achieved
|
||||
prompt injection" without a chained §3.1 outcome is not an
|
||||
actionable report under this policy.
|
||||
- **Consequences of a chosen isolation posture.** Reports that a
|
||||
code path operating within its posture's scope can do what that
|
||||
posture permits are not vulnerabilities. Examples: shell or file
|
||||
tools reaching host state under the local backend; code-execution
|
||||
or MCP subprocesses reaching host state under terminal-backend
|
||||
isolation that only sandboxes shell; reports whose preconditions
|
||||
require pre-existing write access to operator-owned configuration
|
||||
or credential files (those are already inside the trust envelope).
|
||||
- **Documented break-glass settings.** Operator-selected trade-offs
|
||||
that explicitly disable protections: `--insecure` and equivalent
|
||||
flags on the dashboard or other components, disabled approvals,
|
||||
local backend in production, development profiles that bypass
|
||||
hermes-home security, and similar. Reports against those
|
||||
configurations are not vulnerabilities — that's the flag's job.
|
||||
- **Community-contributed skills and plugins.** Third-party skills
|
||||
(including the community skills repository) and third-party
|
||||
plugins are in the operator's review surface, not Hermes Agent's
|
||||
trust surface (§2.4, §2.5). A skill or plugin doing something
|
||||
malicious is the expected failure mode of one that wasn't
|
||||
reviewed, not a vulnerability in Hermes Agent. Bugs in Hermes
|
||||
Agent's skill-install or plugin-install path that prevent the
|
||||
operator from seeing what they're installing are in scope under
|
||||
§3.1.
|
||||
- **Public exposure without external controls.** Exposing the
|
||||
gateway or API to the public internet without authentication,
|
||||
VPN, or firewall.
|
||||
- **Tool-level read/write restrictions on a posture where shell is
|
||||
permitted.** If a path is reachable via the terminal tool, reports
|
||||
that other file tools can reach it add nothing.
|
||||
|
||||
---
|
||||
|
||||
## 4. Deployment Hardening
|
||||
|
||||
The single most important hardening decision is matching isolation
|
||||
(§2.2) to the trust of the content the agent will ingest. Beyond
|
||||
that:
|
||||
|
||||
- Run the agent as a non-root user. The supplied container image
|
||||
does this by default.
|
||||
- Keep credentials in the operator credential file with tight
|
||||
permissions, never in the main config, never in version control.
|
||||
Under OpenShell, use the Provider store rather than an on-disk
|
||||
credential file.
|
||||
- Do not expose the gateway or API to the public internet without
|
||||
VPN, Tailscale, or firewall protection. Under OpenShell, use the
|
||||
network policy layer to restrict egress.
|
||||
- Configure a caller allowlist for every network-exposed adapter
|
||||
you enable (§2.6).
|
||||
- Review third-party skills and plugins before install (§2.4,
|
||||
§2.5). For skills, this means reading the Python and scripts,
|
||||
not just SKILL.md. Skills Guard reports and the install audit
|
||||
log are the review surface.
|
||||
- Hermes Agent includes supply-chain guards for MCP server
|
||||
launches and for dependency / bundled-package changes in CI; see
|
||||
`CONTRIBUTING.md` for specifics.
|
||||
|
||||
---
|
||||
|
||||
## 5. Disclosure
|
||||
|
||||
- **Coordinated disclosure window:** 90 days from report, or until a
|
||||
fix is released, whichever comes first.
|
||||
- **Channel:** the GHSA thread or email correspondence with
|
||||
security@nousresearch.com.
|
||||
- **Credit:** reporters are credited in release notes unless
|
||||
anonymity is requested.
|
||||
@@ -1,32 +1,18 @@
|
||||
"""ACP auth helpers — detect and advertise Hermes authentication methods."""
|
||||
"""ACP auth helpers — detect the currently configured Hermes provider."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup"
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def detect_provider() -> Optional[str]:
|
||||
"""Resolve the active Hermes runtime provider, or None if unavailable.
|
||||
|
||||
Treats a ``Callable`` ``api_key`` (Azure Foundry Entra ID bearer
|
||||
token provider — see :mod:`agent.azure_identity_adapter`) as a valid
|
||||
credential. Without this, ACP sessions for Entra-configured Foundry
|
||||
deployments silently default to ``"openrouter"`` and the ACP auth
|
||||
handshake rejects the legitimate provider.
|
||||
"""
|
||||
"""Resolve the active Hermes runtime provider, or None if unavailable."""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
runtime = resolve_runtime_provider()
|
||||
api_key = runtime.get("api_key")
|
||||
provider = runtime.get("provider")
|
||||
if not isinstance(provider, str) or not provider.strip():
|
||||
return None
|
||||
is_string_key = isinstance(api_key, str) and api_key.strip()
|
||||
is_callable_provider = callable(api_key) and not isinstance(api_key, str)
|
||||
if is_string_key or is_callable_provider:
|
||||
if isinstance(api_key, str) and api_key.strip() and isinstance(provider, str) and provider.strip():
|
||||
return provider.strip().lower()
|
||||
except Exception:
|
||||
return None
|
||||
@@ -36,44 +22,3 @@ def detect_provider() -> Optional[str]:
|
||||
def has_provider() -> bool:
|
||||
"""Return True if Hermes can resolve any runtime provider credentials."""
|
||||
return detect_provider() is not None
|
||||
|
||||
|
||||
def build_auth_methods() -> list[Any]:
|
||||
"""Return registry-compatible ACP auth methods for Hermes.
|
||||
|
||||
The official ACP registry validates that agents advertise at least one
|
||||
usable auth method during the initial handshake. A fresh Zed install may
|
||||
not have Hermes provider credentials configured yet, so Hermes always
|
||||
advertises a terminal setup method. When credentials are already present,
|
||||
it also advertises the resolved provider as the default agent-managed
|
||||
runtime credential method.
|
||||
"""
|
||||
from acp.schema import AuthMethodAgent, TerminalAuthMethod
|
||||
|
||||
methods: list[Any] = []
|
||||
provider = detect_provider()
|
||||
if provider:
|
||||
methods.append(
|
||||
AuthMethodAgent(
|
||||
id=provider,
|
||||
name=f"{provider} runtime credentials",
|
||||
description=(
|
||||
"Authenticate Hermes using the currently configured "
|
||||
f"{provider} runtime credentials."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
methods.append(
|
||||
TerminalAuthMethod(
|
||||
id=TERMINAL_SETUP_AUTH_METHOD_ID,
|
||||
name="Configure Hermes provider",
|
||||
description=(
|
||||
"Open Hermes' interactive model/provider setup in a terminal. "
|
||||
"Use this when Hermes has not been configured on this machine yet."
|
||||
),
|
||||
type="terminal",
|
||||
args=["--setup"],
|
||||
)
|
||||
)
|
||||
return methods
|
||||
|
||||
@@ -1,286 +0,0 @@
|
||||
"""Pre-execution ACP edit approval helpers.
|
||||
|
||||
This module is intentionally isolated from the generic tool registry. ACP binds
|
||||
an edit approval requester in a ContextVar for the duration of one ACP agent run;
|
||||
CLI, gateway, and other sessions leave it unset and therefore bypass this guard.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import tempfile
|
||||
from concurrent.futures import TimeoutError as FutureTimeout
|
||||
from contextvars import ContextVar, Token
|
||||
from dataclasses import dataclass
|
||||
from itertools import count
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EditProposal:
|
||||
"""A proposed single-file edit that can be shown to an ACP client."""
|
||||
|
||||
tool_name: str
|
||||
path: str
|
||||
old_text: str | None
|
||||
new_text: str
|
||||
arguments: dict[str, Any]
|
||||
|
||||
|
||||
EditApprovalRequester = Callable[[EditProposal], bool]
|
||||
|
||||
_EDIT_APPROVAL_REQUESTER: ContextVar[EditApprovalRequester | None] = ContextVar(
|
||||
"ACP_EDIT_APPROVAL_REQUESTER",
|
||||
default=None,
|
||||
)
|
||||
_PERMISSION_REQUEST_IDS = count(1)
|
||||
|
||||
|
||||
SENSITIVE_AUTO_APPROVE_NAMES = {".env", ".env.local", ".env.production", "id_rsa", "id_ed25519"}
|
||||
AUTO_APPROVE_ASK = "ask"
|
||||
AUTO_APPROVE_WORKSPACE = "workspace_session"
|
||||
AUTO_APPROVE_SESSION = "session"
|
||||
|
||||
|
||||
def set_edit_approval_requester(requester: EditApprovalRequester | None) -> Token:
|
||||
"""Bind an ACP edit approval requester for the current context."""
|
||||
|
||||
return _EDIT_APPROVAL_REQUESTER.set(requester)
|
||||
|
||||
|
||||
def reset_edit_approval_requester(token: Token) -> None:
|
||||
"""Restore a previous edit approval requester binding."""
|
||||
|
||||
_EDIT_APPROVAL_REQUESTER.reset(token)
|
||||
|
||||
|
||||
def clear_edit_approval_requester() -> None:
|
||||
"""Clear the current requester; primarily used by tests."""
|
||||
|
||||
_EDIT_APPROVAL_REQUESTER.set(None)
|
||||
|
||||
|
||||
def get_edit_approval_requester() -> EditApprovalRequester | None:
|
||||
return _EDIT_APPROVAL_REQUESTER.get()
|
||||
|
||||
|
||||
def _read_text_if_exists(path: str) -> str | None:
|
||||
p = Path(path).expanduser()
|
||||
if not p.exists():
|
||||
return None
|
||||
if not p.is_file():
|
||||
raise OSError(f"Cannot edit non-file path: {path}")
|
||||
return p.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
|
||||
def _proposal_for_write_file(arguments: dict[str, Any]) -> EditProposal:
|
||||
path = str(arguments.get("path") or "")
|
||||
if not path:
|
||||
raise ValueError("path required")
|
||||
content = arguments.get("content")
|
||||
if content is None:
|
||||
raise ValueError("content required")
|
||||
return EditProposal(
|
||||
tool_name="write_file",
|
||||
path=path,
|
||||
old_text=_read_text_if_exists(path),
|
||||
new_text=str(content),
|
||||
arguments=dict(arguments),
|
||||
)
|
||||
|
||||
|
||||
def _proposal_for_patch_replace(arguments: dict[str, Any]) -> EditProposal:
|
||||
path = str(arguments.get("path") or "")
|
||||
if not path:
|
||||
raise ValueError("path required")
|
||||
old_string = arguments.get("old_string")
|
||||
new_string = arguments.get("new_string")
|
||||
if old_string is None or new_string is None:
|
||||
raise ValueError("old_string and new_string required")
|
||||
|
||||
old_text = _read_text_if_exists(path)
|
||||
if old_text is None:
|
||||
raise ValueError(f"Failed to read file: {path}")
|
||||
|
||||
from tools.fuzzy_match import fuzzy_find_and_replace
|
||||
|
||||
new_text, match_count, _strategy, error = fuzzy_find_and_replace(
|
||||
old_text,
|
||||
str(old_string),
|
||||
str(new_string),
|
||||
bool(arguments.get("replace_all", False)),
|
||||
)
|
||||
if error or match_count == 0:
|
||||
raise ValueError(error or f"Could not find match for old_string in {path}")
|
||||
|
||||
return EditProposal(
|
||||
tool_name="patch",
|
||||
path=path,
|
||||
old_text=old_text,
|
||||
new_text=new_text,
|
||||
arguments=dict(arguments),
|
||||
)
|
||||
|
||||
|
||||
def build_edit_proposal(tool_name: str, arguments: dict[str, Any]) -> EditProposal | None:
|
||||
"""Return an edit proposal for supported file mutation calls."""
|
||||
|
||||
if tool_name == "write_file":
|
||||
return _proposal_for_write_file(arguments)
|
||||
if tool_name == "patch" and arguments.get("mode", "replace") == "replace":
|
||||
return _proposal_for_patch_replace(arguments)
|
||||
return None
|
||||
|
||||
|
||||
def _is_sensitive_auto_approve_path(path: str) -> bool:
|
||||
parts = Path(path).expanduser().parts
|
||||
lowered = {part.lower() for part in parts}
|
||||
if ".git" in lowered or ".ssh" in lowered:
|
||||
return True
|
||||
return Path(path).name.lower() in SENSITIVE_AUTO_APPROVE_NAMES
|
||||
|
||||
|
||||
def should_auto_approve_edit(proposal: EditProposal, policy: str, cwd: str | None = None) -> bool:
|
||||
"""Return whether an ACP edit proposal may bypass the prompt for this session.
|
||||
|
||||
This is intentionally session-scoped and conservative: sensitive paths still
|
||||
ask even under autonomous policies.
|
||||
"""
|
||||
|
||||
policy = str(policy or AUTO_APPROVE_ASK).strip()
|
||||
if policy == AUTO_APPROVE_ASK or _is_sensitive_auto_approve_path(proposal.path):
|
||||
return False
|
||||
path = Path(proposal.path).expanduser().resolve(strict=False)
|
||||
if policy == AUTO_APPROVE_SESSION:
|
||||
return True
|
||||
if policy == AUTO_APPROVE_WORKSPACE:
|
||||
# `/tmp` is the POSIX path but tempfile.gettempdir() is the real one on
|
||||
# every platform: `/private/tmp` on macOS (because `/tmp` is a symlink
|
||||
# and Path.resolve() follows it) and the per-user Temp dir on Windows.
|
||||
tmp_root = Path(tempfile.gettempdir()).resolve(strict=False)
|
||||
try:
|
||||
path.relative_to(tmp_root)
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
if cwd:
|
||||
root = Path(cwd).expanduser().resolve(strict=False)
|
||||
try:
|
||||
path.relative_to(root)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def maybe_require_edit_approval(tool_name: str, arguments: dict[str, Any]) -> str | None:
|
||||
"""Run ACP edit approval if bound.
|
||||
|
||||
Returns a JSON tool-error string when the edit must be blocked, otherwise
|
||||
``None`` so dispatch can continue. Requester exceptions deny by default.
|
||||
"""
|
||||
|
||||
requester = get_edit_approval_requester()
|
||||
if requester is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
proposal = build_edit_proposal(tool_name, arguments)
|
||||
except Exception as exc:
|
||||
logger.warning("Could not build ACP edit approval proposal for %s: %s", tool_name, exc)
|
||||
return json.dumps({"error": f"Edit approval denied: could not prepare diff ({exc})"}, ensure_ascii=False)
|
||||
|
||||
if proposal is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
approved = bool(requester(proposal))
|
||||
except Exception as exc:
|
||||
logger.warning("ACP edit approval requester failed: %s", exc)
|
||||
approved = False
|
||||
|
||||
if approved:
|
||||
return None
|
||||
return json.dumps({"error": "Edit approval denied by ACP client; file was not modified."}, ensure_ascii=False)
|
||||
|
||||
|
||||
def build_acp_edit_tool_call(proposal: EditProposal):
|
||||
"""Build the ToolCallUpdate payload for ACP request_permission."""
|
||||
|
||||
import acp
|
||||
|
||||
tool_call_id = f"edit-approval-{next(_PERMISSION_REQUEST_IDS)}"
|
||||
return acp.update_tool_call(
|
||||
tool_call_id,
|
||||
title=f"Approve edit: {proposal.path}",
|
||||
kind="edit",
|
||||
status="pending",
|
||||
content=[
|
||||
acp.tool_diff_content(
|
||||
path=proposal.path,
|
||||
old_text=proposal.old_text,
|
||||
new_text=proposal.new_text,
|
||||
)
|
||||
],
|
||||
raw_input={"tool": proposal.tool_name, "arguments": proposal.arguments},
|
||||
)
|
||||
|
||||
|
||||
def make_acp_edit_approval_requester(
|
||||
request_permission_fn: Callable,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
session_id: str,
|
||||
timeout: float = 60.0,
|
||||
auto_approve_getter: Callable[[], tuple[str, str | None]] | None = None,
|
||||
) -> EditApprovalRequester:
|
||||
"""Return a sync requester that bridges edit proposals to ACP permissions."""
|
||||
|
||||
def _requester(proposal: EditProposal) -> bool:
|
||||
from acp.schema import PermissionOption
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
|
||||
if auto_approve_getter is not None:
|
||||
try:
|
||||
policy, cwd = auto_approve_getter()
|
||||
if should_auto_approve_edit(proposal, policy, cwd):
|
||||
logger.info("Auto-approved ACP edit under policy %s: %s", policy, proposal.path)
|
||||
return True
|
||||
except Exception:
|
||||
logger.debug("ACP edit auto-approval policy check failed", exc_info=True)
|
||||
|
||||
options = [
|
||||
PermissionOption(option_id="allow_once", kind="allow_once", name="Allow edit"),
|
||||
PermissionOption(option_id="deny", kind="reject_once", name="Deny"),
|
||||
]
|
||||
tool_call = build_acp_edit_tool_call(proposal)
|
||||
coro = request_permission_fn(
|
||||
session_id=session_id,
|
||||
tool_call=tool_call,
|
||||
options=options,
|
||||
)
|
||||
future = safe_schedule_threadsafe(
|
||||
coro,
|
||||
loop,
|
||||
logger=logger,
|
||||
log_message="Edit approval request: failed to schedule on loop",
|
||||
)
|
||||
if future is None:
|
||||
return False
|
||||
try:
|
||||
response = future.result(timeout=timeout)
|
||||
except (FutureTimeout, Exception) as exc:
|
||||
future.cancel()
|
||||
logger.warning("Edit approval request timed out or failed: %s", exc)
|
||||
return False
|
||||
outcome = getattr(response, "outcome", None)
|
||||
return (
|
||||
getattr(outcome, "outcome", None) == "selected"
|
||||
and getattr(outcome, "option_id", None) == "allow_once"
|
||||
)
|
||||
|
||||
return _requester
|
||||
@@ -13,18 +13,6 @@ Usage::
|
||||
hermes-acp
|
||||
"""
|
||||
|
||||
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||
try:
|
||||
import hermes_bootstrap # noqa: F401
|
||||
except ModuleNotFoundError:
|
||||
# Graceful fallback when hermes_bootstrap isn't registered in the venv
|
||||
# yet — happens during partial ``hermes update`` where git-reset landed
|
||||
# new code but ``uv pip install -e .`` didn't finish. Missing bootstrap
|
||||
# means UTF-8 stdio setup is skipped on Windows; POSIX is unaffected.
|
||||
pass
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
@@ -32,46 +20,6 @@ from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
# Methods clients send as periodic liveness probes. They are not part of the
|
||||
# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
|
||||
# caller — but the supervisor task that dispatches the request then surfaces
|
||||
# the raised RequestError via ``logging.exception("Background task failed")``,
|
||||
# which dumps a traceback to stderr every probe interval. Clients like
|
||||
# acp-bridge already treat the -32601 response as "agent alive", so the
|
||||
# traceback is pure noise. We keep the protocol response intact and only
|
||||
# silence the stderr noise for this specific benign case.
|
||||
_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
|
||||
|
||||
|
||||
class _BenignProbeMethodFilter(logging.Filter):
|
||||
"""Suppress acp 'Background task failed' tracebacks caused by unknown
|
||||
liveness-probe methods (e.g. ``ping``) while leaving every other
|
||||
background-task error — including method_not_found for any non-probe
|
||||
method — visible in stderr.
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if record.getMessage() != "Background task failed":
|
||||
return True
|
||||
exc_info = record.exc_info
|
||||
if not exc_info:
|
||||
return True
|
||||
exc = exc_info[1]
|
||||
# Imported lazily so this module stays importable when the optional
|
||||
# ``agent-client-protocol`` dependency is not installed.
|
||||
try:
|
||||
from acp.exceptions import RequestError
|
||||
except ImportError:
|
||||
return True
|
||||
if not isinstance(exc, RequestError):
|
||||
return True
|
||||
if getattr(exc, "code", None) != -32601:
|
||||
return True
|
||||
data = getattr(exc, "data", None)
|
||||
method = data.get("method") if isinstance(data, dict) else None
|
||||
return method not in _BENIGN_PROBE_METHODS
|
||||
|
||||
|
||||
def _setup_logging() -> None:
|
||||
"""Route all logging to stderr so stdout stays clean for ACP stdio."""
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
@@ -81,7 +29,6 @@ def _setup_logging() -> None:
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
)
|
||||
handler.addFilter(_BenignProbeMethodFilter())
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.addHandler(handler)
|
||||
@@ -108,125 +55,8 @@ def _load_env() -> None:
|
||||
)
|
||||
|
||||
|
||||
def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="hermes-acp",
|
||||
description="Run Hermes Agent as an ACP stdio server.",
|
||||
)
|
||||
parser.add_argument("--version", action="store_true", help="Print Hermes version and exit")
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
help="Verify ACP dependencies and adapter imports, then exit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--setup",
|
||||
action="store_true",
|
||||
help="Run interactive Hermes provider/model setup for ACP terminal auth",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--setup-browser",
|
||||
action="store_true",
|
||||
help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ "
|
||||
"for browser tool support. Idempotent.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--yes",
|
||||
"-y",
|
||||
action="store_true",
|
||||
dest="assume_yes",
|
||||
help="Accept all prompts (currently used by --setup-browser to skip the "
|
||||
"~400 MB Chromium download confirmation).",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def _print_version() -> None:
|
||||
from hermes_cli import __version__ as hermes_version
|
||||
|
||||
print(hermes_version)
|
||||
|
||||
|
||||
def _run_check() -> None:
|
||||
import acp # noqa: F401
|
||||
from acp_adapter.server import HermesACPAgent # noqa: F401
|
||||
|
||||
print("Hermes ACP check OK")
|
||||
|
||||
|
||||
def _run_setup() -> None:
|
||||
from hermes_cli.main import main as hermes_main
|
||||
|
||||
old_argv = sys.argv[:]
|
||||
try:
|
||||
sys.argv = [old_argv[0] if old_argv else "hermes", "model"]
|
||||
hermes_main()
|
||||
finally:
|
||||
sys.argv = old_argv
|
||||
|
||||
# Offer browser-tools install as a follow-up. The terminal auth method
|
||||
# is the one supported first-run UX for registry installs, so this is
|
||||
# the natural moment to ask. Skip silently if stdin isn't a TTY (the
|
||||
# answer can't be collected anyway).
|
||||
if not sys.stdin.isatty():
|
||||
return
|
||||
try:
|
||||
reply = input(
|
||||
"\nInstall browser tools? Downloads agent-browser (npm) and "
|
||||
"optionally Playwright Chromium (~400 MB). [y/N] "
|
||||
).strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return
|
||||
if reply in {"y", "yes"}:
|
||||
_run_setup_browser(assume_yes=False)
|
||||
|
||||
|
||||
def _run_setup_browser(assume_yes: bool = False) -> int:
|
||||
"""Bootstrap agent-browser + Chromium.
|
||||
|
||||
Routes through dep_ensure -> install.{sh,ps1} --ensure, sharing code
|
||||
with ``hermes postinstall`` and the runtime lazy installer.
|
||||
|
||||
Returns 0 on success, 1 on failure.
|
||||
"""
|
||||
from hermes_cli.dep_ensure import ensure_dependency
|
||||
|
||||
try:
|
||||
node_ok = ensure_dependency("node", interactive=not assume_yes)
|
||||
if not node_ok:
|
||||
print("Node.js installation failed — cannot proceed with browser tools.",
|
||||
file=sys.stderr)
|
||||
return 1
|
||||
|
||||
browser_ok = ensure_dependency("browser", interactive=not assume_yes)
|
||||
if not browser_ok:
|
||||
print("Browser tools installation failed.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
except OSError as exc:
|
||||
print(f"Browser bootstrap failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
def main() -> None:
|
||||
"""Entry point: load env, configure logging, run the ACP agent."""
|
||||
args = _parse_args(argv)
|
||||
if args.version:
|
||||
_print_version()
|
||||
return
|
||||
if args.check:
|
||||
_run_check()
|
||||
return
|
||||
if args.setup:
|
||||
_run_setup()
|
||||
return
|
||||
if args.setup_browser:
|
||||
rc = _run_setup_browser(assume_yes=args.assume_yes)
|
||||
if rc != 0:
|
||||
sys.exit(rc)
|
||||
return
|
||||
|
||||
_setup_logging()
|
||||
_load_env()
|
||||
|
||||
@@ -241,17 +71,6 @@ def main(argv: list[str] | None = None) -> None:
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
|
||||
@@ -14,7 +14,6 @@ from collections import deque
|
||||
from typing import Any, Callable, Deque, Dict
|
||||
|
||||
import acp
|
||||
from acp.schema import AgentPlanUpdate, PlanEntry
|
||||
|
||||
from .tools import (
|
||||
build_tool_complete,
|
||||
@@ -25,65 +24,6 @@ from .tools import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _json_loads_maybe_prefix(value: str) -> Any:
|
||||
"""Parse a JSON object even when Hermes appended a human hint after it."""
|
||||
text = value.strip()
|
||||
try:
|
||||
return json.loads(text)
|
||||
except Exception:
|
||||
decoder = json.JSONDecoder()
|
||||
data, _ = decoder.raw_decode(text)
|
||||
return data
|
||||
|
||||
|
||||
def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
|
||||
"""Translate Hermes' todo tool result into ACP's native plan update.
|
||||
|
||||
Zed renders ``sessionUpdate: plan`` as its first-class task/todo panel. The
|
||||
Hermes agent already maintains task state through the ``todo`` tool, so the
|
||||
ACP adapter should expose that state natively instead of only as a generic
|
||||
tool-call transcript block.
|
||||
"""
|
||||
if not isinstance(result, str) or not result.strip():
|
||||
return None
|
||||
|
||||
try:
|
||||
data = _json_loads_maybe_prefix(result)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
|
||||
return None
|
||||
|
||||
todos = data["todos"]
|
||||
if not todos:
|
||||
return AgentPlanUpdate(session_update="plan", entries=[])
|
||||
|
||||
status_map = {
|
||||
"pending": "pending",
|
||||
"in_progress": "in_progress",
|
||||
"completed": "completed",
|
||||
# ACP plans only support pending/in_progress/completed. Preserve
|
||||
# cancelled tasks as terminal entries instead of dropping them and
|
||||
# making the client's full-list replacement lose visible context.
|
||||
"cancelled": "completed",
|
||||
}
|
||||
entries: list[PlanEntry] = []
|
||||
for item in todos:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
content = str(item.get("content") or item.get("id") or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
raw_status = str(item.get("status") or "pending").strip()
|
||||
status = status_map.get(raw_status, "pending")
|
||||
if raw_status == "cancelled":
|
||||
content = f"[cancelled] {content}"
|
||||
entries.append(PlanEntry(content=content, priority="medium", status=status))
|
||||
|
||||
return AgentPlanUpdate(session_update="plan", entries=entries)
|
||||
|
||||
|
||||
def _send_update(
|
||||
conn: acp.Client,
|
||||
session_id: str,
|
||||
@@ -91,17 +31,10 @@ def _send_update(
|
||||
update: Any,
|
||||
) -> None:
|
||||
"""Fire-and-forget an ACP session update from a worker thread."""
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
|
||||
future = safe_schedule_threadsafe(
|
||||
conn.session_update(session_id, update),
|
||||
loop,
|
||||
logger=logger,
|
||||
log_message="Failed to send ACP update",
|
||||
)
|
||||
if future is None:
|
||||
return
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
conn.session_update(session_id, update), loop
|
||||
)
|
||||
future.result(timeout=5)
|
||||
except Exception:
|
||||
logger.debug("Failed to send ACP update", exc_info=True)
|
||||
@@ -116,8 +49,6 @@ def make_tool_progress_cb(
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
tool_call_ids: Dict[str, Deque[str]],
|
||||
tool_call_meta: Dict[str, Dict[str, Any]],
|
||||
edit_approval_policy_getter: Callable[[], tuple[str, str | None]] | None = None,
|
||||
) -> Callable:
|
||||
"""Create a ``tool_progress_callback`` for AIAgent.
|
||||
|
||||
@@ -153,30 +84,7 @@ def make_tool_progress_cb(
|
||||
tool_call_ids[name] = queue
|
||||
queue.append(tc_id)
|
||||
|
||||
snapshot = None
|
||||
if name in {"write_file", "patch", "skill_manage"}:
|
||||
try:
|
||||
from agent.display import capture_local_edit_snapshot
|
||||
|
||||
snapshot = capture_local_edit_snapshot(name, args)
|
||||
except Exception:
|
||||
logger.debug("Failed to capture ACP edit snapshot for %s", name, exc_info=True)
|
||||
tool_call_meta[tc_id] = {"args": args, "snapshot": snapshot}
|
||||
|
||||
edit_diff = None
|
||||
if name in {"write_file", "patch"} and edit_approval_policy_getter is not None:
|
||||
try:
|
||||
from acp_adapter.edit_approval import build_edit_proposal, should_auto_approve_edit
|
||||
|
||||
proposal = build_edit_proposal(name, args)
|
||||
if proposal is not None:
|
||||
policy, cwd = edit_approval_policy_getter()
|
||||
if should_auto_approve_edit(proposal, policy, cwd):
|
||||
edit_diff = proposal
|
||||
except Exception:
|
||||
logger.debug("Failed to prepare auto-approved ACP edit diff for %s", name, exc_info=True)
|
||||
|
||||
update = build_tool_start(tc_id, name, args, edit_diff=edit_diff)
|
||||
update = build_tool_start(tc_id, name, args)
|
||||
_send_update(conn, session_id, loop, update)
|
||||
|
||||
return _tool_progress
|
||||
@@ -211,7 +119,6 @@ def make_step_cb(
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
tool_call_ids: Dict[str, Deque[str]],
|
||||
tool_call_meta: Dict[str, Dict[str, Any]],
|
||||
) -> Callable:
|
||||
"""Create a ``step_callback`` for AIAgent.
|
||||
|
||||
@@ -225,12 +132,10 @@ def make_step_cb(
|
||||
for tool_info in prev_tools:
|
||||
tool_name = None
|
||||
result = None
|
||||
function_args = None
|
||||
|
||||
if isinstance(tool_info, dict):
|
||||
tool_name = tool_info.get("name") or tool_info.get("function_name")
|
||||
result = tool_info.get("result") or tool_info.get("output")
|
||||
function_args = tool_info.get("arguments") or tool_info.get("args")
|
||||
elif isinstance(tool_info, str):
|
||||
tool_name = tool_info
|
||||
|
||||
@@ -240,19 +145,10 @@ def make_step_cb(
|
||||
tool_call_ids[tool_name] = queue
|
||||
if tool_name and queue:
|
||||
tc_id = queue.popleft()
|
||||
meta = tool_call_meta.pop(tc_id, {})
|
||||
update = build_tool_complete(
|
||||
tc_id,
|
||||
tool_name,
|
||||
result=str(result) if result is not None else None,
|
||||
function_args=function_args or meta.get("args"),
|
||||
snapshot=meta.get("snapshot"),
|
||||
tc_id, tool_name, result=str(result) if result is not None else None
|
||||
)
|
||||
_send_update(conn, session_id, loop, update)
|
||||
if tool_name == "todo":
|
||||
plan_update = _build_plan_update_from_todo_result(result)
|
||||
if plan_update is not None:
|
||||
_send_update(conn, session_id, loop, plan_update)
|
||||
if not queue:
|
||||
tool_call_ids.pop(tool_name, None)
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
"""ACP permission bridging for Hermes dangerous-command approvals."""
|
||||
"""ACP permission bridging — maps ACP approval requests to hermes approval callbacks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import TimeoutError as FutureTimeout
|
||||
from itertools import count
|
||||
from typing import Callable
|
||||
|
||||
from acp.schema import (
|
||||
@@ -15,107 +14,24 @@ from acp.schema import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maps ACP permission option ids to Hermes approval result strings.
|
||||
# Option ids are stable across both the ``allow_permanent=True`` and
|
||||
# ``allow_permanent=False`` paths even though the option list differs.
|
||||
_OPTION_ID_TO_HERMES = {
|
||||
# Maps ACP PermissionOptionKind -> hermes approval result strings
|
||||
_KIND_TO_HERMES = {
|
||||
"allow_once": "once",
|
||||
"allow_session": "session",
|
||||
"allow_always": "always",
|
||||
"deny": "deny",
|
||||
"deny_always": "deny",
|
||||
"reject_once": "deny",
|
||||
"reject_always": "deny",
|
||||
}
|
||||
|
||||
_PERMISSION_REQUEST_IDS = count(1)
|
||||
|
||||
|
||||
def _permission_option_supports_kind(kind: str) -> bool:
|
||||
"""Return whether the installed ACP SDK accepts a permission option kind."""
|
||||
try:
|
||||
PermissionOption(option_id="__probe__", kind=kind, name="probe")
|
||||
except Exception:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _build_permission_options(*, allow_permanent: bool) -> list[PermissionOption]:
|
||||
"""Return ACP options that match Hermes approval semantics."""
|
||||
options = [
|
||||
PermissionOption(option_id="allow_once", kind="allow_once", name="Allow once"),
|
||||
PermissionOption(
|
||||
option_id="allow_session",
|
||||
# ACP has no session-scoped kind, so use the closest persistent
|
||||
# hint while keeping Hermes semantics in the option id.
|
||||
kind="allow_always",
|
||||
name="Allow for session",
|
||||
),
|
||||
]
|
||||
if allow_permanent:
|
||||
options.append(
|
||||
PermissionOption(
|
||||
option_id="allow_always",
|
||||
kind="allow_always",
|
||||
name="Allow always",
|
||||
),
|
||||
)
|
||||
options.append(PermissionOption(option_id="deny", kind="reject_once", name="Deny"))
|
||||
if _permission_option_supports_kind("reject_always"):
|
||||
options.append(
|
||||
PermissionOption(
|
||||
option_id="deny_always",
|
||||
kind="reject_always",
|
||||
name="Deny always",
|
||||
),
|
||||
)
|
||||
return options
|
||||
|
||||
|
||||
def _build_permission_tool_call(command: str, description: str):
|
||||
"""Return the ACP tool-call update attached to a permission request.
|
||||
|
||||
``request_permission`` expects a ``ToolCallUpdate`` payload — produced
|
||||
by ``_acp.update_tool_call`` — not a ``ToolCallStart``. Each request
|
||||
gets a unique ``perm-check-N`` id so concurrent requests don't collide.
|
||||
"""
|
||||
import acp as _acp
|
||||
|
||||
tool_call_id = f"perm-check-{next(_PERMISSION_REQUEST_IDS)}"
|
||||
title = f"{description}: {command}" if description else command
|
||||
content_text = f"{description}\n$ {command}" if description else f"$ {command}"
|
||||
return _acp.update_tool_call(
|
||||
tool_call_id,
|
||||
title=title,
|
||||
kind="execute",
|
||||
status="pending",
|
||||
content=[_acp.tool_content(_acp.text_block(content_text))],
|
||||
raw_input={"command": command, "description": description},
|
||||
)
|
||||
|
||||
|
||||
def _map_outcome_to_hermes(outcome: object, *, allowed_option_ids: set[str]) -> str:
|
||||
"""Map an ACP permission outcome into Hermes approval strings."""
|
||||
if not isinstance(outcome, AllowedOutcome):
|
||||
return "deny"
|
||||
|
||||
option_id = outcome.option_id
|
||||
if option_id not in allowed_option_ids:
|
||||
logger.warning("Permission request returned unknown option_id: %s", option_id)
|
||||
return "deny"
|
||||
return _OPTION_ID_TO_HERMES.get(option_id, "deny")
|
||||
|
||||
|
||||
def make_approval_callback(
|
||||
request_permission_fn: Callable,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
session_id: str,
|
||||
timeout: float = 60.0,
|
||||
) -> Callable[..., str]:
|
||||
) -> Callable[[str, str], str]:
|
||||
"""
|
||||
Return a Hermes-compatible approval callback that bridges to ACP.
|
||||
|
||||
The callback accepts ``command`` and ``description`` plus optional
|
||||
keyword arguments such as ``allow_permanent`` used by
|
||||
``tools.approval.prompt_dangerous_approval()``.
|
||||
Return a hermes-compatible ``approval_callback(command, description) -> str``
|
||||
that bridges to the ACP client's ``request_permission`` call.
|
||||
|
||||
Args:
|
||||
request_permission_fn: The ACP connection's ``request_permission`` coroutine.
|
||||
@@ -124,45 +40,38 @@ def make_approval_callback(
|
||||
timeout: Seconds to wait for a response before auto-denying.
|
||||
"""
|
||||
|
||||
def _callback(
|
||||
command: str,
|
||||
description: str,
|
||||
*,
|
||||
allow_permanent: bool = True,
|
||||
**_: object,
|
||||
) -> str:
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
def _callback(command: str, description: str) -> str:
|
||||
options = [
|
||||
PermissionOption(option_id="allow_once", kind="allow_once", name="Allow once"),
|
||||
PermissionOption(option_id="allow_always", kind="allow_always", name="Allow always"),
|
||||
PermissionOption(option_id="deny", kind="reject_once", name="Deny"),
|
||||
]
|
||||
import acp as _acp
|
||||
|
||||
options = _build_permission_options(allow_permanent=allow_permanent)
|
||||
tool_call = _acp.start_tool_call("perm-check", command, kind="execute")
|
||||
|
||||
tool_call = _build_permission_tool_call(command, description)
|
||||
coro = request_permission_fn(
|
||||
session_id=session_id,
|
||||
tool_call=tool_call,
|
||||
options=options,
|
||||
)
|
||||
future = safe_schedule_threadsafe(
|
||||
coro, loop,
|
||||
logger=logger,
|
||||
log_message="Permission request: failed to schedule on loop",
|
||||
)
|
||||
if future is None:
|
||||
return "deny"
|
||||
|
||||
try:
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
response = future.result(timeout=timeout)
|
||||
except (FutureTimeout, Exception) as exc:
|
||||
future.cancel()
|
||||
logger.warning("Permission request timed out or failed: %s", exc)
|
||||
return "deny"
|
||||
|
||||
if response is None:
|
||||
outcome = response.outcome
|
||||
if isinstance(outcome, AllowedOutcome):
|
||||
option_id = outcome.option_id
|
||||
# Look up the kind from our options list
|
||||
for opt in options:
|
||||
if opt.option_id == option_id:
|
||||
return _KIND_TO_HERMES.get(opt.kind, "deny")
|
||||
return "once" # fallback for unknown option_id
|
||||
else:
|
||||
return "deny"
|
||||
|
||||
allowed_option_ids = {option.option_id for option in options}
|
||||
return _map_outcome_to_hermes(
|
||||
response.outcome,
|
||||
allowed_option_ids=allowed_option_ids,
|
||||
)
|
||||
|
||||
return _callback
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -13,12 +13,8 @@ from hermes_constants import get_hermes_home
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from dataclasses import dataclass, field
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, List, Optional
|
||||
@@ -26,89 +22,6 @@ from typing import Any, Dict, List, Optional
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _win_path_to_wsl(path: str) -> str | None:
|
||||
"""Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
|
||||
match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
|
||||
if not match:
|
||||
return None
|
||||
drive = match.group(1).lower()
|
||||
tail = match.group(2).replace("\\", "/")
|
||||
return f"/mnt/{drive}/{tail}"
|
||||
|
||||
|
||||
def _translate_acp_cwd(cwd: str) -> str:
|
||||
"""Translate Windows ACP cwd values when Hermes itself is running in WSL.
|
||||
|
||||
Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
|
||||
editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
|
||||
and execute against the WSL mount path so agents, tools, and persisted ACP
|
||||
sessions all agree on the usable workspace. Native Linux/macOS keeps the
|
||||
original cwd unchanged.
|
||||
"""
|
||||
from hermes_constants import is_wsl
|
||||
|
||||
if not is_wsl():
|
||||
return cwd
|
||||
translated = _win_path_to_wsl(str(cwd))
|
||||
return translated if translated is not None else cwd
|
||||
|
||||
|
||||
def _normalize_cwd_for_compare(cwd: str | None) -> str:
|
||||
raw = str(cwd or ".").strip()
|
||||
if not raw:
|
||||
raw = "."
|
||||
expanded = os.path.expanduser(raw)
|
||||
|
||||
# Normalize Windows drive paths into the equivalent WSL mount form so
|
||||
# ACP history filters match the same workspace across Windows and WSL.
|
||||
translated = _win_path_to_wsl(expanded)
|
||||
if translated is not None:
|
||||
expanded = translated
|
||||
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
|
||||
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
|
||||
|
||||
return os.path.normpath(expanded)
|
||||
|
||||
|
||||
def _build_session_title(title: Any, preview: Any, cwd: str | None) -> str:
|
||||
explicit = str(title or "").strip()
|
||||
if explicit:
|
||||
return explicit
|
||||
preview_text = str(preview or "").strip()
|
||||
if preview_text:
|
||||
return preview_text
|
||||
leaf = os.path.basename(str(cwd or "").rstrip("/\\"))
|
||||
return leaf or "New thread"
|
||||
|
||||
|
||||
def _format_updated_at(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value
|
||||
try:
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _updated_at_sort_key(value: Any) -> float:
|
||||
if value is None:
|
||||
return float("-inf")
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
raw = str(value).strip()
|
||||
if not raw:
|
||||
return float("-inf")
|
||||
try:
|
||||
return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp()
|
||||
except Exception:
|
||||
try:
|
||||
return float(raw)
|
||||
except Exception:
|
||||
return float("-inf")
|
||||
|
||||
|
||||
def _acp_stderr_print(*args, **kwargs) -> None:
|
||||
"""Best-effort human-readable output sink for ACP stdio sessions.
|
||||
|
||||
@@ -121,40 +34,16 @@ def _acp_stderr_print(*args, **kwargs) -> None:
|
||||
|
||||
|
||||
def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
"""Bind a task/session id to the editor's working directory for tools.
|
||||
|
||||
Zed can launch Hermes from a Windows workspace while the ACP process runs
|
||||
inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
|
||||
local tools need the WSL mount equivalent or subprocess creation fails
|
||||
before the command can run.
|
||||
"""
|
||||
"""Bind a task/session id to the editor's working directory for tools."""
|
||||
if not task_id:
|
||||
return
|
||||
try:
|
||||
from tools.terminal_tool import register_task_env_overrides
|
||||
register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
|
||||
register_task_env_overrides(task_id, {"cwd": cwd})
|
||||
except Exception:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
def _expand_acp_enabled_toolsets(
|
||||
toolsets: List[str] | None = None,
|
||||
mcp_server_names: List[str] | None = None,
|
||||
) -> List[str]:
|
||||
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
|
||||
expanded: List[str] = []
|
||||
for name in list(toolsets or ["hermes-acp"]):
|
||||
if name and name not in expanded:
|
||||
expanded.append(name)
|
||||
|
||||
for server_name in list(mcp_server_names or []):
|
||||
toolset_name = f"mcp-{server_name}"
|
||||
if server_name and toolset_name not in expanded:
|
||||
expanded.append(toolset_name)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def _clear_task_cwd(task_id: str) -> None:
|
||||
"""Remove task-specific cwd overrides for an ACP session."""
|
||||
if not task_id:
|
||||
@@ -176,11 +65,6 @@ class SessionState:
|
||||
model: str = ""
|
||||
history: List[Dict[str, Any]] = field(default_factory=list)
|
||||
cancel_event: Any = None # threading.Event
|
||||
is_running: bool = False
|
||||
queued_prompts: List[str] = field(default_factory=list)
|
||||
runtime_lock: Any = field(default_factory=Lock)
|
||||
current_prompt_text: str = ""
|
||||
interrupted_prompt_text: str = ""
|
||||
|
||||
|
||||
class SessionManager:
|
||||
@@ -211,7 +95,6 @@ class SessionManager:
|
||||
"""Create a new session with a unique ID and a fresh AIAgent."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
session_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(session_id=session_id, cwd=cwd)
|
||||
state = SessionState(
|
||||
@@ -254,7 +137,6 @@ class SessionManager:
|
||||
"""Deep-copy a session's history into a new session."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
original = self.get_session(session_id) # checks DB too
|
||||
if original is None:
|
||||
return None
|
||||
@@ -280,83 +162,51 @@ class SessionManager:
|
||||
logger.info("Forked ACP session %s -> %s", session_id, new_id)
|
||||
return state
|
||||
|
||||
def list_sessions(self, cwd: str | None = None) -> List[Dict[str, Any]]:
|
||||
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||
"""Return lightweight info dicts for all sessions (memory + database)."""
|
||||
normalized_cwd = _normalize_cwd_for_compare(cwd) if cwd else None
|
||||
db = self._get_db()
|
||||
persisted_rows: dict[str, dict[str, Any]] = {}
|
||||
|
||||
if db is not None:
|
||||
try:
|
||||
for row in db.list_sessions_rich(source="acp", limit=1000):
|
||||
persisted_rows[str(row["id"])] = dict(row)
|
||||
except Exception:
|
||||
logger.debug("Failed to load ACP sessions from DB", exc_info=True)
|
||||
|
||||
# Collect in-memory sessions first.
|
||||
with self._lock:
|
||||
seen_ids = set(self._sessions.keys())
|
||||
results = []
|
||||
for s in self._sessions.values():
|
||||
history_len = len(s.history)
|
||||
if history_len <= 0:
|
||||
continue
|
||||
if normalized_cwd and _normalize_cwd_for_compare(s.cwd) != normalized_cwd:
|
||||
continue
|
||||
persisted = persisted_rows.get(s.session_id, {})
|
||||
preview = next(
|
||||
(
|
||||
str(msg.get("content") or "").strip()
|
||||
for msg in s.history
|
||||
if msg.get("role") == "user" and str(msg.get("content") or "").strip()
|
||||
),
|
||||
persisted.get("preview") or "",
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"cwd": s.cwd,
|
||||
"model": s.model,
|
||||
"history_len": history_len,
|
||||
"title": _build_session_title(persisted.get("title"), preview, s.cwd),
|
||||
"updated_at": _format_updated_at(
|
||||
persisted.get("last_active") or persisted.get("started_at") or time.time()
|
||||
),
|
||||
}
|
||||
)
|
||||
results = [
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"cwd": s.cwd,
|
||||
"model": s.model,
|
||||
"history_len": len(s.history),
|
||||
}
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
# Merge any persisted sessions not currently in memory.
|
||||
for sid, row in persisted_rows.items():
|
||||
if sid in seen_ids:
|
||||
continue
|
||||
message_count = int(row.get("message_count") or 0)
|
||||
if message_count <= 0:
|
||||
continue
|
||||
# Extract cwd from model_config JSON.
|
||||
session_cwd = "."
|
||||
mc = row.get("model_config")
|
||||
if mc:
|
||||
try:
|
||||
session_cwd = json.loads(mc).get("cwd", ".")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if normalized_cwd and _normalize_cwd_for_compare(session_cwd) != normalized_cwd:
|
||||
continue
|
||||
results.append({
|
||||
"session_id": sid,
|
||||
"cwd": session_cwd,
|
||||
"model": row.get("model") or "",
|
||||
"history_len": message_count,
|
||||
"title": _build_session_title(row.get("title"), row.get("preview"), session_cwd),
|
||||
"updated_at": _format_updated_at(row.get("last_active") or row.get("started_at")),
|
||||
})
|
||||
db = self._get_db()
|
||||
if db is not None:
|
||||
try:
|
||||
rows = db.search_sessions(source="acp", limit=1000)
|
||||
for row in rows:
|
||||
sid = row["id"]
|
||||
if sid in seen_ids:
|
||||
continue
|
||||
# Extract cwd from model_config JSON.
|
||||
cwd = "."
|
||||
mc = row.get("model_config")
|
||||
if mc:
|
||||
try:
|
||||
cwd = json.loads(mc).get("cwd", ".")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
results.append({
|
||||
"session_id": sid,
|
||||
"cwd": cwd,
|
||||
"model": row.get("model") or "",
|
||||
"history_len": row.get("message_count") or 0,
|
||||
})
|
||||
except Exception:
|
||||
logger.debug("Failed to list ACP sessions from DB", exc_info=True)
|
||||
|
||||
results.sort(key=lambda item: _updated_at_sort_key(item.get("updated_at")), reverse=True)
|
||||
return results
|
||||
|
||||
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
|
||||
"""Update the working directory for a session and its tool overrides."""
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
state = self.get_session(session_id) # checks DB too
|
||||
if state is None:
|
||||
return None
|
||||
@@ -466,10 +316,17 @@ class SessionManager:
|
||||
except Exception:
|
||||
logger.debug("Failed to update ACP session metadata", exc_info=True)
|
||||
|
||||
# Replace stored messages with current history atomically so a
|
||||
# mid-rewrite failure rolls back and the previously persisted
|
||||
# conversation is preserved (salvaged from #13675).
|
||||
db.replace_messages(state.session_id, state.history)
|
||||
# Replace stored messages with current history.
|
||||
db.clear_messages(state.session_id)
|
||||
for msg in state.history:
|
||||
db.append_message(
|
||||
session_id=state.session_id,
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name") or msg.get("name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)
|
||||
|
||||
@@ -587,21 +444,11 @@ class SessionManager:
|
||||
elif isinstance(model_cfg, str) and model_cfg.strip():
|
||||
default_model = model_cfg.strip()
|
||||
|
||||
configured_mcp_servers = [
|
||||
name
|
||||
for name, cfg in (config.get("mcp_servers") or {}).items()
|
||||
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
|
||||
]
|
||||
|
||||
kwargs = {
|
||||
"platform": "acp",
|
||||
"enabled_toolsets": _expand_acp_enabled_toolsets(
|
||||
["hermes-acp"],
|
||||
mcp_server_names=configured_mcp_servers,
|
||||
),
|
||||
"enabled_toolsets": ["hermes-acp"],
|
||||
"quiet_mode": True,
|
||||
"session_id": session_id,
|
||||
"session_db": self._get_db(),
|
||||
"model": model or default_model,
|
||||
}
|
||||
|
||||
|
||||
1213
acp_adapter/tools.py
1213
acp_adapter/tools.py
File diff suppressed because it is too large
Load Diff
@@ -1,16 +1,12 @@
|
||||
{
|
||||
"id": "hermes-agent",
|
||||
"name": "Hermes Agent",
|
||||
"version": "0.14.0",
|
||||
"description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
|
||||
"repository": "https://github.com/NousResearch/hermes-agent",
|
||||
"website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
|
||||
"authors": ["Nous Research"],
|
||||
"license": "MIT",
|
||||
"schema_version": 1,
|
||||
"name": "hermes-agent",
|
||||
"display_name": "Hermes Agent",
|
||||
"description": "AI agent by Nous Research with 90+ tools, persistent memory, and multi-platform support",
|
||||
"icon": "icon.svg",
|
||||
"distribution": {
|
||||
"uvx": {
|
||||
"package": "hermes-agent[acp]==0.14.0",
|
||||
"args": ["hermes-acp"]
|
||||
}
|
||||
"type": "command",
|
||||
"command": "hermes",
|
||||
"args": ["acp"]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,25 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16" fill="none">
|
||||
<path d="M8 1.5v13" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
|
||||
<path d="M8 3.25c-2.35-1.4-4.7-.95-6.25.35 1.85-.2 3.8.2 5.55 1.55" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 3.25c2.35-1.4 4.7-.95 6.25.35-1.85-.2-3.8.2-5.55 1.55" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 13.25c-2.3-1-3.05-2.65-1.35-4.15-2 .8-2.35 2.95-.35 4" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 13.25c2.3-1 3.05-2.65 1.35-4.15 2 .8 2.35 2.95.35 4" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<circle cx="8" cy="1.8" r="1.1" fill="currentColor"/>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64">
|
||||
<defs>
|
||||
<linearGradient id="gold" x1="0%" y1="0%" x2="0%" y2="100%">
|
||||
<stop offset="0%" style="stop-color:#F5C542;stop-opacity:1" />
|
||||
<stop offset="100%" style="stop-color:#D4961C;stop-opacity:1" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
<!-- Staff -->
|
||||
<rect x="30" y="10" width="4" height="46" rx="2" fill="url(#gold)" />
|
||||
<!-- Wings (left) -->
|
||||
<path d="M30 18 C24 14, 14 14, 10 18 C14 16, 22 16, 28 20" fill="#F5C542" opacity="0.9" />
|
||||
<path d="M30 22 C26 19, 18 19, 14 22 C18 20, 24 20, 28 24" fill="#D4961C" opacity="0.8" />
|
||||
<!-- Wings (right) -->
|
||||
<path d="M34 18 C40 14, 50 14, 54 18 C50 16, 42 16, 36 20" fill="#F5C542" opacity="0.9" />
|
||||
<path d="M34 22 C38 19, 46 19, 50 22 C46 20, 40 20, 36 24" fill="#D4961C" opacity="0.8" />
|
||||
<!-- Left serpent -->
|
||||
<path d="M32 48 C22 44, 20 38, 26 34 C20 36, 18 42, 24 46 C18 40, 22 30, 30 28 C24 32, 22 38, 28 42"
|
||||
fill="none" stroke="#F5C542" stroke-width="2.5" stroke-linecap="round" />
|
||||
<!-- Right serpent -->
|
||||
<path d="M32 48 C42 44, 44 38, 38 34 C44 36, 46 42, 40 46 C46 40, 42 30, 34 28 C40 32, 42 38, 36 42"
|
||||
fill="none" stroke="#D4961C" stroke-width="2.5" stroke-linecap="round" />
|
||||
<!-- Orb at top -->
|
||||
<circle cx="32" cy="10" r="4" fill="#F5C542" />
|
||||
<circle cx="32" cy="10" r="2" fill="#FFF8E1" opacity="0.7" />
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 882 B After Width: | Height: | Size: 1.4 KiB |
@@ -1,326 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
|
||||
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageWindow:
|
||||
label: str
|
||||
used_percent: Optional[float] = None
|
||||
reset_at: Optional[datetime] = None
|
||||
detail: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageSnapshot:
|
||||
provider: str
|
||||
source: str
|
||||
fetched_at: datetime
|
||||
title: str = "Account limits"
|
||||
plan: Optional[str] = None
|
||||
windows: tuple[AccountUsageWindow, ...] = ()
|
||||
details: tuple[str, ...] = ()
|
||||
unavailable_reason: Optional[str] = None
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return bool(self.windows or self.details) and not self.unavailable_reason
|
||||
|
||||
|
||||
def _title_case_slug(value: Optional[str]) -> Optional[str]:
|
||||
cleaned = str(value or "").strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
return cleaned.replace("_", " ").replace("-", " ").title()
|
||||
|
||||
|
||||
def _parse_dt(value: Any) -> Optional[datetime]:
|
||||
if value in {None, ""}:
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc)
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.endswith("Z"):
|
||||
text = text[:-1] + "+00:00"
|
||||
try:
|
||||
dt = datetime.fromisoformat(text)
|
||||
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _format_reset(dt: Optional[datetime]) -> str:
|
||||
if not dt:
|
||||
return "unknown"
|
||||
local_dt = dt.astimezone()
|
||||
delta = dt - _utc_now()
|
||||
total_seconds = int(delta.total_seconds())
|
||||
if total_seconds <= 0:
|
||||
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
hours, rem = divmod(total_seconds, 3600)
|
||||
minutes = rem // 60
|
||||
if hours >= 24:
|
||||
days, hours = divmod(hours, 24)
|
||||
rel = f"in {days}d {hours}h"
|
||||
elif hours > 0:
|
||||
rel = f"in {hours}h {minutes}m"
|
||||
else:
|
||||
rel = f"in {minutes}m"
|
||||
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
|
||||
|
||||
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
|
||||
if not snapshot:
|
||||
return []
|
||||
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
|
||||
lines = [header]
|
||||
if snapshot.plan:
|
||||
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
|
||||
else:
|
||||
lines.append(f"Provider: {snapshot.provider}")
|
||||
for window in snapshot.windows:
|
||||
if window.used_percent is None:
|
||||
base = f"{window.label}: unavailable"
|
||||
else:
|
||||
remaining = max(0, round(100 - float(window.used_percent)))
|
||||
used = max(0, round(float(window.used_percent)))
|
||||
base = f"{window.label}: {remaining}% remaining ({used}% used)"
|
||||
if window.reset_at:
|
||||
base += f" • resets {_format_reset(window.reset_at)}"
|
||||
elif window.detail:
|
||||
base += f" • {window.detail}"
|
||||
lines.append(base)
|
||||
for detail in snapshot.details:
|
||||
lines.append(detail)
|
||||
if snapshot.unavailable_reason:
|
||||
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
|
||||
return lines
|
||||
|
||||
|
||||
def _resolve_codex_usage_url(base_url: str) -> str:
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
normalized = "https://chatgpt.com/backend-api/codex"
|
||||
if normalized.endswith("/codex"):
|
||||
normalized = normalized[: -len("/codex")]
|
||||
if "/backend-api" in normalized:
|
||||
return normalized + "/wham/usage"
|
||||
return normalized + "/api/codex/usage"
|
||||
|
||||
|
||||
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
token_data = _read_codex_tokens()
|
||||
tokens = token_data.get("tokens") or {}
|
||||
account_id = str(tokens.get("account_id", "") or "").strip() or None
|
||||
headers = {
|
||||
"Authorization": f"Bearer {creds['api_key']}",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "codex-cli",
|
||||
}
|
||||
if account_id:
|
||||
headers["ChatGPT-Account-Id"] = account_id
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
rate_limit = payload.get("rate_limit") or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
|
||||
window = rate_limit.get(key) or {}
|
||||
used = window.get("used_percent")
|
||||
if used is None:
|
||||
continue
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=float(used),
|
||||
reset_at=_parse_dt(window.get("reset_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
credits = payload.get("credits") or {}
|
||||
if credits.get("has_credits"):
|
||||
balance = credits.get("balance")
|
||||
if isinstance(balance, (int, float)):
|
||||
details.append(f"Credits balance: ${float(balance):.2f}")
|
||||
elif credits.get("unlimited"):
|
||||
details.append("Credits balance: unlimited")
|
||||
return AccountUsageSnapshot(
|
||||
provider="openai-codex",
|
||||
source="usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
plan=_title_case_slug(payload.get("plan_type")),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
token = (resolve_anthropic_token() or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
if not _is_oauth_token(token):
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
"User-Agent": "claude-code/2.1.0",
|
||||
}
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
mapping = (
|
||||
("five_hour", "Current session"),
|
||||
("seven_day", "Current week"),
|
||||
("seven_day_opus", "Opus week"),
|
||||
("seven_day_sonnet", "Sonnet week"),
|
||||
)
|
||||
for key, label in mapping:
|
||||
window = payload.get(key) or {}
|
||||
util = window.get("utilization")
|
||||
if util is None:
|
||||
continue
|
||||
used = float(util) * 100 if float(util) <= 1 else float(util)
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=used,
|
||||
reset_at=_parse_dt(window.get("resets_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
extra = payload.get("extra_usage") or {}
|
||||
if extra.get("is_enabled"):
|
||||
used_credits = extra.get("used_credits")
|
||||
monthly_limit = extra.get("monthly_limit")
|
||||
currency = extra.get("currency") or "USD"
|
||||
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
|
||||
details.append(
|
||||
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
|
||||
)
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested="openrouter",
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
token = str(runtime.get("api_key", "") or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
|
||||
credits_url = f"{normalized}/credits"
|
||||
key_url = f"{normalized}/key"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
credits_resp = client.get(credits_url, headers=headers)
|
||||
credits_resp.raise_for_status()
|
||||
credits = (credits_resp.json() or {}).get("data") or {}
|
||||
try:
|
||||
key_resp = client.get(key_url, headers=headers)
|
||||
key_resp.raise_for_status()
|
||||
key_data = (key_resp.json() or {}).get("data") or {}
|
||||
except Exception:
|
||||
key_data = {}
|
||||
total_credits = float(credits.get("total_credits") or 0.0)
|
||||
total_usage = float(credits.get("total_usage") or 0.0)
|
||||
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
|
||||
windows: list[AccountUsageWindow] = []
|
||||
limit = key_data.get("limit")
|
||||
limit_remaining = key_data.get("limit_remaining")
|
||||
limit_reset = str(key_data.get("limit_reset") or "").strip()
|
||||
usage = key_data.get("usage")
|
||||
if (
|
||||
isinstance(limit, (int, float))
|
||||
and float(limit) > 0
|
||||
and isinstance(limit_remaining, (int, float))
|
||||
and 0 <= float(limit_remaining) <= float(limit)
|
||||
):
|
||||
limit_value = float(limit)
|
||||
remaining_value = float(limit_remaining)
|
||||
used_percent = ((limit_value - remaining_value) / limit_value) * 100
|
||||
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
|
||||
if limit_reset:
|
||||
detail_parts.append(f"resets {limit_reset}")
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label="API key quota",
|
||||
used_percent=used_percent,
|
||||
detail=" • ".join(detail_parts),
|
||||
)
|
||||
)
|
||||
if isinstance(usage, (int, float)):
|
||||
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
|
||||
for value, label in (
|
||||
(key_data.get("usage_daily"), "today"),
|
||||
(key_data.get("usage_weekly"), "this week"),
|
||||
(key_data.get("usage_monthly"), "this month"),
|
||||
):
|
||||
if isinstance(value, (int, float)) and float(value) > 0:
|
||||
usage_parts.append(f"${float(value):.2f} {label}")
|
||||
details.append(" • ".join(usage_parts))
|
||||
return AccountUsageSnapshot(
|
||||
provider="openrouter",
|
||||
source="credits_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def fetch_account_usage(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Optional[AccountUsageSnapshot]:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
if normalized in {"", "auto", "custom"}:
|
||||
return None
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
return _fetch_codex_account_usage()
|
||||
if normalized == "anthropic":
|
||||
return _fetch_anthropic_account_usage()
|
||||
if normalized == "openrouter":
|
||||
return _fetch_openrouter_account_usage(base_url, api_key)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
1510
agent/agent_init.py
1510
agent/agent_init.py
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,68 +0,0 @@
|
||||
"""Async/sync bridging helpers.
|
||||
|
||||
The codebase has ~30 sites that schedule a coroutine onto an event loop from a
|
||||
worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can
|
||||
raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
|
||||
and when it does the coroutine object is never awaited and never closed —
|
||||
which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
|
||||
leaks the coroutine's frame until GC.
|
||||
|
||||
:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
|
||||
scheduling failure, and returns ``None`` (instead of a half-formed future) so
|
||||
callers can branch cleanly:
|
||||
|
||||
fut = safe_schedule_threadsafe(coro, loop)
|
||||
if fut is None:
|
||||
return # or fallback behavior
|
||||
fut.result(timeout=5)
|
||||
|
||||
The helper deliberately does NOT also handle ``future.result()`` failures —
|
||||
that is a separate concern. Once the loop has accepted the coroutine, its
|
||||
lifecycle belongs to the loop, not the scheduling thread.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import Future
|
||||
from typing import Any, Coroutine, Optional
|
||||
|
||||
|
||||
_DEFAULT_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def safe_schedule_threadsafe(
|
||||
coro: Coroutine[Any, Any, Any],
|
||||
loop: Optional[asyncio.AbstractEventLoop],
|
||||
*,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
log_message: str = "Failed to schedule coroutine on loop",
|
||||
log_level: int = logging.DEBUG,
|
||||
) -> Optional[Future]:
|
||||
"""Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
|
||||
|
||||
Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
|
||||
the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
|
||||
(e.g. the loop was closed during a shutdown race). In all failure paths
|
||||
the coroutine is :meth:`close`-d so it does not trigger
|
||||
``"coroutine was never awaited"`` warnings or leak its frame.
|
||||
|
||||
Callers retain full control over what to do with the returned future
|
||||
(call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
|
||||
fire-and-forget, etc.).
|
||||
"""
|
||||
log = logger if logger is not None else _DEFAULT_LOGGER
|
||||
|
||||
if loop is None:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
log.log(log_level, "%s: loop is None", log_message)
|
||||
return None
|
||||
|
||||
try:
|
||||
return asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
except Exception as exc:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
log.log(log_level, "%s: %s", log_message, exc)
|
||||
return None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,555 +0,0 @@
|
||||
"""Microsoft Entra ID adapter for Microsoft Foundry.
|
||||
|
||||
Provides keyless authentication for Microsoft Foundry deployments using the
|
||||
`azure-identity` SDK's `DefaultAzureCredential` chain (env service principal
|
||||
→ workload identity → managed identity → VS Code → Azure CLI → azd →
|
||||
PowerShell → broker).
|
||||
|
||||
Architecture mirrors `agent/bedrock_adapter.py`:
|
||||
|
||||
* Lazy import. `azure-identity` is only loaded when ``model.auth_mode =
|
||||
entra_id`` is selected. Users who stick with `AZURE_FOUNDRY_API_KEY`
|
||||
never pay the import cost.
|
||||
* SDK-callable contract. The public entry point ``build_token_provider``
|
||||
returns a zero-arg callable produced by ``get_bearer_token_provider`` —
|
||||
this is exactly the value Microsoft's documented sample plugs into
|
||||
``OpenAI(api_key=token_provider, base_url=...)``. The OpenAI SDK calls
|
||||
it before every request, so token refresh is transparent.
|
||||
* Three explicit consumer-side helpers (display / cache / http-bearer)
|
||||
rather than one generic "materialize" function — splitting them by
|
||||
purpose prevents accidental token-minting in logging paths or token
|
||||
leakage into cache keys / dashboard JSON.
|
||||
* No persisted JWT. ``azure-identity`` caches in-process and (where
|
||||
available) in the OS keychain or ``~/.IdentityService``. Hermes does
|
||||
not duplicate that storage in ``auth.json``.
|
||||
|
||||
Reference: https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
|
||||
|
||||
Requires: ``azure-identity`` (optional dependency — only needed when
|
||||
``model.auth_mode = entra_id``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Microsoft-documented scope for Foundry inference auth. Both the new
|
||||
# Foundry portal and the legacy Azure OpenAI managed-identity docs use
|
||||
# this scope for ALL Foundry endpoint shapes (*.openai.azure.com,
|
||||
# *.services.ai.azure.com, *.ai.azure.com). The older control-plane
|
||||
# scope ``https://cognitiveservices.azure.com/.default`` is for ARM
|
||||
# resource management and is rejected for inference by newer
|
||||
# resources — users with that requirement override via
|
||||
# ``model.entra.scope`` in config.yaml.
|
||||
SCOPE_AI_AZURE_DEFAULT = "https://ai.azure.com/.default"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lazy SDK import — only loaded when the Entra path is actually used.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_AZURE_IDENTITY_FEATURE = "provider.azure_identity"
|
||||
|
||||
|
||||
def has_azure_identity_installed() -> bool:
|
||||
"""Return True if `azure-identity` can be imported right now.
|
||||
|
||||
Cheap check — does not walk the credential chain.
|
||||
"""
|
||||
try:
|
||||
import azure.identity # noqa: F401
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _require_azure_identity():
|
||||
"""Import ``azure.identity``, lazy-installing it if allowed.
|
||||
|
||||
Raises ``ImportError`` with a clear actionable message when the
|
||||
package is missing and lazy installs are disabled.
|
||||
"""
|
||||
try:
|
||||
import azure.identity as _ai
|
||||
return _ai
|
||||
except ImportError:
|
||||
try:
|
||||
from tools.lazy_deps import ensure, FeatureUnavailable
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"The 'azure-identity' package is required for Azure AI "
|
||||
"Foundry Entra ID authentication. Install it with: "
|
||||
"pip install azure-identity"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
ensure(_AZURE_IDENTITY_FEATURE, prompt=False)
|
||||
except FeatureUnavailable as exc:
|
||||
raise ImportError(
|
||||
"The 'azure-identity' package is required for Azure AI "
|
||||
"Foundry Entra ID authentication. " + str(exc)
|
||||
) from exc
|
||||
|
||||
# Retry import after lazy install.
|
||||
import azure.identity as _ai # noqa: WPS440
|
||||
return _ai
|
||||
|
||||
|
||||
def reset_credential_cache() -> None:
|
||||
"""Clear the cached ``DefaultAzureCredential``. Used by tests and
|
||||
profile switches.
|
||||
|
||||
Defensive against tests that ``monkeypatch.setattr`` over
|
||||
``build_credential`` with a plain (non-lru-cached) function — those
|
||||
won't expose ``cache_clear()`` until pytest reverts the patch.
|
||||
"""
|
||||
cache_clear = getattr(build_credential, "cache_clear", None)
|
||||
if callable(cache_clear):
|
||||
cache_clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Token-provider construction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EntraIdentityConfig:
|
||||
"""Serializable Entra ID config.
|
||||
|
||||
Captures the Hermes-managed Entra knobs we need outside Azure SDK
|
||||
environment configuration. Everything else
|
||||
(tenant ID, service principal secret, federated token file, sovereign
|
||||
cloud authority, etc.) flows through azure-identity's standard
|
||||
``AZURE_*`` env vars — see the Bedrock pattern in
|
||||
``hermes_cli/runtime_provider.py:1310-1377`` for the analogous
|
||||
"let the SDK read env" approach.
|
||||
|
||||
``scope`` is Microsoft's documented Foundry inference audience. Almost
|
||||
everyone uses the default; sovereign-cloud / non-standard tenants can
|
||||
override via ``model.entra.scope``. Identity selection (user-assigned
|
||||
managed identity, workload identity, service principal, tenant, authority)
|
||||
stays in the standard Azure SDK env vars such as ``AZURE_CLIENT_ID``.
|
||||
|
||||
``exclude_interactive_browser`` is kept as an internal constructor knob
|
||||
so probes stay non-interactive by default. It is not written by the setup
|
||||
wizard.
|
||||
|
||||
The dataclass is frozen so it's hashable for ``functools.lru_cache``
|
||||
keying, and serializable across multiprocessing boundaries (workers
|
||||
rebuild the credential inside their own process).
|
||||
"""
|
||||
|
||||
scope: str = SCOPE_AI_AZURE_DEFAULT
|
||||
exclude_interactive_browser: bool = True
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
scope = str(self.scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
object.__setattr__(self, "scope", scope)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"scope": self.scope,
|
||||
"exclude_interactive_browser": self.exclude_interactive_browser,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Optional[Dict[str, Any]],
|
||||
*, default_scope: Optional[str] = None) -> "EntraIdentityConfig":
|
||||
data = data or {}
|
||||
scope = str(data.get("scope") or "").strip() or default_scope or SCOPE_AI_AZURE_DEFAULT
|
||||
exclude_browser = bool(data.get("exclude_interactive_browser", True))
|
||||
return cls(
|
||||
scope=scope,
|
||||
exclude_interactive_browser=exclude_browser,
|
||||
)
|
||||
|
||||
|
||||
def _build_default_credential(config: EntraIdentityConfig) -> Any:
|
||||
"""Construct a ``DefaultAzureCredential`` for ``config``.
|
||||
|
||||
Only Hermes-selected knobs are passed as kwargs. Everything else
|
||||
(tenant, service principal secret, federated token file, sovereign
|
||||
cloud authority, etc.) is read by ``azure-identity`` from the
|
||||
standard ``AZURE_*`` environment variables — see Microsoft's
|
||||
documented credential resolution chain. Users configure those in
|
||||
``~/.hermes/.env`` or the deployment environment.
|
||||
"""
|
||||
ai = _require_azure_identity()
|
||||
kwargs: Dict[str, Any] = {}
|
||||
# SDK default is True (browser excluded); only pass when the user
|
||||
# explicitly opts in to interactive browser auth.
|
||||
if not config.exclude_interactive_browser:
|
||||
kwargs["exclude_interactive_browser_credential"] = False
|
||||
return ai.DefaultAzureCredential(**kwargs)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def build_credential(config: EntraIdentityConfig) -> Any:
|
||||
"""Return the cached ``DefaultAzureCredential`` for ``config``.
|
||||
|
||||
Hermes processes use exactly one Entra config at a time (the
|
||||
``model.entra.*`` block in config.yaml drives every aux task,
|
||||
subagent, and credential probe in the session). ``maxsize=1`` is
|
||||
intentional: it reflects the actual usage pattern and keeps the
|
||||
cache trivially small.
|
||||
|
||||
``EntraIdentityConfig`` is a frozen dataclass, so it's hashable and
|
||||
safe as an LRU-cache key. ``functools.lru_cache`` is thread-safe in
|
||||
CPython.
|
||||
|
||||
If two distinct configs are ever passed (tests do this; production
|
||||
rarely), the LRU eviction handles it correctly — each call still
|
||||
returns a credential matching its config; only one is cached at a
|
||||
time. Use :func:`reset_credential_cache` to clear (e.g. in tests).
|
||||
"""
|
||||
return _build_default_credential(config)
|
||||
|
||||
|
||||
def build_token_provider(scope: Optional[str] = None,
|
||||
*,
|
||||
config: Optional[EntraIdentityConfig] = None,
|
||||
base_url: Optional[str] = None,
|
||||
exclude_interactive_browser: bool = True,
|
||||
) -> Callable[[], str]:
|
||||
"""Return a zero-arg callable that mints a fresh Entra bearer JWT.
|
||||
|
||||
The returned callable is exactly what Microsoft's documented Foundry
|
||||
sample expects::
|
||||
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
base_url="https://my-resource.openai.azure.com/openai/v1/",
|
||||
api_key=build_token_provider(),
|
||||
)
|
||||
|
||||
Scope resolution order:
|
||||
1. ``config.scope`` when a config object is supplied
|
||||
2. explicit ``scope`` kwarg
|
||||
3. ``SCOPE_AI_AZURE_DEFAULT`` (Microsoft's documented Foundry scope)
|
||||
|
||||
``base_url`` is unused today and kept for back-compat. Tenant /
|
||||
service-principal / sovereign-cloud configuration flows through
|
||||
``azure-identity``'s standard ``AZURE_*`` environment variables —
|
||||
see :func:`_build_default_credential` for the rationale.
|
||||
|
||||
NOT serializable across process boundaries. For multiprocessing
|
||||
workers, serialize the ``EntraIdentityConfig`` and rebuild the
|
||||
provider inside the worker.
|
||||
"""
|
||||
ai = _require_azure_identity()
|
||||
if config is None:
|
||||
config = EntraIdentityConfig(
|
||||
scope=scope or SCOPE_AI_AZURE_DEFAULT,
|
||||
exclude_interactive_browser=exclude_interactive_browser,
|
||||
)
|
||||
credential = build_credential(config)
|
||||
return ai.get_bearer_token_provider(credential, config.scope)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credential probing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def has_azure_identity_credentials(scope: Optional[str] = None,
|
||||
*,
|
||||
config: Optional[EntraIdentityConfig] = None,
|
||||
timeout_seconds: float = 10.0,
|
||||
allow_install: bool = True,
|
||||
**overrides: Any) -> bool:
|
||||
"""Best-effort probe: can `DefaultAzureCredential` mint a token now?
|
||||
|
||||
Runs ``credential.get_token(scope)`` under a thread-based timeout so
|
||||
a slow token service can't hang the caller. Returns False on any
|
||||
error — never raises. Use for ``hermes doctor`` /
|
||||
``hermes auth status`` / wizard preflight.
|
||||
|
||||
``allow_install``: when True (default) and ``azure-identity`` is not
|
||||
importable, the adapter triggers the standard lazy-install path
|
||||
(subject to ``security.allow_lazy_installs``) before probing. Set
|
||||
False to make this strictly an "is installed?" check — used on hot
|
||||
paths like CLI startup where we never want pip to run.
|
||||
|
||||
NOT used by ``is_provider_configured()`` — that path is structural
|
||||
only (no token mint), so CLI startup doesn't pay this latency.
|
||||
"""
|
||||
if not has_azure_identity_installed():
|
||||
if not allow_install:
|
||||
return False
|
||||
try:
|
||||
_require_azure_identity()
|
||||
except ImportError as exc:
|
||||
logger.debug("azure-identity lazy install unavailable: %s", exc)
|
||||
return False
|
||||
if config is None:
|
||||
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
config = EntraIdentityConfig(scope=effective_scope, **overrides)
|
||||
|
||||
result = {"ok": False}
|
||||
|
||||
def _probe() -> None:
|
||||
try:
|
||||
credential = build_credential(config)
|
||||
tok = credential.get_token(config.scope)
|
||||
result["ok"] = bool(getattr(tok, "token", None))
|
||||
except Exception as exc:
|
||||
logger.debug("Entra credential probe failed: %s", exc)
|
||||
result["ok"] = False
|
||||
|
||||
thread = threading.Thread(target=_probe, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(0.01, timeout_seconds))
|
||||
if thread.is_alive():
|
||||
logger.debug("Entra token service probe timed out after %ss", timeout_seconds)
|
||||
return False
|
||||
return bool(result.get("ok"))
|
||||
|
||||
|
||||
def describe_active_credential(config: Optional[EntraIdentityConfig] = None,
|
||||
*,
|
||||
scope: Optional[str] = None,
|
||||
timeout_seconds: float = 10.0,
|
||||
allow_install: bool = True,
|
||||
**overrides: Any) -> Dict[str, Any]:
|
||||
"""Return diagnostic info about the active credential chain.
|
||||
|
||||
Best-effort: runs ``get_token()`` and inspects what came back.
|
||||
Designed for ``hermes doctor`` and the wizard preflight — never
|
||||
raises, returns ``{"ok": False, "error": ...}`` on failure.
|
||||
|
||||
``allow_install``: when True (default) and ``azure-identity`` is not
|
||||
importable, the adapter triggers the standard lazy-install path
|
||||
(subject to ``security.allow_lazy_installs``) before probing. The
|
||||
install failure is surfaced as the diagnostic error when it fails.
|
||||
Set False for hot CLI paths that should never trigger pip.
|
||||
|
||||
``azure-identity`` doesn't expose the winning inner credential as
|
||||
a public field, so we report a coarse picture (env vars present,
|
||||
token expiry, claims-derived tenant) rather than the credential
|
||||
class name. Users wanting the precise class can run with
|
||||
``AZURE_LOG_LEVEL=DEBUG``.
|
||||
"""
|
||||
info: Dict[str, Any] = {"ok": False}
|
||||
if not has_azure_identity_installed():
|
||||
if not allow_install:
|
||||
info["error"] = "azure-identity not installed"
|
||||
info["hint"] = (
|
||||
"pip install azure-identity (or rely on lazy install at "
|
||||
"first use)"
|
||||
)
|
||||
return info
|
||||
try:
|
||||
_require_azure_identity()
|
||||
except ImportError as exc:
|
||||
info["error"] = str(exc) or "azure-identity not installed"
|
||||
info["hint"] = (
|
||||
"pip install azure-identity manually, or enable lazy "
|
||||
"installs (security.allow_lazy_installs: true in "
|
||||
"config.yaml)."
|
||||
)
|
||||
return info
|
||||
|
||||
if config is None:
|
||||
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
config = EntraIdentityConfig(scope=effective_scope, **overrides)
|
||||
|
||||
info["scope"] = config.scope
|
||||
# Tenant / authority / service-principal config flow through the
|
||||
# standard ``AZURE_*`` env vars; surface them below.
|
||||
if os.environ.get("AZURE_TENANT_ID", "").strip():
|
||||
info["tenant_id_env"] = os.environ["AZURE_TENANT_ID"].strip()
|
||||
|
||||
# Surface which env-var sources are present without minting yet.
|
||||
env_sources = []
|
||||
if os.environ.get("AZURE_FEDERATED_TOKEN_FILE", "").strip():
|
||||
env_sources.append("WorkloadIdentityCredential (AZURE_FEDERATED_TOKEN_FILE)")
|
||||
if (os.environ.get("AZURE_CLIENT_ID", "").strip()
|
||||
and os.environ.get("AZURE_CLIENT_SECRET", "").strip()
|
||||
and os.environ.get("AZURE_TENANT_ID", "").strip()):
|
||||
env_sources.append("EnvironmentCredential (client secret)")
|
||||
if os.environ.get("IDENTITY_ENDPOINT", "").strip() or os.environ.get("MSI_ENDPOINT", "").strip():
|
||||
env_sources.append("ManagedIdentityCredential (IDENTITY_ENDPOINT)")
|
||||
info["env_sources"] = env_sources
|
||||
|
||||
# Now try minting.
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
def _probe() -> None:
|
||||
try:
|
||||
credential = build_credential(config)
|
||||
tok = credential.get_token(config.scope)
|
||||
result["token"] = tok
|
||||
except Exception as exc:
|
||||
result["error"] = str(exc)
|
||||
|
||||
thread = threading.Thread(target=_probe, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(0.01, timeout_seconds))
|
||||
if thread.is_alive():
|
||||
info["error"] = f"Token probe timed out after {timeout_seconds:.0f}s"
|
||||
info["hint"] = (
|
||||
"DefaultAzureCredential can be slow when the token service is unreachable "
|
||||
"or when az login state is stale. Try `az login` or set "
|
||||
"AZURE_CLIENT_ID / AZURE_TENANT_ID / AZURE_CLIENT_SECRET."
|
||||
)
|
||||
return info
|
||||
|
||||
if "error" in result:
|
||||
info["error"] = result["error"]
|
||||
return info
|
||||
|
||||
token = result.get("token")
|
||||
if token is None:
|
||||
info["error"] = "credential chain exhausted"
|
||||
return info
|
||||
|
||||
info["ok"] = True
|
||||
info["expires_on"] = getattr(token, "expires_on", None)
|
||||
return info
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Consumer-side helpers — split by purpose to prevent accidental token
|
||||
# minting in logging / cache-key / dashboard paths.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_token_provider(value: Any) -> bool:
|
||||
"""Return True when ``value`` is a callable Entra token provider.
|
||||
|
||||
Used at the seams where a consumer must decide between
|
||||
string-API-key semantics and bearer-callable semantics.
|
||||
"""
|
||||
return callable(value) and not isinstance(value, str)
|
||||
|
||||
|
||||
def materialize_bearer_for_http(value: Any) -> str:
|
||||
"""Return a fresh Bearer JWT for a manual HTTP request.
|
||||
|
||||
Only call this at sites that must construct an ``Authorization``
|
||||
header outside the OpenAI SDK (e.g. ``hermes_cli/azure_detect.py``).
|
||||
Calls the callable exactly once and returns the resulting token.
|
||||
|
||||
**Anthropic SDK integration:** the Anthropic Python SDK does not
|
||||
accept a ``Callable[[], str]`` for ``auth_token``. Instead,
|
||||
:func:`build_bearer_http_client` returns an ``httpx.Client`` whose
|
||||
request event hook calls this function and rewrites the
|
||||
``Authorization`` header per request — and that client is passed to
|
||||
the Anthropic SDK via ``http_client=...``. See
|
||||
:func:`agent.anthropic_adapter.build_anthropic_client` for the
|
||||
consumer.
|
||||
|
||||
Raises ``ValueError`` if ``value`` is not a callable token provider
|
||||
or non-empty string.
|
||||
"""
|
||||
if is_token_provider(value):
|
||||
token = value()
|
||||
if not isinstance(token, str) or not token:
|
||||
raise ValueError("token provider returned empty value")
|
||||
return token
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
raise ValueError("no usable api_key / token provider")
|
||||
|
||||
|
||||
def build_bearer_http_client(token_provider: Callable[[], str], **httpx_kwargs: Any) -> Any:
|
||||
"""Return an ``httpx.Client`` that mints a fresh Entra bearer JWT
|
||||
per outbound request.
|
||||
|
||||
The Anthropic SDK (≤ 0.86.0 at the time of writing) stores
|
||||
``api_key`` / ``auth_token`` as static strings and computes the
|
||||
``Authorization`` header at construction time. To get per-request
|
||||
token refresh (the Microsoft-recommended Foundry pattern for
|
||||
callable bearer providers), we install an httpx ``request`` event
|
||||
hook on a custom client and pass that client to the SDK via
|
||||
``http_client=...``. The hook:
|
||||
|
||||
1. Calls :func:`materialize_bearer_for_http` to mint a fresh JWT
|
||||
(azure-identity caches internally — this is cheap when the
|
||||
cached token is still valid).
|
||||
2. Strips any pre-set ``Authorization`` / ``api-key`` /
|
||||
``x-api-key`` headers the SDK may have added (avoids
|
||||
conflicting auth values).
|
||||
3. Sets ``Authorization: Bearer <fresh-jwt>``.
|
||||
|
||||
``token_provider`` must be a zero-arg callable returning a string —
|
||||
typically the result of :func:`build_token_provider`.
|
||||
|
||||
``httpx_kwargs`` are forwarded verbatim to ``httpx.Client(...)`` so
|
||||
callers can attach a ``timeout``, ``transport``, ``proxy``, etc.
|
||||
|
||||
Raises ``ImportError`` if ``httpx`` is not installed (it is a
|
||||
transitive dependency of both ``openai`` and ``anthropic`` SDKs, so
|
||||
in practice always available when this helper is reached).
|
||||
"""
|
||||
if not is_token_provider(token_provider):
|
||||
raise ValueError(
|
||||
"build_bearer_http_client requires a zero-arg callable "
|
||||
"token provider"
|
||||
)
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError as exc: # pragma: no cover — httpx ships with openai/anthropic
|
||||
raise ImportError(
|
||||
"httpx is required for Entra ID bearer auth on Microsoft Foundry "
|
||||
"Anthropic-style endpoints. It is normally a transitive "
|
||||
"dependency of the openai/anthropic SDKs."
|
||||
) from exc
|
||||
|
||||
def _inject_bearer(request: "httpx.Request") -> None:
|
||||
try:
|
||||
token = materialize_bearer_for_http(token_provider)
|
||||
except ValueError as exc:
|
||||
# Token provider failed (chain exhausted, token service unreachable,
|
||||
# az login expired, etc.). Strip any auth headers the SDK
|
||||
# may have set — including our own placeholder sentinel
|
||||
# ``entra-id-bearer-via-http-hook`` from
|
||||
# ``_build_anthropic_client_with_bearer_hook`` — so the
|
||||
# outbound request hits Azure with NO Authorization rather
|
||||
# than with the placeholder. Azure returns a clean 401
|
||||
# "missing auth" that is easier to diagnose than a 401
|
||||
# against the sentinel string, and the sentinel never
|
||||
# appears in upstream access logs.
|
||||
#
|
||||
# Log at WARNING (not DEBUG) so the misconfiguration is
|
||||
# visible at default log levels.
|
||||
logger.warning(
|
||||
"Bearer hook: Entra ID token provider returned empty (%s) "
|
||||
"— stripping Authorization headers. Azure will respond 401. "
|
||||
"Run `hermes doctor` or `az login` to recover.",
|
||||
exc,
|
||||
)
|
||||
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
|
||||
request.headers.pop(header_name, None)
|
||||
return
|
||||
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
|
||||
request.headers.pop(header_name, None)
|
||||
request.headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
return httpx.Client(
|
||||
event_hooks={"request": [_inject_bearer]},
|
||||
**httpx_kwargs,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EntraIdentityConfig",
|
||||
"SCOPE_AI_AZURE_DEFAULT",
|
||||
"build_bearer_http_client",
|
||||
"build_credential",
|
||||
"build_token_provider",
|
||||
"describe_active_credential",
|
||||
"has_azure_identity_credentials",
|
||||
"has_azure_identity_installed",
|
||||
"is_token_provider",
|
||||
"materialize_bearer_for_http",
|
||||
"reset_credential_cache",
|
||||
]
|
||||
@@ -1,582 +0,0 @@
|
||||
"""Background memory/skill review — fork the agent to evaluate the turn.
|
||||
|
||||
After every turn, ``AIAgent.run_conversation`` may call
|
||||
:func:`spawn_background_review` to fire off a daemon thread that replays
|
||||
the conversation snapshot in a forked :class:`AIAgent` and asks itself
|
||||
"should any skill/memory be saved or updated?". Writes go straight to
|
||||
the memory + skill stores. Main conversation and prompt cache are never
|
||||
touched.
|
||||
|
||||
The fork inherits the parent's live runtime (provider, model, base_url,
|
||||
credentials, cached system prompt) so it hits the same prefix cache and
|
||||
uses the same auth. It runs with a tool whitelist limited to memory and
|
||||
skill management tools; everything else is denied at runtime.
|
||||
|
||||
See the ``hermes-agent-dev`` skill (``references/self-improvement-loop.md``)
|
||||
for invariants and PR review criteria.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
|
||||
# the user-message that the forked review agent receives. AIAgent exposes
|
||||
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
|
||||
# the actual text lives here so future edits are one-place.
|
||||
_MEMORY_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving to memory if appropriate.\n\n"
|
||||
"Focus on:\n"
|
||||
"1. Has the user revealed things about themselves — their persona, desires, "
|
||||
"preferences, or personal details worth remembering?\n"
|
||||
"2. Has the user expressed expectations about how you should behave, their work "
|
||||
"style, or ways they want you to operate?\n\n"
|
||||
"If something stands out, save it using the memory tool. "
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update the skill library. Be "
|
||||
"ACTIVE — most sessions produce at least one skill update, even if "
|
||||
"small. A pass that does nothing is a missed learning opportunity, "
|
||||
"not a neutral outcome.\n\n"
|
||||
"Target shape of the library: CLASS-LEVEL skills, each with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries. This "
|
||||
"shapes HOW you update, not WHETHER you update.\n\n"
|
||||
"Signals to look for (any one of these warrants action):\n"
|
||||
" • User corrected your style, tone, format, legibility, or "
|
||||
"verbosity. Frustration signals like 'stop doing X', 'this is too "
|
||||
"verbose', 'don't format like this', 'why are you explaining', "
|
||||
"'just give me the answer', 'you always do Y and I hate it', or an "
|
||||
"explicit 'remember this' are FIRST-CLASS skill signals, not just "
|
||||
"memory signals. Update the relevant skill(s) to embed the "
|
||||
"preference so the next session starts already knowing.\n"
|
||||
" • User corrected your workflow, approach, or sequence of steps. "
|
||||
"Encode the correction as a pitfall or explicit step in the skill "
|
||||
"that governs that class of task.\n"
|
||||
" • Non-trivial technique, fix, workaround, debugging path, or "
|
||||
"tool-usage pattern emerged that a future session would benefit "
|
||||
"from. Capture it.\n"
|
||||
" • A skill that got loaded or consulted this session turned out "
|
||||
"to be wrong, missing a step, or outdated. Patch it NOW.\n\n"
|
||||
"Preference order — prefer the earliest action that fits, but do "
|
||||
"pick one when a signal above fired:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Look back through the "
|
||||
"conversation for skills the user loaded via /skill-name or you "
|
||||
"read via skill_view. If any of them covers the territory of the "
|
||||
"new learning, PATCH that one first. It is the skill that was in "
|
||||
"play, so it's the right one to extend.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (via skills_list + skill_view). "
|
||||
"If no loaded skill fits but an existing class-level skill does, "
|
||||
"patch it. Add a subsection, a pitfall, or broaden a trigger.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella. Skills can be "
|
||||
"packaged with three kinds of support files — use the right "
|
||||
"directory per kind:\n"
|
||||
" • `references/<topic>.md` — session-specific detail (error "
|
||||
"transcripts, reproduction recipes, provider quirks) AND "
|
||||
"condensed knowledge banks: quoted research, API docs, external "
|
||||
"authoritative excerpts, or domain notes you found while working "
|
||||
"on the problem. Write it concise and for the value of the task, "
|
||||
"not as a full mirror of upstream docs.\n"
|
||||
" • `templates/<name>.<ext>` — starter files meant to be "
|
||||
"copied and modified (boilerplate configs, scaffolding, a "
|
||||
"known-good example the agent can `reproduce with modifications`).\n"
|
||||
" • `scripts/<name>.<ext>` — statically re-runnable actions "
|
||||
"the skill can invoke directly (verification scripts, fixture "
|
||||
"generators, deterministic probes, anything the agent should run "
|
||||
"rather than hand-type each time).\n"
|
||||
" Add support files via skill_manage action=write_file with "
|
||||
"file_path starting 'references/', 'templates/', or 'scripts/'. "
|
||||
"The umbrella's SKILL.md should gain a one-line pointer to any "
|
||||
"new support file so future agents know it exists.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA SKILL when no existing "
|
||||
"skill covers the class. The name MUST be at the class level. "
|
||||
"The name MUST NOT be a specific PR number, error string, feature "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y / audit-Z-today' "
|
||||
"session artifact. If the proposed name only makes sense for "
|
||||
"today's task, it's wrong — fall back to (1), (2), or (3).\n\n"
|
||||
"User-preference embedding (important): when the user expressed a "
|
||||
"style/format/workflow preference, the update belongs in the "
|
||||
"SKILL.md body, not just in memory. Memory captures 'who the user "
|
||||
"is and what the current situation and state of your operations "
|
||||
"are'; skills capture 'how to do this class of task for this "
|
||||
"user'. When they complain about how you handled a task, the "
|
||||
"skill that governs that task needs to carry the lesson.\n\n"
|
||||
"If you notice two existing skills that overlap, note it in your "
|
||||
"reply — the background curator handles consolidation at scale.\n\n"
|
||||
"Protected skills (DO NOT edit these):\n"
|
||||
" • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
|
||||
" • Hub-installed skills (installed via 'hermes skills install').\n"
|
||||
" • Pinned skills (marked via 'hermes curator pin').\n"
|
||||
"If the only skills that need updating are protected, say\n"
|
||||
"'Nothing to save.' and stop.\n\n"
|
||||
"Do NOT capture (these become persistent self-imposed constraints "
|
||||
"that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"'Nothing to save.' is a real option but should NOT be the "
|
||||
"default. If the session ran smoothly with no corrections and "
|
||||
"produced no new technique, just say 'Nothing to save.' and stop. "
|
||||
"Otherwise, act."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update two things:\n\n"
|
||||
"**Memory**: who the user is. Did the user reveal persona, "
|
||||
"desires, preferences, personal details, or expectations about "
|
||||
"how you should behave? Save facts about the user and durable "
|
||||
"preferences with the memory tool.\n\n"
|
||||
"**Skills**: how to do this class of task. Be ACTIVE — most "
|
||||
"sessions produce at least one skill update. A pass that does "
|
||||
"nothing is a missed learning opportunity, not a neutral outcome.\n\n"
|
||||
"Target shape of the skill library: CLASS-LEVEL skills with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries.\n\n"
|
||||
"Signals that warrant a skill update (any one is enough):\n"
|
||||
" • User corrected your style, tone, format, legibility, "
|
||||
"verbosity, or approach. Frustration is a FIRST-CLASS skill "
|
||||
"signal, not just a memory signal. 'stop doing X', 'don't format "
|
||||
"like this', 'I hate when you Y' — embed the lesson in the skill "
|
||||
"that governs that task so the next session starts fixed.\n"
|
||||
" • Non-trivial technique, fix, workaround, or debugging path "
|
||||
"emerged.\n"
|
||||
" • A skill that was loaded or consulted turned out wrong, "
|
||||
"missing, or outdated — patch it now.\n\n"
|
||||
"Preference order for skills — pick the earliest that fits:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Check what skills were "
|
||||
"loaded via /skill-name or skill_view in the conversation. If one "
|
||||
"of them covers the learning, PATCH it first. It was in play; "
|
||||
"it's the right place.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (skills_list + skill_view to "
|
||||
"find the right one). Patch it.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella via "
|
||||
"skill_manage action=write_file. Three kinds: "
|
||||
"`references/<topic>.md` for session-specific detail OR condensed "
|
||||
"knowledge banks (quoted research, API docs excerpts, domain "
|
||||
"notes) written concise and task-focused; `templates/<name>.<ext>` "
|
||||
"for starter files meant to be copied and modified; "
|
||||
"`scripts/<name>.<ext>` for statically re-runnable actions "
|
||||
"(verification, fixture generators, probes). Add a one-line "
|
||||
"pointer in SKILL.md so future agents find them.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA when nothing exists. "
|
||||
"Name at the class level — NOT a PR number, error string, "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y' session "
|
||||
"artifact. If the name only fits today's task, fall back to (1), "
|
||||
"(2), or (3).\n\n"
|
||||
"User-preference embedding: when the user complains about how "
|
||||
"you handled a task, update the skill that governs that task — "
|
||||
"memory alone isn't enough. Memory says 'who the user is and "
|
||||
"what the current situation and state of your operations are'; "
|
||||
"skills say 'how to do this class of task for this user'. Both "
|
||||
"should carry user-preference lessons when relevant.\n\n"
|
||||
"If you notice overlapping existing skills, mention it — the "
|
||||
"background curator handles consolidation.\n\n"
|
||||
"Protected skills (DO NOT edit these):\n"
|
||||
" • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
|
||||
" • Hub-installed skills (installed via 'hermes skills install').\n"
|
||||
" • Pinned skills (marked via 'hermes curator pin').\n"
|
||||
"If the only skills that need updating are protected, say\n"
|
||||
"'Nothing to save.' and stop.\n\n"
|
||||
"Do NOT capture as skills (these become persistent self-imposed "
|
||||
"constraints that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"Act on whichever of the two dimensions has real signal. If "
|
||||
"genuinely nothing stands out on either, say 'Nothing to save.' "
|
||||
"and stop — but don't reach for that conclusion as a default."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def summarize_background_review_actions(
|
||||
review_messages: List[Dict],
|
||||
prior_snapshot: List[Dict],
|
||||
) -> List[str]:
|
||||
"""Build the human-facing action summary for a background review pass.
|
||||
|
||||
Walks the review agent's session messages and collects "successful tool
|
||||
action" descriptions to surface to the user (e.g. "Memory updated").
|
||||
Tool messages already present in ``prior_snapshot`` are skipped so we
|
||||
don't re-surface stale results from the prior conversation that the
|
||||
review agent inherited via ``conversation_history`` (issue #14944).
|
||||
|
||||
Matching is by ``tool_call_id`` when available, with a content-equality
|
||||
fallback for tool messages that lack one.
|
||||
"""
|
||||
existing_tool_call_ids = set()
|
||||
existing_tool_contents = set()
|
||||
for prior in prior_snapshot or []:
|
||||
if not isinstance(prior, dict) or prior.get("role") != "tool":
|
||||
continue
|
||||
tcid = prior.get("tool_call_id")
|
||||
if tcid:
|
||||
existing_tool_call_ids.add(tcid)
|
||||
else:
|
||||
content = prior.get("content")
|
||||
if isinstance(content, str):
|
||||
existing_tool_contents.add(content)
|
||||
|
||||
actions: List[str] = []
|
||||
for msg in review_messages or []:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
tcid = msg.get("tool_call_id")
|
||||
if tcid and tcid in existing_tool_call_ids:
|
||||
continue
|
||||
if not tcid:
|
||||
content_str = msg.get("content")
|
||||
if isinstance(content_str, str) and content_str in existing_tool_contents:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(msg.get("content", "{}"))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
if not isinstance(data, dict) or not data.get("success"):
|
||||
continue
|
||||
message = data.get("message", "")
|
||||
target = data.get("target", "")
|
||||
if "created" in message.lower():
|
||||
actions.append(message)
|
||||
elif "updated" in message.lower():
|
||||
actions.append(message)
|
||||
elif "added" in message.lower() or (target and "add" in message.lower()):
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "Entry added" in message:
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "removed" in message.lower() or "replaced" in message.lower():
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
return actions
|
||||
|
||||
|
||||
def build_memory_write_metadata(
|
||||
agent: Any,
|
||||
*,
|
||||
write_origin: Optional[str] = None,
|
||||
execution_context: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
tool_call_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build provenance metadata for external memory-provider mirrors."""
|
||||
metadata: Dict[str, Any] = {
|
||||
"write_origin": write_origin or getattr(agent, "_memory_write_origin", "assistant_tool"),
|
||||
"execution_context": (
|
||||
execution_context
|
||||
or getattr(agent, "_memory_write_context", "foreground")
|
||||
),
|
||||
"session_id": agent.session_id or "",
|
||||
"parent_session_id": agent._parent_session_id or "",
|
||||
"platform": agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
"tool_name": "memory",
|
||||
}
|
||||
if task_id:
|
||||
metadata["task_id"] = task_id
|
||||
if tool_call_id:
|
||||
metadata["tool_call_id"] = tool_call_id
|
||||
return {k: v for k, v in metadata.items() if v not in {None, ""}}
|
||||
|
||||
|
||||
def _run_review_in_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
prompt: str,
|
||||
) -> None:
|
||||
"""Worker function executed in the background-review daemon thread.
|
||||
|
||||
Spawns a forked ``AIAgent`` inheriting the parent's runtime, runs the
|
||||
review prompt, and surfaces a compact action summary back to the user
|
||||
via ``agent._safe_print`` and ``agent.background_review_callback``.
|
||||
"""
|
||||
# Local import to avoid a hard circular dep at module load.
|
||||
from run_agent import AIAgent
|
||||
from tools.terminal_tool import set_approval_callback as _set_approval_callback
|
||||
|
||||
# Install a non-interactive approval callback on this worker
|
||||
# thread so any dangerous-command guard the review agent trips
|
||||
# resolves to "deny" instead of falling back to input() -- which
|
||||
# deadlocks against the parent's prompt_toolkit TUI (#15216).
|
||||
# Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
|
||||
def _bg_review_auto_deny(command, description, **kwargs):
|
||||
logger.warning(
|
||||
"Background review auto-denied dangerous command: %s (%s)",
|
||||
command, description,
|
||||
)
|
||||
return "deny"
|
||||
try:
|
||||
_set_approval_callback(_bg_review_auto_deny)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
review_agent = None
|
||||
review_messages: List[Dict] = []
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = agent._current_main_runtime()
|
||||
_parent_api_mode = _parent_runtime.get("api_mode") or None
|
||||
# The review fork needs to call agent-loop tools (memory,
|
||||
# skill_manage). Those tools require Hermes' own dispatch,
|
||||
# which the codex_app_server runtime bypasses entirely
|
||||
# (it runs the turn inside codex's subprocess). So when
|
||||
# the parent is on codex_app_server, downgrade the review
|
||||
# fork to codex_responses — same auth/credentials, but
|
||||
# talks to the OpenAI Responses API directly so Hermes
|
||||
# owns the loop and the agent-loop tools dispatch.
|
||||
if _parent_api_mode == "codex_app_server":
|
||||
_parent_api_mode = "codex_responses"
|
||||
# skip_memory=True keeps the review fork from
|
||||
# touching external memory plugins (honcho, mem0,
|
||||
# supermemory, etc.). Without it, the fork's
|
||||
# __init__ rebuilds its own _memory_manager from
|
||||
# config, scoped to the parent's session_id, and
|
||||
# run_conversation() then leaks the harness prompt
|
||||
# into the user's real memory namespace via three
|
||||
# ingestion sites: on_turn_start (cadence + turn
|
||||
# message), prefetch_all (recall query), and
|
||||
# sync_all (harness prompt + review output recorded
|
||||
# as a (user, assistant) turn pair). Built-in
|
||||
# MEMORY.md / USER.md state is re-bound from the
|
||||
# parent below so memory(action="add") writes from
|
||||
# the review still land on disk; the review just
|
||||
# has zero side effects on external providers.
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
max_iterations=16,
|
||||
quiet_mode=True,
|
||||
platform=agent.platform,
|
||||
provider=agent.provider,
|
||||
api_mode=_parent_api_mode,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
skip_memory=True,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
review_agent._memory_write_context = "background_review"
|
||||
review_agent._memory_store = agent._memory_store
|
||||
review_agent._memory_enabled = agent._memory_enabled
|
||||
review_agent._user_profile_enabled = agent._user_profile_enabled
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
# Suppress all status/warning emits from the fork so the
|
||||
# user only sees the final successful-action summary.
|
||||
# Without this, mid-review "Iteration budget exhausted",
|
||||
# rate-limit retries, compression warnings, and other
|
||||
# lifecycle messages bubble up through _emit_status ->
|
||||
# _vprint and leak past the stdout redirect (they go via
|
||||
# _print_fn/status_callback, which bypass sys.stdout).
|
||||
review_agent.suppress_status_output = True
|
||||
# Inherit the parent's cached system prompt verbatim so
|
||||
# the review fork's outbound HTTP request hits the same
|
||||
# Anthropic/OpenRouter prefix cache the parent warmed.
|
||||
# Without this, the fork rebuilds the system prompt from
|
||||
# scratch (fresh _hermes_now() timestamp, fresh
|
||||
# session_id, narrower toolset → different skills_prompt)
|
||||
# and the byte-exact prefix-cache key misses. See
|
||||
# issue #25322 and PR #17276 for the full analysis +
|
||||
# measured impact (~26% end-to-end cost reduction on
|
||||
# Sonnet 4.5).
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
review_agent.session_id = agent.session_id
|
||||
|
||||
from model_tools import get_tool_definitions
|
||||
from hermes_cli.plugins import (
|
||||
set_thread_tool_whitelist,
|
||||
clear_thread_tool_whitelist,
|
||||
)
|
||||
|
||||
review_whitelist = {
|
||||
t["function"]["name"]
|
||||
for t in get_tool_definitions(
|
||||
enabled_toolsets=["memory", "skills"],
|
||||
quiet_mode=True,
|
||||
)
|
||||
}
|
||||
set_thread_tool_whitelist(
|
||||
review_whitelist,
|
||||
deny_msg_fmt=(
|
||||
"Background review denied non-whitelisted tool: "
|
||||
"{tool_name}. Only memory/skill tools are allowed."
|
||||
),
|
||||
)
|
||||
try:
|
||||
review_agent.run_conversation(
|
||||
user_message=(
|
||||
prompt
|
||||
+ "\n\nYou can only call memory and skill "
|
||||
"management tools. Other tools will be denied "
|
||||
"at runtime — do not attempt them."
|
||||
),
|
||||
conversation_history=messages_snapshot,
|
||||
)
|
||||
finally:
|
||||
clear_thread_tool_whitelist()
|
||||
|
||||
# Tear down memory providers while stdout is still
|
||||
# redirected so background thread teardown (Honcho flush,
|
||||
# Hindsight sync, etc.) stays silent. The finally block
|
||||
# below is a safety net for the exception path.
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
review_messages = list(getattr(review_agent, "_session_messages", []))
|
||||
review_agent = None
|
||||
|
||||
# Scan the review agent's messages for successful tool actions
|
||||
# and surface a compact summary to the user. Tool messages
|
||||
# already present in messages_snapshot must be skipped, since
|
||||
# the review agent inherits that history and would otherwise
|
||||
# re-surface stale "created"/"updated" messages from the prior
|
||||
# conversation as if they just happened (issue #14944).
|
||||
actions = summarize_background_review_actions(
|
||||
review_messages,
|
||||
messages_snapshot,
|
||||
)
|
||||
|
||||
if actions:
|
||||
summary = " · ".join(dict.fromkeys(actions))
|
||||
agent._safe_print(
|
||||
f" 💾 Self-improvement review: {summary}"
|
||||
)
|
||||
_bg_cb = agent.background_review_callback
|
||||
if _bg_cb:
|
||||
try:
|
||||
_bg_cb(
|
||||
f"💾 Self-improvement review: {summary}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
agent._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Safety-net cleanup for the exception path. Normal
|
||||
# completion already shut down inside redirect_stdout above.
|
||||
# Re-open devnull here so any teardown output (Honcho flush,
|
||||
# Hindsight sync, background thread joins) stays silent even
|
||||
# on the exception path where redirect_stdout already exited.
|
||||
if review_agent is not None:
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _fn, \
|
||||
contextlib.redirect_stdout(_fn), \
|
||||
contextlib.redirect_stderr(_fn):
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
# Clear the approval callback on this bg-review thread so a
|
||||
# recycled thread-id doesn't inherit a stale reference.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def spawn_background_review_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
review_memory: bool = False,
|
||||
review_skills: bool = False,
|
||||
):
|
||||
"""Build the review thread target and prompt for a background review.
|
||||
|
||||
Returns a ``(target, prompt)`` tuple. The caller (``AIAgent._spawn_background_review``)
|
||||
owns the actual ``threading.Thread`` construction so test-level patches
|
||||
of ``run_agent.threading.Thread`` keep working.
|
||||
"""
|
||||
# Pick the right prompt based on which triggers fired. Allow per-agent
|
||||
# override (the prompts moved to module-level constants but old code paths
|
||||
# that set agent._MEMORY_REVIEW_PROMPT etc. directly keep working).
|
||||
if review_memory and review_skills:
|
||||
prompt = getattr(agent, "_COMBINED_REVIEW_PROMPT", _COMBINED_REVIEW_PROMPT)
|
||||
elif review_memory:
|
||||
prompt = getattr(agent, "_MEMORY_REVIEW_PROMPT", _MEMORY_REVIEW_PROMPT)
|
||||
else:
|
||||
prompt = getattr(agent, "_SKILL_REVIEW_PROMPT", _SKILL_REVIEW_PROMPT)
|
||||
|
||||
def _target() -> None:
|
||||
_run_review_in_thread(agent, messages_snapshot, prompt)
|
||||
|
||||
return _target, prompt
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_MEMORY_REVIEW_PROMPT",
|
||||
"_SKILL_REVIEW_PROMPT",
|
||||
"_COMBINED_REVIEW_PROMPT",
|
||||
"spawn_background_review_thread",
|
||||
"summarize_background_review_actions",
|
||||
"build_memory_write_metadata",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,175 +0,0 @@
|
||||
"""
|
||||
Browser Provider ABC
|
||||
====================
|
||||
|
||||
Defines the pluggable-backend interface for cloud browser providers
|
||||
(Browserbase, Browser Use, Firecrawl, …). Providers register instances via
|
||||
:meth:`PluginContext.register_browser_provider`; the active one (selected via
|
||||
``browser.cloud_provider`` in ``config.yaml``) services every cloud-mode
|
||||
``browser_*`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/browser/<name>/`` (built-in, auto-loaded as
|
||||
``kind: backend``) or ``~/.hermes/plugins/browser/<name>/`` (user, opt-in via
|
||||
``plugins.enabled``).
|
||||
|
||||
This ABC mirrors :class:`agent.web_search_provider.WebSearchProvider` (PR
|
||||
#25182) — same shape, same registration flow, same picker integration. The
|
||||
legacy in-tree ``tools.browser_providers.base.CloudBrowserProvider`` ABC was
|
||||
deleted in PR #25214 (this work) along with the per-vendor inline modules in
|
||||
``tools/browser_providers/``; the lifecycle contract documented below is
|
||||
preserved bit-for-bit so the tool wrapper (:mod:`tools.browser_tool`) does
|
||||
not have to translate.
|
||||
|
||||
Session metadata contract (preserved from the legacy ``CloudBrowserProvider``)::
|
||||
|
||||
{
|
||||
"session_name": str, # unique name for agent-browser --session
|
||||
"bb_session_id": str, # provider session ID (for close/cleanup)
|
||||
"cdp_url": str, # CDP websocket URL
|
||||
"features": dict, # feature flags that were enabled
|
||||
"external_call_id": str, # optional, managed-gateway billing key
|
||||
}
|
||||
|
||||
``bb_session_id`` is a legacy key name kept verbatim for backward compat with
|
||||
:mod:`tools.browser_tool` — it holds the provider's session ID regardless of
|
||||
which provider is in use.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BrowserProvider(abc.ABC):
|
||||
"""Abstract base class for a cloud browser backend.
|
||||
|
||||
Subclasses must implement :meth:`name`, :meth:`is_available`, and the
|
||||
three lifecycle methods: :meth:`create_session`, :meth:`close_session`,
|
||||
:meth:`emergency_cleanup`.
|
||||
|
||||
The lifecycle shape preserves the legacy ``CloudBrowserProvider`` contract
|
||||
bit-for-bit so the dispatcher in :mod:`tools.browser_tool` is a pure
|
||||
registry lookup — no per-provider conditionals, no shape translation.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in the ``browser.cloud_provider``
|
||||
config key.
|
||||
|
||||
Lowercase, hyphens permitted to preserve existing user-visible names.
|
||||
Examples: ``browserbase``, ``browser-use``, ``firecrawl``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
|
||||
return self.name
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically a cheap check (env var present, managed-gateway token
|
||||
readable, optional Python dep importable). Must NOT make network
|
||||
calls — this runs at tool-registration time and on every
|
||||
``hermes tools`` paint.
|
||||
|
||||
Mirrors the legacy ``CloudBrowserProvider.is_configured()`` method;
|
||||
renamed for parity with :class:`agent.web_search_provider.WebSearchProvider`.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def create_session(self, task_id: str) -> Dict[str, object]:
|
||||
"""Create a cloud browser session and return session metadata.
|
||||
|
||||
Must return a dict with at least::
|
||||
|
||||
{
|
||||
"session_name": str, # unique name for agent-browser --session
|
||||
"bb_session_id": str, # provider session ID (for close/cleanup)
|
||||
"cdp_url": str, # CDP websocket URL
|
||||
"features": dict, # feature flags that were enabled
|
||||
}
|
||||
|
||||
``bb_session_id`` is a legacy key name kept for backward compat with
|
||||
the rest of :mod:`tools.browser_tool` — it holds the provider's
|
||||
session ID regardless of which provider is in use.
|
||||
|
||||
May raise ``ValueError`` (missing credentials) or ``RuntimeError``
|
||||
(network / API failure); the dispatcher surfaces these to the user.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def close_session(self, session_id: str) -> bool:
|
||||
"""Release / terminate a cloud session by its provider session ID.
|
||||
|
||||
Returns True on success, False on failure. Should not raise — log and
|
||||
return False on any exception so the dispatcher's cleanup loop keeps
|
||||
moving across sessions.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
"""Best-effort session teardown during process exit.
|
||||
|
||||
Called from atexit / signal handlers. Must tolerate missing
|
||||
credentials, network errors, etc. — log and move on. Must not raise.
|
||||
"""
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by :mod:`hermes_cli.tools_config` to inject this provider as a
|
||||
row in the Browser Automation picker. Shape mirrors the existing
|
||||
hardcoded entries in ``TOOL_CATEGORIES["browser"]``::
|
||||
|
||||
{
|
||||
"name": "Browserbase",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with stealth and proxies",
|
||||
"env_vars": [
|
||||
{"key": "BROWSERBASE_API_KEY",
|
||||
"prompt": "Browserbase API key",
|
||||
"url": "https://browserbase.com"},
|
||||
],
|
||||
"post_setup": "agent_browser",
|
||||
}
|
||||
|
||||
Default: minimal entry derived from :attr:`display_name`. Override to
|
||||
expose API key prompts, badges, managed-Nous gating, and the
|
||||
``post_setup`` install hook.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Backward-compat shims for the legacy CloudBrowserProvider API
|
||||
# ------------------------------------------------------------------
|
||||
#
|
||||
# The pre-PR-#25214 ABC exposed ``is_configured()`` and ``provider_name()``;
|
||||
# ``tools.browser_tool`` has ~6 callers that still use those names. Rather
|
||||
# than churn every callsite (and break out-of-tree downstream code that
|
||||
# subclassed CloudBrowserProvider), we expose the old names as thin
|
||||
# delegations to the new API. Subclasses MUST implement :meth:`is_available`
|
||||
# and :attr:`name`; they may override ``is_configured`` / ``provider_name``
|
||||
# for compatibility with the legacy ABC but it is not required.
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Backward-compat alias for :meth:`is_available`."""
|
||||
return self.is_available()
|
||||
|
||||
def provider_name(self) -> str:
|
||||
"""Backward-compat alias returning :attr:`display_name`."""
|
||||
return self.display_name
|
||||
@@ -1,223 +0,0 @@
|
||||
"""
|
||||
Browser Provider Registry
|
||||
=========================
|
||||
|
||||
Central map of registered cloud browser providers. Populated by plugins at
|
||||
import-time via :meth:`PluginContext.register_browser_provider`; consumed by
|
||||
:func:`tools.browser_tool._get_cloud_provider` to route each cloud-mode
|
||||
``browser_*`` tool call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by configuration with this precedence:
|
||||
|
||||
1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
|
||||
2. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by
|
||||
availability. Matches the historic auto-detect order in
|
||||
:func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
|
||||
because it covers both the managed Nous gateway and direct API key path;
|
||||
Browserbase as the older direct-credentials fallback). ``firecrawl`` is
|
||||
intentionally NOT in the legacy walk — users only get Firecrawl as a
|
||||
cloud browser when they explicitly set ``browser.cloud_provider:
|
||||
firecrawl``, matching pre-migration behaviour where Firecrawl was never
|
||||
auto-selected.
|
||||
3. Otherwise ``None`` — the dispatcher falls back to local browser mode.
|
||||
|
||||
The explicit-config branch (rule 1) intentionally ignores ``is_available()``
|
||||
so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
|
||||
instead of silently switching backends. Matches the legacy
|
||||
:func:`tools.browser_tool._get_cloud_provider` behaviour for configured names.
|
||||
|
||||
Note: there is no "capability" split here (unlike the web subsystem, which
|
||||
has search/extract/crawl). Every browser provider implements the full
|
||||
:class:`agent.browser_provider.BrowserProvider` lifecycle; the registry's
|
||||
job is purely selection, not capability routing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.browser_provider import BrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, BrowserProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: BrowserProvider) -> None:
|
||||
"""Register a cloud browser provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — makes hot-reload scenarios (tests, dev loops) behave
|
||||
predictably.
|
||||
"""
|
||||
if not isinstance(provider, BrowserProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects a BrowserProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Browser provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug(
|
||||
"Browser provider '%s' re-registered (was %r)",
|
||||
name, type(existing).__name__,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Registered browser provider '%s' (%s)",
|
||||
name, type(provider).__name__,
|
||||
)
|
||||
|
||||
|
||||
def list_providers() -> List[BrowserProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[BrowserProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active-provider resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Legacy auto-detect order — used when no ``browser.cloud_provider`` is set.
|
||||
# Matches the pre-migration walk in :func:`tools.browser_tool._get_cloud_provider`.
|
||||
# Firecrawl is intentionally absent so users with ``FIRECRAWL_API_KEY`` set
|
||||
# for web-extract don't get silently routed to a paid cloud browser. See
|
||||
# :func:`_resolve` for the full rationale.
|
||||
_LEGACY_PREFERENCE = (
|
||||
"browser-use",
|
||||
"browserbase",
|
||||
)
|
||||
|
||||
|
||||
def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
|
||||
"""Resolve the active browser provider.
|
||||
|
||||
Resolution rules (in order):
|
||||
|
||||
1. **Explicit "local".** Returns None — the dispatcher disables cloud
|
||||
mode entirely. Mirrors legacy short-circuit in
|
||||
:func:`tools.browser_tool._get_cloud_provider`.
|
||||
2. **Explicit config wins, ignoring availability.** If ``configured``
|
||||
names a registered provider, return it even if its
|
||||
:meth:`is_available` returns False — the dispatcher will surface a
|
||||
precise "X_API_KEY is not set" error instead of silently routing
|
||||
somewhere else.
|
||||
3. **Legacy preference walk, filtered by availability.** Walk
|
||||
:data:`_LEGACY_PREFERENCE` (``browser-use`` → ``browserbase``) looking
|
||||
for a provider whose ``is_available()`` is True.
|
||||
|
||||
There is intentionally NO "single-eligible shortcut" rule here (unlike
|
||||
:func:`agent.web_search_registry._resolve`). Pre-migration, the
|
||||
auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
|
||||
considered Browser Use and Browserbase; Firecrawl was reachable only
|
||||
via an explicit ``browser.cloud_provider: firecrawl`` config key.
|
||||
Preserving that gate matters because Firecrawl shares its API key with
|
||||
the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
|
||||
``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
|
||||
paid cloud browser on a fresh install. Third-party browser-provider
|
||||
plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
|
||||
to the same gate — they must be explicitly configured to take effect.
|
||||
|
||||
Returns None when no provider is configured AND no available provider
|
||||
matches the legacy preference; the dispatcher then falls back to local
|
||||
browser mode.
|
||||
"""
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
def _is_available_safe(p: BrowserProvider) -> bool:
|
||||
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
|
||||
try:
|
||||
return bool(p.is_available())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"Browser provider %s.is_available() raised %s — treating as unavailable",
|
||||
p.name, exc, exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
# 1. Explicit "local" short-circuit.
|
||||
if configured == "local":
|
||||
return None
|
||||
|
||||
# 2. Explicit config wins — return regardless of is_available() so the
|
||||
# user gets a precise downstream error message rather than a silent
|
||||
# backend switch. Matches _get_cloud_provider() in browser_tool.py.
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"browser cloud_provider '%s' configured but not registered; "
|
||||
"falling back to auto-detect",
|
||||
configured,
|
||||
)
|
||||
|
||||
# 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
|
||||
# auto-eligible. Filtered by availability so we don't surface a
|
||||
# provider the user has no credentials for. See docstring for why
|
||||
# we do NOT fall back to "any single-eligible registered provider".
|
||||
for legacy in _LEGACY_PREFERENCE:
|
||||
provider = snapshot.get(legacy)
|
||||
if provider is not None and _is_available_safe(provider):
|
||||
return provider
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_active_browser_provider() -> Optional[BrowserProvider]:
|
||||
"""Resolve the currently-active cloud browser provider.
|
||||
|
||||
Reads ``browser.cloud_provider`` from config.yaml; falls back per the
|
||||
module docstring. Returns None for local mode or when no provider is
|
||||
available.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
|
||||
cfg = read_raw_config()
|
||||
browser_cfg = cfg.get("browser", {})
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read browser config: %s", exc)
|
||||
browser_cfg = {}
|
||||
|
||||
configured: Optional[str] = None
|
||||
if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
|
||||
try:
|
||||
from tools.tool_backend_helpers import normalize_browser_cloud_provider
|
||||
|
||||
configured = normalize_browser_cloud_provider(
|
||||
browser_cfg.get("cloud_provider")
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("normalize_browser_cloud_provider failed: %s", exc)
|
||||
configured = None
|
||||
|
||||
return _resolve(configured)
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,448 +0,0 @@
|
||||
"""Codex API runtime — App Server and Responses-API streaming paths.
|
||||
|
||||
Extracted from :class:`AIAgent` to keep the agent loop file focused.
|
||||
Each function takes the parent ``AIAgent`` as its first argument
|
||||
(``agent``). AIAgent keeps thin forwarder methods for backward
|
||||
compatibility.
|
||||
|
||||
* ``run_codex_app_server_turn`` — drives one turn through the
|
||||
``codex_app_server`` subprocess client (used when a Codex CLI install
|
||||
is the active provider).
|
||||
* ``run_codex_stream`` — streams a Codex Responses API call (the
|
||||
``codex_responses`` api_mode).
|
||||
* ``run_codex_create_stream_fallback`` — recovery path when the
|
||||
Responses ``stream=True`` initial create fails.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_codex_app_server_turn(
|
||||
agent,
|
||||
*,
|
||||
user_message: str,
|
||||
original_user_message: Any,
|
||||
messages: List[Dict[str, Any]],
|
||||
effective_task_id: str,
|
||||
should_review_memory: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Codex app-server runtime path. Hands the entire turn to a `codex
|
||||
app-server` subprocess and projects its events back into Hermes'
|
||||
messages list so memory/skill review keep working.
|
||||
|
||||
Called from run_conversation() when agent.api_mode == "codex_app_server".
|
||||
Returns the same dict shape as the chat_completions path.
|
||||
"""
|
||||
from agent.transports.codex_app_server_session import CodexAppServerSession
|
||||
|
||||
# Lazy session: one CodexAppServerSession per AIAgent instance.
|
||||
# Spawned on first turn, reused across turns, closed at AIAgent
|
||||
# shutdown (see _cleanup hook).
|
||||
if not hasattr(agent, "_codex_session") or agent._codex_session is None:
|
||||
cwd = getattr(agent, "session_cwd", None) or os.getcwd()
|
||||
# Approval callback: defer to Hermes' standard prompt flow if a
|
||||
# CLI thread has installed one. Gateway / cron contexts get the
|
||||
# codex-side fail-closed default.
|
||||
try:
|
||||
from tools.terminal_tool import _get_approval_callback
|
||||
approval_callback = _get_approval_callback()
|
||||
except Exception:
|
||||
approval_callback = None
|
||||
agent._codex_session = CodexAppServerSession(
|
||||
cwd=cwd,
|
||||
approval_callback=approval_callback,
|
||||
)
|
||||
|
||||
# NOTE: the user message is ALREADY appended to messages by the
|
||||
# standard run_conversation() flow (line ~11823) before the early
|
||||
# return reaches us. Do NOT append again — that would duplicate.
|
||||
|
||||
try:
|
||||
turn = agent._codex_session.run_turn(user_input=user_message)
|
||||
except Exception as exc:
|
||||
logger.exception("codex app-server turn failed")
|
||||
# Crash → unconditionally drop the session so the next turn
|
||||
# respawns from scratch instead of reusing a dead client.
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
return {
|
||||
"final_response": (
|
||||
f"Codex app-server turn failed: {exc}. "
|
||||
f"Fall back to default runtime with `/codex-runtime auto`."
|
||||
),
|
||||
"messages": messages,
|
||||
"api_calls": 0,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
# If the turn signalled the underlying client is wedged (deadline
|
||||
# blown, post-tool watchdog tripped, OAuth refresh died, subprocess
|
||||
# exited), retire the session so the next turn respawns codex
|
||||
# rather than riding the broken process. Mirrors openclaw beta.8's
|
||||
# "retire timed-out app-server clients" fix.
|
||||
if getattr(turn, "should_retire", False):
|
||||
logger.warning(
|
||||
"codex app-server session retired (turn error: %s)",
|
||||
turn.error,
|
||||
)
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
|
||||
# Splice projected messages into the conversation. The projector emits
|
||||
# standard {role, content, tool_calls, tool_call_id} entries, which
|
||||
# is exactly what curator.py / sessions DB expect.
|
||||
if turn.projected_messages:
|
||||
messages.extend(turn.projected_messages)
|
||||
|
||||
# Counter ticks for the agent-improvement loop.
|
||||
# _turns_since_memory and _user_turn_count are ALREADY incremented
|
||||
# in the run_conversation() pre-loop block (lines ~11793-11817) so we
|
||||
# do NOT touch them here — that would double-count.
|
||||
# Only _iters_since_skill needs explicit increment, since the
|
||||
# chat_completions loop bumps it per tool iteration (line ~12110)
|
||||
# and that loop is bypassed on this path.
|
||||
agent._iters_since_skill = (
|
||||
getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
|
||||
)
|
||||
|
||||
# Now check the skill nudge AFTER iters were incremented — same
|
||||
# pattern the chat_completions path uses (line ~15432).
|
||||
should_review_skills = False
|
||||
if (
|
||||
agent._skill_nudge_interval > 0
|
||||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||||
and "skill_manage" in agent.valid_tool_names
|
||||
):
|
||||
should_review_skills = True
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
# External memory provider sync (mirrors line ~15439). Skipped on
|
||||
# interrupt/error to avoid feeding partial transcripts to memory.
|
||||
if not turn.interrupted and turn.error is None:
|
||||
try:
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=turn.final_text,
|
||||
interrupted=False,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("external memory sync raised", exc_info=True)
|
||||
|
||||
# Background review fork — same cadence + signature as the default
|
||||
# path (line ~15449). Only fires when a trigger actually tripped AND
|
||||
# we have a real final response.
|
||||
if (
|
||||
turn.final_text
|
||||
and not turn.interrupted
|
||||
and (should_review_memory or should_review_skills)
|
||||
):
|
||||
try:
|
||||
agent._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=should_review_memory,
|
||||
review_skills=should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("background review spawn raised", exc_info=True)
|
||||
|
||||
return {
|
||||
"final_response": turn.final_text,
|
||||
"messages": messages,
|
||||
"api_calls": 1, # one app-server "turn" maps to one logical API call
|
||||
"completed": not turn.interrupted and turn.error is None,
|
||||
"partial": turn.interrupted or turn.error is not None,
|
||||
"error": turn.error,
|
||||
"codex_thread_id": turn.thread_id,
|
||||
"codex_turn_id": turn.turn_id,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
|
||||
"""Execute one streaming Responses API request and return the final response."""
|
||||
import httpx as _httpx
|
||||
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
|
||||
max_stream_retries = 1
|
||||
has_tool_calls = False
|
||||
first_delta_fired = False
|
||||
# Accumulate streamed text so we can recover if get_final_response()
|
||||
# returns empty output (e.g. chatgpt.com backend-api sends
|
||||
# response.incomplete instead of response.completed).
|
||||
agent._codex_streamed_text_parts: list = []
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if agent._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
for event in stream:
|
||||
agent._touch_activity("receiving stream response")
|
||||
if agent._interrupt_requested:
|
||||
break
|
||||
event_type = getattr(event, "type", "")
|
||||
# Fire callbacks on text content deltas (suppress during tool calls)
|
||||
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
|
||||
delta_text = getattr(event, "delta", "")
|
||||
if delta_text:
|
||||
agent._codex_streamed_text_parts.append(delta_text)
|
||||
if delta_text and not has_tool_calls:
|
||||
if not first_delta_fired:
|
||||
first_delta_fired = True
|
||||
if on_first_delta:
|
||||
try:
|
||||
on_first_delta()
|
||||
except Exception:
|
||||
pass
|
||||
agent._fire_stream_delta(delta_text)
|
||||
# Track tool calls to suppress text streaming
|
||||
elif "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
# Fire reasoning callbacks
|
||||
elif "reasoning" in event_type and "delta" in event_type:
|
||||
reasoning_text = getattr(event, "delta", "")
|
||||
if reasoning_text:
|
||||
agent._fire_reasoning_delta(reasoning_text)
|
||||
# Collect completed output items — some backends
|
||||
# (chatgpt.com/backend-api/codex) stream valid items
|
||||
# via response.output_item.done but the SDK's
|
||||
# get_final_response() returns an empty output list.
|
||||
elif event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
# Log non-completed terminal events for diagnostics
|
||||
elif event_type in {"response.incomplete", "response.failed"}:
|
||||
resp_obj = getattr(event, "response", None)
|
||||
status = getattr(resp_obj, "status", None) if resp_obj else None
|
||||
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
|
||||
logger.warning(
|
||||
"Codex Responses stream received terminal event %s "
|
||||
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
|
||||
event_type, status, incomplete_details,
|
||||
sum(len(p) for p in agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
final_response = stream.get_final_response()
|
||||
# PATCH: ChatGPT Codex backend streams valid output items
|
||||
# but get_final_response() can return an empty output list.
|
||||
# Backfill from collected items or synthesize from deltas.
|
||||
_out = getattr(final_response, "output", None)
|
||||
if isinstance(_out, list) and not _out:
|
||||
if collected_output_items:
|
||||
final_response.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex stream: backfilled %d output items from stream events",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif agent._codex_streamed_text_parts and not has_tool_calls:
|
||||
assembled = "".join(agent._codex_streamed_text_parts)
|
||||
final_response.output = [SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex stream: synthesized output from %d text deltas (%d chars)",
|
||||
len(agent._codex_streamed_text_parts), len(assembled),
|
||||
)
|
||||
return final_response
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
except RuntimeError as exc:
|
||||
err_text = str(exc)
|
||||
missing_completed = "response.completed" in err_text
|
||||
# The OpenAI SDK's Responses streaming state machine raises
|
||||
# ``RuntimeError("Expected to have received `response.created`
|
||||
# before `<event-type>`")`` when the first SSE event from the
|
||||
# server is anything other than ``response.created`` — and it
|
||||
# discards the event's payload before we can read it. Three
|
||||
# real-world backends emit a different first frame:
|
||||
#
|
||||
# * xAI on grok-4.x OAuth — sends ``error`` (issues
|
||||
# reported around the May 2026 SuperGrok rollout when
|
||||
# multi-turn conversations replay encrypted reasoning
|
||||
# content the OAuth tier rejects)
|
||||
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
|
||||
# * custom Responses relays — send ``response.in_progress``
|
||||
# (#8133)
|
||||
#
|
||||
# In all three cases the underlying byte stream is still
|
||||
# readable: a non-stream ``responses.create(stream=True)``
|
||||
# fallback succeeds and surfaces the real provider error as
|
||||
# a normal exception with body+status_code attached, which
|
||||
# ``_summarize_api_error`` can then translate into a useful
|
||||
# user-facing line. Treat ``response.created`` prelude
|
||||
# errors the same way we already treat ``response.completed``
|
||||
# postlude errors.
|
||||
prelude_error = (
|
||||
"Expected to have received `response.created`" in err_text
|
||||
or "Expected to have received \"response.created\"" in err_text
|
||||
)
|
||||
if (missing_completed or prelude_error) and attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Responses stream %s (attempt %s/%s); retrying. %s",
|
||||
"prelude rejected" if prelude_error else "closed before completion",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
)
|
||||
continue
|
||||
if missing_completed or prelude_error:
|
||||
logger.debug(
|
||||
"Responses stream %s; falling back to create(stream=True). %s err=%s",
|
||||
"rejected before response.created" if prelude_error else "did not emit response.completed",
|
||||
agent._client_log_context(),
|
||||
err_text,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
|
||||
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
|
||||
fallback_kwargs = dict(api_kwargs)
|
||||
fallback_kwargs["stream"] = True
|
||||
fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
|
||||
stream_or_response = active_client.responses.create(**fallback_kwargs)
|
||||
|
||||
# Compatibility shim for mocks or providers that still return a concrete response.
|
||||
if hasattr(stream_or_response, "output"):
|
||||
return stream_or_response
|
||||
if not hasattr(stream_or_response, "__iter__"):
|
||||
return stream_or_response
|
||||
|
||||
terminal_response = None
|
||||
collected_output_items: list = []
|
||||
collected_text_deltas: list = []
|
||||
try:
|
||||
for event in stream_or_response:
|
||||
agent._touch_activity("receiving stream response")
|
||||
event_type = getattr(event, "type", None)
|
||||
if not event_type and isinstance(event, dict):
|
||||
event_type = event.get("type")
|
||||
|
||||
# ``error`` SSE frames carry the provider's real failure
|
||||
# reason (subscription / quota / model-not-available /
|
||||
# rejected-reasoning-replay) but never appear in the
|
||||
# ``{completed, incomplete, failed}`` terminal set, so the
|
||||
# raw loop below would silently consume them and end with
|
||||
# "did not emit a terminal response". xAI in particular
|
||||
# emits ``type=error`` as the FIRST frame for OAuth
|
||||
# accounts whose Grok subscription is missing/exhausted —
|
||||
# the SDK's stream helper raises ``RuntimeError(Expected
|
||||
# to have received response.created before error)`` which
|
||||
# the caller catches and routes here, expecting this
|
||||
# fallback to surface the message. Synthesize an
|
||||
# APIError-shaped exception so ``_summarize_api_error``
|
||||
# and the credential-pool entitlement detector see the
|
||||
# real text instead of a generic RuntimeError.
|
||||
if event_type == "error":
|
||||
err_message = getattr(event, "message", None)
|
||||
if not err_message and isinstance(event, dict):
|
||||
err_message = event.get("message")
|
||||
err_code = getattr(event, "code", None)
|
||||
if not err_code and isinstance(event, dict):
|
||||
err_code = event.get("code")
|
||||
err_param = getattr(event, "param", None)
|
||||
if not err_param and isinstance(event, dict):
|
||||
err_param = event.get("param")
|
||||
err_message = (err_message or "stream emitted error event").strip()
|
||||
from run_agent import _StreamErrorEvent
|
||||
raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
|
||||
|
||||
# Collect output items and text deltas for backfill
|
||||
if event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is None and isinstance(event, dict):
|
||||
done_item = event.get("item")
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
elif event_type in {"response.output_text.delta",}:
|
||||
delta = getattr(event, "delta", "")
|
||||
if not delta and isinstance(event, dict):
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
collected_text_deltas.append(delta)
|
||||
|
||||
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
|
||||
continue
|
||||
|
||||
terminal_response = getattr(event, "response", None)
|
||||
if terminal_response is None and isinstance(event, dict):
|
||||
terminal_response = event.get("response")
|
||||
if terminal_response is not None:
|
||||
# Backfill empty output from collected stream events
|
||||
_out = getattr(terminal_response, "output", None)
|
||||
if isinstance(_out, list) and not _out:
|
||||
if collected_output_items:
|
||||
terminal_response.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex fallback stream: backfilled %d output items",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif collected_text_deltas:
|
||||
assembled = "".join(collected_text_deltas)
|
||||
terminal_response.output = [SimpleNamespace(
|
||||
type="message", role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex fallback stream: synthesized from %d deltas (%d chars)",
|
||||
len(collected_text_deltas), len(assembled),
|
||||
)
|
||||
return terminal_response
|
||||
finally:
|
||||
close_fn = getattr(stream_or_response, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if terminal_response is not None:
|
||||
return terminal_response
|
||||
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
|
||||
|
||||
|
||||
|
||||
__all__ = [
|
||||
"run_codex_app_server_turn",
|
||||
"run_codex_stream",
|
||||
"run_codex_create_stream_fallback",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,211 +0,0 @@
|
||||
"""Abstract base class for pluggable context engines.
|
||||
|
||||
A context engine controls how conversation context is managed when
|
||||
approaching the model's token limit. The built-in ContextCompressor
|
||||
is the default implementation. Third-party engines (e.g. LCM) can
|
||||
replace it via the plugin system or by being placed in the
|
||||
``plugins/context_engine/<name>/`` directory.
|
||||
|
||||
Selection is config-driven: ``context.engine`` in config.yaml.
|
||||
Default is ``"compressor"`` (the built-in). Only one engine is active.
|
||||
|
||||
The engine is responsible for:
|
||||
- Deciding when compaction should fire
|
||||
- Performing compaction (summarization, DAG construction, etc.)
|
||||
- Optionally exposing tools the agent can call (e.g. lcm_grep)
|
||||
- Tracking token usage from API responses
|
||||
|
||||
Lifecycle:
|
||||
1. Engine is instantiated and registered (plugin register() or default)
|
||||
2. on_session_start() called when a conversation begins
|
||||
3. update_from_response() called after each API response with usage data
|
||||
4. should_compress() checked after each turn
|
||||
5. compress() called when should_compress() returns True
|
||||
6. on_session_end() called at real session boundaries (CLI exit, /reset,
|
||||
gateway session expiry) — NOT per-turn
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class ContextEngine(ABC):
|
||||
"""Base class all context engines must implement."""
|
||||
|
||||
# -- Identity ----------------------------------------------------------
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Short identifier (e.g. 'compressor', 'lcm')."""
|
||||
|
||||
# -- Token state (read by run_agent.py for display/logging) ------------
|
||||
#
|
||||
# Engines MUST maintain these. run_agent.py reads them directly.
|
||||
|
||||
last_prompt_tokens: int = 0
|
||||
last_completion_tokens: int = 0
|
||||
last_total_tokens: int = 0
|
||||
threshold_tokens: int = 0
|
||||
context_length: int = 0
|
||||
compression_count: int = 0
|
||||
|
||||
# -- Compaction parameters (read by run_agent.py for preflight) --------
|
||||
#
|
||||
# These control the preflight compression check. Subclasses may
|
||||
# override via __init__ or property; defaults are sensible for most
|
||||
# engines.
|
||||
#
|
||||
# protect_first_n semantics (since PR #13754): count of non-system head
|
||||
# messages always preserved verbatim, IN ADDITION to the system prompt
|
||||
# which is always implicitly protected. Default 3 keeps the
|
||||
# historical "system + first 3 non-system messages" head shape.
|
||||
|
||||
threshold_percent: float = 0.75
|
||||
protect_first_n: int = 3
|
||||
protect_last_n: int = 6
|
||||
|
||||
# -- Core interface ----------------------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def update_from_response(self, usage: Dict[str, Any]) -> None:
|
||||
"""Update tracked token usage from an API response.
|
||||
|
||||
Called after every LLM call with the usage dict from the response.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Return True if compaction should fire this turn."""
|
||||
|
||||
@abstractmethod
|
||||
def compress(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
focus_topic: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
This is the main entry point. The engine receives the full message
|
||||
list and returns a (possibly shorter) list that fits within the
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||
Engines that support guided compression should prioritise
|
||||
preserving information related to this topic. Engines that
|
||||
don't support it may simply ignore this argument.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick rough check before the API call (no real token count yet).
|
||||
|
||||
Default returns False (skip pre-flight). Override if your engine
|
||||
can do a cheap estimate.
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: manual /compress preflight ------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||
|
||||
Used by the gateway ``/compress`` command as a preflight guard —
|
||||
returning False lets the gateway report "nothing to compress yet"
|
||||
without making an LLM call.
|
||||
|
||||
Default returns True (always attempt). Engines with a cheap way
|
||||
to introspect their own head/tail boundaries should override this
|
||||
to return False when the transcript is still entirely protected.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
"""Called when a new conversation session begins.
|
||||
|
||||
Use this to load persisted state (DAG, store) for the session.
|
||||
kwargs may include hermes_home, platform, model, etc.
|
||||
"""
|
||||
|
||||
def on_session_end(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Called at real session boundaries (CLI exit, /reset, gateway expiry).
|
||||
|
||||
Use this to flush state, close DB connections, etc.
|
||||
NOT called per-turn — only when the session truly ends.
|
||||
"""
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Called on /new or /reset. Reset per-session state.
|
||||
|
||||
Default resets compression_count and token tracking.
|
||||
"""
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
self.compression_count = 0
|
||||
|
||||
# -- Optional: tools ---------------------------------------------------
|
||||
|
||||
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
||||
"""Return tool schemas this engine provides to the agent.
|
||||
|
||||
Default returns empty list (no tools). LCM would return schemas
|
||||
for lcm_grep, lcm_describe, lcm_expand here.
|
||||
"""
|
||||
return []
|
||||
|
||||
def handle_tool_call(self, name: str, args: Dict[str, Any], **kwargs) -> str:
|
||||
"""Handle a tool call from the agent.
|
||||
|
||||
Only called for tool names returned by get_tool_schemas().
|
||||
Must return a JSON string.
|
||||
|
||||
kwargs may include:
|
||||
messages: the current in-memory message list (for live ingestion)
|
||||
"""
|
||||
import json
|
||||
return json.dumps({"error": f"Unknown context engine tool: {name}"})
|
||||
|
||||
# -- Optional: status / display ----------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return status dict for display/logging.
|
||||
|
||||
Default returns the standard fields run_agent.py expects.
|
||||
"""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": (
|
||||
min(100, self.last_prompt_tokens / self.context_length * 100)
|
||||
if self.context_length else 0
|
||||
),
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
# -- Optional: model switch support ------------------------------------
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
) -> None:
|
||||
"""Called when the user switches models or on fallback activation.
|
||||
|
||||
Default updates context_length and recalculates threshold_tokens
|
||||
from threshold_percent. Override if your engine needs more
|
||||
(e.g. recalculate DAG budgets, switch summary models).
|
||||
"""
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
@@ -483,7 +483,9 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
except subprocess.TimeoutExpired:
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
@@ -1,605 +0,0 @@
|
||||
"""Context compression — extract the AIAgent methods that drive summarisation.
|
||||
|
||||
Three concerns live here:
|
||||
|
||||
* :func:`check_compression_model_feasibility` — startup probe of the
|
||||
configured auxiliary compression model. Warns when the aux context
|
||||
window can't fit the main model's compression threshold; auto-lowers
|
||||
the session threshold when possible; hard-rejects auxes below
|
||||
``MINIMUM_CONTEXT_LENGTH``.
|
||||
|
||||
* :func:`replay_compression_warning` — re-emit a stored warning through
|
||||
the gateway ``status_callback`` once it's wired up (the callback is
|
||||
set after :class:`AIAgent` construction).
|
||||
|
||||
* :func:`compress_context` — the actual compression call. Runs the
|
||||
configured compressor, splits the SQLite session, rotates the
|
||||
session_id, notifies plugin context engines / memory providers, and
|
||||
returns the compressed message list and freshly-built system prompt.
|
||||
|
||||
* :func:`try_shrink_image_parts_in_messages` — image-too-large recovery
|
||||
helper that re-encodes ``data:image/...;base64,...`` parts at a smaller
|
||||
size so retries can fit under provider ceilings (Anthropic's 5 MB).
|
||||
|
||||
``run_agent`` keeps thin wrappers for each so existing call sites
|
||||
(``self._compress_context(...)``) keep working. Tests that exercise
|
||||
these paths see no behavioural change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_compression_model_feasibility(agent: Any) -> None:
|
||||
"""Warn at session start if the auxiliary compression model's context
|
||||
window is smaller than the main model's compression threshold.
|
||||
|
||||
When the auxiliary model cannot fit the content that needs summarising,
|
||||
compression will either fail outright (the LLM call errors) or produce
|
||||
a severely truncated summary.
|
||||
|
||||
Called during ``AIAgent.__init__`` so CLI users see the warning
|
||||
immediately (via ``_vprint``). The gateway sets ``status_callback``
|
||||
*after* construction, so :func:`replay_compression_warning` re-sends
|
||||
the stored warning through the callback on the first
|
||||
``run_conversation()`` call.
|
||||
"""
|
||||
if not agent.compression_enabled:
|
||||
return
|
||||
try:
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_task_provider_model,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
get_model_context_length,
|
||||
)
|
||||
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
# Best-effort aux provider label for the warning message. The
|
||||
# configured provider may be "auto", in which case we fall back
|
||||
# to the client's base_url hostname so the user can still tell
|
||||
# where the compression model is actually being called.
|
||||
try:
|
||||
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
|
||||
except Exception:
|
||||
_aux_cfg_provider = ""
|
||||
if client is None or not aux_model:
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto":
|
||||
msg = (
|
||||
"⚠ Configured auxiliary compression provider "
|
||||
f"'{_aux_cfg_provider}' is unavailable — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Check auxiliary.compression in config.yaml and "
|
||||
"reauthenticate that provider."
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"⚠ No auxiliary LLM provider configured — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Run `hermes setup` or set OPENROUTER_API_KEY."
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"No auxiliary LLM provider for compression — "
|
||||
"summaries will be unavailable."
|
||||
)
|
||||
return
|
||||
|
||||
aux_base_url = str(getattr(client, "base_url", ""))
|
||||
# ``client.api_key`` may be a callable (Azure Foundry Entra ID
|
||||
# bearer provider). The context-length resolver chain expects a
|
||||
# string, but it only needs a key for live catalogue probes
|
||||
# (provider model lists). For Entra clients the model-metadata
|
||||
# chain still resolves via models.dev + hardcoded family
|
||||
# fallbacks, which don't require auth — pass empty string rather
|
||||
# than minting a bearer JWT just to look up a context length.
|
||||
_raw_aux_key = getattr(client, "api_key", "")
|
||||
aux_api_key = "" if (callable(_raw_aux_key) and not isinstance(_raw_aux_key, str)) else str(_raw_aux_key or "")
|
||||
|
||||
aux_context = get_model_context_length(
|
||||
aux_model,
|
||||
base_url=aux_base_url,
|
||||
api_key=aux_api_key,
|
||||
config_context_length=getattr(agent, "_aux_compression_context_length_config", None),
|
||||
# Each model must be resolved with its own provider so that
|
||||
# provider-specific paths (e.g. Bedrock static table, OpenRouter API)
|
||||
# are invoked for the correct client, not inherited from the main model.
|
||||
provider=(_aux_cfg_provider if _aux_cfg_provider and _aux_cfg_provider != "auto" else getattr(agent, "provider", "")),
|
||||
custom_providers=agent._custom_providers,
|
||||
)
|
||||
|
||||
# Hard floor: the auxiliary compression model must have at least
|
||||
# MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model
|
||||
# is already required to meet this floor (checked earlier in
|
||||
# __init__), so the compression model must too — otherwise it
|
||||
# cannot summarise a full threshold-sized window of main-model
|
||||
# content. Mirrors the main-model rejection pattern.
|
||||
if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
|
||||
raise ValueError(
|
||||
f"Auxiliary compression model {aux_model} has a context "
|
||||
f"window of {aux_context:,} tokens, which is below the "
|
||||
f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
|
||||
f"Agent. Choose a compression model with at least "
|
||||
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
|
||||
f"auxiliary.compression.model in config.yaml), or set "
|
||||
f"auxiliary.compression.context_length to override the "
|
||||
f"detected value if it is wrong."
|
||||
)
|
||||
|
||||
threshold = agent.context_compressor.threshold_tokens
|
||||
if aux_context < threshold:
|
||||
# Auto-correct: lower the live session threshold so
|
||||
# compression actually works this session. The hard floor
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = aux_context
|
||||
agent.context_compressor.threshold_tokens = new_threshold
|
||||
# Keep threshold_percent in sync so future main-model
|
||||
# context_length changes (update_model) re-derive from a
|
||||
# sensible number rather than the original too-high value.
|
||||
main_ctx = agent.context_compressor.context_length
|
||||
if main_ctx:
|
||||
agent.context_compressor.threshold_percent = (
|
||||
new_threshold / main_ctx
|
||||
)
|
||||
safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
|
||||
# Build human-readable "model (provider)" labels for both
|
||||
# the main model and the compression model so users can
|
||||
# tell at a glance which provider each side is actually
|
||||
# using. When the configured provider is empty or "auto",
|
||||
# fall back to the client's base_url hostname.
|
||||
_main_model = getattr(agent, "model", "") or "?"
|
||||
_main_provider = getattr(agent, "provider", "") or ""
|
||||
_aux_provider_label = (
|
||||
_aux_cfg_provider
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto"
|
||||
else ""
|
||||
)
|
||||
if not _aux_provider_label:
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
_aux_provider_label = (
|
||||
urlparse(aux_base_url).hostname or aux_base_url
|
||||
)
|
||||
except Exception:
|
||||
_aux_provider_label = aux_base_url or "auto"
|
||||
_main_label = (
|
||||
f"{_main_model} ({_main_provider})"
|
||||
if _main_provider
|
||||
else _main_model
|
||||
)
|
||||
_aux_label = f"{aux_model} ({_aux_provider_label})"
|
||||
msg = (
|
||||
f"⚠ Compression model {_aux_label} context is "
|
||||
f"{aux_context:,} tokens, but the main model "
|
||||
f"{_main_label}'s compression threshold was "
|
||||
f"{old_threshold:,} tokens. "
|
||||
f"Auto-lowered this session's threshold to "
|
||||
f"{new_threshold:,} tokens so compression can run.\n"
|
||||
f" To make this permanent, edit config.yaml — either:\n"
|
||||
f" 1. Use a larger compression model:\n"
|
||||
f" auxiliary:\n"
|
||||
f" compression:\n"
|
||||
f" model: <model-with-{old_threshold:,}+-context>\n"
|
||||
f" 2. Lower the compression threshold:\n"
|
||||
f" compression:\n"
|
||||
f" threshold: 0.{safe_pct:02d}"
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"Auxiliary compression model %s has %d token context, "
|
||||
"below the main model's compression threshold of %d "
|
||||
"tokens — auto-lowered session threshold to %d to "
|
||||
"keep compression working.",
|
||||
aux_model,
|
||||
aux_context,
|
||||
old_threshold,
|
||||
new_threshold,
|
||||
)
|
||||
except ValueError:
|
||||
# Hard rejections (aux below minimum context) must propagate
|
||||
# so the session refuses to start.
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Compression feasibility check failed (non-fatal): %s", exc
|
||||
)
|
||||
|
||||
|
||||
def replay_compression_warning(agent: Any) -> None:
|
||||
"""Re-send the compression warning through ``status_callback``.
|
||||
|
||||
During ``__init__`` the gateway's ``status_callback`` is not yet
|
||||
wired, so ``_emit_status`` only reaches ``_vprint`` (CLI). This
|
||||
method is called once at the start of the first
|
||||
``run_conversation()`` — by then the gateway has set the callback,
|
||||
so every platform (Telegram, Discord, Slack, etc.) receives the
|
||||
warning.
|
||||
"""
|
||||
msg = getattr(agent, "_compression_warning", None)
|
||||
if msg and agent.status_callback:
|
||||
try:
|
||||
agent.status_callback("lifecycle", msg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def compress_context(
|
||||
agent: Any,
|
||||
messages: list,
|
||||
system_message: str,
|
||||
*,
|
||||
approx_tokens: Optional[int] = None,
|
||||
task_id: str = "default",
|
||||
focus_topic: Optional[str] = None,
|
||||
force: bool = False,
|
||||
) -> Tuple[list, str]:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
Args:
|
||||
agent: The owning :class:`AIAgent`.
|
||||
messages: Current message history (will be summarised).
|
||||
system_message: Current system prompt; rebuilt after compression.
|
||||
approx_tokens: Pre-compression token estimate, logged for ops.
|
||||
task_id: Tool task scope (used for clearing file-read dedup state).
|
||||
focus_topic: Optional focus string for guided compression — the
|
||||
summariser will prioritise preserving information related to
|
||||
this topic. Inspired by Claude Code's ``/compact <focus>``.
|
||||
force: If True, bypass any active summary-failure cooldown. Set
|
||||
by the manual ``/compress`` slash command so users can retry
|
||||
immediately after an auto-compress abort. Auto-compress
|
||||
callers use the default ``False``.
|
||||
|
||||
Returns:
|
||||
``(compressed_messages, new_system_prompt)`` tuple. When
|
||||
compression aborts (aux LLM failed to produce a usable summary),
|
||||
returns the original messages unchanged and the existing system
|
||||
prompt — the session is NOT rotated. Callers should detect the
|
||||
no-op via ``len(returned) == len(input)`` and stop the retry loop.
|
||||
"""
|
||||
# Lazy feasibility check — run the auxiliary-provider probe + context
|
||||
# length lookup just-in-time on the first compression attempt instead of
|
||||
# at AIAgent.__init__. Saves ~400ms cold off every short session that
|
||||
# never reaches the threshold (the vast majority of ``chat -q`` runs).
|
||||
# The check itself sets ``agent._compression_warning`` so the
|
||||
# status-callback replay machinery still emits the warning to the user
|
||||
# the first time it would matter.
|
||||
if not getattr(agent, "_compression_feasibility_checked", True):
|
||||
try:
|
||||
check_compression_model_feasibility(agent)
|
||||
finally:
|
||||
agent._compression_feasibility_checked = True
|
||||
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
|
||||
agent.session_id or "none", _pre_msg_count,
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
|
||||
focus_topic,
|
||||
)
|
||||
agent._emit_status(
|
||||
"🗜️ Compacting context — summarizing earlier conversation so I can continue..."
|
||||
)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if agent._memory_manager:
|
||||
try:
|
||||
agent._memory_manager.on_pre_compress(messages)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic, force=force)
|
||||
except TypeError:
|
||||
# Plugin context engine with strict signature that doesn't accept
|
||||
# focus_topic / force — fall back to calling without them.
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
# If compression aborted (aux LLM failed to produce a usable summary)
|
||||
# the compressor returns the input messages unchanged. Surface the
|
||||
# error to the user, skip the session-rotation work entirely (no
|
||||
# session has logically ended), and let auto-compress callers detect
|
||||
# the no-op via len(returned) == len(input).
|
||||
if getattr(agent.context_compressor, "_last_compress_aborted", False):
|
||||
_err = getattr(agent.context_compressor, "_last_summary_error", None) or "unknown error"
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != _err:
|
||||
agent._last_compression_summary_warning = _err
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression aborted: {_err}. "
|
||||
"No messages were dropped — conversation continues unchanged. "
|
||||
"Run /compress to retry, or /new to start a fresh session."
|
||||
)
|
||||
_existing_sp = getattr(agent, "_cached_system_prompt", None)
|
||||
if not _existing_sp:
|
||||
_existing_sp = agent._build_system_prompt(system_message)
|
||||
return messages, _existing_sp
|
||||
|
||||
summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
|
||||
agent._last_compression_summary_warning = summary_error
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
else:
|
||||
# No hard failure — but did the configured aux model error out
|
||||
# and get recovered by retrying on main? Surface that so users
|
||||
# know their auxiliary.compression.model setting is broken even
|
||||
# though compression succeeded.
|
||||
_aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
|
||||
_aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
|
||||
if _aux_fail_model:
|
||||
# Dedup on (model, error) so we don't spam on every compaction
|
||||
_aux_key = (_aux_fail_model, _aux_fail_err)
|
||||
if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
|
||||
agent._last_aux_fallback_warning_key = _aux_key
|
||||
agent._emit_warning(
|
||||
f"ℹ Configured compression model '{_aux_fail_model}' failed "
|
||||
f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
|
||||
"check auxiliary.compression.model in config.yaml."
|
||||
)
|
||||
|
||||
todo_snapshot = agent._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
|
||||
agent._invalidate_system_prompt()
|
||||
new_system_prompt = agent._build_system_prompt(system_message)
|
||||
agent._cached_system_prompt = new_system_prompt
|
||||
|
||||
if agent._session_db:
|
||||
try:
|
||||
# Propagate title to the new session with auto-numbering
|
||||
old_title = agent._session_db.get_session_title(agent.session_id)
|
||||
# Trigger memory extraction on the old session before it rotates.
|
||||
agent.commit_memory_session(messages)
|
||||
agent._session_db.end_session(agent.session_id, "compression")
|
||||
old_session_id = agent.session_id
|
||||
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
try:
|
||||
from gateway.session_context import _SESSION_ID
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Update session_log_file to point to the new session's JSON file
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
model=agent.model,
|
||||
model_config=agent._session_init_model_config,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
agent._session_db_created = True
|
||||
# Auto-number the title for the continuation session
|
||||
if old_title:
|
||||
try:
|
||||
new_title = agent._session_db.get_next_title_in_lineage(old_title)
|
||||
agent._session_db.set_session_title(agent.session_id, new_title)
|
||||
except (ValueError, Exception) as e:
|
||||
logger.debug("Could not propagate title on compression: %s", e)
|
||||
agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
|
||||
# Reset flush cursor — new session starts with no messages written
|
||||
agent._last_flushed_db_idx = 0
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Notify the context engine that the session_id rotated because of
|
||||
# compression (not a fresh /new). Plugin engines (e.g. hermes-lcm) use
|
||||
# boundary_reason="compression" to preserve DAG lineage across the
|
||||
# rollover instead of re-initializing fresh per-session state.
|
||||
# See hermes-lcm#68. Built-in ContextCompressor ignores kwargs.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and hasattr(agent.context_compressor, "on_session_start"):
|
||||
agent.context_compressor.on_session_start(
|
||||
agent.session_id or "",
|
||||
boundary_reason="compression",
|
||||
old_session_id=_old_sid,
|
||||
)
|
||||
except Exception as _ce_err:
|
||||
logger.debug("context engine on_session_start (compression): %s", _ce_err)
|
||||
|
||||
# Notify memory providers of the compression-driven session_id rotation
|
||||
# so provider-cached per-session state (Hindsight's _document_id,
|
||||
# accumulated turn buffers, counters) refreshes. reset=False because
|
||||
# the logical conversation continues; only the id and DB row rolled
|
||||
# over. See #6672.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and agent._memory_manager:
|
||||
agent._memory_manager.on_session_switch(
|
||||
agent.session_id or "",
|
||||
parent_session_id=_old_sid,
|
||||
reset=False,
|
||||
reason="compression",
|
||||
)
|
||||
except Exception as _me_err:
|
||||
logger.debug("memory manager on_session_switch (compression): %s", _me_err)
|
||||
|
||||
# Warn on repeated compressions (quality degrades with each pass)
|
||||
_cc = agent.context_compressor.compression_count
|
||||
if _cc >= 2:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Session compressed {_cc} times — "
|
||||
f"accuracy may degrade. Consider /new to start fresh.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Update token estimate after compaction so pressure calculations
|
||||
# use the post-compression count, not the stale pre-compression one.
|
||||
# Use estimate_request_tokens_rough() so tool schemas are included —
|
||||
# with 50+ tools enabled, schemas alone can add 20-30K tokens, and
|
||||
# omitting them delays the next compression cycle far past the
|
||||
# configured threshold (issue #14695).
|
||||
_compressed_est = estimate_request_tokens_rough(
|
||||
compressed,
|
||||
system_prompt=new_system_prompt or "",
|
||||
tools=agent.tools or None,
|
||||
)
|
||||
agent.context_compressor.last_prompt_tokens = _compressed_est
|
||||
agent.context_compressor.last_completion_tokens = 0
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
# read content is summarised away — if the model re-reads the same
|
||||
# file it needs the full content, not a "file unchanged" stub.
|
||||
try:
|
||||
from tools.file_tools import reset_file_dedup
|
||||
reset_file_dedup(task_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"context compression done: session=%s messages=%d->%d tokens=~%s",
|
||||
agent.session_id or "none", _pre_msg_count, len(compressed),
|
||||
f"{_compressed_est:,}",
|
||||
)
|
||||
return compressed, new_system_prompt
|
||||
|
||||
|
||||
def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
|
||||
"""Re-encode all native image parts at a smaller size to recover from
|
||||
image-too-large errors (Anthropic 5 MB, unknown other providers).
|
||||
|
||||
Mutates ``api_messages`` in place. Returns True if any image part was
|
||||
actually replaced, False if there were no image parts to shrink or
|
||||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead), write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
Non-data-URL images (http/https URLs) are not touched — the provider
|
||||
fetches those itself and the size limit is different.
|
||||
"""
|
||||
if not api_messages:
|
||||
return False
|
||||
|
||||
try:
|
||||
from tools.vision_tools import _resize_image_for_vision
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
|
||||
return False
|
||||
|
||||
# 4 MB target leaves comfortable headroom under Anthropic's 5 MB.
|
||||
# Non-Anthropic providers we haven't observed rejecting are fine with
|
||||
# much larger; shrinking to 4 MB here loses quality but only fires
|
||||
# after a confirmed provider rejection, so the alternative is failure.
|
||||
target_bytes = 4 * 1024 * 1024
|
||||
changed_count = 0
|
||||
|
||||
def _shrink_data_url(url: str) -> Optional[str]:
|
||||
"""Return a smaller data URL, or None if shrink can't help."""
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
return None
|
||||
if len(url) <= target_bytes:
|
||||
# This specific image wasn't the oversized one.
|
||||
return None
|
||||
try:
|
||||
header, _, data = url.partition(",")
|
||||
mime = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
mime_part = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if mime_part.startswith("image/"):
|
||||
mime = mime_part
|
||||
import base64 as _b64
|
||||
raw = _b64.b64decode(data)
|
||||
suffix = {
|
||||
"image/png": ".png", "image/gif": ".gif", "image/webp": ".webp",
|
||||
"image/jpeg": ".jpg", "image/jpg": ".jpg", "image/bmp": ".bmp",
|
||||
}.get(mime, ".jpg")
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
prefix="hermes_shrink_", suffix=suffix, delete=False,
|
||||
)
|
||||
try:
|
||||
tmp.write(raw)
|
||||
tmp.close()
|
||||
resized = _resize_image_for_vision(
|
||||
Path(tmp.name),
|
||||
mime_type=mime,
|
||||
max_base64_bytes=target_bytes,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not resized or len(resized) >= len(url):
|
||||
# Shrink didn't help (or made it bigger — corrupt input?).
|
||||
return None
|
||||
return resized
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
# OpenAI chat.completions: {"image_url": {"url": "data:..."}}
|
||||
# OpenAI Responses: {"image_url": "data:..."}
|
||||
if isinstance(image_value, dict):
|
||||
url = image_value.get("url", "")
|
||||
resized = _shrink_data_url(url)
|
||||
if resized:
|
||||
image_value["url"] = resized
|
||||
changed_count += 1
|
||||
elif isinstance(image_value, str):
|
||||
resized = _shrink_data_url(image_value)
|
||||
if resized:
|
||||
part["image_url"] = resized
|
||||
changed_count += 1
|
||||
|
||||
if changed_count:
|
||||
logger.info(
|
||||
"image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
|
||||
changed_count, target_bytes / (1024 * 1024),
|
||||
)
|
||||
return changed_count > 0
|
||||
|
||||
|
||||
__all__ = [
|
||||
"check_compression_model_feasibility",
|
||||
"replay_compression_warning",
|
||||
"compress_context",
|
||||
"try_shrink_image_parts_in_messages",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,37 +21,12 @@ from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from agent.file_safety import get_read_block_error, is_write_denied
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
ACP_MARKER_BASE_URL = "acp://copilot"
|
||||
_DEFAULT_TIMEOUT_SECONDS = 900.0
|
||||
|
||||
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
|
||||
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
|
||||
|
||||
# Stderr fingerprint of the deprecated `gh copilot` CLI extension
|
||||
# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
|
||||
# We require BOTH the literal product name ("gh-copilot") AND a deprecation
|
||||
# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
|
||||
# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
|
||||
# in its own banners and error messages — doesn't get misclassified as the
|
||||
# deprecated extension.
|
||||
_DEPRECATION_REQUIRED = ("gh-copilot",)
|
||||
_DEPRECATION_MARKERS = (
|
||||
"has been deprecated",
|
||||
"no commands will be executed",
|
||||
)
|
||||
|
||||
|
||||
def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
|
||||
"""True iff stderr looks like the deprecated gh-copilot extension's banner."""
|
||||
|
||||
lower = stderr_text.lower()
|
||||
if not any(req in lower for req in _DEPRECATION_REQUIRED):
|
||||
return False
|
||||
return any(marker in lower for marker in _DEPRECATION_MARKERS)
|
||||
|
||||
|
||||
def _resolve_command() -> str:
|
||||
return (
|
||||
@@ -68,47 +43,6 @@ def _resolve_args() -> list[str]:
|
||||
return shlex.split(raw)
|
||||
|
||||
|
||||
def _resolve_home_dir() -> str:
|
||||
"""Return a stable HOME for child ACP processes."""
|
||||
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
profile_home = get_subprocess_home()
|
||||
if profile_home:
|
||||
return profile_home
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
home = os.environ.get("HOME", "").strip()
|
||||
if home:
|
||||
return home
|
||||
|
||||
expanded = os.path.expanduser("~")
|
||||
if expanded and expanded != "~":
|
||||
return expanded
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
||||
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() # windows-footgun: ok — POSIX fallback inside try/except (pwd import fails on Windows)
|
||||
if resolved:
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
|
||||
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
|
||||
# need a different writable dir.
|
||||
return "/tmp"
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = _resolve_home_dir()
|
||||
return env
|
||||
|
||||
|
||||
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
@@ -120,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _permission_denied(message_id: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "cancelled",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _format_messages_as_prompt(
|
||||
messages: list[dict[str, Any]],
|
||||
model: str | None = None,
|
||||
@@ -391,25 +313,9 @@ class CopilotACPClient:
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
|
||||
# (used natively by the OpenAI SDK) rather than a plain float.
|
||||
if timeout is None:
|
||||
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
|
||||
elif isinstance(timeout, (int, float)):
|
||||
_effective_timeout = float(timeout)
|
||||
else:
|
||||
# httpx.Timeout or similar — pick the largest component so the
|
||||
# subprocess has enough wall-clock time for the full response.
|
||||
_candidates = [
|
||||
getattr(timeout, attr, None)
|
||||
for attr in ("read", "write", "connect", "pool", "timeout")
|
||||
]
|
||||
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
|
||||
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
|
||||
|
||||
response_text, reasoning_text = self._run_prompt(
|
||||
prompt_text,
|
||||
timeout_seconds=_effective_timeout,
|
||||
timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS),
|
||||
)
|
||||
|
||||
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
|
||||
@@ -445,7 +351,6 @@ class CopilotACPClient:
|
||||
text=True,
|
||||
bufsize=1,
|
||||
cwd=self._acp_cwd,
|
||||
env=_build_subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
@@ -465,8 +370,6 @@ class CopilotACPClient:
|
||||
stderr_tail: deque[str] = deque(maxlen=40)
|
||||
|
||||
def _stdout_reader() -> None:
|
||||
if proc.stdout is None:
|
||||
return
|
||||
for line in proc.stdout:
|
||||
try:
|
||||
inbox.put(json.loads(line))
|
||||
@@ -499,8 +402,8 @@ class CopilotACPClient:
|
||||
proc.stdin.write(json.dumps(payload) + "\n")
|
||||
proc.stdin.flush()
|
||||
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
while time.monotonic() < deadline:
|
||||
deadline = time.time() + timeout_seconds
|
||||
while time.time() < deadline:
|
||||
if proc.poll() is not None:
|
||||
break
|
||||
try:
|
||||
@@ -528,21 +431,6 @@ class CopilotACPClient:
|
||||
|
||||
stderr_text = "\n".join(stderr_tail).strip()
|
||||
if proc.poll() is not None and stderr_text:
|
||||
if _is_gh_copilot_deprecation_message(stderr_text):
|
||||
raise RuntimeError(
|
||||
"Hermes ACP mode requires the NEW GitHub Copilot CLI "
|
||||
"(github.com/github/copilot-cli), but the binary it just "
|
||||
"spawned is the deprecated `gh copilot` extension.\n\n"
|
||||
"Install the new CLI:\n"
|
||||
" npm install -g @github/copilot\n"
|
||||
" # then verify with: copilot --help\n\n"
|
||||
"If `copilot` already resolves to the new CLI but you still see this,\n"
|
||||
"point Hermes at it explicitly:\n"
|
||||
" export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
|
||||
"Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
|
||||
"directly with a Copilot subscription token) via `hermes setup`.\n\n"
|
||||
f"Original error:\n{stderr_text}"
|
||||
)
|
||||
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
|
||||
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
|
||||
|
||||
@@ -629,17 +517,19 @@ class CopilotACPClient:
|
||||
params = msg.get("params") or {}
|
||||
|
||||
if method == "session/request_permission":
|
||||
response = _permission_denied(message_id)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "allow_once",
|
||||
}
|
||||
},
|
||||
}
|
||||
elif method == "fs/read_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
block_error = get_read_block_error(str(path))
|
||||
if block_error:
|
||||
raise PermissionError(block_error)
|
||||
try:
|
||||
content = path.read_text()
|
||||
except FileNotFoundError:
|
||||
content = ""
|
||||
content = path.read_text() if path.exists() else ""
|
||||
line = params.get("line")
|
||||
limit = params.get("limit")
|
||||
if isinstance(line, int) and line > 1:
|
||||
@@ -647,8 +537,6 @@ class CopilotACPClient:
|
||||
start = line - 1
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content, force=True)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
@@ -661,10 +549,6 @@ class CopilotACPClient:
|
||||
elif method == "fs/write_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
if is_write_denied(str(path)):
|
||||
raise PermissionError(
|
||||
f"Write denied: '{path}' is a protected system/credential file."
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(str(params.get("content") or ""))
|
||||
response = {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,448 +0,0 @@
|
||||
"""Unified removal contract for every credential source Hermes reads from.
|
||||
|
||||
Hermes seeds its credential pool from many places:
|
||||
|
||||
env:<VAR> — os.environ / ~/.hermes/.env
|
||||
claude_code — ~/.claude/.credentials.json
|
||||
hermes_pkce — ~/.hermes/.anthropic_oauth.json
|
||||
device_code — auth.json providers.<provider> (nous, openai-codex, ...)
|
||||
qwen-cli — ~/.qwen/oauth_creds.json
|
||||
gh_cli — gh auth token
|
||||
config:<name> — custom_providers config entry
|
||||
model_config — model.api_key when model.provider == "custom"
|
||||
manual — user ran `hermes auth add`
|
||||
|
||||
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
|
||||
(which keep their existing shape — we haven't restructured them). What we
|
||||
unify here is **removal**:
|
||||
|
||||
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
|
||||
|
||||
Before this module, every source had an ad-hoc removal branch in
|
||||
``auth_remove_command``, and several sources had no branch at all — so
|
||||
``auth remove`` silently reverted on the next ``load_pool()`` call for
|
||||
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
|
||||
custom-config sources.
|
||||
|
||||
Now every source registers a ``RemovalStep`` that does exactly three things
|
||||
in the same shape:
|
||||
|
||||
1. Clean up whatever externally-readable state the source reads from
|
||||
(.env line, auth.json block, OAuth file, etc.)
|
||||
2. Suppress the ``(provider, source_id)`` in auth.json so the
|
||||
corresponding ``_seed_from_*`` branch skips the upsert on re-load
|
||||
3. Return ``RemovalResult`` describing what was cleaned and any
|
||||
diagnostic hints the user should see (shell-exported env vars,
|
||||
external credential files we deliberately don't delete, etc.)
|
||||
|
||||
Adding a new credential source is:
|
||||
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
|
||||
- gate that reader behind ``is_source_suppressed(provider, source_id)``
|
||||
- register a ``RemovalStep`` here
|
||||
|
||||
No more per-source if/elif chain in ``auth_remove_command``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalResult:
|
||||
"""Outcome of removing a credential source.
|
||||
|
||||
Attributes:
|
||||
cleaned: Short strings describing external state that was actually
|
||||
mutated (``"Cleared XAI_API_KEY from .env"``,
|
||||
``"Cleared openai-codex OAuth tokens from auth store"``).
|
||||
Printed as plain lines to the user.
|
||||
hints: Diagnostic lines ABOUT state the user may need to clean up
|
||||
themselves or is deliberately left intact (shell-exported env
|
||||
var, Claude Code credential file we don't delete, etc.).
|
||||
Printed as plain lines to the user. Always non-destructive.
|
||||
suppress: Whether to call ``suppress_credential_source`` after
|
||||
cleanup so future ``load_pool`` calls skip this source.
|
||||
Default True — almost every source needs this to stay sticky.
|
||||
The only legitimate False is ``manual`` entries, which aren't
|
||||
seeded from anywhere external.
|
||||
"""
|
||||
|
||||
cleaned: List[str] = field(default_factory=list)
|
||||
hints: List[str] = field(default_factory=list)
|
||||
suppress: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalStep:
|
||||
"""How to remove one specific credential source cleanly.
|
||||
|
||||
Attributes:
|
||||
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
|
||||
Special value ``"*"`` means "matches any provider" — used for
|
||||
sources like ``manual`` that aren't provider-specific.
|
||||
source_id: Source identifier as it appears in
|
||||
``PooledCredential.source``. May be a literal (``"claude_code"``)
|
||||
or a prefix pattern matched via ``match_fn``.
|
||||
match_fn: Optional predicate overriding literal ``source_id``
|
||||
matching. Gets the removed entry's source string. Used for
|
||||
``env:*`` (any env-seeded key), ``config:*`` (any custom
|
||||
pool), and ``manual:*`` (any manual-source variant).
|
||||
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
|
||||
actual cleanup and returns what happened for the user.
|
||||
description: One-line human-readable description for docs / tests.
|
||||
"""
|
||||
|
||||
provider: str
|
||||
source_id: str
|
||||
remove_fn: Callable[..., RemovalResult]
|
||||
match_fn: Optional[Callable[[str], bool]] = None
|
||||
description: str = ""
|
||||
|
||||
def matches(self, provider: str, source: str) -> bool:
|
||||
if self.provider != "*" and self.provider != provider:
|
||||
return False
|
||||
if self.match_fn is not None:
|
||||
return self.match_fn(source)
|
||||
return source == self.source_id
|
||||
|
||||
|
||||
_REGISTRY: List[RemovalStep] = []
|
||||
|
||||
|
||||
def register(step: RemovalStep) -> RemovalStep:
|
||||
_REGISTRY.append(step)
|
||||
return step
|
||||
|
||||
|
||||
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
|
||||
"""Return the first matching RemovalStep, or None if unregistered.
|
||||
|
||||
Unregistered sources fall through to the default remove path in
|
||||
``auth_remove_command``: the pool entry is already gone (that happens
|
||||
before dispatch), no external cleanup, no suppression. This is the
|
||||
correct behaviour for ``manual`` entries — they were only ever stored
|
||||
in the pool, nothing external to clean up.
|
||||
"""
|
||||
for step in _REGISTRY:
|
||||
if step.matches(provider, source):
|
||||
return step
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Individual RemovalStep implementations — one per source.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Each remove_fn is intentionally small and single-purpose. Adding a new
|
||||
# credential source means adding ONE entry here — no other changes to
|
||||
# auth_remove_command.
|
||||
|
||||
|
||||
def _remove_env_source(provider: str, removed) -> RemovalResult:
|
||||
"""env:<VAR> — the most common case.
|
||||
|
||||
Handles three user situations:
|
||||
1. Var lives only in ~/.hermes/.env → clear it
|
||||
2. Var lives only in the user's shell (shell profile, systemd
|
||||
EnvironmentFile, launchd plist) → hint them where to unset it
|
||||
3. Var lives in both → clear from .env, hint about shell
|
||||
"""
|
||||
from hermes_cli.config import get_env_path, remove_env_value
|
||||
|
||||
result = RemovalResult()
|
||||
env_var = removed.source[len("env:"):]
|
||||
if not env_var:
|
||||
return result
|
||||
|
||||
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
|
||||
env_in_process = bool(os.getenv(env_var))
|
||||
env_in_dotenv = False
|
||||
try:
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
env_in_dotenv = any(
|
||||
line.strip().startswith(f"{env_var}=")
|
||||
for line in env_path.read_text(errors="replace").splitlines()
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
shell_exported = env_in_process and not env_in_dotenv
|
||||
|
||||
cleared = remove_env_value(env_var)
|
||||
if cleared:
|
||||
result.cleaned.append(f"Cleared {env_var} from .env")
|
||||
|
||||
if shell_exported:
|
||||
result.hints.extend([
|
||||
f"Note: {env_var} is still set in your shell environment "
|
||||
f"(not in ~/.hermes/.env).",
|
||||
" Unset it there (shell profile, systemd EnvironmentFile, "
|
||||
"launchd plist, etc.) or it will keep being visible to Hermes.",
|
||||
f" The pool entry is now suppressed — Hermes will ignore "
|
||||
f"{env_var} until you run `hermes auth add {provider}`.",
|
||||
])
|
||||
else:
|
||||
result.hints.append(
|
||||
f"Suppressed env:{env_var} — it will not be re-seeded even "
|
||||
f"if the variable is re-exported later."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_claude_code(provider: str, removed) -> RemovalResult:
|
||||
"""~/.claude/.credentials.json is owned by Claude Code itself.
|
||||
|
||||
We don't delete it — the user's Claude Code install still needs to
|
||||
work. We just suppress it so Hermes stops reading it.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed claude_code credential — it will not be re-seeded.",
|
||||
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
|
||||
"Run `hermes auth add anthropic` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
result = RemovalResult()
|
||||
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
|
||||
if oauth_file.exists():
|
||||
try:
|
||||
oauth_file.unlink()
|
||||
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
|
||||
except OSError as exc:
|
||||
result.hints.append(f"Could not delete {oauth_file}: {exc}")
|
||||
return result
|
||||
|
||||
|
||||
def _clear_auth_store_provider(provider: str) -> bool:
|
||||
"""Delete auth_store.providers[provider]. Returns True if deleted."""
|
||||
from hermes_cli.auth import (
|
||||
_auth_store_lock,
|
||||
_load_auth_store,
|
||||
_save_auth_store,
|
||||
)
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
|
||||
|
||||
We suppress in addition to clearing because nothing else stops the
|
||||
user's next `hermes login` run from writing providers.nous again
|
||||
before they decide to. Suppression forces them to go through
|
||||
`hermes auth add nous` to re-engage, which is the documented re-add
|
||||
path and clears the suppression atomically.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
|
||||
"""MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
|
||||
|
||||
Same pattern as Nous: single-source OAuth state with refresh tokens.
|
||||
Suppression of the `oauth` source ensures the pool reseed path
|
||||
(_seed_from_singletons) doesn't instantly undo the removal.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
|
||||
|
||||
Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
|
||||
itself: the central dispatcher only removes the in-memory pool entry,
|
||||
leaves ``providers.xai-oauth`` in auth.json intact, and on the next
|
||||
``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
|
||||
entry from the still-present singleton — credentials reappear with no
|
||||
user feedback. Clearing the singleton in step with the suppression set
|
||||
by the central dispatcher makes the removal stick.
|
||||
|
||||
Belt-and-braces against the manual entry path: ``hermes auth add
|
||||
xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
|
||||
falls through to "unregistered → nothing to clean up" (correct —
|
||||
manual entries are pool-only).
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
result.hints.append(
|
||||
"Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
refresh_codex_oauth_pure() writes both every time, so clearing only
|
||||
the Hermes auth store is not enough — _seed_from_singletons() would
|
||||
re-import from ~/.codex/auth.json on the next load_pool() call and
|
||||
the removal would be instantly undone. We suppress instead of
|
||||
deleting Codex CLI's file, so the Codex CLI itself keeps working.
|
||||
|
||||
The canonical source name in ``_seed_from_singletons`` is
|
||||
``"device_code"`` (no prefix). Entries may show up in the pool as
|
||||
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
|
||||
via ``hermes auth add openai-codex``), but in both cases the re-seed
|
||||
gate lives at the ``"device_code"`` suppression key. We suppress
|
||||
that canonical key here; the central dispatcher also suppresses
|
||||
``removed.source`` which is fine — belt-and-suspenders, idempotent.
|
||||
"""
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
# Suppress the canonical re-seed source, not just whatever source the
|
||||
# removed entry had. Otherwise `manual:device_code` removals wouldn't
|
||||
# block the `device_code` re-seed path.
|
||||
suppress_credential_source(provider, "device_code")
|
||||
result.hints.extend([
|
||||
"Suppressed openai-codex device_code source — it will not be re-seeded.",
|
||||
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
|
||||
"Run `hermes auth add openai-codex` to re-enable if needed.",
|
||||
])
|
||||
return result
|
||||
|
||||
|
||||
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
|
||||
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
|
||||
|
||||
Same pattern as claude_code — suppress, don't delete. The user's
|
||||
Qwen CLI install still reads from that file.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed qwen-cli credential — it will not be re-seeded.",
|
||||
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
|
||||
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
|
||||
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
|
||||
|
||||
Copilot is special: the same token can be seeded as multiple source
|
||||
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
|
||||
``_seed_from_env``), so removing one entry without suppressing the
|
||||
others lets the duplicates resurrect. We suppress ALL known copilot
|
||||
sources here so removal is stable regardless of which entry the
|
||||
user clicked.
|
||||
|
||||
We don't touch the user's gh CLI or shell state — just suppress so
|
||||
Hermes stops picking the token up.
|
||||
"""
|
||||
# Suppress ALL copilot source variants up-front so no path resurrects
|
||||
# the pool entry. The central dispatcher in auth_remove_command will
|
||||
# ALSO suppress removed.source, but it's idempotent so double-calling
|
||||
# is harmless.
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "gh_cli")
|
||||
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
|
||||
suppress_credential_source(provider, f"env:{env_var}")
|
||||
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
|
||||
"Note: Your gh CLI / shell environment is unchanged.",
|
||||
"Run `hermes auth add copilot` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_custom_config(provider: str, removed) -> RemovalResult:
|
||||
"""Custom provider pools are seeded from custom_providers config or
|
||||
model.api_key. Both are in config.yaml — modifying that from here
|
||||
is more invasive than suppression. We suppress; the user can edit
|
||||
config.yaml if they want to remove the key from disk entirely.
|
||||
"""
|
||||
source_label = removed.source
|
||||
return RemovalResult(hints=[
|
||||
f"Suppressed {source_label} — it will not be re-seeded.",
|
||||
"Note: The underlying value in config.yaml is unchanged. Edit it "
|
||||
"directly if you want to remove the credential from disk.",
|
||||
])
|
||||
|
||||
|
||||
def _register_all_sources() -> None:
|
||||
"""Called once on module import.
|
||||
|
||||
ORDER MATTERS — ``find_removal_step`` returns the first match. Put
|
||||
provider-specific steps before the generic ``env:*`` step so that e.g.
|
||||
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
|
||||
doesn't touch the user's shell), not the generic env-var removal
|
||||
(which would try to clear .env).
|
||||
"""
|
||||
register(RemovalStep(
|
||||
provider="copilot", source_id="gh_cli",
|
||||
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
|
||||
remove_fn=_remove_copilot_gh,
|
||||
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="env:",
|
||||
match_fn=lambda src: src.startswith("env:"),
|
||||
remove_fn=_remove_env_source,
|
||||
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="claude_code",
|
||||
remove_fn=_remove_claude_code,
|
||||
description="~/.claude/.credentials.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="hermes_pkce",
|
||||
remove_fn=_remove_hermes_pkce,
|
||||
description="~/.hermes/.anthropic_oauth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="nous", source_id="device_code",
|
||||
remove_fn=_remove_nous_device_code,
|
||||
description="auth.json providers.nous",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="openai-codex", source_id="device_code",
|
||||
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
|
||||
remove_fn=_remove_codex_device_code,
|
||||
description="auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="xai-oauth", source_id="loopback_pkce",
|
||||
remove_fn=_remove_xai_oauth_loopback_pkce,
|
||||
description="auth.json providers.xai-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="qwen-oauth", source_id="qwen-cli",
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="minimax-oauth", source_id="oauth",
|
||||
remove_fn=_remove_minimax_oauth,
|
||||
description="auth.json providers.minimax-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
remove_fn=_remove_custom_config,
|
||||
description="Custom provider config.yaml api_key field",
|
||||
))
|
||||
|
||||
|
||||
_register_all_sources()
|
||||
1781
agent/curator.py
1781
agent/curator.py
File diff suppressed because it is too large
Load Diff
@@ -1,693 +0,0 @@
|
||||
"""Curator snapshot + rollback.
|
||||
|
||||
A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
|
||||
itself) is taken before any mutating curator pass. Snapshots are tar.gz
|
||||
files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
|
||||
companion ``manifest.json`` describing the snapshot (reason, time, size,
|
||||
counted skill files). Rollback picks a snapshot, moves the current
|
||||
``skills/`` tree aside into another snapshot so even the rollback itself
|
||||
is undoable, then extracts the chosen snapshot into place.
|
||||
|
||||
The snapshot does NOT include:
|
||||
- ``.curator_backups/`` (would recurse)
|
||||
- ``.hub/`` (hub-installed skills — managed by the hub, not us)
|
||||
|
||||
It DOES include:
|
||||
- all SKILL.md files + their directories (``scripts/``, ``references/``,
|
||||
``templates/``, ``assets/``)
|
||||
- ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
|
||||
- ``.archive/`` (so rollback restores previously-archived skills too)
|
||||
- ``.curator_state`` (so rolling back also restores the last-run-at
|
||||
pointer — otherwise the curator would immediately re-fire on the next
|
||||
tick)
|
||||
- ``.bundled_manifest`` (so protection markers stay consistent)
|
||||
|
||||
Alongside the skills tarball, each snapshot also captures a copy of
|
||||
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
|
||||
jobs reference skills by name in their ``skills``/``skill`` fields; the
|
||||
curator's consolidation pass rewrites those in place via
|
||||
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
|
||||
rolling back the skills tree would leave cron jobs pointing at the
|
||||
umbrella skills even though the narrow skills they were originally
|
||||
configured with have been restored. We store the whole jobs.json for
|
||||
fidelity but rollback only touches the ``skills``/``skill`` fields — the
|
||||
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
|
||||
we leave it alone.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_KEEP = 5
|
||||
|
||||
# Entries under skills/ that should NEVER be rolled up into a snapshot.
|
||||
# .hub/ is managed by the skills hub; rolling it back would break lockfile
|
||||
# invariants. .curator_backups is the backup dir itself — recursion bomb.
|
||||
_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
|
||||
|
||||
# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
|
||||
# is portable (Windows-safe). An optional ``-NN`` suffix handles two
|
||||
# snapshots landing in the same wallclock second.
|
||||
_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
|
||||
|
||||
|
||||
def _backups_dir() -> Path:
|
||||
return get_hermes_home() / "skills" / ".curator_backups"
|
||||
|
||||
|
||||
def _skills_dir() -> Path:
|
||||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _cron_jobs_file() -> Path:
|
||||
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
|
||||
return get_hermes_home() / "cron" / "jobs.json"
|
||||
|
||||
|
||||
CRON_JOBS_FILENAME = "cron-jobs.json"
|
||||
|
||||
|
||||
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
|
||||
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
|
||||
|
||||
Returns a small dict describing what was captured so the caller can
|
||||
fold it into the manifest. Never raises — if the cron file is missing
|
||||
or unreadable, the return dict has ``backed_up=False`` and the reason,
|
||||
and the snapshot proceeds without cron data (the snapshot is still
|
||||
useful for rolling back skills).
|
||||
"""
|
||||
src = _cron_jobs_file()
|
||||
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
|
||||
if not src.exists():
|
||||
info["reason"] = "no cron/jobs.json present"
|
||||
return info
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
|
||||
info["reason"] = f"read error: {e}"
|
||||
return info
|
||||
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
|
||||
# file is unparseable; just store the raw text and let rollback deal
|
||||
# with it (or not, if it's corrupted). jobs.json wraps the list as
|
||||
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
|
||||
# fall back to bare-list shape just in case the format ever changes.
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
inner = parsed.get("jobs")
|
||||
if isinstance(inner, list):
|
||||
info["jobs_count"] = len(inner)
|
||||
elif isinstance(parsed, list):
|
||||
info["jobs_count"] = len(parsed)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
info["jobs_count"] = 0
|
||||
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
|
||||
try:
|
||||
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to write cron backup file: %s", e)
|
||||
info["reason"] = f"write error: {e}"
|
||||
return info
|
||||
info["backed_up"] = True
|
||||
return info
|
||||
|
||||
|
||||
def _utc_id(now: Optional[datetime] = None) -> str:
|
||||
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
# isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
|
||||
s = now.replace(microsecond=0).isoformat()
|
||||
if s.endswith("+00:00"):
|
||||
s = s[:-6]
|
||||
return s.replace(":", "-") + "Z"
|
||||
|
||||
|
||||
def _load_config() -> Dict[str, Any]:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load config for curator backup: %s", e)
|
||||
return {}
|
||||
if not isinstance(cfg, dict):
|
||||
return {}
|
||||
cur = cfg.get("curator") or {}
|
||||
if not isinstance(cur, dict):
|
||||
return {}
|
||||
bk = cur.get("backup") or {}
|
||||
return bk if isinstance(bk, dict) else {}
|
||||
|
||||
|
||||
def is_enabled() -> bool:
|
||||
"""Default ON — the whole point of the backup is safety by default."""
|
||||
return bool(_load_config().get("enabled", True))
|
||||
|
||||
|
||||
def get_keep() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
n = int(cfg.get("keep", DEFAULT_KEEP))
|
||||
except (TypeError, ValueError):
|
||||
n = DEFAULT_KEEP
|
||||
return max(1, n)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
skills_counted: int,
|
||||
cron_info: Optional[Dict[str, Any]] = None) -> None:
|
||||
manifest = {
|
||||
"id": dest.name,
|
||||
"reason": reason,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"archive": archive_path.name,
|
||||
"archive_bytes": archive_path.stat().st_size,
|
||||
"skill_files": skills_counted,
|
||||
}
|
||||
if cron_info is not None:
|
||||
manifest["cron_jobs"] = {
|
||||
"backed_up": bool(cron_info.get("backed_up", False)),
|
||||
"jobs_count": int(cron_info.get("jobs_count", 0)),
|
||||
}
|
||||
if not cron_info.get("backed_up"):
|
||||
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
|
||||
if cron_info.get("parse_warning"):
|
||||
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
|
||||
(dest / "manifest.json").write_text(
|
||||
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
"""Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
|
||||
|
||||
Returns the snapshot directory path, or ``None`` if the snapshot was
|
||||
skipped (backup disabled, skills dir missing, or an IO error occurred —
|
||||
in which case we log at debug and return None so the curator never
|
||||
aborts a pass because of a backup failure).
|
||||
"""
|
||||
if not is_enabled():
|
||||
logger.debug("Curator backup disabled by config; skipping snapshot")
|
||||
return None
|
||||
|
||||
skills = _skills_dir()
|
||||
if not skills.exists():
|
||||
logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
|
||||
return None
|
||||
|
||||
backups = _backups_dir()
|
||||
try:
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create backups dir %s: %s", backups, e)
|
||||
return None
|
||||
|
||||
# Uniquify: if a snapshot with the same second already exists (can
|
||||
# happen if two curator runs fire in the same second), append a short
|
||||
# counter. Avoids clobbering and avoids timestamp collisions.
|
||||
base_id = _utc_id()
|
||||
snap_id = base_id
|
||||
counter = 1
|
||||
while (backups / snap_id).exists():
|
||||
snap_id = f"{base_id}-{counter:02d}"
|
||||
counter += 1
|
||||
|
||||
dest = backups / snap_id
|
||||
try:
|
||||
dest.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create snapshot dir %s: %s", dest, e)
|
||||
return None
|
||||
|
||||
archive = dest / "skills.tar.gz"
|
||||
try:
|
||||
# Stream into the tarball — no tempdir copy needed.
|
||||
with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
|
||||
for entry in sorted(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
# arcname: store paths relative to skills/ so extraction
|
||||
# drops cleanly back into the skills dir.
|
||||
tf.add(str(entry), arcname=entry.name, recursive=True)
|
||||
# Capture cron/jobs.json alongside the tarball. Never fails the
|
||||
# snapshot — the skills side is the core guarantee; cron is
|
||||
# additive. We still record in the manifest whether it was
|
||||
# captured so rollback can surface "no cron data in this snapshot".
|
||||
cron_info = _backup_cron_jobs_into(dest)
|
||||
_write_manifest(dest, reason, archive,
|
||||
_count_skill_files(skills),
|
||||
cron_info=cron_info)
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
|
||||
# Clean up partial snapshot
|
||||
try:
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_old(keep=get_keep())
|
||||
logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
|
||||
return dest
|
||||
|
||||
|
||||
def _prune_old(keep: int) -> List[str]:
|
||||
"""Delete regular snapshots beyond the newest *keep*. Returns deleted
|
||||
ids. Staging dirs (``.rollback-staging-*``) are implementation detail
|
||||
and pruned independently on every call."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
entries: List[Tuple[str, Path]] = []
|
||||
stale_staging: List[Path] = []
|
||||
for child in backups.iterdir():
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if child.name.startswith(".rollback-staging-"):
|
||||
# Staging dirs are only supposed to exist briefly during a
|
||||
# rollback. If we find one here (e.g. from a crashed rollback),
|
||||
# clean it up opportunistically.
|
||||
stale_staging.append(child)
|
||||
continue
|
||||
if _ID_RE.match(child.name):
|
||||
entries.append((child.name, child))
|
||||
# Newest first (lexicographic works because the id is UTC ISO).
|
||||
entries.sort(key=lambda t: t[0], reverse=True)
|
||||
deleted: List[str] = []
|
||||
for _, path in entries[keep:]:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted.append(path.name)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to prune %s: %s", path, e)
|
||||
for path in stale_staging:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to clean stale staging dir %s: %s", path, e)
|
||||
return deleted
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# List + rollback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
|
||||
mf = snap_dir / "manifest.json"
|
||||
if not mf.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(mf.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def list_backups() -> List[Dict[str, Any]]:
|
||||
"""Return all restorable snapshots, newest first. Only entries with a
|
||||
real ``skills.tar.gz`` tarball are listed — transient
|
||||
``.rollback-staging-*`` directories created mid-rollback are
|
||||
implementation detail and not shown."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for child in sorted(backups.iterdir(), reverse=True):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if not _ID_RE.match(child.name):
|
||||
continue
|
||||
if not (child / "skills.tar.gz").exists():
|
||||
continue
|
||||
mf = _read_manifest(child)
|
||||
mf.setdefault("id", child.name)
|
||||
mf.setdefault("path", str(child))
|
||||
if "archive_bytes" not in mf:
|
||||
arc = child / "skills.tar.gz"
|
||||
try:
|
||||
mf["archive_bytes"] = arc.stat().st_size
|
||||
except OSError:
|
||||
mf["archive_bytes"] = 0
|
||||
out.append(mf)
|
||||
return out
|
||||
|
||||
|
||||
def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
|
||||
"""Return the path of the requested backup, or the newest one if
|
||||
*backup_id* is None. Returns None if no match."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return None
|
||||
if backup_id:
|
||||
target = backups / backup_id
|
||||
if (
|
||||
target.is_dir()
|
||||
and _ID_RE.match(backup_id)
|
||||
and (target / "skills.tar.gz").exists()
|
||||
):
|
||||
return target
|
||||
return None
|
||||
candidates = [
|
||||
c for c in sorted(backups.iterdir(), reverse=True)
|
||||
if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
|
||||
]
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
|
||||
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
|
||||
|
||||
We do NOT overwrite the whole cron file. Only the ``skills`` and
|
||||
``skill`` fields are restored, and only on jobs that still exist in the
|
||||
current file (matched by ``id``). Everything else about the job —
|
||||
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
|
||||
is live state that the user/scheduler has modified since the snapshot;
|
||||
overwriting it would regress unrelated cron activity.
|
||||
|
||||
Rules:
|
||||
- Jobs present in backup AND live, with differing skills → skills restored.
|
||||
- Jobs present in backup AND live, with matching skills → no-op.
|
||||
- Jobs present in backup but gone from live (user deleted the job
|
||||
after the snapshot) → skipped, noted in the return report.
|
||||
- Jobs present in live but not in backup (user created a new cron
|
||||
job after the snapshot) → left untouched.
|
||||
|
||||
Never raises; failures are captured in the return dict. Writes through
|
||||
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
|
||||
uses, so we don't race the scheduler.
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"attempted": False,
|
||||
"restored": [],
|
||||
"skipped_missing": [],
|
||||
"unchanged": 0,
|
||||
"error": None,
|
||||
}
|
||||
backup_file = snapshot_dir / CRON_JOBS_FILENAME
|
||||
if not backup_file.exists():
|
||||
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
|
||||
return report
|
||||
|
||||
try:
|
||||
backup_text = backup_file.read_text(encoding="utf-8")
|
||||
backup_parsed = json.loads(backup_text)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
report["error"] = f"failed to load backed-up jobs: {e}"
|
||||
return report
|
||||
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
|
||||
# that shape and a bare list for forward compat.
|
||||
if isinstance(backup_parsed, dict):
|
||||
backup_jobs = backup_parsed.get("jobs")
|
||||
elif isinstance(backup_parsed, list):
|
||||
backup_jobs = backup_parsed
|
||||
else:
|
||||
backup_jobs = None
|
||||
if not isinstance(backup_jobs, list):
|
||||
report["error"] = "backed-up cron-jobs.json has no jobs list"
|
||||
return report
|
||||
|
||||
# Build a lookup of the backed-up skill state keyed by job id.
|
||||
# We only need the two skill-ish fields (legacy single and modern list).
|
||||
backup_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
for job in backup_jobs:
|
||||
if not isinstance(job, dict):
|
||||
continue
|
||||
jid = job.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
backup_by_id[jid] = {
|
||||
"skills": job.get("skills"),
|
||||
"skill": job.get("skill"),
|
||||
"name": job.get("name") or jid,
|
||||
}
|
||||
|
||||
if not backup_by_id:
|
||||
report["attempted"] = True # we tried but there was nothing to do
|
||||
return report
|
||||
|
||||
# Load and rewrite the live jobs under the scheduler's lock.
|
||||
try:
|
||||
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
|
||||
except ImportError as e:
|
||||
report["error"] = f"cron module unavailable: {e}"
|
||||
return report
|
||||
|
||||
report["attempted"] = True
|
||||
try:
|
||||
with _jobs_file_lock:
|
||||
live_jobs = load_jobs()
|
||||
changed = False
|
||||
|
||||
live_ids = set()
|
||||
for live in live_jobs:
|
||||
if not isinstance(live, dict):
|
||||
continue
|
||||
jid = live.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
live_ids.add(jid)
|
||||
|
||||
backup = backup_by_id.get(jid)
|
||||
if backup is None:
|
||||
continue # live job didn't exist at snapshot time
|
||||
|
||||
cur_skills = live.get("skills")
|
||||
cur_skill = live.get("skill")
|
||||
bkp_skills = backup.get("skills")
|
||||
bkp_skill = backup.get("skill")
|
||||
|
||||
if cur_skills == bkp_skills and cur_skill == bkp_skill:
|
||||
report["unchanged"] += 1
|
||||
continue
|
||||
|
||||
# Restore. Preserve absence (don't force the key to appear
|
||||
# if the backup didn't have it either).
|
||||
if bkp_skills is None:
|
||||
live.pop("skills", None)
|
||||
else:
|
||||
live["skills"] = bkp_skills
|
||||
if bkp_skill is None:
|
||||
live.pop("skill", None)
|
||||
else:
|
||||
live["skill"] = bkp_skill
|
||||
|
||||
report["restored"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
"from": {"skills": cur_skills, "skill": cur_skill},
|
||||
"to": {"skills": bkp_skills, "skill": bkp_skill},
|
||||
})
|
||||
changed = True
|
||||
|
||||
# Jobs in backup but not in live = user deleted them after snapshot
|
||||
for jid, backup in backup_by_id.items():
|
||||
if jid not in live_ids:
|
||||
report["skipped_missing"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(live_jobs)
|
||||
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
|
||||
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
|
||||
report["error"] = f"restore failed mid-flight: {e}"
|
||||
|
||||
return report
|
||||
|
||||
|
||||
|
||||
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
|
||||
"""Restore ``~/.hermes/skills/`` from a snapshot.
|
||||
|
||||
Strategy:
|
||||
1. Resolve the target snapshot (explicit id or newest regular).
|
||||
2. Take a safety snapshot of the CURRENT skills tree under
|
||||
``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
|
||||
undoable.
|
||||
3. Move all current top-level entries (except ``.curator_backups``
|
||||
and ``.hub``) into a tempdir.
|
||||
4. Extract the chosen snapshot into ``~/.hermes/skills/``.
|
||||
5. On failure during 4, move the tempdir contents back (best-effort)
|
||||
and return failure.
|
||||
|
||||
Returns ``(ok, message, snapshot_path)``.
|
||||
"""
|
||||
target = _resolve_backup(backup_id)
|
||||
if target is None:
|
||||
return (
|
||||
False,
|
||||
f"no matching backup found"
|
||||
+ (f" for id '{backup_id}'" if backup_id else "")
|
||||
+ " (use `hermes curator rollback --list` to see available snapshots)",
|
||||
None,
|
||||
)
|
||||
archive = target / "skills.tar.gz"
|
||||
if not archive.exists():
|
||||
return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
|
||||
|
||||
skills = _skills_dir()
|
||||
skills.mkdir(parents=True, exist_ok=True)
|
||||
backups = _backups_dir()
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Step 2: safety snapshot of current state FIRST. If this fails we bail
|
||||
# out before touching anything — otherwise a failed extract could leave
|
||||
# the user with no skills.
|
||||
try:
|
||||
snapshot_skills(reason=f"pre-rollback to {target.name}")
|
||||
except Exception as e:
|
||||
return (False, f"pre-rollback safety snapshot failed: {e}", None)
|
||||
|
||||
# Additionally move current entries into an internal staging dir so
|
||||
# the extract happens into an empty skills tree (predictable result).
|
||||
# This dir is implementation detail — not listed as a restorable
|
||||
# backup. The safety snapshot above is the user-facing undo handle.
|
||||
staged = backups / f".rollback-staging-{_utc_id()}"
|
||||
try:
|
||||
staged.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
return (False, f"failed to create staging dir: {e}", None)
|
||||
|
||||
moved: List[Tuple[Path, Path]] = []
|
||||
try:
|
||||
for entry in list(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
dest = staged / entry.name
|
||||
shutil.move(str(entry), str(dest))
|
||||
moved.append((entry, dest))
|
||||
except OSError as e:
|
||||
# Best-effort rollback of the move
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"failed to stage current skills: {e}", None)
|
||||
|
||||
# Step 4: extract the snapshot into skills/
|
||||
try:
|
||||
with tarfile.open(archive, "r:gz") as tf:
|
||||
# Python 3.12+ supports filter='data' for safer extraction.
|
||||
# Fall back to the unfiltered call for older interpreters but
|
||||
# still reject absolute paths and .. components defensively.
|
||||
for member in tf.getmembers():
|
||||
name = member.name
|
||||
if name.startswith("/") or ".." in Path(name).parts:
|
||||
raise tarfile.TarError(
|
||||
f"refusing to extract unsafe path: {name!r}"
|
||||
)
|
||||
try:
|
||||
tf.extractall(str(skills), filter="data") # type: ignore[call-arg]
|
||||
except TypeError:
|
||||
# Python < 3.12 — no filter kwarg
|
||||
tf.extractall(str(skills))
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
# Best-effort recover: move staged contents back
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"snapshot extract failed (state restored): {e}", None)
|
||||
|
||||
# Extract succeeded — the staging dir has served its purpose. The
|
||||
# user's undo handle is the safety snapshot tarball we took earlier.
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Reconcile cron skill-links. Surgical: only the skills/skill fields
|
||||
# on jobs matched by id. Everything else in jobs.json is live state
|
||||
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
|
||||
# alone. Failures here don't fail the overall rollback — the skills
|
||||
# tree is already restored, which is the main guarantee.
|
||||
cron_report = _restore_cron_skill_links(target)
|
||||
|
||||
summary_bits = [f"restored from snapshot {target.name}"]
|
||||
if cron_report.get("attempted"):
|
||||
restored_n = len(cron_report.get("restored") or [])
|
||||
skipped_n = len(cron_report.get("skipped_missing") or [])
|
||||
if cron_report.get("error"):
|
||||
summary_bits.append(f"cron links: error — {cron_report['error']}")
|
||||
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
|
||||
# Attempted but nothing matched — empty snapshot or no overlapping ids.
|
||||
pass
|
||||
else:
|
||||
parts = []
|
||||
if restored_n:
|
||||
parts.append(f"{restored_n} job(s) had skill links restored")
|
||||
if skipped_n:
|
||||
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
|
||||
if cron_report.get("unchanged"):
|
||||
parts.append(f"{cron_report['unchanged']} already matched")
|
||||
summary_bits.append("cron links: " + ", ".join(parts))
|
||||
|
||||
logger.info("Curator rollback: restored from %s (cron_report=%s)",
|
||||
target.name, cron_report)
|
||||
return (True, "; ".join(summary_bits), target)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Human-readable summary for CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def format_size(n: int) -> str:
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if n < 1024 or unit == "GB":
|
||||
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
|
||||
n /= 1024
|
||||
return f"{n:.1f} GB"
|
||||
|
||||
|
||||
def summarize_backups() -> str:
|
||||
rows = list_backups()
|
||||
if not rows:
|
||||
return "No curator snapshots yet."
|
||||
lines = [f"{'id':<24} {'reason':<40} {'skills':>6} {'size':>8}"]
|
||||
lines.append("─" * len(lines[0]))
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"{r.get('id','?'):<24} "
|
||||
f"{(r.get('reason','?') or '?')[:40]:<40} "
|
||||
f"{r.get('skill_files', 0):>6} "
|
||||
f"{format_size(int(r.get('archive_bytes', 0))):>8}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
273
agent/display.py
273
agent/display.py
@@ -4,6 +4,7 @@ Pure display functions and classes with no AIAgent dependency.
|
||||
Used by AIAgent._execute_tool_calls for CLI feedback.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@@ -13,9 +14,6 @@ from dataclasses import dataclass, field
|
||||
from difflib import unified_diff
|
||||
from pathlib import Path
|
||||
|
||||
from utils import safe_json_loads
|
||||
from agent.tool_result_classification import file_mutation_result_landed
|
||||
|
||||
# ANSI escape codes for coloring tool failure indicators
|
||||
_RED = "\033[31m"
|
||||
_RESET = "\033[0m"
|
||||
@@ -23,67 +21,11 @@ _RESET = "\033[0m"
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ANSI_RESET = "\033[0m"
|
||||
|
||||
# Diff colors — resolved lazily from the skin engine so they adapt
|
||||
# to light/dark themes. Falls back to sensible defaults on import
|
||||
# failure. We cache after first resolution for performance.
|
||||
_diff_colors_cached: dict[str, str] | None = None
|
||||
|
||||
|
||||
def _diff_ansi() -> dict[str, str]:
|
||||
"""Return ANSI escapes for diff display, resolved from the active skin."""
|
||||
global _diff_colors_cached
|
||||
if _diff_colors_cached is not None:
|
||||
return _diff_colors_cached
|
||||
|
||||
# Defaults that work on dark terminals
|
||||
dim = "\033[38;2;150;150;150m"
|
||||
file_c = "\033[38;2;180;160;255m"
|
||||
hunk = "\033[38;2;120;120;140m"
|
||||
minus = "\033[38;2;255;255;255;48;2;120;20;20m"
|
||||
plus = "\033[38;2;255;255;255;48;2;20;90;20m"
|
||||
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
skin = get_active_skin()
|
||||
|
||||
def _hex_fg(key: str, fallback_rgb: tuple[int, int, int]) -> str:
|
||||
h = skin.get_color(key, "")
|
||||
if h and len(h) == 7 and h[0] == "#":
|
||||
r, g, b = int(h[1:3], 16), int(h[3:5], 16), int(h[5:7], 16)
|
||||
return f"\033[38;2;{r};{g};{b}m"
|
||||
r, g, b = fallback_rgb
|
||||
return f"\033[38;2;{r};{g};{b}m"
|
||||
|
||||
dim = _hex_fg("banner_dim", (150, 150, 150))
|
||||
file_c = _hex_fg("session_label", (180, 160, 255))
|
||||
hunk = _hex_fg("session_border", (120, 120, 140))
|
||||
# minus/plus use background colors — derive from ui_error/ui_ok
|
||||
err_h = skin.get_color("ui_error", "#ef5350")
|
||||
ok_h = skin.get_color("ui_ok", "#4caf50")
|
||||
if err_h and len(err_h) == 7:
|
||||
er, eg, eb = int(err_h[1:3], 16), int(err_h[3:5], 16), int(err_h[5:7], 16)
|
||||
# Use a dark tinted version as background
|
||||
minus = f"\033[38;2;255;255;255;48;2;{max(er//2,20)};{max(eg//4,10)};{max(eb//4,10)}m"
|
||||
if ok_h and len(ok_h) == 7:
|
||||
or_, og, ob = int(ok_h[1:3], 16), int(ok_h[3:5], 16), int(ok_h[5:7], 16)
|
||||
plus = f"\033[38;2;255;255;255;48;2;{max(or_//4,10)};{max(og//2,20)};{max(ob//4,10)}m"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_diff_colors_cached = {
|
||||
"dim": dim, "file": file_c, "hunk": hunk,
|
||||
"minus": minus, "plus": plus,
|
||||
}
|
||||
return _diff_colors_cached
|
||||
|
||||
|
||||
# Module-level helpers — each call resolves from the active skin lazily.
|
||||
def _diff_dim(): return _diff_ansi()["dim"]
|
||||
def _diff_file(): return _diff_ansi()["file"]
|
||||
def _diff_hunk(): return _diff_ansi()["hunk"]
|
||||
def _diff_minus(): return _diff_ansi()["minus"]
|
||||
def _diff_plus(): return _diff_ansi()["plus"]
|
||||
_ANSI_DIM = "\033[38;2;150;150;150m"
|
||||
_ANSI_FILE = "\033[38;2;180;160;255m"
|
||||
_ANSI_HUNK = "\033[38;2;120;120;140m"
|
||||
_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
|
||||
_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
|
||||
_MAX_INLINE_DIFF_FILES = 6
|
||||
_MAX_INLINE_DIFF_LINES = 80
|
||||
|
||||
@@ -226,11 +168,9 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
|
||||
content = _oneline(args.get("content", ""))
|
||||
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
|
||||
elif action == "replace":
|
||||
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
|
||||
return f"~{target}: \"{old[:20]}\""
|
||||
return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
|
||||
elif action == "remove":
|
||||
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
|
||||
return f"-{target}: \"{old[:20]}\""
|
||||
return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
|
||||
return action
|
||||
|
||||
if tool_name == "send_message":
|
||||
@@ -240,6 +180,21 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
|
||||
msg = msg[:17] + "..."
|
||||
return f"to {target}: \"{msg}\""
|
||||
|
||||
if tool_name.startswith("rl_"):
|
||||
rl_previews = {
|
||||
"rl_list_environments": "listing envs",
|
||||
"rl_select_environment": args.get("name", ""),
|
||||
"rl_get_current_config": "reading config",
|
||||
"rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}",
|
||||
"rl_start_training": "starting",
|
||||
"rl_check_status": args.get("run_id", "")[:16],
|
||||
"rl_stop_training": f"stopping {args.get('run_id', '')[:16]}",
|
||||
"rl_get_results": args.get("run_id", "")[:16],
|
||||
"rl_list_runs": "listing runs",
|
||||
"rl_test_inference": f"{args.get('num_steps', 3)} steps",
|
||||
}
|
||||
return rl_previews.get(tool_name)
|
||||
|
||||
key = primary_args.get(tool_name)
|
||||
if not key:
|
||||
for fallback_key in ("query", "text", "command", "path", "name", "prompt", "code", "goal"):
|
||||
@@ -355,8 +310,9 @@ def _result_succeeded(result: str | None) -> bool:
|
||||
"""Conservatively detect whether a tool result represents success."""
|
||||
if not result:
|
||||
return False
|
||||
data = safe_json_loads(result)
|
||||
if data is None:
|
||||
try:
|
||||
data = json.loads(result)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return False
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
@@ -405,7 +361,10 @@ def extract_edit_diff(
|
||||
) -> str | None:
|
||||
"""Extract a unified diff from a file-edit tool result."""
|
||||
if tool_name == "patch" and result:
|
||||
data = safe_json_loads(result)
|
||||
try:
|
||||
data = json.loads(result)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
data = None
|
||||
if isinstance(data, dict):
|
||||
diff = data.get("diff")
|
||||
if isinstance(diff, str) and diff.strip():
|
||||
@@ -444,19 +403,19 @@ def _render_inline_unified_diff(diff: str) -> list[str]:
|
||||
if raw_line.startswith("+++ "):
|
||||
to_file = raw_line[4:].strip()
|
||||
if from_file or to_file:
|
||||
rendered.append(f"{_diff_file()}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("@@"):
|
||||
rendered.append(f"{_diff_hunk()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("-"):
|
||||
rendered.append(f"{_diff_minus()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("+"):
|
||||
rendered.append(f"{_diff_plus()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith(" "):
|
||||
rendered.append(f"{_diff_dim()}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line:
|
||||
rendered.append(raw_line)
|
||||
@@ -522,7 +481,7 @@ def _summarize_rendered_diff_sections(
|
||||
summary = f"… omitted {omitted_lines} diff line(s)"
|
||||
if omitted_files:
|
||||
summary += f" across {omitted_files} additional file(s)/section(s)"
|
||||
rendered.append(f"{_diff_hunk()}{summary}{_ANSI_RESET}")
|
||||
rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
|
||||
|
||||
return rendered
|
||||
|
||||
@@ -588,45 +547,6 @@ class KawaiiSpinner:
|
||||
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_waiting_faces(cls) -> list:
|
||||
"""Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
|
||||
try:
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
faces = skin.spinner.get("waiting_faces", [])
|
||||
if faces:
|
||||
return faces
|
||||
except Exception:
|
||||
pass
|
||||
return cls.KAWAII_WAITING
|
||||
|
||||
@classmethod
|
||||
def get_thinking_faces(cls) -> list:
|
||||
"""Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
|
||||
try:
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
faces = skin.spinner.get("thinking_faces", [])
|
||||
if faces:
|
||||
return faces
|
||||
except Exception:
|
||||
pass
|
||||
return cls.KAWAII_THINKING
|
||||
|
||||
@classmethod
|
||||
def get_thinking_verbs(cls) -> list:
|
||||
"""Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
|
||||
try:
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
verbs = skin.spinner.get("thinking_verbs", [])
|
||||
if verbs:
|
||||
return verbs
|
||||
except Exception:
|
||||
pass
|
||||
return cls.THINKING_VERBS
|
||||
|
||||
def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
|
||||
self.message = message
|
||||
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
|
||||
@@ -796,29 +716,27 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
|
||||
"""
|
||||
if result is None:
|
||||
return False, ""
|
||||
if file_mutation_result_landed(tool_name, result):
|
||||
return False, ""
|
||||
|
||||
if tool_name == "terminal":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
try:
|
||||
data = json.loads(result)
|
||||
exit_code = data.get("exit_code")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
return True, f" [exit {exit_code}]"
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
logger.debug("Could not parse terminal result as JSON for exit code check")
|
||||
return False, ""
|
||||
|
||||
# Memory-specific: distinguish "full" from real errors
|
||||
if tool_name == "memory":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
try:
|
||||
data = json.loads(result)
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
logger.debug("Could not parse memory result as JSON for capacity check")
|
||||
|
||||
# Generic heuristic for non-terminal tools
|
||||
# Multimodal tool results (dicts with _multimodal=True) are not strings —
|
||||
# treat them as successes since failures would be JSON-encoded strings.
|
||||
if not isinstance(result, str):
|
||||
return False, ""
|
||||
lower = result[:500].lower()
|
||||
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
|
||||
return True, " [error]"
|
||||
@@ -844,15 +762,13 @@ def get_cute_tool_message(
|
||||
s = str(s)
|
||||
if _tool_preview_max_len == 0:
|
||||
return s # no limit
|
||||
limit = _tool_preview_max_len
|
||||
return (s[:limit-3] + "...") if len(s) > limit else s
|
||||
return (s[:n-3] + "...") if len(s) > n else s
|
||||
|
||||
def _path(p, n=35):
|
||||
p = str(p)
|
||||
if _tool_preview_max_len == 0:
|
||||
return p # no limit
|
||||
limit = _tool_preview_max_len
|
||||
return ("..." + p[-(limit-3):]) if len(p) > limit else p
|
||||
return ("..." + p[-(n-3):]) if len(p) > n else p
|
||||
|
||||
def _wrap(line: str) -> str:
|
||||
"""Apply skin tool prefix and failure suffix."""
|
||||
@@ -935,13 +851,9 @@ def get_cute_tool_message(
|
||||
if action == "add":
|
||||
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
|
||||
elif action == "replace":
|
||||
old = args.get("old_text") or ""
|
||||
old = old if old else "<missing old_text>"
|
||||
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(old, 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
|
||||
elif action == "remove":
|
||||
old = args.get("old_text") or ""
|
||||
old = old if old else "<missing old_text>"
|
||||
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(old, 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory {action} {dur}")
|
||||
if tool_name == "skills_list":
|
||||
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
|
||||
@@ -966,6 +878,15 @@ def get_cute_tool_message(
|
||||
if action == "list":
|
||||
return _wrap(f"┊ ⏰ cron listing {dur}")
|
||||
return _wrap(f"┊ ⏰ cron {action} {args.get('job_id', '')} {dur}")
|
||||
if tool_name.startswith("rl_"):
|
||||
rl = {
|
||||
"rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
|
||||
"rl_get_current_config": "get config", "rl_edit_config": f"set {args.get('field', '?')}",
|
||||
"rl_start_training": "start training", "rl_check_status": f"status {args.get('run_id', '?')[:12]}",
|
||||
"rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
|
||||
"rl_list_runs": "list runs", "rl_test_inference": "test inference",
|
||||
}
|
||||
return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}")
|
||||
if tool_name == "execute_code":
|
||||
code = args.get("code", "")
|
||||
first_line = code.strip().split("\n")[0] if code.strip() else ""
|
||||
@@ -984,4 +905,84 @@ def get_cute_tool_message(
|
||||
# Honcho session line (one-liner with clickable OSC 8 hyperlink)
|
||||
# =========================================================================
|
||||
|
||||
_DIM = "\033[2m"
|
||||
_SKY_BLUE = "\033[38;5;117m"
|
||||
_ANSI_RESET = "\033[0m"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context pressure display (CLI user-facing warnings)
|
||||
# =========================================================================
|
||||
|
||||
# ANSI color codes for context pressure tiers
|
||||
_CYAN = "\033[36m"
|
||||
_YELLOW = "\033[33m"
|
||||
_BOLD = "\033[1m"
|
||||
_DIM_ANSI = "\033[2m"
|
||||
|
||||
# Bar characters
|
||||
_BAR_FILLED = "▰"
|
||||
_BAR_EMPTY = "▱"
|
||||
_BAR_WIDTH = 20
|
||||
|
||||
|
||||
def format_context_pressure(
|
||||
compaction_progress: float,
|
||||
threshold_tokens: int,
|
||||
threshold_percent: float,
|
||||
compression_enabled: bool = True,
|
||||
) -> str:
|
||||
"""Build a formatted context pressure line for CLI display.
|
||||
|
||||
The bar and percentage show progress toward the compaction threshold,
|
||||
NOT the raw context window. 100% = compaction fires.
|
||||
|
||||
Args:
|
||||
compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires).
|
||||
threshold_tokens: Compaction threshold in tokens.
|
||||
threshold_percent: Compaction threshold as a fraction of context window.
|
||||
compression_enabled: Whether auto-compression is active.
|
||||
"""
|
||||
pct_int = min(int(compaction_progress * 100), 100)
|
||||
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
|
||||
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
|
||||
|
||||
threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
|
||||
threshold_pct_int = int(threshold_percent * 100)
|
||||
|
||||
color = f"{_BOLD}{_YELLOW}"
|
||||
icon = "⚠"
|
||||
if compression_enabled:
|
||||
hint = "compaction approaching"
|
||||
else:
|
||||
hint = "no auto-compaction"
|
||||
|
||||
return (
|
||||
f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
|
||||
f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
|
||||
)
|
||||
|
||||
|
||||
def format_context_pressure_gateway(
|
||||
compaction_progress: float,
|
||||
threshold_percent: float,
|
||||
compression_enabled: bool = True,
|
||||
) -> str:
|
||||
"""Build a plain-text context pressure notification for messaging platforms.
|
||||
|
||||
No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
|
||||
The percentage shows progress toward the compaction threshold.
|
||||
"""
|
||||
pct_int = min(int(compaction_progress * 100), 100)
|
||||
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
|
||||
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
|
||||
|
||||
threshold_pct_int = int(threshold_percent * 100)
|
||||
|
||||
icon = "⚠️"
|
||||
if compression_enabled:
|
||||
hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
|
||||
else:
|
||||
hint = "Auto-compaction is disabled — context may be truncated."
|
||||
|
||||
return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"
|
||||
|
||||
@@ -13,6 +13,7 @@ from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
@@ -42,11 +43,9 @@ class FailoverReason(enum.Enum):
|
||||
# Context / payload
|
||||
context_overflow = "context_overflow" # Context too large — compress, not failover
|
||||
payload_too_large = "payload_too_large" # 413 — compress payload
|
||||
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
@@ -54,8 +53,6 @@ class FailoverReason(enum.Enum):
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
|
||||
llama_cpp_grammar_pattern = "llama_cpp_grammar_pattern" # llama.cpp json-schema-to-grammar rejects regex escapes in `pattern` / `format` — strip from tools and retry
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
@@ -83,7 +80,7 @@ class ClassifiedError:
|
||||
|
||||
@property
|
||||
def is_auth(self) -> bool:
|
||||
return self.reason in {FailoverReason.auth, FailoverReason.auth_permanent}
|
||||
return self.reason in (FailoverReason.auth, FailoverReason.auth_permanent)
|
||||
|
||||
|
||||
|
||||
@@ -93,7 +90,6 @@ class ClassifiedError:
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"insufficient balance",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
@@ -117,10 +113,6 @@ _RATE_LIMIT_PATTERNS = [
|
||||
"please retry after",
|
||||
"resource_exhausted",
|
||||
"rate increased too quickly", # Alibaba/DashScope throttling
|
||||
# AWS Bedrock throttling
|
||||
"throttlingexception",
|
||||
"too many concurrent requests",
|
||||
"servicequotaexceededexception",
|
||||
]
|
||||
|
||||
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
||||
@@ -151,20 +143,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
|
||||
"error code: 413",
|
||||
]
|
||||
|
||||
# Image-size patterns. Matched against 400 bodies (not 413) because most
|
||||
# providers return a 400 with a specific image-too-big message before the
|
||||
# whole request hits the 413 size limit. Anthropic's wording is the most
|
||||
# important here (hard 5 MB per image, returned as
|
||||
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
|
||||
_IMAGE_TOO_LARGE_PATTERNS = [
|
||||
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
|
||||
"image too large", # generic
|
||||
"image_too_large", # error_code variant
|
||||
"image size exceeds", # variant
|
||||
# "request_too_large" on a request known to contain an image → image is
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -179,26 +157,9 @@ _CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"prompt exceeds max length",
|
||||
"max_tokens",
|
||||
"maximum number of tokens",
|
||||
# vLLM / local inference server patterns
|
||||
"exceeds the max_model_len",
|
||||
"max_model_len",
|
||||
"prompt length", # "engine prompt length X exceeds"
|
||||
"input is too long",
|
||||
"maximum model length",
|
||||
# Ollama patterns
|
||||
"context length exceeded",
|
||||
"truncating input",
|
||||
# llama.cpp / llama-server patterns
|
||||
"slot context", # "slot context: N tokens, prompt N tokens"
|
||||
"n_ctx_slot",
|
||||
# Chinese error messages (some providers return these)
|
||||
"超过最大长度",
|
||||
"上下文长度",
|
||||
# AWS Bedrock Converse API error patterns
|
||||
"input is too long",
|
||||
"max input token",
|
||||
"input token",
|
||||
"exceeds the maximum number of input tokens",
|
||||
]
|
||||
|
||||
# Model not found patterns
|
||||
@@ -213,29 +174,6 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
@@ -254,20 +192,6 @@ _THINKING_SIG_PATTERNS = [
|
||||
"signature", # Combined with "thinking" check
|
||||
]
|
||||
|
||||
# Message-string patterns that indicate a provider-side timeout even when
|
||||
# the exception type is generic (e.g. RuntimeError from a local shim that
|
||||
# wraps a subprocess timeout). Checked before the type-based transport
|
||||
# heuristics so custom-provider "timed out" errors don't fall through to
|
||||
# the unknown bucket and get misreported as empty responses.
|
||||
_TIMEOUT_MESSAGE_PATTERNS = [
|
||||
"timed out",
|
||||
"turn timed out",
|
||||
"request timed out",
|
||||
"deadline exceeded",
|
||||
"operation timed out",
|
||||
"upstream timed out",
|
||||
]
|
||||
|
||||
# Transport error type names
|
||||
_TRANSPORT_ERROR_TYPES = frozenset({
|
||||
"ReadTimeout", "ConnectTimeout", "PoolTimeout",
|
||||
@@ -276,25 +200,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
|
||||
"ConnectionAbortedError", "BrokenPipeError",
|
||||
"TimeoutError", "ReadError",
|
||||
"ServerDisconnectedError",
|
||||
# SSL/TLS transport errors — transient mid-stream handshake/record
|
||||
# failures that should retry rather than surface as a stalled session.
|
||||
# ssl.SSLError subclasses OSError (caught by isinstance) but we list
|
||||
# the type names here so provider-wrapped SSL errors (e.g. when the
|
||||
# SDK re-raises without preserving the exception chain) still classify
|
||||
# as transport rather than falling through to the unknown bucket.
|
||||
"SSLError", "SSLZeroReturnError", "SSLWantReadError",
|
||||
"SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
|
||||
# OpenAI SDK errors (not subclasses of Python builtins)
|
||||
"APIConnectionError",
|
||||
"APITimeoutError",
|
||||
})
|
||||
|
||||
# Server disconnect patterns (no status code, but transport-level).
|
||||
# These are the "ambiguous" patterns — a plain connection close could be
|
||||
# transient transport hiccup OR server-side context overflow rejection
|
||||
# (common when the API gateway disconnects instead of returning an HTTP
|
||||
# error for oversized requests). A large session + one of these patterns
|
||||
# triggers the context-overflow-with-compression recovery path.
|
||||
# Server disconnect patterns (no status code, but transport-level)
|
||||
_SERVER_DISCONNECT_PATTERNS = [
|
||||
"server disconnected",
|
||||
"peer closed connection",
|
||||
@@ -305,40 +216,6 @@ _SERVER_DISCONNECT_PATTERNS = [
|
||||
"incomplete chunked read",
|
||||
]
|
||||
|
||||
# SSL/TLS transient failure patterns — intentionally distinct from
|
||||
# _SERVER_DISCONNECT_PATTERNS above.
|
||||
#
|
||||
# An SSL alert mid-stream is almost always a transport-layer hiccup
|
||||
# (flaky network, mid-session TLS renegotiation failure, load balancer
|
||||
# dropping the connection) — NOT a server-side context overflow signal.
|
||||
# So we want the retry path but NOT the compression path; lumping these
|
||||
# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
|
||||
# expensive) context compression on any large-session SSL hiccup.
|
||||
#
|
||||
# The OpenSSL library constructs error codes by prepending a format string
|
||||
# to the uppercased alert reason; OpenSSL 3.x changed the separator
|
||||
# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
|
||||
# which silently stopped matching anything explicit. Matching on the
|
||||
# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
|
||||
# survives future OpenSSL format churn without code changes.
|
||||
_SSL_TRANSIENT_PATTERNS = [
|
||||
# Space-separated (human-readable form, Python ssl module, most SDKs)
|
||||
"bad record mac",
|
||||
"ssl alert",
|
||||
"tls alert",
|
||||
"ssl handshake failure",
|
||||
"tlsv1 alert",
|
||||
"sslv3 alert",
|
||||
# Underscore-separated (OpenSSL error code tokens, e.g.
|
||||
# `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
|
||||
"bad_record_mac",
|
||||
"ssl_alert",
|
||||
"tls_alert",
|
||||
"tls_alert_internal_error",
|
||||
# Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
|
||||
"[ssl:",
|
||||
]
|
||||
|
||||
|
||||
# ── Classification pipeline ─────────────────────────────────────────────
|
||||
|
||||
@@ -358,10 +235,9 @@ def classify_api_error(
|
||||
2. HTTP status code + message-aware refinement
|
||||
3. Error code classification (from body)
|
||||
4. Message pattern matching (billing vs rate_limit vs context vs auth)
|
||||
5. SSL/TLS transient alert patterns → retry as timeout
|
||||
5. Transport error heuristics
|
||||
6. Server disconnect + large session → context overflow
|
||||
7. Transport error heuristics
|
||||
8. Fallback: unknown (retryable with backoff)
|
||||
7. Fallback: unknown (retryable with backoff)
|
||||
|
||||
Args:
|
||||
error: The exception from the API call.
|
||||
@@ -375,11 +251,6 @@ def classify_api_error(
|
||||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
|
||||
# so downstream rate-limit handling (classifier reason, pool rotation,
|
||||
# fallback gating) fires correctly instead of misclassifying as generic.
|
||||
if status_code is None and error_type == "RateLimitError":
|
||||
status_code = 429
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
@@ -399,7 +270,7 @@ def classify_api_error(
|
||||
if isinstance(body, dict):
|
||||
_err_obj = body.get("error", {})
|
||||
if isinstance(_err_obj, dict):
|
||||
_body_msg = str(_err_obj.get("message") or "").lower()
|
||||
_body_msg = (_err_obj.get("message") or "").lower()
|
||||
# Parse metadata.raw for wrapped provider errors
|
||||
_metadata = _err_obj.get("metadata", {})
|
||||
if isinstance(_metadata, dict):
|
||||
@@ -411,11 +282,11 @@ def classify_api_error(
|
||||
if isinstance(_inner, dict):
|
||||
_inner_err = _inner.get("error", {})
|
||||
if isinstance(_inner_err, dict):
|
||||
_metadata_msg = str(_inner_err.get("message") or "").lower()
|
||||
_metadata_msg = (_inner_err.get("message") or "").lower()
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if not _body_msg:
|
||||
_body_msg = str(body.get("message") or "").lower()
|
||||
_body_msg = (body.get("message") or "").lower()
|
||||
# Combine all message sources for pattern matching
|
||||
parts = [_raw_msg]
|
||||
if _body_msg and _body_msg not in _raw_msg:
|
||||
@@ -466,79 +337,6 @@ def classify_api_error(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejects the 1M-context beta header.
|
||||
# Observed error body: "The long context beta is not yet available for
|
||||
# this subscription." Returned as HTTP 400 from native Anthropic when
|
||||
# the subscription doesn't include 1M context, even though the request
|
||||
# carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
|
||||
# in run_agent.py rebuilds the Anthropic client with the beta stripped
|
||||
# and retries once. Pattern is narrow enough that it won't collide with
|
||||
# the 429 tier-gate pattern above (different status, different phrase).
|
||||
if (
|
||||
status_code == 400
|
||||
and "long context beta" in error_msg
|
||||
and "not yet available" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.oauth_long_context_beta_forbidden,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# llama.cpp's ``json-schema-to-grammar`` converter (used by its OAI
|
||||
# server to build GBNF tool-call parsers) rejects regex escape classes
|
||||
# like ``\d``/``\w``/``\s`` and most ``format`` values. MCP servers
|
||||
# routinely emit ``"pattern": "\\d{4}-\\d{2}-\\d{2}"`` for date/phone/
|
||||
# email params. llama.cpp surfaces this as HTTP 400 with one of a few
|
||||
# recognizable phrases; on match we strip ``pattern``/``format`` from
|
||||
# ``self.tools`` in the retry loop and retry once. Cloud providers are
|
||||
# unaffected — they accept these keywords and we never hit this branch.
|
||||
if (
|
||||
status_code == 400
|
||||
and (
|
||||
"error parsing grammar" in error_msg
|
||||
or "json-schema-to-grammar" in error_msg
|
||||
or (
|
||||
"unable to generate parser" in error_msg
|
||||
and "template" in error_msg
|
||||
)
|
||||
)
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.llama_cpp_grammar_pattern,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# xAI Grok subscription entitlement errors.
|
||||
#
|
||||
# xAI returns "You have either run out of available resources or do not
|
||||
# have an active Grok subscription" through two distinct code paths:
|
||||
#
|
||||
# • HTTP 403 — status_code is set; _classify_by_status (step 2) routes
|
||||
# it to FailoverReason.auth correctly, and _is_entitlement_failure
|
||||
# then prevents the credential-refresh loop.
|
||||
#
|
||||
# • SSE ``type=error`` frame — surfaced as _StreamErrorEvent with
|
||||
# status_code=None. _classify_by_status is skipped entirely, and
|
||||
# "grok subscription" / "out of available resources" appear in none
|
||||
# of the message-pattern lists below. Without this guard the error
|
||||
# falls through to FailoverReason.unknown (retryable=True), burning
|
||||
# max_retries before the agent stops — and _is_entitlement_failure
|
||||
# is never called because it only runs under FailoverReason.auth.
|
||||
#
|
||||
# Both X Premium+ and SuperGrok subscribers hit this path when their
|
||||
# subscription tier does not cover the requested model or feature.
|
||||
if (
|
||||
"do not have an active grok subscription" in error_msg
|
||||
or ("out of available resources" in error_msg and "grok" in error_msg)
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.auth,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
@@ -570,18 +368,7 @@ def classify_api_error(
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
|
||||
# SSL alerts mid-stream are transport hiccups, not server-side context
|
||||
# overflow signals. Classify before the disconnect check so a large
|
||||
# session doesn't incorrectly trigger context compression when the real
|
||||
# cause is a flaky TLS handshake. Also matches when the error is
|
||||
# wrapped in a generic exception whose message string carries the SSL
|
||||
# alert text but the type isn't ssl.SSLError (happens with some SDKs
|
||||
# that re-raise without chaining).
|
||||
if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 6. Server disconnect + large session → context overflow ─────
|
||||
# ── 5. Server disconnect + large session → context overflow ─────
|
||||
# Must come BEFORE generic transport error catch — a disconnect on
|
||||
# a large session is more likely context overflow than a transient
|
||||
# transport hiccup. Without this ordering, RemoteProtocolError
|
||||
@@ -589,12 +376,7 @@ def classify_api_error(
|
||||
|
||||
is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS)
|
||||
if is_disconnect and not status_code:
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have hundreds of
|
||||
# messages while still being far below their actual token budget.
|
||||
is_large = approx_tokens > context_length * 0.6 or (
|
||||
context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200)
|
||||
)
|
||||
is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200
|
||||
if is_large:
|
||||
return _result(
|
||||
FailoverReason.context_overflow,
|
||||
@@ -603,12 +385,12 @@ def classify_api_error(
|
||||
)
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 7. Transport / timeout heuristics ───────────────────────────
|
||||
# ── 6. Transport / timeout heuristics ───────────────────────────
|
||||
|
||||
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 8. Fallback: unknown ────────────────────────────────────────
|
||||
# ── 7. Fallback: unknown ────────────────────────────────────────
|
||||
|
||||
return _result(FailoverReason.unknown, retryable=True)
|
||||
|
||||
@@ -662,33 +444,17 @@ def _classify_by_status(
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
# Generic 404 with no "model not found" signal — could be a wrong
|
||||
# endpoint path (common with local llama.cpp / Ollama / vLLM when
|
||||
# the URL is slightly misconfigured), a proxy routing glitch, or
|
||||
# a transient backend issue. Classifying these as model_not_found
|
||||
# silently falls back to a different provider and tells the model
|
||||
# the model is missing, which is wrong and wastes a turn. Treat
|
||||
# as unknown so the retry loop surfaces the real error instead.
|
||||
# Generic 404 — could be model or endpoint
|
||||
return result_fn(
|
||||
FailoverReason.unknown,
|
||||
retryable=True,
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 413:
|
||||
@@ -717,10 +483,10 @@ def _classify_by_status(
|
||||
result_fn=result_fn,
|
||||
)
|
||||
|
||||
if status_code in {500, 502}:
|
||||
if status_code in (500, 502):
|
||||
return result_fn(FailoverReason.server_error, retryable=True)
|
||||
|
||||
if status_code in {503, 529}:
|
||||
if status_code in (503, 529):
|
||||
return result_fn(FailoverReason.overloaded, retryable=True)
|
||||
|
||||
# Other 4xx — non-retryable
|
||||
@@ -781,15 +547,6 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
@@ -799,12 +556,6 @@ def _classify_400(
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -835,17 +586,12 @@ def _classify_400(
|
||||
if isinstance(body, dict):
|
||||
err_obj = body.get("error", {})
|
||||
if isinstance(err_obj, dict):
|
||||
err_body_msg = str(err_obj.get("message") or "").strip().lower()
|
||||
err_body_msg = (err_obj.get("message") or "").strip().lower()
|
||||
# Responses API (and some providers) use flat body: {"message": "..."}
|
||||
if not err_body_msg:
|
||||
err_body_msg = str(body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in {"error", ""}
|
||||
# Absolute token/message-count thresholds are only a proxy for smaller
|
||||
# context windows. Large-context sessions can have many messages while
|
||||
# still being far below their actual token budget.
|
||||
is_large = approx_tokens > context_length * 0.4 or (
|
||||
context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80)
|
||||
)
|
||||
err_body_msg = (body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
||||
|
||||
if is_generic and is_large:
|
||||
return result_fn(
|
||||
@@ -870,14 +616,14 @@ def _classify_by_error_code(
|
||||
"""Classify by structured error codes from the response body."""
|
||||
code_lower = error_code.lower()
|
||||
|
||||
if code_lower in {"resource_exhausted", "throttled", "rate_limit_exceeded"}:
|
||||
if code_lower in ("resource_exhausted", "throttled", "rate_limit_exceeded"):
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
)
|
||||
|
||||
if code_lower in {"insufficient_quota", "billing_not_active", "payment_required"}:
|
||||
if code_lower in ("insufficient_quota", "billing_not_active", "payment_required"):
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
@@ -885,14 +631,14 @@ def _classify_by_error_code(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if code_lower in {"model_not_found", "model_not_available", "invalid_model"}:
|
||||
if code_lower in ("model_not_found", "model_not_available", "invalid_model"):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if code_lower in {"context_length_exceeded", "max_tokens_exceeded"}:
|
||||
if code_lower in ("context_length_exceeded", "max_tokens_exceeded"):
|
||||
return result_fn(
|
||||
FailoverReason.context_overflow,
|
||||
retryable=True,
|
||||
@@ -922,13 +668,6 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Usage-limit patterns need the same disambiguation as 402: some providers
|
||||
# surface "usage limit" errors without an HTTP status code. A transient
|
||||
# signal ("try again", "resets at", …) means it's a periodic quota, not
|
||||
@@ -989,15 +728,6 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
@@ -1006,14 +736,6 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Timeout message patterns — generic exception types (e.g. RuntimeError)
|
||||
# raised by local shims or custom providers that internally wrap a
|
||||
# subprocess/HTTP timeout. Classified as transport timeout so the retry
|
||||
# loop rebuilds the client instead of treating the turn as an empty
|
||||
# model response.
|
||||
if any(p in error_msg for p in _TIMEOUT_MESSAGE_PATTERNS):
|
||||
return result_fn(FailoverReason.timeout, retryable=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
"""Shared file safety rules used by both tools and ACP shims."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _hermes_home_path() -> Path:
|
||||
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
|
||||
try:
|
||||
from hermes_constants import get_hermes_home # local import to avoid cycles
|
||||
return get_hermes_home()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def build_write_denied_paths(home: str) -> set[str]:
|
||||
"""Return exact sensitive paths that must never be written."""
|
||||
hermes_home = _hermes_home_path()
|
||||
return {
|
||||
os.path.realpath(p)
|
||||
for p in [
|
||||
os.path.join(home, ".ssh", "authorized_keys"),
|
||||
os.path.join(home, ".ssh", "id_rsa"),
|
||||
os.path.join(home, ".ssh", "id_ed25519"),
|
||||
os.path.join(home, ".ssh", "config"),
|
||||
str(hermes_home / ".env"),
|
||||
os.path.join(home, ".bashrc"),
|
||||
os.path.join(home, ".zshrc"),
|
||||
os.path.join(home, ".profile"),
|
||||
os.path.join(home, ".bash_profile"),
|
||||
os.path.join(home, ".zprofile"),
|
||||
os.path.join(home, ".netrc"),
|
||||
os.path.join(home, ".pgpass"),
|
||||
os.path.join(home, ".npmrc"),
|
||||
os.path.join(home, ".pypirc"),
|
||||
"/etc/sudoers",
|
||||
"/etc/passwd",
|
||||
"/etc/shadow",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def build_write_denied_prefixes(home: str) -> list[str]:
|
||||
"""Return sensitive directory prefixes that must never be written."""
|
||||
return [
|
||||
os.path.realpath(p) + os.sep
|
||||
for p in [
|
||||
os.path.join(home, ".ssh"),
|
||||
os.path.join(home, ".aws"),
|
||||
os.path.join(home, ".gnupg"),
|
||||
os.path.join(home, ".kube"),
|
||||
"/etc/sudoers.d",
|
||||
"/etc/systemd",
|
||||
os.path.join(home, ".docker"),
|
||||
os.path.join(home, ".azure"),
|
||||
os.path.join(home, ".config", "gh"),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def get_safe_write_root() -> Optional[str]:
|
||||
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
|
||||
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not root:
|
||||
return None
|
||||
try:
|
||||
return os.path.realpath(os.path.expanduser(root))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def is_write_denied(path: str) -> bool:
|
||||
"""Return True if path is blocked by the write denylist or safe root."""
|
||||
home = os.path.realpath(os.path.expanduser("~"))
|
||||
resolved = os.path.realpath(os.path.expanduser(str(path)))
|
||||
|
||||
if resolved in build_write_denied_paths(home):
|
||||
return True
|
||||
for prefix in build_write_denied_prefixes(home):
|
||||
if resolved.startswith(prefix):
|
||||
return True
|
||||
|
||||
safe_root = get_safe_write_root()
|
||||
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_read_block_error(path: str) -> Optional[str]:
|
||||
"""Return an error message when a read targets internal Hermes cache files."""
|
||||
resolved = Path(path).expanduser().resolve()
|
||||
hermes_home = _hermes_home_path().resolve()
|
||||
blocked_dirs = [
|
||||
hermes_home / "skills" / ".hub" / "index-cache",
|
||||
hermes_home / "skills" / ".hub",
|
||||
]
|
||||
for blocked in blocked_dirs:
|
||||
try:
|
||||
resolved.relative_to(blocked)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
return None
|
||||
@@ -1,909 +0,0 @@
|
||||
"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend.
|
||||
|
||||
This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were
|
||||
a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP
|
||||
traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent,
|
||||
streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE.
|
||||
|
||||
Architecture
|
||||
------------
|
||||
- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)``
|
||||
mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses.
|
||||
- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated
|
||||
to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` /
|
||||
``toolConfig`` / ``systemInstruction`` shape.
|
||||
- The request body is wrapped ``{project, model, user_prompt_id, request}``
|
||||
per Code Assist API expectations.
|
||||
- Responses (``candidates[].content.parts[]``) are converted back to
|
||||
OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``.
|
||||
- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks.
|
||||
|
||||
Attribution
|
||||
-----------
|
||||
Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public
|
||||
Gemini API docs. Request envelope shape
|
||||
(``{project, model, user_prompt_id, request}``) is documented nowhere; it is
|
||||
reverse-engineered from the opencode-gemini-auth and clawdbot implementations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
CodeAssistError,
|
||||
ProjectContext,
|
||||
resolve_project_context,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Request translation: OpenAI → Gemini
|
||||
# =============================================================================
|
||||
|
||||
_ROLE_MAP_OPENAI_TO_GEMINI = {
|
||||
"user": "user",
|
||||
"assistant": "model",
|
||||
"system": "user", # handled separately via systemInstruction
|
||||
"tool": "user", # functionResponse is wrapped in a user-role turn
|
||||
"function": "user",
|
||||
}
|
||||
|
||||
|
||||
def _coerce_content_to_text(content: Any) -> str:
|
||||
"""OpenAI content may be str or a list of parts; reduce to plain text."""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
pieces: List[str] = []
|
||||
for p in content:
|
||||
if isinstance(p, str):
|
||||
pieces.append(p)
|
||||
elif isinstance(p, dict):
|
||||
if p.get("type") == "text" and isinstance(p.get("text"), str):
|
||||
pieces.append(p["text"])
|
||||
# Multimodal (image_url, etc.) — stub for now; log and skip
|
||||
elif p.get("type") in {"image_url", "input_audio"}:
|
||||
logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
|
||||
return "\n".join(pieces)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""OpenAI tool_call -> Gemini functionCall part."""
|
||||
fn = tool_call.get("function") or {}
|
||||
args_raw = fn.get("arguments", "")
|
||||
try:
|
||||
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
|
||||
except json.JSONDecodeError:
|
||||
args = {"_raw": args_raw}
|
||||
if not isinstance(args, dict):
|
||||
args = {"_value": args}
|
||||
return {
|
||||
"functionCall": {
|
||||
"name": fn.get("name") or "",
|
||||
"args": args,
|
||||
},
|
||||
# Sentinel signature — matches opencode-gemini-auth's approach.
|
||||
# Without this, Code Assist rejects function calls that originated
|
||||
# outside its own chain.
|
||||
"thoughtSignature": "skip_thought_signature_validator",
|
||||
}
|
||||
|
||||
|
||||
def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""OpenAI tool-role message -> Gemini functionResponse part.
|
||||
|
||||
The function name isn't in the OpenAI tool message directly; it must be
|
||||
passed via the assistant message that issued the call. For simplicity we
|
||||
look up ``name`` on the message (OpenAI SDK copies it there) or on the
|
||||
``tool_call_id`` cross-reference.
|
||||
"""
|
||||
name = str(message.get("name") or message.get("tool_call_id") or "tool")
|
||||
content = _coerce_content_to_text(message.get("content"))
|
||||
# Gemini expects the response as a dict under `response`. We wrap plain
|
||||
# text in {"output": "..."}.
|
||||
try:
|
||||
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
response = parsed if isinstance(parsed, dict) else {"output": content}
|
||||
return {
|
||||
"functionResponse": {
|
||||
"name": name,
|
||||
"response": response,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _build_gemini_contents(
|
||||
messages: List[Dict[str, Any]],
|
||||
) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
"""Convert OpenAI messages[] to Gemini contents[] + systemInstruction."""
|
||||
system_text_parts: List[str] = []
|
||||
contents: List[Dict[str, Any]] = []
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = str(msg.get("role") or "user")
|
||||
|
||||
if role == "system":
|
||||
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
|
||||
continue
|
||||
|
||||
# Tool result message — emit a user-role turn with functionResponse
|
||||
if role == "tool" or role == "function":
|
||||
contents.append({
|
||||
"role": "user",
|
||||
"parts": [_translate_tool_result_to_gemini(msg)],
|
||||
})
|
||||
continue
|
||||
|
||||
gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user")
|
||||
parts: List[Dict[str, Any]] = []
|
||||
|
||||
text = _coerce_content_to_text(msg.get("content"))
|
||||
if text:
|
||||
parts.append({"text": text})
|
||||
|
||||
# Assistant messages can carry tool_calls
|
||||
tool_calls = msg.get("tool_calls") or []
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
parts.append(_translate_tool_call_to_gemini(tc))
|
||||
|
||||
if not parts:
|
||||
# Gemini rejects empty parts; skip the turn entirely
|
||||
continue
|
||||
|
||||
contents.append({"role": gemini_role, "parts": parts})
|
||||
|
||||
system_instruction: Optional[Dict[str, Any]] = None
|
||||
joined_system = "\n".join(p for p in system_text_parts if p).strip()
|
||||
if joined_system:
|
||||
system_instruction = {
|
||||
"role": "system",
|
||||
"parts": [{"text": joined_system}],
|
||||
}
|
||||
|
||||
return contents, system_instruction
|
||||
|
||||
|
||||
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
|
||||
"""OpenAI tools[] -> Gemini tools[].functionDeclarations[]."""
|
||||
if not isinstance(tools, list) or not tools:
|
||||
return []
|
||||
declarations: List[Dict[str, Any]] = []
|
||||
for t in tools:
|
||||
if not isinstance(t, dict):
|
||||
continue
|
||||
fn = t.get("function") or {}
|
||||
if not isinstance(fn, dict):
|
||||
continue
|
||||
name = fn.get("name")
|
||||
if not name:
|
||||
continue
|
||||
decl = {"name": str(name)}
|
||||
if fn.get("description"):
|
||||
decl["description"] = str(fn["description"])
|
||||
params = fn.get("parameters")
|
||||
if isinstance(params, dict):
|
||||
decl["parameters"] = sanitize_gemini_tool_parameters(params)
|
||||
declarations.append(decl)
|
||||
if not declarations:
|
||||
return []
|
||||
return [{"functionDeclarations": declarations}]
|
||||
|
||||
|
||||
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
|
||||
"""OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig."""
|
||||
if tool_choice is None:
|
||||
return None
|
||||
if isinstance(tool_choice, str):
|
||||
if tool_choice == "auto":
|
||||
return {"functionCallingConfig": {"mode": "AUTO"}}
|
||||
if tool_choice == "required":
|
||||
return {"functionCallingConfig": {"mode": "ANY"}}
|
||||
if tool_choice == "none":
|
||||
return {"functionCallingConfig": {"mode": "NONE"}}
|
||||
if isinstance(tool_choice, dict):
|
||||
fn = tool_choice.get("function") or {}
|
||||
name = fn.get("name")
|
||||
if name:
|
||||
return {
|
||||
"functionCallingConfig": {
|
||||
"mode": "ANY",
|
||||
"allowedFunctionNames": [str(name)],
|
||||
},
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case)."""
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
budget = config.get("thinkingBudget", config.get("thinking_budget"))
|
||||
level = config.get("thinkingLevel", config.get("thinking_level"))
|
||||
include = config.get("includeThoughts", config.get("include_thoughts"))
|
||||
normalized: Dict[str, Any] = {}
|
||||
if isinstance(budget, (int, float)):
|
||||
normalized["thinkingBudget"] = int(budget)
|
||||
if isinstance(level, str) and level.strip():
|
||||
normalized["thinkingLevel"] = level.strip().lower()
|
||||
if isinstance(include, bool):
|
||||
normalized["includeThoughts"] = include
|
||||
return normalized or None
|
||||
|
||||
|
||||
def build_gemini_request(
|
||||
*,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
thinking_config: Any = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the inner Gemini request body (goes inside ``request`` wrapper)."""
|
||||
contents, system_instruction = _build_gemini_contents(messages)
|
||||
|
||||
body: Dict[str, Any] = {"contents": contents}
|
||||
if system_instruction is not None:
|
||||
body["systemInstruction"] = system_instruction
|
||||
|
||||
gemini_tools = _translate_tools_to_gemini(tools)
|
||||
if gemini_tools:
|
||||
body["tools"] = gemini_tools
|
||||
tool_cfg = _translate_tool_choice_to_gemini(tool_choice)
|
||||
if tool_cfg is not None:
|
||||
body["toolConfig"] = tool_cfg
|
||||
|
||||
generation_config: Dict[str, Any] = {}
|
||||
if isinstance(temperature, (int, float)):
|
||||
generation_config["temperature"] = float(temperature)
|
||||
if isinstance(max_tokens, int) and max_tokens > 0:
|
||||
generation_config["maxOutputTokens"] = max_tokens
|
||||
if isinstance(top_p, (int, float)):
|
||||
generation_config["topP"] = float(top_p)
|
||||
if isinstance(stop, str) and stop:
|
||||
generation_config["stopSequences"] = [stop]
|
||||
elif isinstance(stop, list) and stop:
|
||||
generation_config["stopSequences"] = [str(s) for s in stop if s]
|
||||
normalized_thinking = _normalize_thinking_config(thinking_config)
|
||||
if normalized_thinking:
|
||||
generation_config["thinkingConfig"] = normalized_thinking
|
||||
if generation_config:
|
||||
body["generationConfig"] = generation_config
|
||||
|
||||
return body
|
||||
|
||||
|
||||
def wrap_code_assist_request(
|
||||
*,
|
||||
project_id: str,
|
||||
model: str,
|
||||
inner_request: Dict[str, Any],
|
||||
user_prompt_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Wrap the inner Gemini request in the Code Assist envelope."""
|
||||
return {
|
||||
"project": project_id,
|
||||
"model": model,
|
||||
"user_prompt_id": user_prompt_id or str(uuid.uuid4()),
|
||||
"request": inner_request,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Response translation: Gemini → OpenAI
|
||||
# =============================================================================
|
||||
|
||||
def _translate_gemini_response(
|
||||
resp: Dict[str, Any],
|
||||
model: str,
|
||||
) -> SimpleNamespace:
|
||||
"""Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace.
|
||||
|
||||
Code Assist wraps the actual Gemini response inside ``response``, so we
|
||||
unwrap it first if present.
|
||||
"""
|
||||
inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp
|
||||
|
||||
candidates = inner.get("candidates") or []
|
||||
if not isinstance(candidates, list) or not candidates:
|
||||
return _empty_response(model)
|
||||
|
||||
cand = candidates[0]
|
||||
content_obj = cand.get("content") if isinstance(cand, dict) else {}
|
||||
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
|
||||
|
||||
text_pieces: List[str] = []
|
||||
reasoning_pieces: List[str] = []
|
||||
tool_calls: List[SimpleNamespace] = []
|
||||
|
||||
for i, part in enumerate(parts or []):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
# Thought parts are model's internal reasoning — surface as reasoning,
|
||||
# don't mix into content.
|
||||
if part.get("thought") is True:
|
||||
if isinstance(part.get("text"), str):
|
||||
reasoning_pieces.append(part["text"])
|
||||
continue
|
||||
if isinstance(part.get("text"), str):
|
||||
text_pieces.append(part["text"])
|
||||
continue
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
tool_calls.append(SimpleNamespace(
|
||||
id=f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
index=i,
|
||||
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
|
||||
))
|
||||
|
||||
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(
|
||||
str(cand.get("finishReason") or "")
|
||||
)
|
||||
|
||||
usage_meta = inner.get("usageMetadata") or {}
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="".join(text_pieces) if text_pieces else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning="".join(reasoning_pieces) or None,
|
||||
reasoning_content="".join(reasoning_pieces) or None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(
|
||||
index=0,
|
||||
message=message,
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def _empty_response(model: str) -> SimpleNamespace:
|
||||
message = SimpleNamespace(
|
||||
role="assistant", content="", tool_calls=None,
|
||||
reasoning=None, reasoning_content=None, reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=0, completion_tokens=0, total_tokens=0,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def _map_gemini_finish_reason(reason: str) -> str:
|
||||
mapping = {
|
||||
"STOP": "stop",
|
||||
"MAX_TOKENS": "length",
|
||||
"SAFETY": "content_filter",
|
||||
"RECITATION": "content_filter",
|
||||
"OTHER": "stop",
|
||||
}
|
||||
return mapping.get(reason.upper(), "stop")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Streaming SSE iterator
|
||||
# =============================================================================
|
||||
|
||||
class _GeminiStreamChunk(SimpleNamespace):
|
||||
"""Mimics an OpenAI ChatCompletionChunk with .choices[0].delta."""
|
||||
pass
|
||||
|
||||
|
||||
def _make_stream_chunk(
|
||||
*,
|
||||
model: str,
|
||||
content: str = "",
|
||||
tool_call_delta: Optional[Dict[str, Any]] = None,
|
||||
finish_reason: Optional[str] = None,
|
||||
reasoning: str = "",
|
||||
) -> _GeminiStreamChunk:
|
||||
delta_kwargs: Dict[str, Any] = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"reasoning": None,
|
||||
"reasoning_content": None,
|
||||
}
|
||||
if content:
|
||||
delta_kwargs["content"] = content
|
||||
if tool_call_delta is not None:
|
||||
delta_kwargs["tool_calls"] = [SimpleNamespace(
|
||||
index=tool_call_delta.get("index", 0),
|
||||
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=tool_call_delta.get("name") or "",
|
||||
arguments=tool_call_delta.get("arguments") or "",
|
||||
),
|
||||
)]
|
||||
if reasoning:
|
||||
delta_kwargs["reasoning"] = reasoning
|
||||
delta_kwargs["reasoning_content"] = reasoning
|
||||
delta = SimpleNamespace(**delta_kwargs)
|
||||
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
|
||||
return _GeminiStreamChunk(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion.chunk",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
|
||||
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
|
||||
"""Parse Server-Sent Events from an httpx streaming response."""
|
||||
buffer = ""
|
||||
for chunk in response.iter_text():
|
||||
if not chunk:
|
||||
continue
|
||||
buffer += chunk
|
||||
while "\n" in buffer:
|
||||
line, buffer = buffer.split("\n", 1)
|
||||
line = line.rstrip("\r")
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
return
|
||||
try:
|
||||
yield json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Non-JSON SSE line: %s", data[:200])
|
||||
|
||||
|
||||
def _translate_stream_event(
|
||||
event: Dict[str, Any],
|
||||
model: str,
|
||||
tool_call_counter: List[int],
|
||||
) -> List[_GeminiStreamChunk]:
|
||||
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
|
||||
|
||||
``tool_call_counter`` is a single-element list used as a mutable counter
|
||||
across events in the same stream. Each ``functionCall`` part gets a
|
||||
fresh, unique OpenAI ``index`` — keying by function name would collide
|
||||
whenever the model issues parallel calls to the same tool (e.g. reading
|
||||
three files in one turn).
|
||||
"""
|
||||
inner = event.get("response") if isinstance(event.get("response"), dict) else event
|
||||
candidates = inner.get("candidates") or []
|
||||
if not candidates:
|
||||
return []
|
||||
cand = candidates[0]
|
||||
if not isinstance(cand, dict):
|
||||
return []
|
||||
|
||||
chunks: List[_GeminiStreamChunk] = []
|
||||
|
||||
content = cand.get("content") or {}
|
||||
parts = content.get("parts") if isinstance(content, dict) else []
|
||||
for part in parts or []:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
chunks.append(_make_stream_chunk(
|
||||
model=model, reasoning=part["text"],
|
||||
))
|
||||
continue
|
||||
if isinstance(part.get("text"), str) and part["text"]:
|
||||
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
name = str(fc["name"])
|
||||
idx = tool_call_counter[0]
|
||||
tool_call_counter[0] += 1
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
chunks.append(_make_stream_chunk(
|
||||
model=model,
|
||||
tool_call_delta={
|
||||
"index": idx,
|
||||
"name": name,
|
||||
"arguments": args_str,
|
||||
},
|
||||
))
|
||||
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = _map_gemini_finish_reason(finish_reason_raw)
|
||||
if tool_call_counter[0] > 0:
|
||||
mapped = "tool_calls"
|
||||
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
|
||||
return chunks
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GeminiCloudCodeClient — OpenAI-compatible facade
|
||||
# =============================================================================
|
||||
|
||||
MARKER_BASE_URL = "cloudcode-pa://google"
|
||||
|
||||
|
||||
class _GeminiChatCompletions:
|
||||
def __init__(self, client: "GeminiCloudCodeClient"):
|
||||
self._client = client
|
||||
|
||||
def create(self, **kwargs: Any) -> Any:
|
||||
return self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _GeminiChatNamespace:
|
||||
def __init__(self, client: "GeminiCloudCodeClient"):
|
||||
self.completions = _GeminiChatCompletions(client)
|
||||
|
||||
|
||||
class GeminiCloudCodeClient:
|
||||
"""Minimal OpenAI-SDK-compatible facade over Code Assist v1internal."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
default_headers: Optional[Dict[str, str]] = None,
|
||||
project_id: str = "",
|
||||
**_: Any,
|
||||
):
|
||||
# `api_key` here is a dummy — real auth is the OAuth access token
|
||||
# fetched on every call via agent.google_oauth.get_valid_access_token().
|
||||
# We accept the kwarg for openai.OpenAI interface parity.
|
||||
self.api_key = api_key or "google-oauth"
|
||||
self.base_url = base_url or MARKER_BASE_URL
|
||||
self._default_headers = dict(default_headers or {})
|
||||
self._configured_project_id = project_id
|
||||
self._project_context: Optional[ProjectContext] = None
|
||||
self._project_context_lock = False # simple single-thread guard
|
||||
self.chat = _GeminiChatNamespace(self)
|
||||
self.is_closed = False
|
||||
self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0))
|
||||
|
||||
def close(self) -> None:
|
||||
self.is_closed = True
|
||||
try:
|
||||
self._http.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Implement the OpenAI SDK's context-manager-ish closure check
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext:
|
||||
"""Lazily resolve and cache the project context for this client."""
|
||||
if self._project_context is not None:
|
||||
return self._project_context
|
||||
|
||||
env_project = google_oauth.resolve_project_id_from_env()
|
||||
creds = google_oauth.load_credentials()
|
||||
stored_project = creds.project_id if creds else ""
|
||||
|
||||
# Prefer what's already baked into the creds
|
||||
if stored_project:
|
||||
self._project_context = ProjectContext(
|
||||
project_id=stored_project,
|
||||
managed_project_id=creds.managed_project_id if creds else "",
|
||||
tier_id="",
|
||||
source="stored",
|
||||
)
|
||||
return self._project_context
|
||||
|
||||
ctx = resolve_project_context(
|
||||
access_token,
|
||||
configured_project_id=self._configured_project_id,
|
||||
env_project_id=env_project,
|
||||
user_agent_model=model,
|
||||
)
|
||||
# Persist discovered project back to the creds file so the next
|
||||
# session doesn't re-run the discovery.
|
||||
if ctx.project_id or ctx.managed_project_id:
|
||||
google_oauth.update_project_ids(
|
||||
project_id=ctx.project_id,
|
||||
managed_project_id=ctx.managed_project_id,
|
||||
)
|
||||
self._project_context = ctx
|
||||
return ctx
|
||||
|
||||
def _create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
stream: bool = False,
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Any = None,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
access_token = google_oauth.get_valid_access_token()
|
||||
ctx = self._ensure_project_context(access_token, model)
|
||||
|
||||
thinking_config = None
|
||||
if isinstance(extra_body, dict):
|
||||
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
|
||||
|
||||
inner = build_gemini_request(
|
||||
messages=messages or [],
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
stop=stop,
|
||||
thinking_config=thinking_config,
|
||||
)
|
||||
wrapped = wrap_code_assist_request(
|
||||
project_id=ctx.project_id,
|
||||
model=model,
|
||||
inner_request=inner,
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": "hermes-agent (gemini-cli-compat)",
|
||||
"X-Goog-Api-Client": "gl-python/hermes",
|
||||
"x-activity-request-id": str(uuid.uuid4()),
|
||||
}
|
||||
headers.update(self._default_headers)
|
||||
|
||||
if stream:
|
||||
return self._stream_completion(model=model, wrapped=wrapped, headers=headers)
|
||||
|
||||
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent"
|
||||
response = self._http.post(url, json=wrapped, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise _gemini_http_error(response)
|
||||
try:
|
||||
payload = response.json()
|
||||
except ValueError as exc:
|
||||
raise CodeAssistError(
|
||||
f"Invalid JSON from Code Assist: {exc}",
|
||||
code="code_assist_invalid_json",
|
||||
) from exc
|
||||
return _translate_gemini_response(payload, model=model)
|
||||
|
||||
def _stream_completion(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
wrapped: Dict[str, Any],
|
||||
headers: Dict[str, str],
|
||||
) -> Iterator[_GeminiStreamChunk]:
|
||||
"""Generator that yields OpenAI-shaped streaming chunks."""
|
||||
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse"
|
||||
stream_headers = dict(headers)
|
||||
stream_headers["Accept"] = "text/event-stream"
|
||||
|
||||
def _generator() -> Iterator[_GeminiStreamChunk]:
|
||||
try:
|
||||
with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response:
|
||||
if response.status_code != 200:
|
||||
# Materialize error body for better diagnostics
|
||||
response.read()
|
||||
raise _gemini_http_error(response)
|
||||
tool_call_counter: List[int] = [0]
|
||||
for event in _iter_sse_events(response):
|
||||
for chunk in _translate_stream_event(event, model, tool_call_counter):
|
||||
yield chunk
|
||||
except httpx.HTTPError as exc:
|
||||
raise CodeAssistError(
|
||||
f"Streaming request failed: {exc}",
|
||||
code="code_assist_stream_error",
|
||||
) from exc
|
||||
|
||||
return _generator()
|
||||
|
||||
|
||||
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
|
||||
"""Translate an httpx response into a CodeAssistError with rich metadata.
|
||||
|
||||
Parses Google's error envelope (``{"error": {"code", "message", "status",
|
||||
"details": [...]}}``) so the agent's error classifier can reason about
|
||||
the failure — ``status_code`` enables the rate_limit / auth classification
|
||||
paths, and ``response`` lets the main loop honor ``Retry-After`` just
|
||||
like it does for OpenAI SDK exceptions.
|
||||
|
||||
Also lifts a few recognizable Google conditions into human-readable
|
||||
messages so the user sees something better than a 500-char JSON dump:
|
||||
|
||||
MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for
|
||||
<model>. This is a Google-side throttle..."
|
||||
RESOURCE_EXHAUSTED w/o reason → quota-style message
|
||||
404 → "Model <name> not found at cloudcode-pa..."
|
||||
"""
|
||||
status = response.status_code
|
||||
|
||||
# Parse the body once, surviving any weird encodings.
|
||||
body_text = ""
|
||||
body_json: Dict[str, Any] = {}
|
||||
try:
|
||||
body_text = response.text
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if body_text:
|
||||
try:
|
||||
parsed = json.loads(body_text)
|
||||
if isinstance(parsed, dict):
|
||||
body_json = parsed
|
||||
except (ValueError, TypeError):
|
||||
body_json = {}
|
||||
|
||||
# Dig into Google's error envelope. Shape is:
|
||||
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
|
||||
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
# "metadata": {...}},
|
||||
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
|
||||
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||
if not isinstance(err_obj, dict):
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
_raw_details = err_obj.get("details")
|
||||
err_details_list = _raw_details if isinstance(_raw_details, list) else []
|
||||
|
||||
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
|
||||
# than one ErrorInfo (rare), so we pick the first one with a reason.
|
||||
error_reason = ""
|
||||
error_metadata: Dict[str, Any] = {}
|
||||
retry_delay_seconds: Optional[float] = None
|
||||
for detail in err_details_list:
|
||||
if not isinstance(detail, dict):
|
||||
continue
|
||||
type_url = str(detail.get("@type") or "")
|
||||
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||
reason = detail.get("reason")
|
||||
if isinstance(reason, str) and reason:
|
||||
error_reason = reason
|
||||
md = detail.get("metadata")
|
||||
if isinstance(md, dict):
|
||||
error_metadata = md
|
||||
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
|
||||
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
|
||||
delay_raw = detail.get("retryDelay")
|
||||
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
|
||||
try:
|
||||
retry_delay_seconds = float(delay_raw[:-1])
|
||||
except ValueError:
|
||||
pass
|
||||
elif isinstance(delay_raw, (int, float)):
|
||||
retry_delay_seconds = float(delay_raw)
|
||||
|
||||
# Fall back to the Retry-After header if the body didn't include RetryInfo.
|
||||
if retry_delay_seconds is None:
|
||||
try:
|
||||
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||
except Exception:
|
||||
header_val = None
|
||||
if header_val:
|
||||
try:
|
||||
retry_delay_seconds = float(header_val)
|
||||
except (TypeError, ValueError):
|
||||
retry_delay_seconds = None
|
||||
|
||||
# Classify the error code. ``code_assist_rate_limited`` stays the default
|
||||
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
|
||||
# logs) without changing the rate_limit classification path.
|
||||
code = f"code_assist_http_{status}"
|
||||
if status == 401:
|
||||
code = "code_assist_unauthorized"
|
||||
elif status == 429:
|
||||
code = "code_assist_rate_limited"
|
||||
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||
code = "code_assist_capacity_exhausted"
|
||||
|
||||
# Build a human-readable message. Keep the status + a raw-body tail for
|
||||
# debugging, but lead with a friendlier summary when we recognize the
|
||||
# Google signal.
|
||||
model_hint = ""
|
||||
if isinstance(error_metadata, dict):
|
||||
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
|
||||
|
||||
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||
target = model_hint or "this Gemini model"
|
||||
message = (
|
||||
f"Gemini capacity exhausted for {target} (Google-side throttle, "
|
||||
f"not a Hermes issue). Try a different Gemini model or set a "
|
||||
f"fallback_providers entry to a non-Gemini provider."
|
||||
)
|
||||
if retry_delay_seconds is not None:
|
||||
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
|
||||
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
|
||||
message = (
|
||||
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
|
||||
f"Check /gquota for remaining daily requests."
|
||||
)
|
||||
if retry_delay_seconds is not None:
|
||||
message += f" Retry suggested in {retry_delay_seconds:g}s."
|
||||
elif status == 404:
|
||||
# Google returns 404 when a model has been retired or renamed.
|
||||
target = model_hint or (err_message or "model")
|
||||
message = (
|
||||
f"Code Assist 404: {target} is not available at "
|
||||
f"cloudcode-pa.googleapis.com. It may have been renamed or "
|
||||
f"retired. Check hermes_cli/models.py for the current list."
|
||||
)
|
||||
elif err_message:
|
||||
# Generic fallback with the parsed message.
|
||||
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||
else:
|
||||
# Last-ditch fallback — raw body snippet.
|
||||
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
return CodeAssistError(
|
||||
message,
|
||||
code=code,
|
||||
status_code=status,
|
||||
response=response,
|
||||
retry_after=retry_delay_seconds,
|
||||
details={
|
||||
"status": err_status,
|
||||
"reason": error_reason,
|
||||
"metadata": error_metadata,
|
||||
"message": err_message,
|
||||
},
|
||||
)
|
||||
@@ -1,971 +0,0 @@
|
||||
"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
|
||||
|
||||
Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
|
||||
main agent loop can keep using its existing OpenAI-shaped message flow.
|
||||
This adapter is the transport shim that converts those OpenAI-style
|
||||
``messages[]`` / ``tools[]`` requests into Gemini's native
|
||||
``models/{model}:generateContent`` schema and converts the responses back.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
|
||||
agent/tool loop (auth churn, tool-call replay quirks, thought-signature
|
||||
requirements). The native Gemini API is the canonical path and avoids the
|
||||
OpenAI-compat layer entirely.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
|
||||
|
||||
|
||||
def is_native_gemini_base_url(base_url: str) -> bool:
|
||||
"""Return True when the endpoint speaks Gemini's native REST API."""
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
def probe_gemini_tier(
|
||||
api_key: str,
|
||||
base_url: str = DEFAULT_GEMINI_BASE_URL,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
timeout: float = 10.0,
|
||||
) -> str:
|
||||
"""Probe a Google AI Studio API key and return its tier.
|
||||
|
||||
Returns one of:
|
||||
|
||||
- ``"free"`` -- key is on the free tier (unusable with Hermes)
|
||||
- ``"paid"`` -- key is on a paid tier
|
||||
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
|
||||
"""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return "unknown"
|
||||
|
||||
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
|
||||
if not normalized_base:
|
||||
normalized_base = DEFAULT_GEMINI_BASE_URL
|
||||
if normalized_base.lower().endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
|
||||
url = f"{normalized_base}/models/{model}:generateContent"
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
|
||||
"generationConfig": {"maxOutputTokens": 1},
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
resp = client.post(
|
||||
url,
|
||||
params={"key": key},
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("probe_gemini_tier: network error: %s", exc)
|
||||
return "unknown"
|
||||
|
||||
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
|
||||
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
|
||||
if rpd_header:
|
||||
try:
|
||||
rpd_val = int(rpd_header)
|
||||
except (TypeError, ValueError):
|
||||
rpd_val = None
|
||||
# Published free-tier daily caps (Dec 2025):
|
||||
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
|
||||
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
|
||||
if rpd_val is not None and rpd_val <= 1000:
|
||||
return "free"
|
||||
if rpd_val is not None and rpd_val > 1000:
|
||||
return "paid"
|
||||
|
||||
if resp.status_code == 429:
|
||||
body_text = ""
|
||||
try:
|
||||
body_text = resp.text or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "free_tier" in body_text.lower():
|
||||
return "free"
|
||||
return "paid"
|
||||
|
||||
if 200 <= resp.status_code < 300:
|
||||
return "paid"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_free_tier_quota_error(error_message: str) -> bool:
|
||||
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
|
||||
if not error_message:
|
||||
return False
|
||||
return "free_tier" in error_message.lower()
|
||||
|
||||
|
||||
_FREE_TIER_GUIDANCE = (
|
||||
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
|
||||
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
|
||||
"so the free tier is exhausted in a handful of messages and cannot sustain "
|
||||
"an agent session. Enable billing on your Google Cloud project and "
|
||||
"regenerate the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
code: str = "gemini_api_error",
|
||||
status_code: Optional[int] = None,
|
||||
response: Optional[httpx.Response] = None,
|
||||
retry_after: Optional[float] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.code = code
|
||||
self.status_code = status_code
|
||||
self.response = response
|
||||
self.retry_after = retry_after
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
def _coerce_content_to_text(content: Any) -> str:
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
pieces: List[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
pieces.append(part)
|
||||
elif isinstance(part, dict) and part.get("type") == "text":
|
||||
text = part.get("text")
|
||||
if isinstance(text, str):
|
||||
pieces.append(text)
|
||||
return "\n".join(pieces)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(content, list):
|
||||
text = _coerce_content_to_text(content)
|
||||
return [{"text": text}] if text else []
|
||||
|
||||
parts: List[Dict[str, Any]] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append({"text": item})
|
||||
continue
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
ptype = item.get("type")
|
||||
if ptype == "text":
|
||||
text = item.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
parts.append({"text": text})
|
||||
elif ptype == "image_url":
|
||||
url = ((item.get("image_url") or {}).get("url") or "")
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
continue
|
||||
try:
|
||||
header, encoded = url.split(",", 1)
|
||||
mime = header.split(":", 1)[1].split(";", 1)[0]
|
||||
raw = base64.b64decode(encoded)
|
||||
except Exception:
|
||||
continue
|
||||
parts.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": mime,
|
||||
"data": base64.b64encode(raw).decode("ascii"),
|
||||
}
|
||||
}
|
||||
)
|
||||
return parts
|
||||
|
||||
|
||||
def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
|
||||
extra = tool_call.get("extra_content") or {}
|
||||
if not isinstance(extra, dict):
|
||||
return None
|
||||
google = extra.get("google") or extra.get("thought_signature")
|
||||
if isinstance(google, dict):
|
||||
sig = google.get("thought_signature") or google.get("thoughtSignature")
|
||||
return str(sig) if isinstance(sig, str) and sig else None
|
||||
if isinstance(google, str) and google:
|
||||
return google
|
||||
return None
|
||||
|
||||
|
||||
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
||||
fn = tool_call.get("function") or {}
|
||||
args_raw = fn.get("arguments", "")
|
||||
try:
|
||||
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
|
||||
except json.JSONDecodeError:
|
||||
args = {"_raw": args_raw}
|
||||
if not isinstance(args, dict):
|
||||
args = {"_value": args}
|
||||
|
||||
part: Dict[str, Any] = {
|
||||
"functionCall": {
|
||||
"name": str(fn.get("name") or ""),
|
||||
"args": args,
|
||||
}
|
||||
}
|
||||
thought_signature = _tool_call_extra_signature(tool_call)
|
||||
if thought_signature:
|
||||
part["thoughtSignature"] = thought_signature
|
||||
return part
|
||||
|
||||
|
||||
def _translate_tool_result_to_gemini(
|
||||
message: Dict[str, Any],
|
||||
tool_name_by_call_id: Optional[Dict[str, str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
tool_name_by_call_id = tool_name_by_call_id or {}
|
||||
tool_call_id = str(message.get("tool_call_id") or "")
|
||||
name = str(
|
||||
message.get("name")
|
||||
or tool_name_by_call_id.get(tool_call_id)
|
||||
or tool_call_id
|
||||
or "tool"
|
||||
)
|
||||
content = _coerce_content_to_text(message.get("content"))
|
||||
try:
|
||||
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
response = parsed if isinstance(parsed, dict) else {"output": content}
|
||||
return {
|
||||
"functionResponse": {
|
||||
"name": name,
|
||||
"response": response,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
system_text_parts: List[str] = []
|
||||
contents: List[Dict[str, Any]] = []
|
||||
tool_name_by_call_id: Dict[str, str] = {}
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = str(msg.get("role") or "user")
|
||||
|
||||
if role == "system":
|
||||
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
|
||||
continue
|
||||
|
||||
if role in {"tool", "function"}:
|
||||
contents.append(
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
_translate_tool_result_to_gemini(
|
||||
msg,
|
||||
tool_name_by_call_id=tool_name_by_call_id,
|
||||
)
|
||||
],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
gemini_role = "model" if role == "assistant" else "user"
|
||||
parts: List[Dict[str, Any]] = []
|
||||
|
||||
content_parts = _extract_multimodal_parts(msg.get("content"))
|
||||
parts.extend(content_parts)
|
||||
|
||||
tool_calls = msg.get("tool_calls") or []
|
||||
if isinstance(tool_calls, list):
|
||||
for tool_call in tool_calls:
|
||||
if isinstance(tool_call, dict):
|
||||
tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
|
||||
tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
|
||||
if tool_call_id and tool_name:
|
||||
tool_name_by_call_id[tool_call_id] = tool_name
|
||||
parts.append(_translate_tool_call_to_gemini(tool_call))
|
||||
|
||||
if parts:
|
||||
contents.append({"role": gemini_role, "parts": parts})
|
||||
|
||||
system_instruction = None
|
||||
joined_system = "\n".join(part for part in system_text_parts if part).strip()
|
||||
if joined_system:
|
||||
system_instruction = {"parts": [{"text": joined_system}]}
|
||||
return contents, system_instruction
|
||||
|
||||
|
||||
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(tools, list):
|
||||
return []
|
||||
declarations: List[Dict[str, Any]] = []
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
continue
|
||||
fn = tool.get("function") or {}
|
||||
if not isinstance(fn, dict):
|
||||
continue
|
||||
name = fn.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
decl: Dict[str, Any] = {"name": name}
|
||||
description = fn.get("description")
|
||||
if isinstance(description, str) and description:
|
||||
decl["description"] = description
|
||||
parameters = fn.get("parameters")
|
||||
if isinstance(parameters, dict):
|
||||
decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
|
||||
declarations.append(decl)
|
||||
return [{"functionDeclarations": declarations}] if declarations else []
|
||||
|
||||
|
||||
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
|
||||
if tool_choice is None:
|
||||
return None
|
||||
if isinstance(tool_choice, str):
|
||||
if tool_choice == "auto":
|
||||
return {"functionCallingConfig": {"mode": "AUTO"}}
|
||||
if tool_choice == "required":
|
||||
return {"functionCallingConfig": {"mode": "ANY"}}
|
||||
if tool_choice == "none":
|
||||
return {"functionCallingConfig": {"mode": "NONE"}}
|
||||
if isinstance(tool_choice, dict):
|
||||
fn = tool_choice.get("function") or {}
|
||||
name = fn.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
budget = config.get("thinkingBudget", config.get("thinking_budget"))
|
||||
include = config.get("includeThoughts", config.get("include_thoughts"))
|
||||
level = config.get("thinkingLevel", config.get("thinking_level"))
|
||||
normalized: Dict[str, Any] = {}
|
||||
if isinstance(budget, (int, float)):
|
||||
normalized["thinkingBudget"] = int(budget)
|
||||
if isinstance(include, bool):
|
||||
normalized["includeThoughts"] = include
|
||||
if isinstance(level, str) and level.strip():
|
||||
normalized["thinkingLevel"] = level.strip().lower()
|
||||
return normalized or None
|
||||
|
||||
|
||||
def build_gemini_request(
|
||||
*,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
thinking_config: Any = None,
|
||||
) -> Dict[str, Any]:
|
||||
contents, system_instruction = _build_gemini_contents(messages)
|
||||
request: Dict[str, Any] = {"contents": contents}
|
||||
if system_instruction:
|
||||
request["systemInstruction"] = system_instruction
|
||||
|
||||
gemini_tools = _translate_tools_to_gemini(tools)
|
||||
if gemini_tools:
|
||||
request["tools"] = gemini_tools
|
||||
|
||||
tool_config = _translate_tool_choice_to_gemini(tool_choice)
|
||||
if tool_config:
|
||||
request["toolConfig"] = tool_config
|
||||
|
||||
generation_config: Dict[str, Any] = {}
|
||||
if temperature is not None:
|
||||
generation_config["temperature"] = temperature
|
||||
if max_tokens is not None:
|
||||
generation_config["maxOutputTokens"] = max_tokens
|
||||
if top_p is not None:
|
||||
generation_config["topP"] = top_p
|
||||
if stop:
|
||||
generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
|
||||
normalized_thinking = _normalize_thinking_config(thinking_config)
|
||||
if normalized_thinking:
|
||||
generation_config["thinkingConfig"] = normalized_thinking
|
||||
if generation_config:
|
||||
request["generationConfig"] = generation_config
|
||||
|
||||
return request
|
||||
|
||||
|
||||
def _map_gemini_finish_reason(reason: str) -> str:
|
||||
mapping = {
|
||||
"STOP": "stop",
|
||||
"MAX_TOKENS": "length",
|
||||
"SAFETY": "content_filter",
|
||||
"RECITATION": "content_filter",
|
||||
"OTHER": "stop",
|
||||
}
|
||||
return mapping.get(str(reason or "").upper(), "stop")
|
||||
|
||||
|
||||
def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
sig = part.get("thoughtSignature")
|
||||
if isinstance(sig, str) and sig:
|
||||
return {"google": {"thought_signature": sig}}
|
||||
return None
|
||||
|
||||
|
||||
def _empty_response(model: str) -> SimpleNamespace:
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="",
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
|
||||
candidates = resp.get("candidates") or []
|
||||
if not isinstance(candidates, list) or not candidates:
|
||||
return _empty_response(model)
|
||||
|
||||
cand = candidates[0] if isinstance(candidates[0], dict) else {}
|
||||
content_obj = cand.get("content") if isinstance(cand, dict) else {}
|
||||
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
|
||||
|
||||
text_pieces: List[str] = []
|
||||
reasoning_pieces: List[str] = []
|
||||
tool_calls: List[SimpleNamespace] = []
|
||||
|
||||
for index, part in enumerate(parts or []):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
reasoning_pieces.append(part["text"])
|
||||
continue
|
||||
if isinstance(part.get("text"), str):
|
||||
text_pieces.append(part["text"])
|
||||
continue
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
tool_call = SimpleNamespace(
|
||||
id=f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
index=index,
|
||||
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
|
||||
)
|
||||
extra_content = _tool_call_extra_from_part(part)
|
||||
if extra_content:
|
||||
tool_call.extra_content = extra_content
|
||||
tool_calls.append(tool_call)
|
||||
|
||||
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
|
||||
usage_meta = resp.get("usageMetadata") or {}
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
reasoning = "".join(reasoning_pieces) or None
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="".join(text_pieces) if text_pieces else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning=reasoning,
|
||||
reasoning_content=reasoning,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _GeminiStreamChunk(SimpleNamespace):
|
||||
pass
|
||||
|
||||
|
||||
def _make_stream_chunk(
|
||||
*,
|
||||
model: str,
|
||||
content: str = "",
|
||||
tool_call_delta: Optional[Dict[str, Any]] = None,
|
||||
finish_reason: Optional[str] = None,
|
||||
reasoning: str = "",
|
||||
) -> _GeminiStreamChunk:
|
||||
delta_kwargs: Dict[str, Any] = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"reasoning": None,
|
||||
"reasoning_content": None,
|
||||
}
|
||||
if content:
|
||||
delta_kwargs["content"] = content
|
||||
if tool_call_delta is not None:
|
||||
tool_delta = SimpleNamespace(
|
||||
index=tool_call_delta.get("index", 0),
|
||||
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=tool_call_delta.get("name") or "",
|
||||
arguments=tool_call_delta.get("arguments") or "",
|
||||
),
|
||||
)
|
||||
extra_content = tool_call_delta.get("extra_content")
|
||||
if isinstance(extra_content, dict):
|
||||
tool_delta.extra_content = extra_content
|
||||
delta_kwargs["tool_calls"] = [tool_delta]
|
||||
if reasoning:
|
||||
delta_kwargs["reasoning"] = reasoning
|
||||
delta_kwargs["reasoning_content"] = reasoning
|
||||
delta = SimpleNamespace(**delta_kwargs)
|
||||
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
|
||||
return _GeminiStreamChunk(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion.chunk",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
|
||||
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
|
||||
buffer = ""
|
||||
for chunk in response.iter_text():
|
||||
if not chunk:
|
||||
continue
|
||||
buffer += chunk
|
||||
while "\n" in buffer:
|
||||
line, buffer = buffer.split("\n", 1)
|
||||
line = line.rstrip("\r")
|
||||
if not line:
|
||||
continue
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
return
|
||||
try:
|
||||
payload = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
|
||||
continue
|
||||
if isinstance(payload, dict):
|
||||
yield payload
|
||||
|
||||
|
||||
def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
|
||||
candidates = event.get("candidates") or []
|
||||
if not candidates:
|
||||
return []
|
||||
cand = candidates[0] if isinstance(candidates[0], dict) else {}
|
||||
parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
|
||||
chunks: List[_GeminiStreamChunk] = []
|
||||
|
||||
for part_index, part in enumerate(parts):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
|
||||
continue
|
||||
if isinstance(part.get("text"), str) and part["text"]:
|
||||
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
name = str(fc["name"])
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
|
||||
call_key = json.dumps(
|
||||
{
|
||||
"part_index": part_index,
|
||||
"name": name,
|
||||
"thought_signature": thought_signature,
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
slot = tool_call_indices.get(call_key)
|
||||
if slot is None:
|
||||
slot = {
|
||||
"index": len(tool_call_indices),
|
||||
"id": f"call_{uuid.uuid4().hex[:12]}",
|
||||
"last_arguments": "",
|
||||
}
|
||||
tool_call_indices[call_key] = slot
|
||||
emitted_arguments = args_str
|
||||
last_arguments = str(slot.get("last_arguments") or "")
|
||||
if last_arguments:
|
||||
if args_str == last_arguments:
|
||||
emitted_arguments = ""
|
||||
elif args_str.startswith(last_arguments):
|
||||
emitted_arguments = args_str[len(last_arguments):]
|
||||
slot["last_arguments"] = args_str
|
||||
chunks.append(
|
||||
_make_stream_chunk(
|
||||
model=model,
|
||||
tool_call_delta={
|
||||
"index": slot["index"],
|
||||
"id": slot["id"],
|
||||
"name": name,
|
||||
"arguments": emitted_arguments,
|
||||
"extra_content": _tool_call_extra_from_part(part),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
|
||||
finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
|
||||
# Attach usage from this event's usageMetadata so the streaming
|
||||
# loop in run_agent.py can record token counts (mirrors the
|
||||
# non-streaming path in translate_gemini_response).
|
||||
usage_meta = event.get("usageMetadata") or {}
|
||||
if usage_meta:
|
||||
finish_chunk.usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
chunks.append(finish_chunk)
|
||||
return chunks
|
||||
|
||||
|
||||
def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
||||
status = response.status_code
|
||||
body_text = ""
|
||||
body_json: Dict[str, Any] = {}
|
||||
try:
|
||||
body_text = response.text
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if body_text:
|
||||
try:
|
||||
parsed = json.loads(body_text)
|
||||
if isinstance(parsed, dict):
|
||||
body_json = parsed
|
||||
except (ValueError, TypeError):
|
||||
body_json = {}
|
||||
|
||||
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||
if not isinstance(err_obj, dict):
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
_raw_details = err_obj.get("details")
|
||||
details_list = _raw_details if isinstance(_raw_details, list) else []
|
||||
|
||||
reason = ""
|
||||
retry_after: Optional[float] = None
|
||||
metadata: Dict[str, Any] = {}
|
||||
for detail in details_list:
|
||||
if not isinstance(detail, dict):
|
||||
continue
|
||||
type_url = str(detail.get("@type") or "")
|
||||
if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||
reason_value = detail.get("reason")
|
||||
if isinstance(reason_value, str):
|
||||
reason = reason_value
|
||||
md = detail.get("metadata")
|
||||
if isinstance(md, dict):
|
||||
metadata = md
|
||||
header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||
if header_retry:
|
||||
try:
|
||||
retry_after = float(header_retry)
|
||||
except (TypeError, ValueError):
|
||||
retry_after = None
|
||||
|
||||
code = f"gemini_http_{status}"
|
||||
if status == 401:
|
||||
code = "gemini_unauthorized"
|
||||
elif status == 429:
|
||||
code = "gemini_rate_limited"
|
||||
elif status == 404:
|
||||
code = "gemini_model_not_found"
|
||||
|
||||
if err_message:
|
||||
message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
# Free-tier quota exhaustion -> append actionable guidance so users who
|
||||
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
|
||||
# that the free tier cannot sustain an agent session.
|
||||
if status == 429 and is_free_tier_quota_error(err_message or body_text):
|
||||
message = message + _FREE_TIER_GUIDANCE
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
status_code=status,
|
||||
response=response,
|
||||
retry_after=retry_after,
|
||||
details={
|
||||
"status": err_status,
|
||||
"reason": reason,
|
||||
"metadata": metadata,
|
||||
"message": err_message,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class _GeminiChatCompletions:
|
||||
def __init__(self, client: "GeminiNativeClient"):
|
||||
self._client = client
|
||||
|
||||
def create(self, **kwargs: Any) -> Any:
|
||||
return self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _AsyncGeminiChatCompletions:
|
||||
def __init__(self, client: "AsyncGeminiNativeClient"):
|
||||
self._client = client
|
||||
|
||||
async def create(self, **kwargs: Any) -> Any:
|
||||
return await self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _GeminiChatNamespace:
|
||||
def __init__(self, client: "GeminiNativeClient"):
|
||||
self.completions = _GeminiChatCompletions(client)
|
||||
|
||||
|
||||
class _AsyncGeminiChatNamespace:
|
||||
def __init__(self, client: "AsyncGeminiNativeClient"):
|
||||
self.completions = _AsyncGeminiChatCompletions(client)
|
||||
|
||||
|
||||
class GeminiNativeClient:
|
||||
"""Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: Optional[str] = None,
|
||||
default_headers: Optional[Dict[str, str]] = None,
|
||||
timeout: Any = None,
|
||||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
if not (api_key or "").strip():
|
||||
raise RuntimeError(
|
||||
"Gemini native client requires an API key, but none was provided. "
|
||||
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
|
||||
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
|
||||
"to configure the Google provider."
|
||||
)
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
self.base_url = normalized_base
|
||||
self._default_headers = dict(default_headers or {})
|
||||
self.chat = _GeminiChatNamespace(self)
|
||||
self.is_closed = False
|
||||
self._http = http_client or httpx.Client(
|
||||
timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
self.is_closed = True
|
||||
try:
|
||||
self._http.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"x-goog-api-key": self.api_key,
|
||||
"User-Agent": "hermes-agent (gemini-native)",
|
||||
}
|
||||
headers.update(self._default_headers)
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
|
||||
try:
|
||||
return False, next(iterator)
|
||||
except StopIteration:
|
||||
return True, None
|
||||
|
||||
def _create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
stream: bool = False,
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Any = None,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
thinking_config = None
|
||||
if isinstance(extra_body, dict):
|
||||
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
|
||||
|
||||
request = build_gemini_request(
|
||||
messages=messages or [],
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
stop=stop,
|
||||
thinking_config=thinking_config,
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._stream_completion(model=model, request=request, timeout=timeout)
|
||||
|
||||
url = f"{self.base_url}/models/{model}:generateContent"
|
||||
response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
|
||||
if response.status_code != 200:
|
||||
raise gemini_http_error(response)
|
||||
try:
|
||||
payload = response.json()
|
||||
except ValueError as exc:
|
||||
raise GeminiAPIError(
|
||||
f"Invalid JSON from Gemini native API: {exc}",
|
||||
code="gemini_invalid_json",
|
||||
status_code=response.status_code,
|
||||
response=response,
|
||||
) from exc
|
||||
return translate_gemini_response(payload, model=model)
|
||||
|
||||
def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
|
||||
url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
|
||||
stream_headers = dict(self._headers())
|
||||
stream_headers["Accept"] = "text/event-stream"
|
||||
|
||||
def _generator() -> Iterator[_GeminiStreamChunk]:
|
||||
try:
|
||||
with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
|
||||
if response.status_code != 200:
|
||||
response.read()
|
||||
raise gemini_http_error(response)
|
||||
tool_call_indices: Dict[str, Dict[str, Any]] = {}
|
||||
for event in _iter_sse_events(response):
|
||||
for chunk in translate_stream_event(event, model, tool_call_indices):
|
||||
yield chunk
|
||||
except httpx.HTTPError as exc:
|
||||
raise GeminiAPIError(
|
||||
f"Gemini streaming request failed: {exc}",
|
||||
code="gemini_stream_error",
|
||||
) from exc
|
||||
|
||||
return _generator()
|
||||
|
||||
|
||||
class AsyncGeminiNativeClient:
|
||||
"""Async wrapper used by auxiliary_client for native Gemini calls."""
|
||||
|
||||
def __init__(self, sync_client: GeminiNativeClient):
|
||||
self._sync = sync_client
|
||||
self.api_key = sync_client.api_key
|
||||
self.base_url = sync_client.base_url
|
||||
self.chat = _AsyncGeminiChatNamespace(self)
|
||||
# Expose the underlying sync client as _real_client so the auxiliary
|
||||
# cache's eviction-by-leaf-client helper (#23482) can find and drop
|
||||
# this async entry when the sync GeminiNativeClient is poisoned.
|
||||
# GeminiNativeClient is itself the leaf (no OpenAI client beneath
|
||||
# it), so we point at the sync_client directly.
|
||||
self._real_client = sync_client
|
||||
|
||||
async def _create_chat_completion(self, **kwargs: Any) -> Any:
|
||||
stream = bool(kwargs.get("stream"))
|
||||
result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
|
||||
if not stream:
|
||||
return result
|
||||
|
||||
async def _async_stream() -> Any:
|
||||
while True:
|
||||
done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
|
||||
if done:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return _async_stream()
|
||||
|
||||
async def close(self) -> None:
|
||||
await asyncio.to_thread(self._sync.close)
|
||||
@@ -1,99 +0,0 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
# outside that subset before sending Hermes tool schemas to Google.
|
||||
_GEMINI_SCHEMA_ALLOWED_KEYS = {
|
||||
"type",
|
||||
"format",
|
||||
"title",
|
||||
"description",
|
||||
"nullable",
|
||||
"enum",
|
||||
"maxItems",
|
||||
"minItems",
|
||||
"properties",
|
||||
"required",
|
||||
"minProperties",
|
||||
"maxProperties",
|
||||
"minLength",
|
||||
"maxLength",
|
||||
"pattern",
|
||||
"example",
|
||||
"anyOf",
|
||||
"propertyOrdering",
|
||||
"default",
|
||||
"items",
|
||||
"minimum",
|
||||
"maximum",
|
||||
}
|
||||
|
||||
|
||||
def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Return a Gemini-compatible copy of a tool parameter schema.
|
||||
|
||||
Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
|
||||
such as ``$schema`` or ``additionalProperties`` that Google's Gemini
|
||||
``Schema`` object rejects. This helper preserves the documented Gemini
|
||||
subset and recursively sanitizes nested ``properties`` / ``items`` /
|
||||
``anyOf`` definitions.
|
||||
"""
|
||||
|
||||
if not isinstance(schema, dict):
|
||||
return {}
|
||||
|
||||
cleaned: Dict[str, Any] = {}
|
||||
for key, value in schema.items():
|
||||
if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
|
||||
continue
|
||||
if key == "properties":
|
||||
if not isinstance(value, dict):
|
||||
continue
|
||||
props: Dict[str, Any] = {}
|
||||
for prop_name, prop_schema in value.items():
|
||||
if not isinstance(prop_name, str):
|
||||
continue
|
||||
props[prop_name] = sanitize_gemini_schema(prop_schema)
|
||||
cleaned[key] = props
|
||||
continue
|
||||
if key == "items":
|
||||
cleaned[key] = sanitize_gemini_schema(value)
|
||||
continue
|
||||
if key == "anyOf":
|
||||
if not isinstance(value, list):
|
||||
continue
|
||||
cleaned[key] = [
|
||||
sanitize_gemini_schema(item)
|
||||
for item in value
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
|
||||
# Gemini's Schema validator requires every ``enum`` entry to be a string,
|
||||
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
|
||||
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
|
||||
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
|
||||
# so we only drop the ``enum`` when it would collide with Gemini's rule.
|
||||
# Keeping ``type: integer`` plus the human-readable description gives the
|
||||
# model enough guidance; the tool handler still validates the value.
|
||||
enum_val = cleaned.get("enum")
|
||||
type_val = cleaned.get("type")
|
||||
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
|
||||
if any(not isinstance(item, str) for item in enum_val):
|
||||
cleaned.pop("enum", None)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a valid Gemini object schema."""
|
||||
|
||||
cleaned = sanitize_gemini_schema(parameters)
|
||||
if not cleaned:
|
||||
return {"type": "object", "properties": {}}
|
||||
return cleaned
|
||||
@@ -1,452 +0,0 @@
|
||||
"""Google Code Assist API client — project discovery, onboarding, quota.
|
||||
|
||||
The Code Assist API powers Google's official gemini-cli. It sits at
|
||||
``cloudcode-pa.googleapis.com`` and provides:
|
||||
|
||||
- Free tier access (generous daily quota) for personal Google accounts
|
||||
- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise
|
||||
|
||||
This module handles the control-plane dance needed before inference:
|
||||
|
||||
1. ``load_code_assist()`` — probe the user's account to learn what tier they're on
|
||||
and whether a ``cloudaicompanionProject`` is already assigned.
|
||||
2. ``onboard_user()`` — if the user hasn't been onboarded yet (new account, fresh
|
||||
free tier, etc.), call this with the chosen tier + project id. Supports LRO
|
||||
polling for slow provisioning.
|
||||
3. ``retrieve_user_quota()`` — fetch the ``buckets[]`` array showing remaining
|
||||
quota per model, used by the ``/gquota`` slash command.
|
||||
|
||||
VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter
|
||||
will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this
|
||||
and force the account to ``standard-tier`` so the call chain still succeeds.
|
||||
|
||||
Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The
|
||||
request/response shapes are specific to Google's internal Code Assist API,
|
||||
documented nowhere public — we copy them from the reference implementations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Constants
|
||||
# =============================================================================
|
||||
|
||||
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
|
||||
|
||||
# Fallback endpoints tried when prod returns an error during project discovery
|
||||
FALLBACK_ENDPOINTS = [
|
||||
"https://daily-cloudcode-pa.sandbox.googleapis.com",
|
||||
"https://autopush-cloudcode-pa.sandbox.googleapis.com",
|
||||
]
|
||||
|
||||
# Tier identifiers that Google's API uses
|
||||
FREE_TIER_ID = "free-tier"
|
||||
LEGACY_TIER_ID = "legacy-tier"
|
||||
STANDARD_TIER_ID = "standard-tier"
|
||||
|
||||
# Default HTTP headers matching gemini-cli's fingerprint.
|
||||
# Google may reject unrecognized User-Agents on these internal endpoints.
|
||||
_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)"
|
||||
_X_GOOG_API_CLIENT = "gl-node/24.0.0"
|
||||
_DEFAULT_REQUEST_TIMEOUT = 30.0
|
||||
_ONBOARDING_POLL_ATTEMPTS = 12
|
||||
_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
|
||||
|
||||
|
||||
class CodeAssistError(RuntimeError):
|
||||
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
|
||||
|
||||
Carries HTTP status / response / retry-after metadata so the agent's
|
||||
``error_classifier._extract_status_code`` and the main loop's Retry-After
|
||||
handling (which walks ``error.response.headers``) pick up the right
|
||||
signals. Without these, 429s from the OAuth path look like opaque
|
||||
``RuntimeError`` and skip the rate-limit path.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
code: str = "code_assist_error",
|
||||
status_code: Optional[int] = None,
|
||||
response: Any = None,
|
||||
retry_after: Optional[float] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.code = code
|
||||
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
|
||||
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
|
||||
# triggers the main loop's fallback_providers chain the same way SDK
|
||||
# errors do.
|
||||
self.status_code = status_code
|
||||
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
|
||||
# ``.headers`` mapping and ``.json()`` method). The main loop reads
|
||||
# ``error.response.headers["Retry-After"]`` to honor Google's retry
|
||||
# hints when the backend throttles us.
|
||||
self.response = response
|
||||
# Parsed ``Retry-After`` seconds (kept separately for convenience —
|
||||
# Google returns retry hints in both the header and the error body's
|
||||
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
|
||||
self.retry_after = retry_after
|
||||
# Parsed structured error details from the Google error envelope
|
||||
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
|
||||
# Useful for logging and for tests that want to assert on specifics.
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
class ProjectIdRequiredError(CodeAssistError):
|
||||
def __init__(self, message: str = "GCP project id required for this tier") -> None:
|
||||
super().__init__(message, code="code_assist_project_id_required")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HTTP primitive (auth via Bearer token passed per-call)
|
||||
# =============================================================================
|
||||
|
||||
def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]:
|
||||
ua = _GEMINI_CLI_USER_AGENT
|
||||
if user_agent_model:
|
||||
ua = f"{ua} model/{user_agent_model}"
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": ua,
|
||||
"X-Goog-Api-Client": _X_GOOG_API_CLIENT,
|
||||
"x-activity-request-id": str(uuid.uuid4()),
|
||||
}
|
||||
|
||||
|
||||
def _client_metadata() -> Dict[str, str]:
|
||||
"""Match Google's gemini-cli exactly — unrecognized metadata may be rejected."""
|
||||
return {
|
||||
"ideType": "IDE_UNSPECIFIED",
|
||||
"platform": "PLATFORM_UNSPECIFIED",
|
||||
"pluginType": "GEMINI",
|
||||
}
|
||||
|
||||
|
||||
def _post_json(
|
||||
url: str,
|
||||
body: Dict[str, Any],
|
||||
access_token: str,
|
||||
*,
|
||||
timeout: float = _DEFAULT_REQUEST_TIMEOUT,
|
||||
user_agent_model: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
url, data=data, method="POST",
|
||||
headers=_build_headers(access_token, user_agent_model=user_agent_model),
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as response:
|
||||
raw = response.read().decode("utf-8", errors="replace")
|
||||
return json.loads(raw) if raw else {}
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = ""
|
||||
try:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
pass
|
||||
# Special case: VPC-SC violation should be distinguishable
|
||||
if _is_vpc_sc_violation(detail):
|
||||
raise CodeAssistError(
|
||||
f"VPC-SC policy violation: {detail}",
|
||||
code="code_assist_vpc_sc",
|
||||
) from exc
|
||||
raise CodeAssistError(
|
||||
f"Code Assist HTTP {exc.code}: {detail or exc.reason}",
|
||||
code=f"code_assist_http_{exc.code}",
|
||||
) from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise CodeAssistError(
|
||||
f"Code Assist request failed: {exc}",
|
||||
code="code_assist_network_error",
|
||||
) from exc
|
||||
|
||||
|
||||
def _is_vpc_sc_violation(body: str) -> bool:
|
||||
"""Detect a VPC Service Controls violation from a response body."""
|
||||
if not body:
|
||||
return False
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return "SECURITY_POLICY_VIOLATED" in body
|
||||
# Walk the nested error structure Google uses
|
||||
error = parsed.get("error") if isinstance(parsed, dict) else None
|
||||
if not isinstance(error, dict):
|
||||
return False
|
||||
details = error.get("details") or []
|
||||
if isinstance(details, list):
|
||||
for item in details:
|
||||
if isinstance(item, dict):
|
||||
reason = item.get("reason") or ""
|
||||
if reason == "SECURITY_POLICY_VIOLATED":
|
||||
return True
|
||||
msg = str(error.get("message", ""))
|
||||
return "SECURITY_POLICY_VIOLATED" in msg
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# load_code_assist — discovers current tier + assigned project
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class CodeAssistProjectInfo:
|
||||
"""Result from ``load_code_assist``."""
|
||||
current_tier_id: str = ""
|
||||
cloudaicompanion_project: str = "" # Google-managed project (free tier)
|
||||
allowed_tiers: List[str] = field(default_factory=list)
|
||||
raw: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def load_code_assist(
|
||||
access_token: str,
|
||||
*,
|
||||
project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> CodeAssistProjectInfo:
|
||||
"""Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback.
|
||||
|
||||
Returns whatever tier + project info Google reports. On VPC-SC violations,
|
||||
returns a synthetic ``standard-tier`` result so the chain can continue.
|
||||
"""
|
||||
body: Dict[str, Any] = {
|
||||
"metadata": {
|
||||
"duetProject": project_id,
|
||||
**_client_metadata(),
|
||||
},
|
||||
}
|
||||
if project_id:
|
||||
body["cloudaicompanionProject"] = project_id
|
||||
|
||||
endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS
|
||||
last_err: Optional[Exception] = None
|
||||
for endpoint in endpoints:
|
||||
url = f"{endpoint}/v1internal:loadCodeAssist"
|
||||
try:
|
||||
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
|
||||
return _parse_load_response(resp)
|
||||
except CodeAssistError as exc:
|
||||
if exc.code == "code_assist_vpc_sc":
|
||||
logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint)
|
||||
return CodeAssistProjectInfo(
|
||||
current_tier_id=STANDARD_TIER_ID,
|
||||
cloudaicompanion_project=project_id,
|
||||
)
|
||||
last_err = exc
|
||||
logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc)
|
||||
continue
|
||||
if last_err:
|
||||
raise last_err
|
||||
return CodeAssistProjectInfo()
|
||||
|
||||
|
||||
def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo:
|
||||
current_tier = resp.get("currentTier") or {}
|
||||
tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else ""
|
||||
project = str(resp.get("cloudaicompanionProject") or "")
|
||||
allowed = resp.get("allowedTiers") or []
|
||||
allowed_ids: List[str] = []
|
||||
if isinstance(allowed, list):
|
||||
for t in allowed:
|
||||
if isinstance(t, dict):
|
||||
tid = str(t.get("id") or "")
|
||||
if tid:
|
||||
allowed_ids.append(tid)
|
||||
return CodeAssistProjectInfo(
|
||||
current_tier_id=tier_id,
|
||||
cloudaicompanion_project=project,
|
||||
allowed_tiers=allowed_ids,
|
||||
raw=resp,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# onboard_user — provisions a new user on a tier (with LRO polling)
|
||||
# =============================================================================
|
||||
|
||||
def onboard_user(
|
||||
access_token: str,
|
||||
*,
|
||||
tier_id: str,
|
||||
project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Call ``POST /v1internal:onboardUser`` to provision the user.
|
||||
|
||||
For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError).
|
||||
For free tiers, ``project_id`` is optional — Google will assign one.
|
||||
|
||||
Returns the final operation response. Polls ``/v1internal/<name>`` for up
|
||||
to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS``
|
||||
(default: 12 × 5s = 1 min).
|
||||
"""
|
||||
if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id:
|
||||
raise ProjectIdRequiredError(
|
||||
f"Tier {tier_id!r} requires a GCP project id. "
|
||||
"Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT."
|
||||
)
|
||||
|
||||
body: Dict[str, Any] = {
|
||||
"tierId": tier_id,
|
||||
"metadata": _client_metadata(),
|
||||
}
|
||||
if project_id:
|
||||
body["cloudaicompanionProject"] = project_id
|
||||
|
||||
endpoint = CODE_ASSIST_ENDPOINT
|
||||
url = f"{endpoint}/v1internal:onboardUser"
|
||||
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
|
||||
|
||||
# Poll if LRO (long-running operation)
|
||||
if not resp.get("done"):
|
||||
op_name = resp.get("name", "")
|
||||
if not op_name:
|
||||
return resp
|
||||
for attempt in range(_ONBOARDING_POLL_ATTEMPTS):
|
||||
time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS)
|
||||
poll_url = f"{endpoint}/v1internal/{op_name}"
|
||||
try:
|
||||
poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model)
|
||||
except CodeAssistError as exc:
|
||||
logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc)
|
||||
continue
|
||||
if poll_resp.get("done"):
|
||||
return poll_resp
|
||||
logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS)
|
||||
return resp
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# retrieve_user_quota — for /gquota
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class QuotaBucket:
|
||||
model_id: str
|
||||
token_type: str = ""
|
||||
remaining_fraction: float = 0.0
|
||||
reset_time_iso: str = ""
|
||||
raw: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def retrieve_user_quota(
|
||||
access_token: str,
|
||||
*,
|
||||
project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> List[QuotaBucket]:
|
||||
"""Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``."""
|
||||
body: Dict[str, Any] = {}
|
||||
if project_id:
|
||||
body["project"] = project_id
|
||||
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota"
|
||||
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
|
||||
raw_buckets = resp.get("buckets") or []
|
||||
buckets: List[QuotaBucket] = []
|
||||
if not isinstance(raw_buckets, list):
|
||||
return buckets
|
||||
for b in raw_buckets:
|
||||
if not isinstance(b, dict):
|
||||
continue
|
||||
buckets.append(QuotaBucket(
|
||||
model_id=str(b.get("modelId") or ""),
|
||||
token_type=str(b.get("tokenType") or ""),
|
||||
remaining_fraction=float(b.get("remainingFraction") or 0.0),
|
||||
reset_time_iso=str(b.get("resetTime") or ""),
|
||||
raw=b,
|
||||
))
|
||||
return buckets
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Project context resolution
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ProjectContext:
|
||||
"""Resolved state for a given OAuth session."""
|
||||
project_id: str = "" # effective project id sent on requests
|
||||
managed_project_id: str = "" # Google-assigned project (free tier)
|
||||
tier_id: str = ""
|
||||
source: str = "" # "env", "config", "discovered", "onboarded"
|
||||
|
||||
|
||||
def resolve_project_context(
|
||||
access_token: str,
|
||||
*,
|
||||
configured_project_id: str = "",
|
||||
env_project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> ProjectContext:
|
||||
"""Figure out what project id + tier to use for requests.
|
||||
|
||||
Priority:
|
||||
1. If configured_project_id or env_project_id is set, use that directly
|
||||
and short-circuit (no discovery needed).
|
||||
2. Otherwise call loadCodeAssist to see what Google says.
|
||||
3. If no tier assigned yet, onboard the user (free tier default).
|
||||
"""
|
||||
# Short-circuit: caller provided a project id
|
||||
if configured_project_id:
|
||||
return ProjectContext(
|
||||
project_id=configured_project_id,
|
||||
tier_id=STANDARD_TIER_ID, # assume paid since they specified one
|
||||
source="config",
|
||||
)
|
||||
if env_project_id:
|
||||
return ProjectContext(
|
||||
project_id=env_project_id,
|
||||
tier_id=STANDARD_TIER_ID,
|
||||
source="env",
|
||||
)
|
||||
|
||||
# Discover via loadCodeAssist
|
||||
info = load_code_assist(access_token, user_agent_model=user_agent_model)
|
||||
|
||||
effective_project = info.cloudaicompanion_project
|
||||
tier = info.current_tier_id
|
||||
|
||||
if not tier:
|
||||
# User hasn't been onboarded — provision them on free tier
|
||||
onboard_resp = onboard_user(
|
||||
access_token,
|
||||
tier_id=FREE_TIER_ID,
|
||||
project_id="",
|
||||
user_agent_model=user_agent_model,
|
||||
)
|
||||
# Re-parse from the onboard response
|
||||
response_body = onboard_resp.get("response") or {}
|
||||
if isinstance(response_body, dict):
|
||||
effective_project = (
|
||||
effective_project
|
||||
or str(response_body.get("cloudaicompanionProject") or "")
|
||||
)
|
||||
tier = FREE_TIER_ID
|
||||
source = "onboarded"
|
||||
else:
|
||||
source = "discovered"
|
||||
|
||||
return ProjectContext(
|
||||
project_id=effective_project,
|
||||
managed_project_id=effective_project if tier == FREE_TIER_ID else "",
|
||||
tier_id=tier,
|
||||
source=source,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
258
agent/i18n.py
258
agent/i18n.py
@@ -1,258 +0,0 @@
|
||||
"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
|
||||
|
||||
Scope (thin slice, by design): only the highest-impact static strings shown
|
||||
to the user by Hermes itself -- approval prompts, a handful of gateway slash
|
||||
command replies, restart-drain notices. Agent-generated output, log lines,
|
||||
error tracebacks, tool outputs, and slash-command descriptions all stay in
|
||||
English.
|
||||
|
||||
Catalog files live under ``locales/<lang>.yaml`` at the repo root. Each
|
||||
catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
|
||||
``gateway.approval_expired``). Missing keys fall back to English; if English
|
||||
is missing too, the key path itself is returned so a broken catalog never
|
||||
crashes the agent.
|
||||
|
||||
Usage::
|
||||
|
||||
from agent.i18n import t
|
||||
print(t("approval.choose_long")) # current lang
|
||||
print(t("gateway.draining", count=3)) # {count} formatted
|
||||
print(t("approval.choose_long", lang="zh")) # explicit override
|
||||
|
||||
Language resolution order:
|
||||
1. Explicit ``lang=`` argument passed to :func:`t`
|
||||
2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
|
||||
3. ``display.language`` from config.yaml
|
||||
4. ``"en"`` (baseline)
|
||||
|
||||
Supported languages: en, zh, ja, de, es, fr, tr, uk. Unknown values fall back to en.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LANGUAGES: tuple[str, ...] = (
|
||||
"en", "zh", "zh-hant", "ja", "de", "es", "fr", "tr", "uk",
|
||||
"af", "ko", "it", "ga", "pt", "ru", "hu",
|
||||
)
|
||||
DEFAULT_LANGUAGE = "en"
|
||||
|
||||
# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
|
||||
# get the right catalog instead of silently falling back to English.
|
||||
_LANGUAGE_ALIASES: dict[str, str] = {
|
||||
"english": "en", "en-us": "en", "en-gb": "en",
|
||||
# Simplified Chinese — explicit codes route here; bare "chinese" / "mandarin"
|
||||
# also default to Simplified since that's the larger user base.
|
||||
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-hans": "zh", "zh-sg": "zh",
|
||||
# Traditional Chinese — distinct catalog. Cover Taiwan / Hong Kong / Macau
|
||||
# locale tags plus the common "traditional" alias.
|
||||
"traditional-chinese": "zh-hant", "traditional_chinese": "zh-hant",
|
||||
"zh-tw": "zh-hant", "zh-hk": "zh-hant", "zh-mo": "zh-hant",
|
||||
"japanese": "ja", "jp": "ja", "ja-jp": "ja",
|
||||
"german": "de", "deutsch": "de", "de-de": "de", "de-at": "de", "de-ch": "de",
|
||||
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es", "es-ar": "es",
|
||||
"french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
|
||||
"ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
|
||||
"turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
|
||||
# Afrikaans — South African Dutch-derived language; "af-ZA" is the common BCP-47 tag.
|
||||
"afrikaans": "af", "af-za": "af",
|
||||
# Korean
|
||||
"korean": "ko", "한국어": "ko", "ko-kr": "ko",
|
||||
# Italian
|
||||
"italian": "it", "italiano": "it", "it-it": "it", "it-ch": "it",
|
||||
# Irish (Gaeilge) — ga is the BCP-47 code
|
||||
"irish": "ga", "gaeilge": "ga", "ga-ie": "ga",
|
||||
# Portuguese — bare "portuguese" routes to European Portuguese; pt-br
|
||||
# is in the same family but rendered identically here (no separate br catalog).
|
||||
"portuguese": "pt", "português": "pt", "portugues": "pt",
|
||||
"pt-pt": "pt", "pt-br": "pt", "brazilian": "pt", "brasileiro": "pt",
|
||||
# Russian
|
||||
"russian": "ru", "русский": "ru", "ru-ru": "ru",
|
||||
# Hungarian
|
||||
"hungarian": "hu", "magyar": "hu", "hu-hu": "hu",
|
||||
}
|
||||
|
||||
_catalog_cache: dict[str, dict[str, str]] = {}
|
||||
_catalog_lock = threading.Lock()
|
||||
|
||||
|
||||
def _locales_dir() -> Path:
|
||||
"""Return the directory containing locale YAML files.
|
||||
|
||||
Lives next to the repo root so both the bundled install and editable
|
||||
checkouts find it without PYTHONPATH gymnastics.
|
||||
"""
|
||||
# agent/i18n.py -> agent/ -> repo root
|
||||
return Path(__file__).resolve().parent.parent / "locales"
|
||||
|
||||
|
||||
def _normalize_lang(value: Any) -> str:
|
||||
"""Normalize a user-supplied language value to a supported code.
|
||||
|
||||
Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
|
||||
and case-insensitive regional tags (``zh-CN`` -> ``zh``). Returns the
|
||||
default language for unknown values.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_LANGUAGE
|
||||
key = value.strip().lower()
|
||||
if not key:
|
||||
return DEFAULT_LANGUAGE
|
||||
if key in SUPPORTED_LANGUAGES:
|
||||
return key
|
||||
if key in _LANGUAGE_ALIASES:
|
||||
return _LANGUAGE_ALIASES[key]
|
||||
# Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
|
||||
# but "zh-CN" -> "zh" will).
|
||||
base = key.split("-", 1)[0]
|
||||
if base in SUPPORTED_LANGUAGES:
|
||||
return base
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def _load_catalog(lang: str) -> dict[str, str]:
|
||||
"""Load and flatten one locale YAML file into a dotted-key dict.
|
||||
|
||||
YAML files can be nested for human readability; this produces the flat
|
||||
key space :func:`t` expects. Cached per-language for the process.
|
||||
"""
|
||||
with _catalog_lock:
|
||||
cached = _catalog_cache.get(lang)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
path = _locales_dir() / f"{lang}.yaml"
|
||||
if not path.is_file():
|
||||
logger.debug("i18n catalog missing for %s at %s", lang, path)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
try:
|
||||
import yaml # PyYAML is already a hermes dependency
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load i18n catalog %s: %s", path, exc)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
flat: dict[str, str] = {}
|
||||
_flatten_into(raw, "", flat)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = flat
|
||||
return flat
|
||||
|
||||
|
||||
def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
child_key = f"{prefix}.{key}" if prefix else str(key)
|
||||
_flatten_into(value, child_key, out)
|
||||
elif isinstance(node, str):
|
||||
out[prefix] = node
|
||||
# Non-string, non-dict leaves are ignored -- catalogs are text-only.
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _config_language_cached() -> str | None:
|
||||
"""Read ``display.language`` from config.yaml once per process.
|
||||
|
||||
Cached because ``t()`` is called in hot paths (every approval prompt,
|
||||
every gateway reply) and re-reading YAML each call would be wasteful.
|
||||
``reset_language_cache()`` clears this when config changes at runtime
|
||||
(e.g. after the setup wizard).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
lang = (cfg.get("display") or {}).get("language")
|
||||
if lang:
|
||||
return _normalize_lang(lang)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read display.language from config: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def reset_language_cache() -> None:
|
||||
"""Invalidate cached language resolution and catalogs.
|
||||
|
||||
Call after :func:`hermes_cli.config.save_config` if a running process
|
||||
needs to pick up a changed ``display.language`` without restart.
|
||||
"""
|
||||
_config_language_cached.cache_clear()
|
||||
with _catalog_lock:
|
||||
_catalog_cache.clear()
|
||||
|
||||
|
||||
def get_language() -> str:
|
||||
"""Resolve the active language using env > config > default order."""
|
||||
env_lang = os.environ.get("HERMES_LANGUAGE")
|
||||
if env_lang:
|
||||
return _normalize_lang(env_lang)
|
||||
cfg_lang = _config_language_cached()
|
||||
if cfg_lang:
|
||||
return cfg_lang
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
|
||||
"""Translate a dotted key to the active language.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key
|
||||
Dotted path into the catalog, e.g. ``"approval.choose_long"``.
|
||||
lang
|
||||
Explicit language override. Takes precedence over env + config.
|
||||
**format_kwargs
|
||||
``str.format`` substitution arguments (``t("gateway.drain", count=3)``
|
||||
expects a catalog entry with a ``{count}`` placeholder).
|
||||
|
||||
Returns
|
||||
-------
|
||||
The translated string, or the English fallback if the key is missing in
|
||||
the target language, or the bare key if English is also missing.
|
||||
"""
|
||||
target = _normalize_lang(lang) if lang else get_language()
|
||||
catalog = _load_catalog(target)
|
||||
value = catalog.get(key)
|
||||
|
||||
if value is None and target != DEFAULT_LANGUAGE:
|
||||
# Fall through to English rather than showing a key path to the user.
|
||||
value = _load_catalog(DEFAULT_LANGUAGE).get(key)
|
||||
|
||||
if value is None:
|
||||
# Last-ditch: return the key itself. A broken catalog should not
|
||||
# crash anything; it just looks ugly until someone fixes it.
|
||||
logger.debug("i18n miss: key=%r lang=%r", key, target)
|
||||
value = key
|
||||
|
||||
if format_kwargs:
|
||||
try:
|
||||
return value.format(**format_kwargs)
|
||||
except (KeyError, IndexError, ValueError) as exc:
|
||||
logger.warning(
|
||||
"i18n format failed for key=%r lang=%r kwargs=%r: %s",
|
||||
key, target, format_kwargs, exc,
|
||||
)
|
||||
return value
|
||||
return value
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SUPPORTED_LANGUAGES",
|
||||
"DEFAULT_LANGUAGE",
|
||||
"t",
|
||||
"get_language",
|
||||
"reset_language_cache",
|
||||
]
|
||||
@@ -1,242 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider ABC
|
||||
=============================
|
||||
|
||||
Defines the pluggable-backend interface for image generation. Providers register
|
||||
instances via ``PluginContext.register_image_gen_provider()``; the active one
|
||||
(selected via ``image_gen.provider`` in ``config.yaml``) services every
|
||||
``image_generate`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
|
||||
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
|
||||
via ``plugins.enabled``).
|
||||
|
||||
Response shape
|
||||
--------------
|
||||
All providers return a dict that :func:`success_response` / :func:`error_response`
|
||||
produce. The tool wrapper JSON-serializes it. Keys:
|
||||
|
||||
success bool
|
||||
image str | None URL or absolute file path
|
||||
model str provider-specific model identifier
|
||||
prompt str echoed prompt
|
||||
aspect_ratio str "landscape" | "square" | "portrait"
|
||||
provider str provider name (for diagnostics)
|
||||
error str only when success=False
|
||||
error_type str only when success=False
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import base64
|
||||
import datetime
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
|
||||
DEFAULT_ASPECT_RATIO = "landscape"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ImageGenProvider(abc.ABC):
|
||||
"""Abstract base class for an image generation backend.
|
||||
|
||||
Subclasses must implement :meth:`generate`. Everything else has sane
|
||||
defaults — override only what your provider needs.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in ``image_gen.provider`` config.
|
||||
|
||||
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
|
||||
return self.name.title()
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically checks for a required API key. Default: True
|
||||
(providers with no external dependencies are always available).
|
||||
"""
|
||||
return True
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
"""Return catalog entries for ``hermes tools`` model picker.
|
||||
|
||||
Each entry::
|
||||
|
||||
{
|
||||
"id": "gpt-image-1.5", # required
|
||||
"display": "GPT Image 1.5", # optional; defaults to id
|
||||
"speed": "~10s", # optional
|
||||
"strengths": "...", # optional
|
||||
"price": "$...", # optional
|
||||
}
|
||||
|
||||
Default: empty list (provider has no user-selectable models).
|
||||
"""
|
||||
return []
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by ``tools_config.py`` to inject this provider as a row in
|
||||
the Image Generation provider list. Shape::
|
||||
|
||||
{
|
||||
"name": "OpenAI", # picker label
|
||||
"badge": "paid", # optional short tag
|
||||
"tag": "One-line description...", # optional subtitle
|
||||
"env_vars": [ # keys to prompt for
|
||||
{"key": "OPENAI_API_KEY",
|
||||
"prompt": "OpenAI API key",
|
||||
"url": "https://platform.openai.com/api-keys"},
|
||||
],
|
||||
}
|
||||
|
||||
Default: minimal entry derived from ``display_name``. Override to
|
||||
expose API key prompts and custom badges.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
"""Return the default model id, or None if not applicable."""
|
||||
models = self.list_models()
|
||||
if models:
|
||||
return models[0].get("id")
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image.
|
||||
|
||||
Implementations should return the dict from :func:`success_response`
|
||||
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
||||
parameters future versions of the schema will expose — implementations
|
||||
should ignore unknown keys.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def resolve_aspect_ratio(value: Optional[str]) -> str:
|
||||
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
|
||||
|
||||
Invalid values are coerced rather than rejected so the tool surface is
|
||||
forgiving of agent mistakes.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
v = value.strip().lower()
|
||||
if v in VALID_ASPECT_RATIOS:
|
||||
return v
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
|
||||
|
||||
def _images_cache_dir() -> Path:
|
||||
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
path = get_hermes_home() / "cache" / "images"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def save_b64_image(
|
||||
b64_data: str,
|
||||
*,
|
||||
prefix: str = "image",
|
||||
extension: str = "png",
|
||||
) -> Path:
|
||||
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
|
||||
|
||||
Returns the absolute :class:`Path` to the saved file.
|
||||
|
||||
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
|
||||
"""
|
||||
raw = base64.b64decode(b64_data)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
path.write_bytes(raw)
|
||||
return path
|
||||
|
||||
|
||||
def success_response(
|
||||
*,
|
||||
image: str,
|
||||
model: str,
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
provider: str,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform success response dict.
|
||||
|
||||
``image`` may be an HTTP URL or an absolute filesystem path (for b64
|
||||
providers like OpenAI). Callers that need to pass through additional
|
||||
backend-specific fields can supply ``extra``.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"image": image,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
if extra:
|
||||
for k, v in extra.items():
|
||||
payload.setdefault(k, v)
|
||||
return payload
|
||||
|
||||
|
||||
def error_response(
|
||||
*,
|
||||
error: str,
|
||||
error_type: str = "provider_error",
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
prompt: str = "",
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform error response dict."""
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": error,
|
||||
"error_type": error_type,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider Registry
|
||||
==================================
|
||||
|
||||
Central map of registered providers. Populated by plugins at import-time via
|
||||
``PluginContext.register_image_gen_provider()``; consumed by the
|
||||
``image_generate`` tool to dispatch each call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
|
||||
If unset, :func:`get_active_provider` applies fallback logic:
|
||||
|
||||
1. If exactly one provider is registered, use it.
|
||||
2. Otherwise if a provider named ``fal`` is registered, use it (legacy
|
||||
default — matches pre-plugin behavior).
|
||||
3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
|
||||
the user at ``hermes tools``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import ImageGenProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, ImageGenProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: ImageGenProvider) -> None:
|
||||
"""Register an image generation provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — this makes hot-reload scenarios (tests, dev loops)
|
||||
behave predictably.
|
||||
"""
|
||||
if not isinstance(provider, ImageGenProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects an ImageGenProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Image gen provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
|
||||
else:
|
||||
logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
|
||||
|
||||
|
||||
def list_providers() -> List[ImageGenProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[ImageGenProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
def get_active_provider() -> Optional[ImageGenProvider]:
|
||||
"""Resolve the currently-active provider.
|
||||
|
||||
Reads ``image_gen.provider`` from config.yaml; falls back per the
|
||||
module docstring.
|
||||
|
||||
**Availability semantics** (mirrors :mod:`agent.web_search_registry`):
|
||||
|
||||
- When ``image_gen.provider`` is explicitly set, the configured
|
||||
provider is returned even if :meth:`ImageGenProvider.is_available`
|
||||
reports False — the dispatcher surfaces a precise "X_API_KEY is not
|
||||
set" error rather than silently switching backends.
|
||||
- When ``image_gen.provider`` is unset, the fallback path (single-
|
||||
provider shortcut and the FAL legacy preference) is filtered by
|
||||
``is_available()`` so we don't pick a provider the user has no
|
||||
credentials for.
|
||||
"""
|
||||
configured: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
if isinstance(section, dict):
|
||||
raw = section.get("provider")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
configured = raw.strip()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read image_gen.provider from config: %s", exc)
|
||||
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
def _is_available_safe(p: ImageGenProvider) -> bool:
|
||||
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
|
||||
try:
|
||||
return bool(p.is_available())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("image_gen provider %s.is_available() raised %s", p.name, exc)
|
||||
return False
|
||||
|
||||
# 1. Explicit config wins — return regardless of is_available() so the
|
||||
# user gets a precise downstream error message rather than a silent
|
||||
# backend switch.
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"image_gen.provider='%s' configured but not registered; falling back",
|
||||
configured,
|
||||
)
|
||||
|
||||
# 2. Fallback: single registered provider — but only if it's actually
|
||||
# available (no credentials = don't surface it as "active").
|
||||
available = [p for p in snapshot.values() if _is_available_safe(p)]
|
||||
if len(available) == 1:
|
||||
return available[0]
|
||||
|
||||
# 3. Fallback: prefer legacy FAL for backward compat, when available.
|
||||
fal = snapshot.get("fal")
|
||||
if fal is not None and _is_available_safe(fal):
|
||||
return fal
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
@@ -1,301 +0,0 @@
|
||||
"""Routing helpers for inbound user-attached images.
|
||||
|
||||
Two modes:
|
||||
|
||||
native — attach images as OpenAI-style ``image_url`` content parts on the
|
||||
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
|
||||
OpenAI chat.completions) already translate these into their
|
||||
vendor-specific multimodal formats.
|
||||
|
||||
text — run ``vision_analyze`` on each image up-front and prepend the
|
||||
description to the user's text. The model never sees the pixels;
|
||||
it only sees a lossy text summary. This is the pre-existing
|
||||
behaviour and still the right choice for non-vision models.
|
||||
|
||||
The decision is made once per message turn by :func:`decide_image_input_mode`.
|
||||
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
|
||||
| ``text``, default ``auto``) and the active model's capability metadata.
|
||||
|
||||
In ``auto`` mode:
|
||||
- If the user has explicitly configured ``auxiliary.vision.provider``
|
||||
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
|
||||
regardless of the main model — they've opted in to a specific vision
|
||||
backend for a reason (cost, quality, local-only, etc.).
|
||||
- Otherwise, if the active model reports ``supports_vision=True`` in its
|
||||
models.dev metadata, we attach natively.
|
||||
- Otherwise (non-vision model, no explicit override), we fall back to text.
|
||||
|
||||
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
|
||||
and agent flows that chain it (browser screenshots, deeper inspection of
|
||||
URL-referenced images, style-gating loops) keep working. The routing only
|
||||
affects *how user-attached images on the current turn* are presented to the
|
||||
main model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
return "auto"
|
||||
val = raw.strip().lower()
|
||||
if val in _VALID_MODES:
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True when the user configured a specific auxiliary vision backend.
|
||||
|
||||
An explicit override means the user *wants* the text pipeline (they're
|
||||
paying for a dedicated vision model), so we don't silently bypass it.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return False
|
||||
aux = cfg.get("auxiliary") or {}
|
||||
if not isinstance(aux, dict):
|
||||
return False
|
||||
vision = aux.get("vision") or {}
|
||||
if not isinstance(vision, dict):
|
||||
return False
|
||||
|
||||
provider = str(vision.get("provider") or "").strip().lower()
|
||||
model = str(vision.get("model") or "").strip()
|
||||
base_url = str(vision.get("base_url") or "").strip()
|
||||
|
||||
# "auto" / "" / blank = not explicit
|
||||
if provider in {"", "auto"} and not model and not base_url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Return ``"native"`` or ``"text"`` for the given turn.
|
||||
|
||||
Args:
|
||||
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
|
||||
model: active model slug as it would be sent to the provider.
|
||||
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
|
||||
"""
|
||||
mode_cfg = "auto"
|
||||
if isinstance(cfg, dict):
|
||||
agent_cfg = cfg.get("agent") or {}
|
||||
if isinstance(agent_cfg, dict):
|
||||
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
|
||||
|
||||
if mode_cfg == "native":
|
||||
return "native"
|
||||
if mode_cfg == "text":
|
||||
return "text"
|
||||
|
||||
# auto
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
|
||||
# Image size handling is REACTIVE rather than proactive: we attempt native
|
||||
# attachment at full size regardless of provider, and rely on
|
||||
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
|
||||
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
|
||||
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
|
||||
#
|
||||
# Why reactive: our knowledge of provider ceilings is partial and evolving
|
||||
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
|
||||
# A proactive per-provider table would be stale the moment a provider raises
|
||||
# or lowers its limit, and silently degrading quality for users on providers
|
||||
# that would have accepted the full image is the worse failure mode.
|
||||
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
|
||||
# it fires, which is cheaper than permanent quality loss.
|
||||
|
||||
|
||||
def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
|
||||
"""Detect image MIME from magic bytes. Returns None if unrecognised.
|
||||
|
||||
Filename-based detection (``mimetypes.guess_type``) is unreliable when
|
||||
upstream platforms lie about content-type. Discord, for example, can
|
||||
serve a PNG with ``content_type=image/webp`` for proxied/animated
|
||||
stickers, custom emoji previews, or images uploaded via certain bots.
|
||||
Anthropic strictly validates that declared media_type matches the
|
||||
actual bytes and returns HTTP 400 on mismatch, so we sniff to be safe.
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
# PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if raw.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
# JPEG: FF D8 FF
|
||||
if raw.startswith(b"\xff\xd8\xff"):
|
||||
return "image/jpeg"
|
||||
# GIF87a / GIF89a
|
||||
if raw[:6] in {b"GIF87a", b"GIF89a"}:
|
||||
return "image/gif"
|
||||
# WEBP: "RIFF" .... "WEBP"
|
||||
if len(raw) >= 12 and raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
|
||||
return "image/webp"
|
||||
# BMP: "BM"
|
||||
if raw.startswith(b"BM"):
|
||||
return "image/bmp"
|
||||
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
|
||||
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
|
||||
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
|
||||
}:
|
||||
return "image/heic"
|
||||
return None
|
||||
|
||||
|
||||
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
|
||||
"""Return image MIME type for *path*.
|
||||
|
||||
If *raw* bytes are provided, magic-byte sniffing wins (authoritative).
|
||||
Otherwise we fall back to ``mimetypes`` then suffix-based defaults.
|
||||
"""
|
||||
if raw is not None:
|
||||
sniffed = _sniff_mime_from_bytes(raw)
|
||||
if sniffed:
|
||||
return sniffed
|
||||
mime, _ = mimetypes.guess_type(str(path))
|
||||
if mime and mime.startswith("image/"):
|
||||
return mime
|
||||
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
|
||||
# the suffix looks imagey.
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}.get(suffix, "image/jpeg")
|
||||
|
||||
|
||||
def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
"""Encode a local image as a base64 data URL at its native size.
|
||||
|
||||
Size limits are NOT enforced here — the agent retry loop
|
||||
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
|
||||
provider's first rejection. Keeping this simple means providers that
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
except Exception as exc:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path, raw=raw)
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
...]
|
||||
|
||||
The local path of each successfully attached image is appended to the
|
||||
text part as ``[Image attached at: <path>]``. The model still sees the
|
||||
pixels via the ``image_url`` part (full native vision); the path note
|
||||
just gives it a string handle so MCP/skill tools that take an image
|
||||
path or URL argument can be invoked on the same image without an
|
||||
extra round-trip. This parallels the text-mode hint produced by
|
||||
``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
|
||||
<path>``) so behaviour is consistent across both image input modes.
|
||||
|
||||
Images are attached at their native size. If a provider rejects the
|
||||
request because an image is too large (e.g. Anthropic's 5 MB per-image
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped_paths). Skipped paths are files that
|
||||
couldn't be read from disk and are NOT advertised in the path hints.
|
||||
"""
|
||||
skipped: List[str] = []
|
||||
image_parts: List[Dict[str, Any]] = []
|
||||
attached_paths: List[str] = []
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
if not p.exists() or not p.is_file():
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
data_url = _file_to_data_url(p)
|
||||
if not data_url:
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
image_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": data_url},
|
||||
})
|
||||
attached_paths.append(str(raw_path))
|
||||
|
||||
text = (user_text or "").strip()
|
||||
|
||||
# If at least one image attached, build a single text part that combines
|
||||
# the user's caption (or a neutral default) with one path hint per image.
|
||||
if attached_paths:
|
||||
base_text = text or "What do you see in this image?"
|
||||
path_hints = "\n".join(
|
||||
f"[Image attached at: {p}]" for p in attached_paths
|
||||
)
|
||||
combined_text = f"{base_text}\n\n{path_hints}"
|
||||
parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}]
|
||||
parts.extend(image_parts)
|
||||
return parts, skipped
|
||||
|
||||
# No images successfully attached — fall back to plain text-only behaviour.
|
||||
parts = []
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
return parts, skipped
|
||||
|
||||
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
]
|
||||
@@ -27,6 +27,7 @@ from agent.usage_pricing import (
|
||||
DEFAULT_PRICING,
|
||||
estimate_usage_cost,
|
||||
format_duration_compact,
|
||||
get_pricing,
|
||||
has_known_pricing,
|
||||
)
|
||||
|
||||
@@ -124,7 +125,6 @@ class InsightsEngine:
|
||||
# Gather raw data
|
||||
sessions = self._get_sessions(cutoff, source)
|
||||
tool_usage = self._get_tool_usage(cutoff, source)
|
||||
skill_usage = self._get_skill_usage(cutoff, source)
|
||||
message_stats = self._get_message_stats(cutoff, source)
|
||||
|
||||
if not sessions:
|
||||
@@ -136,15 +136,6 @@ class InsightsEngine:
|
||||
"models": [],
|
||||
"platforms": [],
|
||||
"tools": [],
|
||||
"skills": {
|
||||
"summary": {
|
||||
"total_skill_loads": 0,
|
||||
"total_skill_edits": 0,
|
||||
"total_skill_actions": 0,
|
||||
"distinct_skills_used": 0,
|
||||
},
|
||||
"top_skills": [],
|
||||
},
|
||||
"activity": {},
|
||||
"top_sessions": [],
|
||||
}
|
||||
@@ -154,7 +145,6 @@ class InsightsEngine:
|
||||
models = self._compute_model_breakdown(sessions)
|
||||
platforms = self._compute_platform_breakdown(sessions)
|
||||
tools = self._compute_tool_breakdown(tool_usage)
|
||||
skills = self._compute_skill_breakdown(skill_usage)
|
||||
activity = self._compute_activity_patterns(sessions)
|
||||
top_sessions = self._compute_top_sessions(sessions)
|
||||
|
||||
@@ -167,7 +157,6 @@ class InsightsEngine:
|
||||
"models": models,
|
||||
"platforms": platforms,
|
||||
"tools": tools,
|
||||
"skills": skills,
|
||||
"activity": activity,
|
||||
"top_sessions": top_sessions,
|
||||
}
|
||||
@@ -296,82 +285,6 @@ class InsightsEngine:
|
||||
for name, count in tool_counts.most_common()
|
||||
]
|
||||
|
||||
def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
|
||||
"""Extract per-skill usage from assistant tool calls."""
|
||||
skill_counts: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT m.tool_calls, m.timestamp
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ? AND s.source = ?
|
||||
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT m.tool_calls, m.timestamp
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ?
|
||||
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
|
||||
(cutoff,),
|
||||
)
|
||||
|
||||
for row in cursor.fetchall():
|
||||
try:
|
||||
calls = row["tool_calls"]
|
||||
if isinstance(calls, str):
|
||||
calls = json.loads(calls)
|
||||
if not isinstance(calls, list):
|
||||
continue
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
|
||||
timestamp = row["timestamp"]
|
||||
for call in calls:
|
||||
if not isinstance(call, dict):
|
||||
continue
|
||||
func = call.get("function", {})
|
||||
tool_name = func.get("name")
|
||||
if tool_name not in {"skill_view", "skill_manage"}:
|
||||
continue
|
||||
|
||||
args = func.get("arguments")
|
||||
if isinstance(args, str):
|
||||
try:
|
||||
args = json.loads(args)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
if not isinstance(args, dict):
|
||||
continue
|
||||
|
||||
skill_name = args.get("name")
|
||||
if not isinstance(skill_name, str) or not skill_name.strip():
|
||||
continue
|
||||
|
||||
entry = skill_counts.setdefault(
|
||||
skill_name,
|
||||
{
|
||||
"skill": skill_name,
|
||||
"view_count": 0,
|
||||
"manage_count": 0,
|
||||
"last_used_at": None,
|
||||
},
|
||||
)
|
||||
if tool_name == "skill_view":
|
||||
entry["view_count"] += 1
|
||||
else:
|
||||
entry["manage_count"] += 1
|
||||
|
||||
if timestamp is not None and (
|
||||
entry["last_used_at"] is None or timestamp > entry["last_used_at"]
|
||||
):
|
||||
entry["last_used_at"] = timestamp
|
||||
|
||||
return list(skill_counts.values())
|
||||
|
||||
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
|
||||
"""Get aggregate message statistics."""
|
||||
if source:
|
||||
@@ -563,46 +476,6 @@ class InsightsEngine:
|
||||
})
|
||||
return result
|
||||
|
||||
def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
|
||||
"""Process per-skill usage into summary + ranked list."""
|
||||
total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
|
||||
total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
|
||||
total_skill_actions = total_skill_loads + total_skill_edits
|
||||
|
||||
top_skills = []
|
||||
for skill in skill_usage:
|
||||
total_count = skill["view_count"] + skill["manage_count"]
|
||||
percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
|
||||
top_skills.append({
|
||||
"skill": skill["skill"],
|
||||
"view_count": skill["view_count"],
|
||||
"manage_count": skill["manage_count"],
|
||||
"total_count": total_count,
|
||||
"percentage": percentage,
|
||||
"last_used_at": skill.get("last_used_at"),
|
||||
})
|
||||
|
||||
top_skills.sort(
|
||||
key=lambda s: (
|
||||
s["total_count"],
|
||||
s["view_count"],
|
||||
s["manage_count"],
|
||||
s["last_used_at"] or 0,
|
||||
s["skill"],
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
return {
|
||||
"summary": {
|
||||
"total_skill_loads": total_skill_loads,
|
||||
"total_skill_edits": total_skill_edits,
|
||||
"total_skill_actions": total_skill_actions,
|
||||
"distinct_skills_used": len(skill_usage),
|
||||
},
|
||||
"top_skills": top_skills,
|
||||
}
|
||||
|
||||
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
|
||||
"""Analyze activity patterns by day of week and hour."""
|
||||
day_counts = Counter() # 0=Monday ... 6=Sunday
|
||||
@@ -762,7 +635,13 @@ class InsightsEngine:
|
||||
lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}")
|
||||
lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}")
|
||||
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
|
||||
lines.append(f" Total tokens: {o['total_tokens']:,}")
|
||||
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
|
||||
if cache_total > 0:
|
||||
lines.append(f" Cache read: {o['total_cache_read_tokens']:<12,} Cache write: {o['total_cache_write_tokens']:,}")
|
||||
cost_str = f"${o['estimated_cost']:.2f}"
|
||||
if o.get("models_without_pricing"):
|
||||
cost_str += " *"
|
||||
lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}")
|
||||
if o["total_hours"] > 0:
|
||||
lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
|
||||
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
|
||||
@@ -772,10 +651,16 @@ class InsightsEngine:
|
||||
if report["models"]:
|
||||
lines.append(" 🤖 Models Used")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12}")
|
||||
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
|
||||
for m in report["models"]:
|
||||
model_name = m["model"][:28]
|
||||
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}")
|
||||
if m.get("has_pricing"):
|
||||
cost_cell = f"${m['cost']:>6.2f}"
|
||||
else:
|
||||
cost_cell = " N/A"
|
||||
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
|
||||
if o.get("models_without_pricing"):
|
||||
lines.append(" * Cost N/A for custom/self-hosted models")
|
||||
lines.append("")
|
||||
|
||||
# Platform breakdown
|
||||
@@ -798,28 +683,6 @@ class InsightsEngine:
|
||||
lines.append(f" ... and {len(report['tools']) - 15} more tools")
|
||||
lines.append("")
|
||||
|
||||
# Skill usage
|
||||
skills = report.get("skills", {})
|
||||
top_skills = skills.get("top_skills", [])
|
||||
if top_skills:
|
||||
lines.append(" 🧠 Top Skills")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
|
||||
for skill in top_skills[:10]:
|
||||
last_used = "—"
|
||||
if skill.get("last_used_at"):
|
||||
last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
|
||||
lines.append(
|
||||
f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
|
||||
)
|
||||
summary = skills.get("summary", {})
|
||||
lines.append(
|
||||
f" Distinct skills: {summary.get('distinct_skills_used', 0)} "
|
||||
f"Loads: {summary.get('total_skill_loads', 0):,} "
|
||||
f"Edits: {summary.get('total_skill_edits', 0):,}"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
# Activity patterns
|
||||
act = report.get("activity", {})
|
||||
if act.get("by_day"):
|
||||
@@ -877,7 +740,15 @@ class InsightsEngine:
|
||||
|
||||
# Overview
|
||||
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
|
||||
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
|
||||
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
|
||||
if cache_total > 0:
|
||||
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
|
||||
else:
|
||||
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
|
||||
cost_note = ""
|
||||
if o.get("models_without_pricing"):
|
||||
cost_note = " _(excludes custom/self-hosted models)_"
|
||||
lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
|
||||
if o["total_hours"] > 0:
|
||||
lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
|
||||
lines.append("")
|
||||
@@ -886,7 +757,8 @@ class InsightsEngine:
|
||||
if report["models"]:
|
||||
lines.append("**🤖 Models:**")
|
||||
for m in report["models"][:5]:
|
||||
lines.append(f" {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens")
|
||||
cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
|
||||
lines.append(f" {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
|
||||
lines.append("")
|
||||
|
||||
# Platforms (if multi-platform)
|
||||
@@ -903,18 +775,6 @@ class InsightsEngine:
|
||||
lines.append(f" {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
|
||||
lines.append("")
|
||||
|
||||
skills = report.get("skills", {})
|
||||
if skills.get("top_skills"):
|
||||
lines.append("**🧠 Top Skills:**")
|
||||
for skill in skills["top_skills"][:5]:
|
||||
suffix = ""
|
||||
if skill.get("last_used_at"):
|
||||
suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
|
||||
lines.append(
|
||||
f" {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
# Activity summary
|
||||
act = report.get("activity", {})
|
||||
if act.get("busiest_day") and act.get("busiest_hour"):
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
"""Per-agent iteration budget — thread-safe consume/refund counter.
|
||||
|
||||
Extracted from ``run_agent.py``. Each ``AIAgent`` instance (parent or
|
||||
subagent) holds an :class:`IterationBudget`; the parent's cap comes from
|
||||
``max_iterations`` (default 90), each subagent's cap comes from
|
||||
``delegation.max_iterations`` (default 50).
|
||||
|
||||
``run_agent`` re-exports ``IterationBudget`` so existing
|
||||
``from run_agent import IterationBudget`` imports keep working unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
|
||||
class IterationBudget:
|
||||
"""Thread-safe iteration counter for an agent.
|
||||
|
||||
Each agent (parent or subagent) gets its own ``IterationBudget``.
|
||||
The parent's budget is capped at ``max_iterations`` (default 90).
|
||||
Each subagent gets an independent budget capped at
|
||||
``delegation.max_iterations`` (default 50) — this means total
|
||||
iterations across parent + subagents can exceed the parent's cap.
|
||||
Users control the per-subagent limit via ``delegation.max_iterations``
|
||||
in config.yaml.
|
||||
|
||||
``execute_code`` (programmatic tool calling) iterations are refunded via
|
||||
:meth:`refund` so they don't eat into the budget.
|
||||
"""
|
||||
|
||||
def __init__(self, max_total: int):
|
||||
self.max_total = max_total
|
||||
self._used = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def consume(self) -> bool:
|
||||
"""Try to consume one iteration. Returns True if allowed."""
|
||||
with self._lock:
|
||||
if self._used >= self.max_total:
|
||||
return False
|
||||
self._used += 1
|
||||
return True
|
||||
|
||||
def refund(self) -> None:
|
||||
"""Give back one iteration (e.g. for execute_code turns)."""
|
||||
with self._lock:
|
||||
if self._used > 0:
|
||||
self._used -= 1
|
||||
|
||||
@property
|
||||
def used(self) -> int:
|
||||
with self._lock:
|
||||
return self._used
|
||||
|
||||
@property
|
||||
def remaining(self) -> int:
|
||||
with self._lock:
|
||||
return max(0, self.max_total - self._used)
|
||||
|
||||
|
||||
__all__ = ["IterationBudget"]
|
||||
@@ -1,48 +0,0 @@
|
||||
"""LM Studio reasoning-effort resolution shared by the chat-completions
|
||||
transport and run_agent's iteration-limit summary path.
|
||||
|
||||
LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
|
||||
``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
|
||||
graduated models). We map the user's ``reasoning_config`` onto LM Studio's
|
||||
OpenAI-compatible vocabulary, then clamp against the model's allowed set so
|
||||
the server doesn't 400 on an unsupported effort.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
# LM Studio accepts these top-level reasoning_effort values via its
|
||||
# OpenAI-compatible chat.completions endpoint.
|
||||
_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
|
||||
# Map them onto the OpenAI-compatible request vocabulary.
|
||||
_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
|
||||
|
||||
|
||||
def resolve_lmstudio_effort(
|
||||
reasoning_config: Optional[dict],
|
||||
allowed_options: Optional[List[str]],
|
||||
) -> Optional[str]:
|
||||
"""Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
|
||||
|
||||
``None`` means "omit the field": the user picked a level the model can't
|
||||
honor, so let LM Studio fall back to the model's declared default rather
|
||||
than silently substituting a different effort. When ``allowed_options`` is
|
||||
falsy (probe failed), skip clamping and send the resolved effort anyway.
|
||||
"""
|
||||
effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
raw = (reasoning_config.get("effort") or "").strip().lower()
|
||||
raw = _LM_EFFORT_ALIASES.get(raw, raw)
|
||||
if raw in _LM_VALID_EFFORTS:
|
||||
effort = raw
|
||||
if allowed_options:
|
||||
allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
|
||||
if effort not in allowed:
|
||||
return None
|
||||
return effort
|
||||
@@ -1,106 +0,0 @@
|
||||
"""Language Server Protocol (LSP) integration for Hermes Agent.
|
||||
|
||||
Hermes runs full language servers (pyright, gopls, rust-analyzer,
|
||||
typescript-language-server, etc.) as subprocesses and pipes their
|
||||
``textDocument/publishDiagnostics`` output into the post-write lint
|
||||
delta filter used by ``write_file`` and ``patch``.
|
||||
|
||||
LSP is **gated on git workspace detection** — if the agent's cwd is
|
||||
inside a git repository, LSP runs against that workspace; otherwise the
|
||||
file_operations layer falls back to its existing in-process syntax
|
||||
checks. This keeps users on user-home cwd's (e.g. Telegram gateway
|
||||
chats) from spawning daemons they don't need.
|
||||
|
||||
Public API:
|
||||
|
||||
from agent.lsp import get_service
|
||||
|
||||
svc = get_service()
|
||||
if svc and svc.enabled_for(path):
|
||||
await svc.touch_file(path)
|
||||
diags = svc.diagnostics_for(path)
|
||||
|
||||
The bulk of the wiring is internal — most callers only need the layer
|
||||
in :func:`tools.file_operations.FileOperations._check_lint_delta`,
|
||||
which is already wired (see that module).
|
||||
|
||||
Architecture is documented in ``website/docs/user-guide/features/lsp.md``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
from agent.lsp.manager import LSPService
|
||||
|
||||
logger = logging.getLogger("agent.lsp")
|
||||
|
||||
_service: Optional[LSPService] = None
|
||||
_atexit_registered = False
|
||||
_service_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_service() -> Optional[LSPService]:
|
||||
"""Return the process-wide LSP service singleton, or None when disabled.
|
||||
|
||||
The service is created lazily on first call. ``None`` is returned
|
||||
when LSP is disabled in config, when no workspace can be detected,
|
||||
or when the platform doesn't support subprocess-based LSP servers.
|
||||
|
||||
On first creation, registers an :mod:`atexit` handler that tears
|
||||
down spawned language servers on Python exit so a long-running
|
||||
CLI or gateway session doesn't leak pyright/gopls/etc. processes
|
||||
when it terminates.
|
||||
"""
|
||||
global _service, _atexit_registered
|
||||
if _service is not None:
|
||||
return _service if _service.is_active() else None
|
||||
with _service_lock:
|
||||
if _service is not None:
|
||||
return _service if _service.is_active() else None
|
||||
_service = LSPService.create_from_config()
|
||||
if not _atexit_registered:
|
||||
# ``atexit`` handlers run in LIFO order on normal Python
|
||||
# exit and on SystemExit, but NOT on os._exit() or
|
||||
# uncaught signals. Language servers are stateless
|
||||
# subprocesses — losing them on SIGKILL is fine; they'll
|
||||
# be reaped by the kernel along with their parent. We
|
||||
# care about clean exits where Python flushes stdio
|
||||
# before terminating; without this hook every
|
||||
# ``hermes chat`` exit would leak pyright processes that
|
||||
# outlive the parent for a few seconds while their
|
||||
# stdout buffers drain.
|
||||
atexit.register(_atexit_shutdown)
|
||||
_atexit_registered = True
|
||||
return _service if (_service is not None and _service.is_active()) else None
|
||||
|
||||
|
||||
def shutdown_service() -> None:
|
||||
"""Tear down the LSP service if one was started.
|
||||
|
||||
Safe to call multiple times; safe to call when no service was created.
|
||||
"""
|
||||
global _service
|
||||
with _service_lock:
|
||||
svc = _service
|
||||
_service = None
|
||||
if svc is not None:
|
||||
try:
|
||||
svc.shutdown()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP shutdown error: %s", e)
|
||||
|
||||
|
||||
def _atexit_shutdown() -> None:
|
||||
"""atexit-registered wrapper. Logs at debug because by the time
|
||||
atexit fires the user has already seen the agent's final output —
|
||||
a noisy shutdown line on top of that is just clutter."""
|
||||
try:
|
||||
shutdown_service()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("atexit LSP shutdown failed: %s", e)
|
||||
|
||||
|
||||
__all__ = ["get_service", "shutdown_service", "LSPService"]
|
||||
308
agent/lsp/cli.py
308
agent/lsp/cli.py
@@ -1,308 +0,0 @@
|
||||
"""``hermes lsp`` CLI subcommand.
|
||||
|
||||
Subcommands:
|
||||
|
||||
- ``status`` — show service state, configured servers, install status.
|
||||
- ``install <server_id>`` — eagerly install one server's binary.
|
||||
- ``install-all`` — try to install every server with a known recipe.
|
||||
- ``restart`` — tear down running clients so the next edit re-spawns.
|
||||
- ``which <server_id>`` — print the resolved binary path for one server.
|
||||
- ``list`` — print the registry of supported servers.
|
||||
|
||||
The handlers are kept here (rather than in
|
||||
``hermes_cli/main.py``) so the LSP module ships self-contained.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def register_subparser(subparsers: argparse._SubParsersAction) -> None:
|
||||
"""Wire the ``hermes lsp`` subcommand tree into the main argparse."""
|
||||
parser = subparsers.add_parser(
|
||||
"lsp",
|
||||
help="Language Server Protocol management",
|
||||
description=(
|
||||
"Manage the LSP layer that powers post-write semantic "
|
||||
"diagnostics in write_file/patch."
|
||||
),
|
||||
)
|
||||
sub = parser.add_subparsers(dest="lsp_command")
|
||||
|
||||
sub_status = sub.add_parser("status", help="Show LSP service status")
|
||||
sub_status.add_argument(
|
||||
"--json", action="store_true", help="Emit machine-readable JSON"
|
||||
)
|
||||
|
||||
sub_list = sub.add_parser("list", help="List supported language servers")
|
||||
sub_list.add_argument(
|
||||
"--installed-only",
|
||||
action="store_true",
|
||||
help="Only show servers whose binary is currently available",
|
||||
)
|
||||
|
||||
sub_install = sub.add_parser("install", help="Install a server binary")
|
||||
sub_install.add_argument("server", help="Server id (e.g. pyright, gopls)")
|
||||
|
||||
sub_install_all = sub.add_parser(
|
||||
"install-all",
|
||||
help="Install every server with a known auto-install recipe",
|
||||
)
|
||||
sub_install_all.add_argument(
|
||||
"--include-manual",
|
||||
action="store_true",
|
||||
help="Even attempt servers marked manual-install (best effort)",
|
||||
)
|
||||
|
||||
sub_restart = sub.add_parser(
|
||||
"restart",
|
||||
help="Tear down running LSP clients (next edit re-spawns)",
|
||||
)
|
||||
|
||||
sub_which = sub.add_parser("which", help="Print binary path for a server")
|
||||
sub_which.add_argument("server", help="Server id")
|
||||
|
||||
parser.set_defaults(func=run_lsp_command)
|
||||
|
||||
|
||||
def run_lsp_command(args: argparse.Namespace) -> int:
|
||||
"""Top-level dispatcher for ``hermes lsp <subcommand>``."""
|
||||
sub = getattr(args, "lsp_command", None) or "status"
|
||||
try:
|
||||
if sub == "status":
|
||||
return _cmd_status(getattr(args, "json", False))
|
||||
if sub == "list":
|
||||
return _cmd_list(getattr(args, "installed_only", False))
|
||||
if sub == "install":
|
||||
return _cmd_install(args.server)
|
||||
if sub == "install-all":
|
||||
return _cmd_install_all(getattr(args, "include_manual", False))
|
||||
if sub == "restart":
|
||||
return _cmd_restart()
|
||||
if sub == "which":
|
||||
return _cmd_which(args.server)
|
||||
sys.stderr.write(f"unknown lsp subcommand: {sub}\n")
|
||||
return 2
|
||||
except KeyboardInterrupt:
|
||||
return 130
|
||||
|
||||
|
||||
def _cmd_status(emit_json: bool) -> int:
|
||||
from agent.lsp import get_service
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import detect_status
|
||||
|
||||
svc = get_service()
|
||||
service_active = svc is not None
|
||||
info = svc.get_status() if svc is not None else {"enabled": False}
|
||||
|
||||
if emit_json:
|
||||
import json
|
||||
payload = {
|
||||
"service": info,
|
||||
"registry": [
|
||||
{
|
||||
"server_id": s.server_id,
|
||||
"extensions": list(s.extensions),
|
||||
"description": s.description,
|
||||
"binary_status": detect_status(_recipe_pkg_for(s.server_id)),
|
||||
}
|
||||
for s in SERVERS
|
||||
],
|
||||
}
|
||||
sys.stdout.write(json.dumps(payload, indent=2) + "\n")
|
||||
return 0
|
||||
|
||||
out = []
|
||||
out.append("LSP Service")
|
||||
out.append("===========")
|
||||
out.append(f" enabled: {info.get('enabled', False)}")
|
||||
if service_active:
|
||||
out.append(f" wait_mode: {info.get('wait_mode')}")
|
||||
out.append(f" wait_timeout: {info.get('wait_timeout')}s")
|
||||
out.append(f" install_strategy:{info.get('install_strategy')}")
|
||||
clients = info.get("clients") or []
|
||||
if clients:
|
||||
out.append(f" active clients: {len(clients)}")
|
||||
for c in clients:
|
||||
out.append(
|
||||
f" - {c['server_id']:20s} state={c['state']:10s} root={c['workspace_root']}"
|
||||
)
|
||||
else:
|
||||
out.append(" active clients: none")
|
||||
broken = info.get("broken") or []
|
||||
if broken:
|
||||
out.append(f" broken pairs: {len(broken)}")
|
||||
for b in broken:
|
||||
out.append(f" - {b}")
|
||||
disabled = info.get("disabled_servers") or []
|
||||
if disabled:
|
||||
out.append(f" disabled in cfg: {', '.join(disabled)}")
|
||||
|
||||
# Surface backend-tool gaps that aren't visible in the registry table:
|
||||
# some servers spawn fine but emit no diagnostics without a sidecar
|
||||
# binary (bash-language-server -> shellcheck).
|
||||
backend_warnings = _backend_warnings()
|
||||
if backend_warnings:
|
||||
out.append("")
|
||||
out.append("Backend warnings")
|
||||
out.append("================")
|
||||
for line in backend_warnings:
|
||||
out.append(f" ! {line}")
|
||||
out.append("")
|
||||
out.append("Registered Servers")
|
||||
out.append("==================")
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
status = detect_status(pkg)
|
||||
marker = {
|
||||
"installed": "✓",
|
||||
"missing": "·",
|
||||
"manual-only": "?",
|
||||
}.get(status, " ")
|
||||
ext_summary = ", ".join(list(s.extensions)[:5])
|
||||
if len(s.extensions) > 5:
|
||||
ext_summary += f", … (+{len(s.extensions) - 5})"
|
||||
out.append(
|
||||
f" {marker} {s.server_id:24s} [{status:11s}] {ext_summary}"
|
||||
)
|
||||
if s.description:
|
||||
out.append(f" {s.description}")
|
||||
sys.stdout.write("\n".join(out) + "\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_list(installed_only: bool) -> int:
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import detect_status
|
||||
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
status = detect_status(pkg)
|
||||
if installed_only and status != "installed":
|
||||
continue
|
||||
sys.stdout.write(
|
||||
f"{s.server_id:24s} [{status:11s}] {','.join(s.extensions)}\n"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_install(server_id: str) -> int:
|
||||
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
|
||||
pkg = _recipe_pkg_for(server_id)
|
||||
pre_status = detect_status(pkg)
|
||||
if pre_status == "installed":
|
||||
sys.stdout.write(f"{server_id} already installed\n")
|
||||
return 0
|
||||
sys.stdout.write(f"installing {server_id} (pkg={pkg}) ...\n")
|
||||
sys.stdout.flush()
|
||||
bin_path = try_install(pkg, "auto")
|
||||
if bin_path is None:
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe and recipe.get("strategy") == "manual":
|
||||
sys.stderr.write(
|
||||
f"{server_id}: this server requires a manual install. "
|
||||
f"See documentation.\n"
|
||||
)
|
||||
else:
|
||||
sys.stderr.write(f"{server_id}: install failed (see logs).\n")
|
||||
return 1
|
||||
sys.stdout.write(f"installed: {bin_path}\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_install_all(include_manual: bool) -> int:
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
|
||||
|
||||
rc = 0
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe is None:
|
||||
continue
|
||||
if recipe.get("strategy") == "manual" and not include_manual:
|
||||
continue
|
||||
if detect_status(pkg) == "installed":
|
||||
sys.stdout.write(f" {s.server_id:24s} already installed\n")
|
||||
continue
|
||||
sys.stdout.write(f" installing {s.server_id} (pkg={pkg}) ... ")
|
||||
sys.stdout.flush()
|
||||
path = try_install(pkg, "auto")
|
||||
if path:
|
||||
sys.stdout.write(f"ok ({path})\n")
|
||||
else:
|
||||
sys.stdout.write("FAILED\n")
|
||||
rc = 1
|
||||
return rc
|
||||
|
||||
|
||||
def _cmd_restart() -> int:
|
||||
from agent.lsp import shutdown_service
|
||||
|
||||
shutdown_service()
|
||||
sys.stdout.write("LSP service shut down. Next edit will respawn clients.\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_which(server_id: str) -> int:
|
||||
from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
|
||||
import os
|
||||
import shutil as _shutil
|
||||
|
||||
recipe = INSTALL_RECIPES.get(server_id)
|
||||
bin_name = (recipe or {}).get("bin", server_id)
|
||||
staged = hermes_lsp_bin_dir() / bin_name
|
||||
if staged.exists():
|
||||
sys.stdout.write(str(staged) + "\n")
|
||||
return 0
|
||||
on_path = _shutil.which(bin_name)
|
||||
if on_path:
|
||||
sys.stdout.write(on_path + "\n")
|
||||
return 0
|
||||
sys.stderr.write(f"{server_id}: not installed\n")
|
||||
return 1
|
||||
|
||||
|
||||
def _recipe_pkg_for(server_id: str) -> str:
|
||||
"""Map a registry ``server_id`` to its install-recipe package key."""
|
||||
# The mapping lives here (not in install.py) because it's a CLI
|
||||
# convenience layer. Most server_ids are also their own recipe
|
||||
# key, but a few differ (e.g. ``vue-language-server`` →
|
||||
# ``@vue/language-server``).
|
||||
aliases = {
|
||||
"vue-language-server": "@vue/language-server",
|
||||
"astro-language-server": "@astrojs/language-server",
|
||||
"dockerfile-ls": "dockerfile-language-server-nodejs",
|
||||
"typescript": "typescript-language-server",
|
||||
}
|
||||
return aliases.get(server_id, server_id)
|
||||
|
||||
|
||||
def _backend_warnings() -> list:
|
||||
"""Return human-readable notes about LSP backend tools that are missing
|
||||
in a way that won't surface elsewhere.
|
||||
|
||||
Some language servers ship as thin wrappers around an external CLI for
|
||||
actual diagnostics — they spawn cleanly but never emit any errors when
|
||||
the sidecar binary isn't on PATH. bash-language-server / shellcheck
|
||||
is the load-bearing example.
|
||||
|
||||
Returned strings are short, actionable, and include the install
|
||||
suggestion across common platforms.
|
||||
"""
|
||||
import shutil as _shutil
|
||||
from agent.lsp.install import hermes_lsp_bin_dir
|
||||
notes: list = []
|
||||
bash_installed = _shutil.which("bash-language-server") is not None or (
|
||||
(hermes_lsp_bin_dir() / "bash-language-server").exists()
|
||||
)
|
||||
if bash_installed and _shutil.which("shellcheck") is None:
|
||||
notes.append(
|
||||
"bash-language-server is installed but shellcheck is missing — "
|
||||
"diagnostics will be empty (apt: shellcheck, brew: shellcheck, "
|
||||
"scoop: shellcheck)."
|
||||
)
|
||||
return notes
|
||||
@@ -1,930 +0,0 @@
|
||||
"""Async LSP client over stdin/stdout.
|
||||
|
||||
One :class:`LSPClient` corresponds to one ``(language_server, workspace_root)``
|
||||
pair — exactly what OpenCode keys clients on, and the same shape Claude
|
||||
Code uses. The client owns a child process, drives the JSON-RPC
|
||||
exchange, and exposes:
|
||||
|
||||
- :meth:`open_file` / :meth:`change_file` — text document sync
|
||||
- :meth:`wait_for_diagnostics` — block until the server emits fresh
|
||||
diagnostics for a specific file (or a timeout fires)
|
||||
- :meth:`diagnostics_for` — read the current per-file diagnostic store
|
||||
- :meth:`shutdown` — graceful close + SIGTERM/SIGKILL fallback
|
||||
|
||||
The class is designed for async use from a single asyncio event loop.
|
||||
The :class:`agent.lsp.manager.LSPService` runs an event loop in a
|
||||
background thread so the synchronous file_operations layer can call
|
||||
into it via :func:`agent.lsp.manager.LSPService.touch_file`.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Push diagnostics are stored per-URI in :attr:`_push_diagnostics` from
|
||||
``textDocument/publishDiagnostics`` notifications. Pull diagnostics
|
||||
go in :attr:`_pull_diagnostics`. The merged view dedupes by content.
|
||||
|
||||
- Whole-document sync. Even when the server advertises incremental
|
||||
sync, we send a single ``contentChanges`` entry replacing the
|
||||
entire document. Pretending to be incremental while sending a
|
||||
full replacement is well-tolerated by every major server and saves
|
||||
range bookkeeping. See OpenCode's ``client.ts:584-659`` for the
|
||||
same trick.
|
||||
|
||||
- The "touch-file dance": every ``open_file`` call also fires a
|
||||
``workspace/didChangeWatchedFiles`` notification (CREATED on the
|
||||
first open, CHANGED thereafter). Some servers (clangd, eslint)
|
||||
only re-scan when this notification fires, even though the LSP spec
|
||||
doesn't strictly require it.
|
||||
|
||||
- ``ContentModified`` (-32801) errors get retried with exponential
|
||||
backoff up to 3 times. This matches Claude Code's
|
||||
``LSPServerInstance.sendRequest``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
from agent.lsp.protocol import (
|
||||
ERROR_CONTENT_MODIFIED,
|
||||
ERROR_METHOD_NOT_FOUND,
|
||||
LSPProtocolError,
|
||||
LSPRequestError,
|
||||
classify_message,
|
||||
encode_message,
|
||||
make_error_response,
|
||||
make_notification,
|
||||
make_request,
|
||||
make_response,
|
||||
read_message,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("agent.lsp.client")
|
||||
|
||||
# Timeouts (seconds) — mirror OpenCode's constants, scaled to seconds.
|
||||
INITIALIZE_TIMEOUT = 45.0
|
||||
DIAGNOSTICS_DOCUMENT_WAIT = 5.0
|
||||
DIAGNOSTICS_FULL_WAIT = 10.0
|
||||
DIAGNOSTICS_REQUEST_TIMEOUT = 3.0
|
||||
PUSH_DEBOUNCE = 0.15
|
||||
SHUTDOWN_GRACE = 1.0 # seconds between SIGTERM and SIGKILL
|
||||
|
||||
# Retry policy for transient ContentModified errors.
|
||||
MAX_CONTENT_MODIFIED_RETRIES = 3
|
||||
RETRY_BASE_DELAY = 0.5 # 0.5, 1.0, 2.0 — exponential
|
||||
|
||||
|
||||
def file_uri(path: str) -> str:
|
||||
"""Return ``file://`` URI for an absolute filesystem path.
|
||||
|
||||
Mirrors Node's ``pathToFileURL`` — handles spaces, unicode, and
|
||||
Windows drive letters (``C:\\foo`` → ``file:///C:/foo``).
|
||||
"""
|
||||
abs_path = os.path.abspath(path)
|
||||
if os.name == "nt":
|
||||
# Windows: backslash → forward slash, prepend extra slash so
|
||||
# the drive letter shows up as part of the path component.
|
||||
abs_path = abs_path.replace("\\", "/")
|
||||
if not abs_path.startswith("/"):
|
||||
abs_path = "/" + abs_path
|
||||
return "file://" + quote(abs_path, safe="/:")
|
||||
|
||||
|
||||
def uri_to_path(uri: str) -> str:
|
||||
"""Inverse of :func:`file_uri`."""
|
||||
if not uri.startswith("file://"):
|
||||
return uri
|
||||
raw = uri[len("file://"):]
|
||||
if os.name == "nt" and raw.startswith("/") and len(raw) > 2 and raw[2] == ":":
|
||||
raw = raw[1:] # strip leading slash before drive letter
|
||||
return os.path.normpath(unquote(raw))
|
||||
|
||||
|
||||
def _end_position(text: str) -> Dict[str, int]:
|
||||
"""Return the LSP Position at the end of ``text``.
|
||||
|
||||
Used to construct a single-range "replace whole document" change
|
||||
for ``textDocument/didChange`` regardless of the server's declared
|
||||
sync mode.
|
||||
"""
|
||||
if not text:
|
||||
return {"line": 0, "character": 0}
|
||||
lines = text.splitlines(keepends=False)
|
||||
last_line = len(lines) - 1
|
||||
last_col = len(lines[-1]) if lines else 0
|
||||
# If the text ends with a trailing newline, ``splitlines`` won't
|
||||
# represent it. The end position is then the start of the next
|
||||
# (empty) line — line index is len(lines), column 0.
|
||||
if text.endswith(("\n", "\r")):
|
||||
return {"line": last_line + 1, "character": 0}
|
||||
return {"line": last_line, "character": last_col}
|
||||
|
||||
|
||||
class LSPClient:
|
||||
"""Async LSP client tied to one server process and one workspace root.
|
||||
|
||||
Lifecycle:
|
||||
|
||||
c = LSPClient(server_id, workspace_root, command, args, init_options)
|
||||
await c.start() # spawn + initialize
|
||||
ver = await c.open_file("/path/to/foo.py")
|
||||
await c.wait_for_diagnostics("/path/to/foo.py", ver)
|
||||
diags = c.diagnostics_for("/path/to/foo.py")
|
||||
await c.shutdown()
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# construction + lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
server_id: str,
|
||||
workspace_root: str,
|
||||
command: List[str],
|
||||
env: Optional[Dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
initialization_options: Optional[Dict[str, Any]] = None,
|
||||
seed_diagnostics_on_first_push: bool = False,
|
||||
) -> None:
|
||||
self.server_id = server_id
|
||||
self.workspace_root = workspace_root
|
||||
self._command = list(command)
|
||||
self._env = env
|
||||
self._cwd = cwd or workspace_root
|
||||
self._init_options = initialization_options or {}
|
||||
self._seed_first_push = seed_diagnostics_on_first_push
|
||||
|
||||
# Process + streams
|
||||
self._proc: Optional[asyncio.subprocess.Process] = None
|
||||
self._stderr_task: Optional[asyncio.Task] = None
|
||||
self._reader_task: Optional[asyncio.Task] = None
|
||||
|
||||
# Request/response correlation
|
||||
self._next_id: int = 0
|
||||
self._pending: Dict[int, asyncio.Future] = {}
|
||||
|
||||
# Server-side request handlers (server → client requests).
|
||||
# Kept small and explicit; everything else returns method-not-found.
|
||||
self._request_handlers: Dict[str, Callable[[Any], Awaitable[Any]]] = {
|
||||
"window/workDoneProgress/create": self._handle_work_done_create,
|
||||
"workspace/configuration": self._handle_workspace_configuration,
|
||||
"client/registerCapability": self._handle_register_capability,
|
||||
"client/unregisterCapability": self._handle_unregister_capability,
|
||||
"workspace/workspaceFolders": self._handle_workspace_folders,
|
||||
"workspace/diagnostic/refresh": self._handle_diagnostic_refresh,
|
||||
}
|
||||
# Notifications (server → client) we care about.
|
||||
self._notification_handlers: Dict[str, Callable[[Any], None]] = {
|
||||
"textDocument/publishDiagnostics": self._handle_publish_diagnostics,
|
||||
# Everything else (window/showMessage, $/progress, etc.)
|
||||
# is silently dropped by default.
|
||||
}
|
||||
|
||||
# Tracked file state — required for didChange version bumps.
|
||||
self._files: Dict[str, Dict[str, Any]] = {}
|
||||
# Diagnostic stores, keyed by file path (NOT URI).
|
||||
self._push_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
|
||||
self._pull_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
|
||||
# Per-path "last published" time so wait-for-fresh logic works.
|
||||
self._published: Dict[str, float] = {}
|
||||
# Per-path version of the latest push (matches our didChange
|
||||
# version when the server respects it).
|
||||
self._published_version: Dict[str, int] = {}
|
||||
# First-push seen flag, for typescript-style seed-on-first-push.
|
||||
self._first_push_seen: Set[str] = set()
|
||||
# Capability registrations — only diagnostic ones are tracked.
|
||||
self._diagnostic_registrations: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# State machine
|
||||
self._state: str = "stopped"
|
||||
self._initialize_result: Optional[Dict[str, Any]] = None
|
||||
self._sync_kind: int = 1 # 1=Full, 2=Incremental
|
||||
self._stopping: bool = False
|
||||
|
||||
# Push event for waiters.
|
||||
self._push_event = asyncio.Event()
|
||||
# Monotonic counter incremented on every publishDiagnostics push.
|
||||
# Waiters snapshot it on entry and treat any increase as
|
||||
# "something happened, recheck the predicate". Avoids the
|
||||
# asyncio.Event sticky-state trap.
|
||||
self._push_counter = 0
|
||||
# Registration change event so wait_for_diagnostics can re-loop
|
||||
# when the server announces a new dynamic provider.
|
||||
self._registration_event = asyncio.Event()
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._state == "running" and self._proc is not None and self._proc.returncode is None
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
return self._state
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Spawn the server and complete the initialize handshake.
|
||||
|
||||
Raises any exception encountered during spawn/init. On failure
|
||||
the process is killed and the client is left in state
|
||||
``"error"`` — re-call ``start()`` to retry.
|
||||
"""
|
||||
if self._state in {"running", "starting"}:
|
||||
return
|
||||
self._state = "starting"
|
||||
try:
|
||||
await self._spawn()
|
||||
await self._initialize()
|
||||
self._state = "running"
|
||||
except Exception:
|
||||
self._state = "error"
|
||||
await self._cleanup_process()
|
||||
raise
|
||||
|
||||
async def _spawn(self) -> None:
|
||||
env = dict(os.environ)
|
||||
if self._env:
|
||||
env.update(self._env)
|
||||
|
||||
try:
|
||||
self._proc = await asyncio.create_subprocess_exec(
|
||||
self._command[0],
|
||||
*self._command[1:],
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
cwd=self._cwd,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
raise LSPProtocolError(
|
||||
f"LSP server binary not found: {self._command[0]} ({e})"
|
||||
) from e
|
||||
|
||||
# Drain stderr at debug level — if we don't, the pipe buffer
|
||||
# fills and the server hangs.
|
||||
self._stderr_task = asyncio.create_task(self._drain_stderr())
|
||||
# Start the reader loop.
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
|
||||
async def _drain_stderr(self) -> None:
|
||||
if self._proc is None or self._proc.stderr is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
line = await self._proc.stderr.readline()
|
||||
if not line:
|
||||
break
|
||||
text = line.decode("utf-8", errors="replace").rstrip()
|
||||
if text:
|
||||
logger.debug("[%s] stderr: %s", self.server_id, text[:1000])
|
||||
except (asyncio.CancelledError, OSError):
|
||||
pass
|
||||
|
||||
async def _reader_loop(self) -> None:
|
||||
if self._proc is None or self._proc.stdout is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
msg = await read_message(self._proc.stdout)
|
||||
if msg is None:
|
||||
logger.debug("[%s] server closed stdout cleanly", self.server_id)
|
||||
break
|
||||
kind, key = classify_message(msg)
|
||||
if kind == "response":
|
||||
self._dispatch_response(key, msg)
|
||||
elif kind == "request":
|
||||
asyncio.create_task(self._dispatch_request(key, msg))
|
||||
elif kind == "notification":
|
||||
self._dispatch_notification(key, msg)
|
||||
else:
|
||||
logger.warning("[%s] dropping invalid message: %r", self.server_id, msg)
|
||||
except LSPProtocolError as e:
|
||||
logger.warning("[%s] protocol error in reader loop: %s", self.server_id, e)
|
||||
except (asyncio.CancelledError, OSError):
|
||||
pass
|
||||
finally:
|
||||
# Wake up any pending requests so they can fail fast.
|
||||
for fut in list(self._pending.values()):
|
||||
if not fut.done():
|
||||
fut.set_exception(LSPProtocolError("server connection closed"))
|
||||
self._pending.clear()
|
||||
|
||||
async def _initialize(self) -> None:
|
||||
params = {
|
||||
"rootUri": file_uri(self.workspace_root),
|
||||
"rootPath": self.workspace_root,
|
||||
"processId": os.getpid(),
|
||||
"workspaceFolders": [
|
||||
{"name": "workspace", "uri": file_uri(self.workspace_root)}
|
||||
],
|
||||
"initializationOptions": self._init_options,
|
||||
"capabilities": {
|
||||
"window": {"workDoneProgress": True},
|
||||
"workspace": {
|
||||
"configuration": True,
|
||||
"workspaceFolders": True,
|
||||
"didChangeWatchedFiles": {"dynamicRegistration": True},
|
||||
"diagnostics": {"refreshSupport": False},
|
||||
},
|
||||
"textDocument": {
|
||||
"synchronization": {
|
||||
"dynamicRegistration": False,
|
||||
"didOpen": True,
|
||||
"didChange": True,
|
||||
"didSave": True,
|
||||
"willSave": False,
|
||||
"willSaveWaitUntil": False,
|
||||
},
|
||||
"diagnostic": {
|
||||
"dynamicRegistration": True,
|
||||
"relatedDocumentSupport": True,
|
||||
},
|
||||
"publishDiagnostics": {
|
||||
"relatedInformation": True,
|
||||
"tagSupport": {"valueSet": [1, 2]},
|
||||
"versionSupport": True,
|
||||
"codeDescriptionSupport": True,
|
||||
"dataSupport": False,
|
||||
},
|
||||
"hover": {"contentFormat": ["markdown", "plaintext"]},
|
||||
"definition": {"linkSupport": True},
|
||||
"references": {},
|
||||
"documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
|
||||
},
|
||||
"general": {"positionEncodings": ["utf-16"]},
|
||||
},
|
||||
}
|
||||
|
||||
result = await asyncio.wait_for(
|
||||
self._send_request("initialize", params),
|
||||
timeout=INITIALIZE_TIMEOUT,
|
||||
)
|
||||
self._initialize_result = result
|
||||
self._sync_kind = self._extract_sync_kind(result.get("capabilities") or {})
|
||||
|
||||
await self._send_notification("initialized", {})
|
||||
if self._init_options:
|
||||
# Some servers (vtsls, eslint) want config pushed via
|
||||
# didChangeConfiguration even if it was sent in
|
||||
# initializationOptions.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeConfiguration",
|
||||
{"settings": self._init_options},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_sync_kind(capabilities: dict) -> int:
|
||||
sync = capabilities.get("textDocumentSync")
|
||||
if isinstance(sync, int):
|
||||
return sync
|
||||
if isinstance(sync, dict):
|
||||
change = sync.get("change")
|
||||
if isinstance(change, int):
|
||||
return change
|
||||
return 1 # default to Full
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Best-effort graceful shutdown.
|
||||
|
||||
Sends ``shutdown`` + ``exit``, then SIGTERMs/SIGKILLs the
|
||||
process if it doesn't exit cleanly. Idempotent.
|
||||
"""
|
||||
if self._stopping:
|
||||
return
|
||||
self._stopping = True
|
||||
try:
|
||||
if self.is_running:
|
||||
try:
|
||||
await asyncio.wait_for(self._send_request("shutdown", None), timeout=2.0)
|
||||
except (asyncio.TimeoutError, LSPRequestError, LSPProtocolError):
|
||||
pass
|
||||
try:
|
||||
await self._send_notification("exit", None)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._state = "stopped"
|
||||
await self._cleanup_process()
|
||||
|
||||
async def _cleanup_process(self) -> None:
|
||||
if self._reader_task is not None and not self._reader_task.done():
|
||||
self._reader_task.cancel()
|
||||
try:
|
||||
await self._reader_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
if self._stderr_task is not None and not self._stderr_task.done():
|
||||
self._stderr_task.cancel()
|
||||
try:
|
||||
await self._stderr_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
proc = self._proc
|
||||
self._proc = None
|
||||
if proc is None:
|
||||
return
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
proc.terminate()
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=SHUTDOWN_GRACE)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# request / notification plumbing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _send_request(self, method: str, params: Any) -> Any:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
raise LSPProtocolError(f"cannot send {method!r}: stdin closed")
|
||||
loop = asyncio.get_running_loop()
|
||||
req_id = self._next_id
|
||||
self._next_id += 1
|
||||
fut: asyncio.Future = loop.create_future()
|
||||
self._pending[req_id] = fut
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_request(req_id, method, params)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
||||
self._pending.pop(req_id, None)
|
||||
raise LSPProtocolError(f"send failed for {method!r}: {e}") from e
|
||||
try:
|
||||
return await fut
|
||||
finally:
|
||||
self._pending.pop(req_id, None)
|
||||
|
||||
async def _send_request_with_retry(self, method: str, params: Any, *, timeout: float) -> Any:
|
||||
"""Send a request, retrying on ``ContentModified`` (-32801).
|
||||
|
||||
Other errors propagate. The retry policy matches Claude Code's
|
||||
``LSPServerInstance.sendRequest`` — 3 attempts with delays
|
||||
0.5s, 1.0s, 2.0s.
|
||||
"""
|
||||
for attempt in range(MAX_CONTENT_MODIFIED_RETRIES + 1):
|
||||
try:
|
||||
return await asyncio.wait_for(self._send_request(method, params), timeout=timeout)
|
||||
except LSPRequestError as e:
|
||||
if e.code == ERROR_CONTENT_MODIFIED and attempt < MAX_CONTENT_MODIFIED_RETRIES:
|
||||
await asyncio.sleep(RETRY_BASE_DELAY * (2 ** attempt))
|
||||
continue
|
||||
raise
|
||||
|
||||
async def _send_notification(self, method: str, params: Any) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_notification(method, params)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
||||
logger.debug("[%s] notify %s failed: %s", self.server_id, method, e)
|
||||
|
||||
async def _send_response(self, req_id: Any, result: Any) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_response(req_id, result)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError):
|
||||
pass
|
||||
|
||||
async def _send_error_response(self, req_id: Any, code: int, message: str) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_error_response(req_id, code, message)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError):
|
||||
pass
|
||||
|
||||
def _dispatch_response(self, req_id: int, msg: dict) -> None:
|
||||
fut = self._pending.get(req_id)
|
||||
if fut is None or fut.done():
|
||||
return
|
||||
if "error" in msg:
|
||||
err = msg["error"] or {}
|
||||
fut.set_exception(
|
||||
LSPRequestError(
|
||||
code=int(err.get("code", -32000)),
|
||||
message=str(err.get("message", "unknown")),
|
||||
data=err.get("data"),
|
||||
)
|
||||
)
|
||||
else:
|
||||
fut.set_result(msg.get("result"))
|
||||
|
||||
async def _dispatch_request(self, req_id: Any, msg: dict) -> None:
|
||||
method = msg.get("method", "")
|
||||
params = msg.get("params")
|
||||
handler = self._request_handlers.get(method)
|
||||
if handler is None:
|
||||
await self._send_error_response(req_id, ERROR_METHOD_NOT_FOUND, f"method not found: {method}")
|
||||
return
|
||||
try:
|
||||
result = await handler(params)
|
||||
except Exception as e: # noqa: BLE001 — protocol must not blow up
|
||||
logger.warning("[%s] request handler %s failed: %s", self.server_id, method, e)
|
||||
await self._send_error_response(req_id, -32000, f"handler failed: {e}")
|
||||
return
|
||||
await self._send_response(req_id, result)
|
||||
|
||||
def _dispatch_notification(self, method: str, msg: dict) -> None:
|
||||
handler = self._notification_handlers.get(method)
|
||||
if handler is None:
|
||||
return
|
||||
try:
|
||||
handler(msg.get("params"))
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("[%s] notification handler %s failed: %s", self.server_id, method, e)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# built-in server-→-client request handlers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_work_done_create(self, params: Any) -> Any:
|
||||
# Acknowledge progress tokens — required by some servers.
|
||||
return None
|
||||
|
||||
async def _handle_workspace_configuration(self, params: Any) -> Any:
|
||||
# Walk dotted sections through initializationOptions. Mirrors
|
||||
# OpenCode's `client.ts:198-220` — return null when missing.
|
||||
if not isinstance(params, dict):
|
||||
return [None]
|
||||
items = params.get("items") or []
|
||||
out: List[Any] = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
out.append(None)
|
||||
continue
|
||||
section = item.get("section")
|
||||
if not section or not self._init_options:
|
||||
out.append(self._init_options or None)
|
||||
continue
|
||||
cur: Any = self._init_options
|
||||
for part in str(section).split("."):
|
||||
if isinstance(cur, dict) and part in cur:
|
||||
cur = cur[part]
|
||||
else:
|
||||
cur = None
|
||||
break
|
||||
out.append(cur)
|
||||
return out
|
||||
|
||||
async def _handle_register_capability(self, params: Any) -> Any:
|
||||
if not isinstance(params, dict):
|
||||
return None
|
||||
for reg in params.get("registrations") or []:
|
||||
if not isinstance(reg, dict):
|
||||
continue
|
||||
method = reg.get("method")
|
||||
reg_id = reg.get("id")
|
||||
if method == "textDocument/diagnostic" and reg_id:
|
||||
self._diagnostic_registrations[str(reg_id)] = reg
|
||||
self._registration_event.set()
|
||||
return None
|
||||
|
||||
async def _handle_unregister_capability(self, params: Any) -> Any:
|
||||
if not isinstance(params, dict):
|
||||
return None
|
||||
for unreg in params.get("unregisterations") or []:
|
||||
if not isinstance(unreg, dict):
|
||||
continue
|
||||
reg_id = unreg.get("id")
|
||||
if reg_id:
|
||||
self._diagnostic_registrations.pop(str(reg_id), None)
|
||||
return None
|
||||
|
||||
async def _handle_workspace_folders(self, params: Any) -> Any:
|
||||
return [{"name": "workspace", "uri": file_uri(self.workspace_root)}]
|
||||
|
||||
async def _handle_diagnostic_refresh(self, params: Any) -> Any:
|
||||
# We don't honour refresh — we re-pull on every touchFile.
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# publishDiagnostics handler
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _handle_publish_diagnostics(self, params: Any) -> None:
|
||||
if not isinstance(params, dict):
|
||||
return
|
||||
uri = params.get("uri")
|
||||
if not isinstance(uri, str):
|
||||
return
|
||||
path = uri_to_path(uri)
|
||||
diagnostics = params.get("diagnostics") or []
|
||||
if not isinstance(diagnostics, list):
|
||||
diagnostics = []
|
||||
version = params.get("version")
|
||||
loop_time = asyncio.get_event_loop().time()
|
||||
|
||||
if self._seed_first_push and path not in self._first_push_seen:
|
||||
# First push: seed without firing the event so a waiter
|
||||
# doesn't resolve on the very first push (which arrives
|
||||
# before the user-triggered didChange could've produced
|
||||
# fresh diagnostics).
|
||||
self._first_push_seen.add(path)
|
||||
self._push_diagnostics[path] = diagnostics
|
||||
self._published[path] = loop_time
|
||||
if isinstance(version, int):
|
||||
self._published_version[path] = version
|
||||
return
|
||||
|
||||
self._push_diagnostics[path] = diagnostics
|
||||
self._published[path] = loop_time
|
||||
if isinstance(version, int):
|
||||
self._published_version[path] = version
|
||||
self._first_push_seen.add(path)
|
||||
# Bump the monotonic push counter and wake every waiter. We
|
||||
# keep the Event sticky-set so any wait already in progress
|
||||
# resolves; waiters re-check their predicate after waking and
|
||||
# decide whether to keep waiting. ``_push_counter`` is what
|
||||
# they actually compare against to detect a fresh event.
|
||||
self._push_counter += 1
|
||||
self._push_event.set()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public file-sync API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def open_file(self, path: str, *, language_id: str = "plaintext") -> int:
|
||||
"""Send didOpen (first time) or didChange (subsequent) for ``path``.
|
||||
|
||||
Returns the new document version number that the agent's
|
||||
``wait_for_diagnostics`` should match against.
|
||||
"""
|
||||
if not self.is_running:
|
||||
raise LSPProtocolError("client not running")
|
||||
|
||||
abs_path = os.path.abspath(path)
|
||||
try:
|
||||
text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
|
||||
except OSError as e:
|
||||
raise LSPProtocolError(f"cannot read {abs_path}: {e}") from e
|
||||
|
||||
uri = file_uri(abs_path)
|
||||
existing = self._files.get(abs_path)
|
||||
|
||||
if existing is not None:
|
||||
# Re-open: bump version, fire didChangeWatchedFiles + didChange.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeWatchedFiles",
|
||||
{"changes": [{"uri": uri, "type": 2}]}, # 2 = CHANGED
|
||||
)
|
||||
new_version = existing["version"] + 1
|
||||
old_text = existing["text"]
|
||||
content_changes: List[Dict[str, Any]]
|
||||
if self._sync_kind == 2:
|
||||
content_changes = [
|
||||
{
|
||||
"range": {
|
||||
"start": {"line": 0, "character": 0},
|
||||
"end": _end_position(old_text),
|
||||
},
|
||||
"text": text,
|
||||
}
|
||||
]
|
||||
else:
|
||||
content_changes = [{"text": text}]
|
||||
await self._send_notification(
|
||||
"textDocument/didChange",
|
||||
{
|
||||
"textDocument": {"uri": uri, "version": new_version},
|
||||
"contentChanges": content_changes,
|
||||
},
|
||||
)
|
||||
self._files[abs_path] = {"version": new_version, "text": text}
|
||||
return new_version
|
||||
|
||||
# First open: didChangeWatchedFiles CREATED + didOpen.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeWatchedFiles",
|
||||
{"changes": [{"uri": uri, "type": 1}]}, # 1 = CREATED
|
||||
)
|
||||
# Clear any stale push/pull entries — fresh open should start
|
||||
# from scratch.
|
||||
self._push_diagnostics.pop(abs_path, None)
|
||||
self._pull_diagnostics.pop(abs_path, None)
|
||||
self._published.pop(abs_path, None)
|
||||
self._published_version.pop(abs_path, None)
|
||||
await self._send_notification(
|
||||
"textDocument/didOpen",
|
||||
{
|
||||
"textDocument": {
|
||||
"uri": uri,
|
||||
"languageId": language_id,
|
||||
"version": 0,
|
||||
"text": text,
|
||||
}
|
||||
},
|
||||
)
|
||||
self._files[abs_path] = {"version": 0, "text": text}
|
||||
return 0
|
||||
|
||||
async def save_file(self, path: str) -> None:
|
||||
"""Send didSave for ``path``. Some linters re-scan only on save."""
|
||||
if not self.is_running:
|
||||
return
|
||||
abs_path = os.path.abspath(path)
|
||||
await self._send_notification(
|
||||
"textDocument/didSave",
|
||||
{"textDocument": {"uri": file_uri(abs_path)}},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# diagnostics: pull + wait
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _pull_document_diagnostics(self, path: str) -> None:
|
||||
"""Send ``textDocument/diagnostic`` for one file.
|
||||
|
||||
Stores results into :attr:`_pull_diagnostics`. Silently
|
||||
no-ops on errors (server may not support the pull endpoint).
|
||||
"""
|
||||
try:
|
||||
params: Dict[str, Any] = {
|
||||
"textDocument": {"uri": file_uri(os.path.abspath(path))}
|
||||
}
|
||||
result = await self._send_request_with_retry(
|
||||
"textDocument/diagnostic",
|
||||
params,
|
||||
timeout=DIAGNOSTICS_REQUEST_TIMEOUT,
|
||||
)
|
||||
except (LSPRequestError, LSPProtocolError, asyncio.TimeoutError) as e:
|
||||
logger.debug("[%s] document diagnostic pull failed: %s", self.server_id, e)
|
||||
return
|
||||
if not isinstance(result, dict):
|
||||
return
|
||||
items = result.get("items")
|
||||
if isinstance(items, list):
|
||||
self._pull_diagnostics[os.path.abspath(path)] = items
|
||||
related = result.get("relatedDocuments")
|
||||
if isinstance(related, dict):
|
||||
for uri, sub in related.items():
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_items = sub.get("items")
|
||||
if isinstance(sub_items, list):
|
||||
self._pull_diagnostics[uri_to_path(uri)] = sub_items
|
||||
|
||||
async def wait_for_diagnostics(
|
||||
self,
|
||||
path: str,
|
||||
version: int,
|
||||
*,
|
||||
mode: str = "document",
|
||||
) -> None:
|
||||
"""Wait for the server to publish diagnostics for ``path`` at ``version``.
|
||||
|
||||
``mode`` is ``"document"`` (5s budget, document pulls) or
|
||||
``"full"`` (10s budget, also workspace pulls). Best-effort —
|
||||
returns silently on timeout. Does NOT throw if the server
|
||||
doesn't support pull diagnostics; we still get the push side.
|
||||
"""
|
||||
budget = DIAGNOSTICS_FULL_WAIT if mode == "full" else DIAGNOSTICS_DOCUMENT_WAIT
|
||||
deadline = asyncio.get_event_loop().time() + budget
|
||||
abs_path = os.path.abspath(path)
|
||||
|
||||
while True:
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
return
|
||||
|
||||
# Concurrent: document pull + push wait.
|
||||
pull_task = asyncio.create_task(self._pull_document_diagnostics(abs_path))
|
||||
push_task = asyncio.create_task(self._wait_for_fresh_push(abs_path, version, remaining))
|
||||
done, pending = await asyncio.wait(
|
||||
{pull_task, push_task},
|
||||
timeout=remaining,
|
||||
return_when=asyncio.FIRST_COMPLETED,
|
||||
)
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
for t in pending:
|
||||
try:
|
||||
await t
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
|
||||
# If we got a fresh push for our version, we're done.
|
||||
current_v = self._published_version.get(abs_path)
|
||||
if abs_path in self._published and (
|
||||
current_v is None or current_v >= version
|
||||
):
|
||||
return
|
||||
|
||||
# Pull may have populated _pull_diagnostics — that's also
|
||||
# success.
|
||||
if abs_path in self._pull_diagnostics:
|
||||
return
|
||||
|
||||
# Loop until budget runs out.
|
||||
|
||||
async def _wait_for_fresh_push(self, path: str, version: int, timeout: float) -> None:
|
||||
"""Wait until a publishDiagnostics arrives for ``path`` at ``version``+."""
|
||||
deadline = asyncio.get_event_loop().time() + timeout
|
||||
baseline = self._push_counter
|
||||
while True:
|
||||
current_v = self._published_version.get(path)
|
||||
if path in self._published and (current_v is None or current_v >= version):
|
||||
# Debounce — wait a tick in case more diagnostics arrive
|
||||
# immediately after. TS often emits in pairs. We
|
||||
# snapshot the counter so we wake on a *new* push, not
|
||||
# on the one that satisfied us a moment ago.
|
||||
debounce_baseline = self._push_counter
|
||||
debounce_deadline = asyncio.get_event_loop().time() + PUSH_DEBOUNCE
|
||||
while self._push_counter == debounce_baseline:
|
||||
remaining = debounce_deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
break
|
||||
self._push_event.clear()
|
||||
try:
|
||||
await asyncio.wait_for(self._push_event.wait(), timeout=remaining)
|
||||
except asyncio.TimeoutError:
|
||||
break
|
||||
return
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
return
|
||||
if self._push_counter > baseline:
|
||||
# New event arrived but predicate still false — re-check
|
||||
# immediately without waiting again.
|
||||
baseline = self._push_counter
|
||||
continue
|
||||
self._push_event.clear()
|
||||
try:
|
||||
await asyncio.wait_for(self._push_event.wait(), timeout=min(remaining, 0.5))
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
def diagnostics_for(self, path: str) -> List[Dict[str, Any]]:
|
||||
"""Return current merged + deduped diagnostics for one file.
|
||||
|
||||
Diagnostics from push and pull stores are concatenated and
|
||||
deduplicated by ``(severity, code, message, range)`` content
|
||||
key. Empty list if the server hasn't published anything.
|
||||
"""
|
||||
abs_path = os.path.abspath(path)
|
||||
push = self._push_diagnostics.get(abs_path) or []
|
||||
pull = self._pull_diagnostics.get(abs_path) or []
|
||||
return _dedupe(push, pull)
|
||||
|
||||
|
||||
def _dedupe(*lists: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
seen: Set[str] = set()
|
||||
out: List[Dict[str, Any]] = []
|
||||
for lst in lists:
|
||||
for d in lst:
|
||||
if not isinstance(d, dict):
|
||||
continue
|
||||
key = _diagnostic_key(d)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
|
||||
def _diagnostic_key(d: Dict[str, Any]) -> str:
|
||||
"""Content-equality key for a diagnostic.
|
||||
|
||||
Matches the structural-equality used in claude-code's
|
||||
``areDiagnosticsEqual`` — message + severity + source + code +
|
||||
range coords. The range is reduced to a tuple to keep the key
|
||||
stable across dict orderings.
|
||||
"""
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
code = d.get("code")
|
||||
if code is not None and not isinstance(code, str):
|
||||
code = str(code)
|
||||
return "\x00".join(
|
||||
[
|
||||
str(d.get("severity") or 1),
|
||||
str(code or ""),
|
||||
str(d.get("source") or ""),
|
||||
str(d.get("message") or "").strip(),
|
||||
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"LSPClient",
|
||||
"file_uri",
|
||||
"uri_to_path",
|
||||
"INITIALIZE_TIMEOUT",
|
||||
"DIAGNOSTICS_DOCUMENT_WAIT",
|
||||
"DIAGNOSTICS_FULL_WAIT",
|
||||
]
|
||||
@@ -1,213 +0,0 @@
|
||||
"""Structured logging with steady-state silence for the LSP layer.
|
||||
|
||||
The LSP layer fires on every write_file/patch. In a busy session
|
||||
that's hundreds of events. We want users to be able to ``rg`` the
|
||||
log for "did LSP fire on that edit?" without drowning in noise.
|
||||
|
||||
The level model:
|
||||
|
||||
- ``DEBUG`` for steady-state events that have no novel signal:
|
||||
``clean``, ``feature off``, ``extension not mapped``, ``no project
|
||||
root for already-announced file``, ``server unavailable for
|
||||
already-announced binary``. These never reach ``agent.log`` at the
|
||||
default INFO threshold.
|
||||
|
||||
- ``INFO`` for state transitions worth surfacing exactly once per
|
||||
session: ``active for <root>`` the first time a (server_id,
|
||||
workspace_root) client starts, ``no project root for <path>``
|
||||
the first time we see that file. Plus every diagnostic event
|
||||
(those are inherently rare and per-edit, exactly what users grep
|
||||
for).
|
||||
|
||||
- ``WARNING`` for action-required failures: ``server unavailable``
|
||||
(binary not on PATH) the first time per (server_id, binary),
|
||||
``no server configured`` once per language. Per-call WARNING for
|
||||
timeouts and unexpected bridge exceptions.
|
||||
|
||||
The dedup is in-process module-level sets. Each set grows at most by
|
||||
the number of distinct (server_id, root) and (server_id, binary)
|
||||
pairs touched in one Python process — bytes of memory in even an
|
||||
aggressive monorepo session. Bounded LRU was rejected: evicting an
|
||||
entry would risk re-firing the WARNING/INFO line we explicitly want
|
||||
to suppress.
|
||||
|
||||
Grep recipe::
|
||||
|
||||
tail -f ~/.hermes/logs/agent.log | rg 'lsp\\['
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from typing import Tuple
|
||||
|
||||
# Dedicated logger name so the documented grep recipe survives a
|
||||
# ``logging.getLogger(__name__)`` rename of any internal module.
|
||||
event_log = logging.getLogger("hermes.lint.lsp")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Once-per-X dedup sets
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_announce_lock = threading.Lock()
|
||||
_announced_active: set = set() # keys: (server_id, workspace_root)
|
||||
_announced_unavailable: set = set() # keys: (server_id, binary_path_or_name)
|
||||
_announced_no_root: set = set() # keys: (server_id, file_path)
|
||||
_announced_no_server: set = set() # keys: (server_id,)
|
||||
|
||||
|
||||
def _short_path(file_path: str) -> str:
|
||||
"""Render *file_path* relative to the cwd when sensible, else absolute.
|
||||
|
||||
Keeps log lines readable for the common case (the user is inside
|
||||
the project they're editing) without emitting brittle ``../../..``
|
||||
chains for the cross-tree case.
|
||||
"""
|
||||
if not file_path:
|
||||
return file_path
|
||||
try:
|
||||
rel = os.path.relpath(file_path)
|
||||
except ValueError:
|
||||
return file_path
|
||||
if rel.startswith(".." + os.sep) or rel == "..":
|
||||
return file_path
|
||||
return rel
|
||||
|
||||
|
||||
def _emit(server_id: str, level: int, message: str) -> None:
|
||||
event_log.log(level, "lsp[%s] %s", server_id, message)
|
||||
|
||||
|
||||
def _announce_once(bucket: set, key: Tuple) -> bool:
|
||||
"""Return True if *key* has not been announced for *bucket* yet.
|
||||
|
||||
Atomically marks the key as announced so concurrent callers
|
||||
cannot both win the race and double-log.
|
||||
"""
|
||||
with _announce_lock:
|
||||
if key in bucket:
|
||||
return False
|
||||
bucket.add(key)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public event helpers — call these from the LSP layer.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def log_clean(server_id: str, file_path: str) -> None:
|
||||
"""No diagnostics emitted for *file_path*. DEBUG (silent at default)."""
|
||||
_emit(server_id, logging.DEBUG, f"clean ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_disabled(server_id: str, file_path: str, reason: str) -> None:
|
||||
"""LSP intentionally skipped for this file (feature off, ext unmapped,
|
||||
backend not local, etc.). DEBUG."""
|
||||
_emit(server_id, logging.DEBUG, f"skipped: {reason} ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_active(server_id: str, workspace_root: str) -> None:
|
||||
"""A new LSP client started for (server_id, workspace_root).
|
||||
|
||||
INFO once per (server_id, workspace_root); DEBUG thereafter.
|
||||
Lets users verify "is LSP actually running?" with a single grep.
|
||||
"""
|
||||
key = (server_id, workspace_root)
|
||||
if _announce_once(_announced_active, key):
|
||||
_emit(server_id, logging.INFO, f"active for {workspace_root}")
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"reused client for {workspace_root}")
|
||||
|
||||
|
||||
def log_diagnostics(server_id: str, file_path: str, count: int) -> None:
|
||||
"""Diagnostics arrived for a file. INFO every time — these are the
|
||||
failure signals users actually want to grep for, and they are
|
||||
inherently rare per edit."""
|
||||
_emit(server_id, logging.INFO, f"{count} diags ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_no_project_root(server_id: str, file_path: str) -> None:
|
||||
"""File had no recognised project marker. INFO once per file,
|
||||
DEBUG thereafter."""
|
||||
key = (server_id, file_path)
|
||||
if _announce_once(_announced_no_root, key):
|
||||
_emit(server_id, logging.INFO, f"no project root for {_short_path(file_path)}")
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"no project root for {_short_path(file_path)}")
|
||||
|
||||
|
||||
def log_server_unavailable(server_id: str, binary_or_pkg: str) -> None:
|
||||
"""The server binary couldn't be resolved. WARNING once per
|
||||
(server_id, binary), DEBUG thereafter so a hundred subsequent
|
||||
.py edits don't spam the log."""
|
||||
key = (server_id, binary_or_pkg)
|
||||
if _announce_once(_announced_unavailable, key):
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"server unavailable: {binary_or_pkg} not found "
|
||||
"(install via `hermes lsp install <id>` or set lsp.servers.<id>.command)",
|
||||
)
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"server still unavailable: {binary_or_pkg}")
|
||||
|
||||
|
||||
def log_no_server_configured(server_id: str) -> None:
|
||||
"""No spawn recipe for this language. WARNING once."""
|
||||
if _announce_once(_announced_no_server, (server_id,)):
|
||||
_emit(server_id, logging.WARNING, "no server configured")
|
||||
|
||||
|
||||
def log_timeout(server_id: str, file_path: str, kind: str = "diagnostics") -> None:
|
||||
"""A request to the server timed out. WARNING every time — these are
|
||||
inherently novel events worth surfacing on each occurrence."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"{kind} timed out for {_short_path(file_path)}",
|
||||
)
|
||||
|
||||
|
||||
def log_server_error(server_id: str, file_path: str, exc: BaseException) -> None:
|
||||
"""An unexpected exception bubbled out of the LSP layer. WARNING."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"unexpected error for {_short_path(file_path)}: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def log_spawn_failed(server_id: str, workspace_root: str, exc: BaseException) -> None:
|
||||
"""The LSP server failed to spawn or initialize. WARNING."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"spawn/initialize failed for {workspace_root}: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def reset_announce_caches() -> None:
|
||||
"""Test-only: clear the dedup caches. Production code never calls this."""
|
||||
with _announce_lock:
|
||||
_announced_active.clear()
|
||||
_announced_unavailable.clear()
|
||||
_announced_no_root.clear()
|
||||
_announced_no_server.clear()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"event_log",
|
||||
"log_clean",
|
||||
"log_disabled",
|
||||
"log_active",
|
||||
"log_diagnostics",
|
||||
"log_no_project_root",
|
||||
"log_server_unavailable",
|
||||
"log_no_server_configured",
|
||||
"log_timeout",
|
||||
"log_server_error",
|
||||
"log_spawn_failed",
|
||||
"reset_announce_caches",
|
||||
]
|
||||
@@ -1,376 +0,0 @@
|
||||
"""Auto-installation of LSP server binaries.
|
||||
|
||||
Tries to install missing servers using whatever package manager is
|
||||
appropriate. All installs go to a Hermes-owned bin staging dir,
|
||||
``<HERMES_HOME>/lsp/bin/``, so we don't pollute the user's global
|
||||
toolchain.
|
||||
|
||||
Strategies:
|
||||
|
||||
- ``auto`` — attempt to install with the best available package
|
||||
manager. This is the default.
|
||||
- ``manual`` — never install; if a binary is missing, the server is
|
||||
silently skipped and the user is told about it via ``hermes lsp
|
||||
status``.
|
||||
- ``off`` — same as ``manual`` for now (kept distinct so we can
|
||||
evolve behavior later, e.g. logging differently).
|
||||
|
||||
The actual installs happen synchronously the first time a server is
|
||||
needed and concurrent calls to :func:`try_install` for the same
|
||||
package are deduplicated via a per-package lock.
|
||||
|
||||
Failure modes are non-fatal: every install path is wrapped in
|
||||
try/except and returns ``None`` on failure. The tool layer then
|
||||
falls back to its in-process syntax checker, exactly as if the user
|
||||
hadn't enabled LSP at all.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger("agent.lsp.install")
|
||||
|
||||
# Package-name → install-strategy hint registry. Each entry is a
|
||||
# tuple of strategy name + package name + executable name. When the
|
||||
# install completes, we look for the executable in
|
||||
# ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
|
||||
#
|
||||
# Optional fields:
|
||||
# - ``extra_pkgs``: list of sibling packages to install alongside
|
||||
# ``pkg`` in the same node_modules tree. Used when an LSP server
|
||||
# has a runtime peer dependency that npm doesn't auto-pull (e.g.
|
||||
# typescript-language-server needs ``typescript``).
|
||||
INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
|
||||
# Python
|
||||
"pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
|
||||
# JS/TS family
|
||||
"typescript-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "typescript-language-server",
|
||||
"bin": "typescript-language-server",
|
||||
# typescript-language-server requires the `typescript` SDK
|
||||
# (tsserver) to be importable from the same node_modules tree;
|
||||
# otherwise initialize() fails with "Could not find a valid
|
||||
# TypeScript installation". Install them together.
|
||||
"extra_pkgs": ["typescript"],
|
||||
},
|
||||
"@vue/language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "@vue/language-server",
|
||||
"bin": "vue-language-server",
|
||||
},
|
||||
"svelte-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "svelte-language-server",
|
||||
"bin": "svelteserver",
|
||||
},
|
||||
"@astrojs/language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "@astrojs/language-server",
|
||||
"bin": "astro-ls",
|
||||
},
|
||||
"yaml-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "yaml-language-server",
|
||||
"bin": "yaml-language-server",
|
||||
},
|
||||
"bash-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "bash-language-server",
|
||||
"bin": "bash-language-server",
|
||||
},
|
||||
"intelephense": {"strategy": "npm", "pkg": "intelephense", "bin": "intelephense"},
|
||||
"dockerfile-language-server-nodejs": {
|
||||
"strategy": "npm",
|
||||
"pkg": "dockerfile-language-server-nodejs",
|
||||
"bin": "docker-langserver",
|
||||
},
|
||||
# Go
|
||||
"gopls": {"strategy": "go", "pkg": "golang.org/x/tools/gopls@latest", "bin": "gopls"},
|
||||
# Rust — too heavy (hundreds of MB to bootstrap). We do NOT
|
||||
# auto-install rust-analyzer; users install via rustup.
|
||||
"rust-analyzer": {"strategy": "manual", "pkg": "", "bin": "rust-analyzer"},
|
||||
# C/C++ — manual (clangd ships with LLVM, very heavy)
|
||||
"clangd": {"strategy": "manual", "pkg": "", "bin": "clangd"},
|
||||
# Lua — manual (LuaLS is platform-specific binaries from GitHub
|
||||
# releases; complex enough that we punt to the user)
|
||||
"lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
|
||||
}
|
||||
|
||||
|
||||
_install_locks: Dict[str, threading.Lock] = {}
|
||||
_install_results: Dict[str, Optional[str]] = {}
|
||||
_install_lock_meta = threading.Lock()
|
||||
|
||||
|
||||
def hermes_lsp_bin_dir() -> Path:
|
||||
"""Return the Hermes-owned bin staging dir for LSP servers."""
|
||||
home = os.environ.get("HERMES_HOME")
|
||||
if home is None:
|
||||
home = os.path.join(os.path.expanduser("~"), ".hermes")
|
||||
p = Path(home) / "lsp" / "bin"
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
|
||||
def _existing_binary(name: str) -> Optional[str]:
|
||||
"""Probe the staging dir + PATH for a binary named ``name``."""
|
||||
staged = hermes_lsp_bin_dir() / name
|
||||
if staged.exists() and os.access(staged, os.X_OK):
|
||||
return str(staged)
|
||||
on_path = shutil.which(name)
|
||||
if on_path:
|
||||
return on_path
|
||||
return None
|
||||
|
||||
|
||||
def _get_lock(pkg: str) -> threading.Lock:
|
||||
with _install_lock_meta:
|
||||
lock = _install_locks.get(pkg)
|
||||
if lock is None:
|
||||
lock = threading.Lock()
|
||||
_install_locks[pkg] = lock
|
||||
return lock
|
||||
|
||||
|
||||
def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
|
||||
"""Try to install ``pkg`` and return the binary path if successful.
|
||||
|
||||
``strategy`` is ``"auto"``, ``"manual"``, or ``"off"``. In
|
||||
``manual``/``off`` mode, this function only probes for an
|
||||
existing binary and returns ``None`` if not found.
|
||||
|
||||
The install is cached per-package — a second call returns the
|
||||
same path (or ``None``) without reinstalling. Concurrent calls
|
||||
are serialized.
|
||||
"""
|
||||
if strategy not in {"auto",}:
|
||||
# Only ``auto`` triggers an actual install. In manual/off,
|
||||
# we still check whether the binary already exists.
|
||||
recipe = INSTALL_RECIPES.get(pkg, {})
|
||||
bin_name = recipe.get("bin", pkg)
|
||||
return _existing_binary(bin_name)
|
||||
|
||||
if pkg in _install_results:
|
||||
return _install_results[pkg]
|
||||
|
||||
lock = _get_lock(pkg)
|
||||
with lock:
|
||||
# Double-check after acquiring lock.
|
||||
if pkg in _install_results:
|
||||
return _install_results[pkg]
|
||||
result = _do_install(pkg)
|
||||
_install_results[pkg] = result
|
||||
return result
|
||||
|
||||
|
||||
def _do_install(pkg: str) -> Optional[str]:
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe is None:
|
||||
# Not in our registry — best-effort: just probe PATH.
|
||||
return shutil.which(pkg)
|
||||
|
||||
strategy = recipe.get("strategy", "manual")
|
||||
bin_name = recipe.get("bin", pkg)
|
||||
|
||||
# Check if already present (shutil.which or staging dir)
|
||||
existing = _existing_binary(bin_name)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
if strategy == "manual":
|
||||
logger.debug("[install] %s requires manual install (recipe=%s)", pkg, recipe)
|
||||
return None
|
||||
|
||||
if strategy == "npm":
|
||||
return _install_npm(
|
||||
recipe.get("pkg", pkg),
|
||||
bin_name,
|
||||
extra_pkgs=recipe.get("extra_pkgs") or [],
|
||||
)
|
||||
if strategy == "go":
|
||||
return _install_go(recipe.get("pkg", pkg), bin_name)
|
||||
if strategy == "pip":
|
||||
return _install_pip(recipe.get("pkg", pkg), bin_name)
|
||||
|
||||
logger.warning("[install] unknown strategy %r for %s", strategy, pkg)
|
||||
return None
|
||||
|
||||
|
||||
def _install_npm(
|
||||
pkg: str,
|
||||
bin_name: str,
|
||||
extra_pkgs: Optional[list] = None,
|
||||
) -> Optional[str]:
|
||||
"""Install an npm package into our staging dir.
|
||||
|
||||
Uses ``npm install --prefix`` so the binaries land in
|
||||
``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
|
||||
one level for direct PATH-style access.
|
||||
|
||||
``extra_pkgs`` is a list of sibling packages to install in the
|
||||
same ``node_modules`` tree. Used for LSP servers with runtime
|
||||
peer deps that npm doesn't auto-pull (typescript-language-server
|
||||
needs ``typescript`` next to it; intelephense ships standalone).
|
||||
"""
|
||||
npm = shutil.which("npm")
|
||||
if npm is None:
|
||||
logger.info("[install] cannot install %s: npm not on PATH", pkg)
|
||||
return None
|
||||
staging = hermes_lsp_bin_dir().parent # <HERMES_HOME>/lsp/
|
||||
install_targets = [pkg] + list(extra_pkgs or [])
|
||||
try:
|
||||
logger.info(
|
||||
"[install] npm install --prefix %s %s",
|
||||
staging,
|
||||
" ".join(install_targets),
|
||||
)
|
||||
proc = subprocess.run(
|
||||
[npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", *install_targets],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] npm install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] npm install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
|
||||
# Find the bin
|
||||
nm_bin = staging / "node_modules" / ".bin" / bin_name
|
||||
if os.name == "nt":
|
||||
# On Windows npm sometimes drops `.cmd` shims
|
||||
candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
|
||||
else:
|
||||
candidates = [nm_bin]
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
# Symlink into our `lsp/bin/` for stable PATH access.
|
||||
link = hermes_lsp_bin_dir() / c.name
|
||||
if not link.exists():
|
||||
try:
|
||||
link.symlink_to(c)
|
||||
except (OSError, NotImplementedError):
|
||||
# Symlinks fail on some Windows setups — copy instead.
|
||||
try:
|
||||
shutil.copy2(c, link)
|
||||
except OSError:
|
||||
return str(c)
|
||||
return str(link if link.exists() else c)
|
||||
logger.warning("[install] npm install for %s succeeded but bin %s not found", pkg, bin_name)
|
||||
return None
|
||||
|
||||
|
||||
def _install_go(pkg: str, bin_name: str) -> Optional[str]:
|
||||
"""Install a Go module to GOBIN=<staging>."""
|
||||
go = shutil.which("go")
|
||||
if go is None:
|
||||
logger.info("[install] cannot install %s: go not on PATH", pkg)
|
||||
return None
|
||||
staging = hermes_lsp_bin_dir()
|
||||
env = dict(os.environ)
|
||||
env["GOBIN"] = str(staging)
|
||||
try:
|
||||
logger.info("[install] go install %s (GOBIN=%s)", pkg, staging)
|
||||
proc = subprocess.run(
|
||||
[go, "install", pkg],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
env=env,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] go install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] go install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
bin_path = staging / bin_name
|
||||
if os.name == "nt":
|
||||
bin_path = bin_path.with_suffix(".exe")
|
||||
if bin_path.exists():
|
||||
return str(bin_path)
|
||||
logger.warning("[install] go install for %s succeeded but bin %s not found", pkg, bin_name)
|
||||
return None
|
||||
|
||||
|
||||
def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
|
||||
"""Install a Python package into a hermes-owned target dir.
|
||||
|
||||
We avoid polluting the user's site-packages by using
|
||||
``pip install --target``. Bins go into
|
||||
``<staging>/python-packages/bin/`` which we symlink into
|
||||
``<staging>/bin``. Note: this only works for packages that ship a
|
||||
console script.
|
||||
"""
|
||||
pip_target = hermes_lsp_bin_dir().parent / "python-packages"
|
||||
pip_target.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
logger.info("[install] pip install --target %s %s", pip_target, pkg)
|
||||
proc = subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "--target", str(pip_target), "--quiet", pkg],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] pip install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] pip install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
# Look for the script
|
||||
bin_path = pip_target / "bin" / bin_name
|
||||
if bin_path.exists():
|
||||
link = hermes_lsp_bin_dir() / bin_name
|
||||
if not link.exists():
|
||||
try:
|
||||
link.symlink_to(bin_path)
|
||||
except (OSError, NotImplementedError):
|
||||
try:
|
||||
shutil.copy2(bin_path, link)
|
||||
except OSError:
|
||||
return str(bin_path)
|
||||
return str(link if link.exists() else bin_path)
|
||||
return None
|
||||
|
||||
|
||||
def detect_status(pkg: str) -> str:
|
||||
"""Return ``installed``, ``missing``, or ``manual-only`` for a package.
|
||||
|
||||
Used by the ``hermes lsp status`` CLI to give users a quick
|
||||
overview of what's available without spawning anything.
|
||||
"""
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
bin_name = recipe.get("bin", pkg) if recipe else pkg
|
||||
if _existing_binary(bin_name):
|
||||
return "installed"
|
||||
if recipe and recipe.get("strategy") == "manual":
|
||||
return "manual-only"
|
||||
return "missing"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"INSTALL_RECIPES",
|
||||
"try_install",
|
||||
"detect_status",
|
||||
"hermes_lsp_bin_dir",
|
||||
]
|
||||
@@ -1,644 +0,0 @@
|
||||
"""Service-level orchestration for LSP clients.
|
||||
|
||||
The :class:`LSPService` is the bridge between the synchronous
|
||||
file_operations layer and the async :class:`agent.lsp.client.LSPClient`.
|
||||
|
||||
Design choices:
|
||||
|
||||
- A **single asyncio event loop** runs in a background thread. All
|
||||
client work happens on that loop. Synchronous callers from
|
||||
``tools/file_operations.py`` use :meth:`get_diagnostics_sync` to
|
||||
open + wait + drain in one blocking call.
|
||||
|
||||
- One client per ``(server_id, workspace_root)`` key. Lazy spawn:
|
||||
the first request for a key spawns the client; subsequent requests
|
||||
re-use it.
|
||||
|
||||
- A **broken-set** records ``(server_id, workspace_root)`` pairs that
|
||||
failed to spawn or initialize. These are never retried for the
|
||||
life of the service. Mirrors OpenCode's design.
|
||||
|
||||
- A **delta baseline** map keeps "diagnostics-as-of-the-last-snapshot"
|
||||
per file. ``snapshot_baseline()`` is called BEFORE a write; the
|
||||
next ``get_diagnostics_sync()`` returns only diagnostics that
|
||||
weren't in the baseline. This is the lift from Claude Code's
|
||||
``beforeFileEdited`` / ``getNewDiagnostics`` pattern, except wired
|
||||
to the local LSP layer instead of MCP IDE RPC.
|
||||
|
||||
The service is **off by default** — call :meth:`is_active` to check
|
||||
whether it's actually doing anything. When LSP is disabled in
|
||||
config, when no git workspace can be detected, when all configured
|
||||
servers are missing binaries and auto-install is off, ``is_active``
|
||||
returns False and the file_operations layer falls through to the
|
||||
in-process syntax check.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import Future as ConcurrentFuture
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.lsp import eventlog
|
||||
from agent.lsp.client import (
|
||||
DIAGNOSTICS_DOCUMENT_WAIT,
|
||||
LSPClient,
|
||||
file_uri,
|
||||
)
|
||||
from agent.lsp.servers import (
|
||||
ServerContext,
|
||||
ServerDef,
|
||||
SpawnSpec,
|
||||
find_server_for_file,
|
||||
language_id_for,
|
||||
)
|
||||
from agent.lsp.workspace import (
|
||||
clear_cache,
|
||||
is_inside_workspace,
|
||||
resolve_workspace_for_file,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("agent.lsp.manager")
|
||||
|
||||
DEFAULT_IDLE_TIMEOUT = 600 # seconds; servers idle for >10min get reaped
|
||||
|
||||
|
||||
class _BackgroundLoop:
|
||||
"""A daemon thread that owns one asyncio event loop.
|
||||
|
||||
Provides :meth:`run` for synchronous callers — submits a coroutine
|
||||
to the loop and blocks until it finishes (or a timeout fires).
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._ready = threading.Event()
|
||||
|
||||
def start(self) -> None:
|
||||
if self._thread is not None:
|
||||
return
|
||||
self._thread = threading.Thread(
|
||||
target=self._run_forever,
|
||||
name="hermes-lsp-loop",
|
||||
daemon=True,
|
||||
)
|
||||
self._thread.start()
|
||||
self._ready.wait(timeout=5.0)
|
||||
|
||||
def _run_forever(self) -> None:
|
||||
loop = asyncio.new_event_loop()
|
||||
self._loop = loop
|
||||
asyncio.set_event_loop(loop)
|
||||
self._ready.set()
|
||||
try:
|
||||
loop.run_forever()
|
||||
finally:
|
||||
try:
|
||||
loop.close()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
def run(self, coro, *, timeout: Optional[float] = None) -> Any:
|
||||
"""Submit a coroutine to the loop and block until done.
|
||||
|
||||
Returns the coroutine's result, or raises its exception.
|
||||
"""
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
if self._loop is None:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
raise RuntimeError("background loop not started")
|
||||
fut = safe_schedule_threadsafe(coro, self._loop)
|
||||
if fut is None:
|
||||
raise RuntimeError("background loop not running")
|
||||
try:
|
||||
return fut.result(timeout=timeout)
|
||||
except Exception:
|
||||
fut.cancel()
|
||||
raise
|
||||
|
||||
def stop(self) -> None:
|
||||
loop = self._loop
|
||||
if loop is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
except RuntimeError:
|
||||
pass
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=2.0)
|
||||
self._loop = None
|
||||
self._thread = None
|
||||
|
||||
|
||||
class LSPService:
|
||||
"""The process-wide LSP service.
|
||||
|
||||
Created once via :meth:`create_from_config`; the
|
||||
:func:`agent.lsp.get_service` accessor manages the singleton.
|
||||
Most callers should use that accessor rather than constructing
|
||||
:class:`LSPService` directly.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# construction + factory
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
enabled: bool,
|
||||
wait_mode: str,
|
||||
wait_timeout: float,
|
||||
install_strategy: str,
|
||||
binary_overrides: Optional[Dict[str, List[str]]] = None,
|
||||
env_overrides: Optional[Dict[str, Dict[str, str]]] = None,
|
||||
init_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
disabled_servers: Optional[List[str]] = None,
|
||||
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
|
||||
) -> None:
|
||||
self._enabled = enabled
|
||||
self._wait_mode = wait_mode if wait_mode in {"document", "full"} else "document"
|
||||
self._wait_timeout = wait_timeout
|
||||
self._install_strategy = install_strategy
|
||||
self._binary_overrides = binary_overrides or {}
|
||||
self._env_overrides = env_overrides or {}
|
||||
self._init_overrides = init_overrides or {}
|
||||
self._disabled_servers = set(disabled_servers or [])
|
||||
self._idle_timeout = idle_timeout
|
||||
|
||||
self._loop = _BackgroundLoop()
|
||||
if self._enabled:
|
||||
self._loop.start()
|
||||
|
||||
# Per-(server_id, workspace_root) state
|
||||
self._clients: Dict[Tuple[str, str], LSPClient] = {}
|
||||
self._broken: set = set()
|
||||
self._spawning: Dict[Tuple[str, str], asyncio.Future] = {}
|
||||
self._last_used: Dict[Tuple[str, str], float] = {}
|
||||
self._state_lock = threading.Lock()
|
||||
|
||||
# Delta baseline: file path → snapshot of diagnostics taken
|
||||
# immediately before a write. ``get_diagnostics_sync`` filters
|
||||
# out anything in the baseline so the agent only sees errors
|
||||
# introduced by the current edit.
|
||||
self._delta_baseline: Dict[str, List[Dict[str, Any]]] = {}
|
||||
|
||||
@classmethod
|
||||
def create_from_config(cls) -> Optional["LSPService"]:
|
||||
"""Build a service from ``hermes_cli.config`` settings.
|
||||
|
||||
Returns ``None`` if the config can't be loaded. The service
|
||||
itself returns ``is_active()`` False when LSP is disabled.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP config load failed: %s", e)
|
||||
return None
|
||||
|
||||
lsp_cfg = (cfg.get("lsp") or {}) if isinstance(cfg, dict) else {}
|
||||
if not isinstance(lsp_cfg, dict):
|
||||
lsp_cfg = {}
|
||||
|
||||
enabled = bool(lsp_cfg.get("enabled", True))
|
||||
wait_mode = lsp_cfg.get("wait_mode", "document")
|
||||
wait_timeout = float(lsp_cfg.get("wait_timeout", DIAGNOSTICS_DOCUMENT_WAIT))
|
||||
install_strategy = lsp_cfg.get("install_strategy", "auto")
|
||||
servers_cfg = lsp_cfg.get("servers") or {}
|
||||
disabled = []
|
||||
binary_overrides: Dict[str, List[str]] = {}
|
||||
env_overrides: Dict[str, Dict[str, str]] = {}
|
||||
init_overrides: Dict[str, Dict[str, Any]] = {}
|
||||
if isinstance(servers_cfg, dict):
|
||||
for name, sub in servers_cfg.items():
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
if sub.get("disabled"):
|
||||
disabled.append(name)
|
||||
cmd = sub.get("command")
|
||||
if isinstance(cmd, list) and cmd:
|
||||
binary_overrides[name] = cmd
|
||||
env = sub.get("env")
|
||||
if isinstance(env, dict):
|
||||
env_overrides[name] = {k: str(v) for k, v in env.items()}
|
||||
init = sub.get("initialization_options")
|
||||
if isinstance(init, dict):
|
||||
init_overrides[name] = init
|
||||
|
||||
return cls(
|
||||
enabled=enabled,
|
||||
wait_mode=wait_mode,
|
||||
wait_timeout=wait_timeout,
|
||||
install_strategy=install_strategy,
|
||||
binary_overrides=binary_overrides,
|
||||
env_overrides=env_overrides,
|
||||
init_overrides=init_overrides,
|
||||
disabled_servers=disabled,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def is_active(self) -> bool:
|
||||
"""Return True iff this service should be consulted at all."""
|
||||
return self._enabled
|
||||
|
||||
def enabled_for(self, file_path: str) -> bool:
|
||||
"""Return True iff LSP should run for this specific file.
|
||||
|
||||
Gates on workspace detection (file or cwd inside a git worktree),
|
||||
on whether any registered server matches the extension, and
|
||||
on whether the (server_id, workspace_root) pair is in the
|
||||
broken-set from a previous spawn failure.
|
||||
|
||||
Files in already-broken pairs return False so the file_operations
|
||||
layer skips the LSP path entirely — no spawn attempts, no
|
||||
timeout cost — until the service is restarted (``hermes lsp
|
||||
restart``) or the process exits.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return False
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None or srv.server_id in self._disabled_servers:
|
||||
return False
|
||||
ws_root, gated_in = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated_in):
|
||||
return False
|
||||
# Broken-set short-circuit. Use the per-server root if we can
|
||||
# compute one cheaply; otherwise fall back to the workspace
|
||||
# root as the broken key (which is what _get_or_spawn would
|
||||
# have used anyway when it failed).
|
||||
try:
|
||||
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
|
||||
except Exception: # noqa: BLE001
|
||||
per_server_root = ws_root
|
||||
if (srv.server_id, per_server_root) in self._broken:
|
||||
return False
|
||||
return True
|
||||
|
||||
def snapshot_baseline(self, file_path: str) -> None:
|
||||
"""Snapshot current diagnostics for ``file_path`` as the delta baseline.
|
||||
|
||||
Called BEFORE a write so the next ``get_diagnostics_sync()``
|
||||
can filter out pre-existing errors. Best-effort — failures
|
||||
are silently swallowed so a flaky server can't break a write.
|
||||
|
||||
Outer timeouts (e.g. server hangs during initialize) mark the
|
||||
(server_id, workspace_root) pair as broken so subsequent edits
|
||||
skip it instantly instead of re-paying the timeout cost.
|
||||
"""
|
||||
if not self.enabled_for(file_path):
|
||||
return
|
||||
try:
|
||||
diags = self._loop.run(self._snapshot_async(file_path), timeout=8.0)
|
||||
self._delta_baseline[os.path.abspath(file_path)] = diags or []
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("baseline snapshot failed for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
self._delta_baseline[os.path.abspath(file_path)] = []
|
||||
|
||||
def get_diagnostics_sync(
|
||||
self,
|
||||
file_path: str,
|
||||
*,
|
||||
delta: bool = True,
|
||||
timeout: Optional[float] = None,
|
||||
line_shift: Optional[Callable[[int], Optional[int]]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Synchronously open ``file_path`` in the right server, wait for
|
||||
diagnostics, return them.
|
||||
|
||||
If ``delta`` is True (default), the result is filtered against
|
||||
any baseline previously captured via :meth:`snapshot_baseline`.
|
||||
Diagnostics present in the baseline are removed so the caller
|
||||
only sees errors introduced by the current edit.
|
||||
|
||||
When ``line_shift`` is provided, baseline diagnostics are
|
||||
remapped through it before the set-difference. This handles
|
||||
the case where the edit deleted or inserted lines, causing
|
||||
pre-existing diagnostics below the edit point to surface at
|
||||
different line numbers in the post-edit snapshot — without
|
||||
the shift, they'd all look "introduced by this edit". Pass
|
||||
a callable built by
|
||||
:func:`agent.lsp.range_shift.build_line_shift` (pre_text,
|
||||
post_text). Omit when pre/post content isn't available;
|
||||
the unshifted comparison still catches diagnostics that
|
||||
didn't move.
|
||||
|
||||
Returns an empty list when LSP is disabled, when no workspace
|
||||
can be detected, when no server matches, or when the server
|
||||
can't be spawned. Never raises.
|
||||
"""
|
||||
if not self.enabled_for(file_path):
|
||||
return []
|
||||
|
||||
# Resolve server_id eagerly so we can emit structured logs even
|
||||
# when the request errors out below.
|
||||
srv = find_server_for_file(file_path)
|
||||
server_id = srv.server_id if srv else "?"
|
||||
|
||||
try:
|
||||
t = timeout if timeout is not None else self._wait_timeout + 2.0
|
||||
diags = self._loop.run(self._open_and_wait_async(file_path), timeout=t) or []
|
||||
except asyncio.TimeoutError as e:
|
||||
eventlog.log_timeout(server_id, file_path)
|
||||
logger.debug("LSP diagnostics timeout for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
return []
|
||||
except Exception as e: # noqa: BLE001
|
||||
eventlog.log_server_error(server_id, file_path, e)
|
||||
logger.debug("LSP diagnostics fetch failed for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
return []
|
||||
|
||||
abs_path = os.path.abspath(file_path)
|
||||
if delta:
|
||||
baseline = self._delta_baseline.get(abs_path) or []
|
||||
if baseline:
|
||||
if line_shift is not None:
|
||||
# Remap baseline diagnostics into post-edit
|
||||
# coordinates so shifted-but-otherwise-identical
|
||||
# entries hash equal under _diag_key. Entries
|
||||
# that mapped into a deleted region drop out
|
||||
# silently — they no longer apply.
|
||||
from agent.lsp.range_shift import shift_baseline
|
||||
baseline = shift_baseline(baseline, line_shift)
|
||||
seen = {_diag_key(d) for d in baseline}
|
||||
diags = [d for d in diags if _diag_key(d) not in seen]
|
||||
# Roll baseline forward — next call returns deltas relative
|
||||
# to the just-emitted state, mirroring claude-code's
|
||||
# diagnosticTracking.
|
||||
try:
|
||||
fresh = self._loop.run(self._current_diags_async(file_path), timeout=2.0) or []
|
||||
except Exception: # noqa: BLE001
|
||||
fresh = []
|
||||
if fresh:
|
||||
self._delta_baseline[abs_path] = fresh
|
||||
|
||||
if diags:
|
||||
eventlog.log_diagnostics(server_id, file_path, len(diags))
|
||||
else:
|
||||
eventlog.log_clean(server_id, file_path)
|
||||
return diags
|
||||
|
||||
def _mark_broken_for_file(self, file_path: str, exc: BaseException) -> None:
|
||||
"""Mark the (server_id, workspace_root) pair as broken so subsequent
|
||||
edits skip it instantly instead of re-paying timeout cost.
|
||||
|
||||
Called when the outer ``_loop.run`` timeout cancels an in-flight
|
||||
spawn/initialize that the inner ``_get_or_spawn`` task was still
|
||||
holding open. Without this, every subsequent write would re-enter
|
||||
the spawn path and re-pay the full ``snapshot_baseline``
|
||||
timeout (8s) until the binary is fixed.
|
||||
|
||||
Also kills any orphan client process that survived the cancelled
|
||||
future, and emits a single eventlog WARNING so the user knows
|
||||
which server gave up.
|
||||
|
||||
``exc`` is whatever exception the outer wrapper caught — used
|
||||
only for logging, never re-raised.
|
||||
"""
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None:
|
||||
return
|
||||
ws_root, gated = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated):
|
||||
return
|
||||
try:
|
||||
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
|
||||
except Exception: # noqa: BLE001
|
||||
per_server_root = ws_root
|
||||
key = (srv.server_id, per_server_root)
|
||||
already_broken = key in self._broken
|
||||
self._broken.add(key)
|
||||
|
||||
# Kill any client we managed to spawn before the timeout. The
|
||||
# cancelled future never reached the broken-set add inside
|
||||
# ``_get_or_spawn`` so the client may still be hanging in
|
||||
# ``_clients`` with a half-initialized state.
|
||||
with self._state_lock:
|
||||
client = self._clients.pop(key, None)
|
||||
if client is not None:
|
||||
try:
|
||||
# Fire-and-forget shutdown — give it a second to cleanup,
|
||||
# but don't block. We're already on a slow path.
|
||||
self._loop.run(client.shutdown(), timeout=1.0)
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
if not already_broken:
|
||||
eventlog.log_spawn_failed(srv.server_id, per_server_root, exc)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Tear down all clients and stop the background loop."""
|
||||
if not self._enabled:
|
||||
return
|
||||
try:
|
||||
self._loop.run(self._shutdown_async(), timeout=10.0)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP shutdown error: %s", e)
|
||||
self._loop.stop()
|
||||
clear_cache()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# async internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _snapshot_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
client = await self._get_or_spawn(file_path)
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
version = await client.open_file(file_path, language_id=language_id_for(file_path))
|
||||
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("snapshot open/wait failed: %s", e)
|
||||
return []
|
||||
self._last_used[(client.server_id, client.workspace_root)] = time.time()
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _open_and_wait_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
client = await self._get_or_spawn(file_path)
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
version = await client.open_file(file_path, language_id=language_id_for(file_path))
|
||||
await client.save_file(file_path)
|
||||
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("open/wait failed for %s: %s", file_path, e)
|
||||
return []
|
||||
self._last_used[(client.server_id, client.workspace_root)] = time.time()
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _current_diags_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
ws, gated = resolve_workspace_for_file(file_path)
|
||||
srv = find_server_for_file(file_path)
|
||||
if not (ws and gated and srv):
|
||||
return []
|
||||
with self._state_lock:
|
||||
client = self._clients.get((srv.server_id, ws))
|
||||
if client is None:
|
||||
return []
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _get_or_spawn(self, file_path: str) -> Optional[LSPClient]:
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None:
|
||||
return None
|
||||
if srv.server_id in self._disabled_servers:
|
||||
eventlog.log_disabled(srv.server_id, file_path, "disabled in config")
|
||||
return None
|
||||
ws_root, gated = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated):
|
||||
eventlog.log_no_project_root(srv.server_id, file_path)
|
||||
return None
|
||||
per_server_root = srv.resolve_root(file_path, ws_root)
|
||||
if per_server_root is None:
|
||||
eventlog.log_disabled(
|
||||
srv.server_id, file_path, "exclude marker hit (server gated off)"
|
||||
)
|
||||
return None # exclude marker hit, server gated off
|
||||
|
||||
key = (srv.server_id, per_server_root)
|
||||
if key in self._broken:
|
||||
return None
|
||||
with self._state_lock:
|
||||
client = self._clients.get(key)
|
||||
if client is not None and client.is_running:
|
||||
eventlog.log_active(srv.server_id, per_server_root)
|
||||
return client
|
||||
spawning = self._spawning.get(key)
|
||||
if spawning is not None:
|
||||
try:
|
||||
return await spawning
|
||||
except Exception: # noqa: BLE001
|
||||
return None
|
||||
|
||||
# Begin spawn
|
||||
loop = asyncio.get_running_loop()
|
||||
spawn_future: asyncio.Future = loop.create_future()
|
||||
with self._state_lock:
|
||||
self._spawning[key] = spawn_future
|
||||
try:
|
||||
ctx = ServerContext(
|
||||
workspace_root=per_server_root,
|
||||
install_strategy=self._install_strategy,
|
||||
binary_overrides=self._binary_overrides,
|
||||
env_overrides=self._env_overrides,
|
||||
init_overrides=self._init_overrides,
|
||||
)
|
||||
spec = srv.build_spawn(per_server_root, ctx)
|
||||
if spec is None:
|
||||
# ``build_spawn`` returns None when the binary can't be
|
||||
# located (auto-install disabled, manual-only server,
|
||||
# or install attempt failed). Surface this once via
|
||||
# the structured logger so the user can act on it.
|
||||
eventlog.log_server_unavailable(srv.server_id, srv.server_id)
|
||||
self._broken.add(key)
|
||||
spawn_future.set_result(None)
|
||||
return None
|
||||
client = LSPClient(
|
||||
server_id=srv.server_id,
|
||||
workspace_root=spec.workspace_root,
|
||||
command=spec.command,
|
||||
env=spec.env,
|
||||
cwd=spec.cwd,
|
||||
initialization_options=spec.initialization_options,
|
||||
seed_diagnostics_on_first_push=spec.seed_diagnostics_on_first_push or srv.seed_first_push,
|
||||
)
|
||||
try:
|
||||
await client.start()
|
||||
except Exception as e: # noqa: BLE001
|
||||
eventlog.log_spawn_failed(srv.server_id, per_server_root, e)
|
||||
self._broken.add(key)
|
||||
spawn_future.set_result(None)
|
||||
return None
|
||||
with self._state_lock:
|
||||
self._clients[key] = client
|
||||
self._last_used[key] = time.time()
|
||||
eventlog.log_active(srv.server_id, per_server_root)
|
||||
spawn_future.set_result(client)
|
||||
return client
|
||||
finally:
|
||||
with self._state_lock:
|
||||
self._spawning.pop(key, None)
|
||||
|
||||
async def _shutdown_async(self) -> None:
|
||||
with self._state_lock:
|
||||
clients = list(self._clients.values())
|
||||
self._clients.clear()
|
||||
self._broken.clear()
|
||||
self._last_used.clear()
|
||||
await asyncio.gather(
|
||||
*(c.shutdown() for c in clients),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# status / introspection (used by ``hermes lsp status``)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return a snapshot of the service for the CLI status command."""
|
||||
with self._state_lock:
|
||||
clients = [
|
||||
{
|
||||
"server_id": k[0],
|
||||
"workspace_root": k[1],
|
||||
"state": c.state,
|
||||
"running": c.is_running,
|
||||
}
|
||||
for k, c in self._clients.items()
|
||||
]
|
||||
broken = list(self._broken)
|
||||
return {
|
||||
"enabled": self._enabled,
|
||||
"wait_mode": self._wait_mode,
|
||||
"wait_timeout": self._wait_timeout,
|
||||
"install_strategy": self._install_strategy,
|
||||
"clients": clients,
|
||||
"broken": broken,
|
||||
"disabled_servers": sorted(self._disabled_servers),
|
||||
}
|
||||
|
||||
|
||||
def _diag_key(d: Dict[str, Any]) -> str:
|
||||
"""Content equality key used for cross-edit delta filtering.
|
||||
|
||||
Includes the diagnostic's position range — when used together
|
||||
with :func:`agent.lsp.range_shift.shift_baseline`, the baseline
|
||||
is line-shifted into post-edit coordinates BEFORE this key is
|
||||
computed, so identical-but-shifted diagnostics hash equal. Two
|
||||
genuinely distinct diagnostics at different lines (e.g. the same
|
||||
error class introduced at a second site) hash differently and
|
||||
are surfaced as new.
|
||||
|
||||
Mirrors :func:`agent.lsp.client._diagnostic_key`; intentionally
|
||||
identical so the two layers agree on diagnostic identity.
|
||||
"""
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
code = d.get("code")
|
||||
if code is not None and not isinstance(code, str):
|
||||
code = str(code)
|
||||
return "\x00".join(
|
||||
[
|
||||
str(d.get("severity") or 1),
|
||||
str(code or ""),
|
||||
str(d.get("source") or ""),
|
||||
str(d.get("message") or "").strip(),
|
||||
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["LSPService"]
|
||||
@@ -1,196 +0,0 @@
|
||||
"""Minimal LSP JSON-RPC 2.0 framer over async streams.
|
||||
|
||||
LSP wire format:
|
||||
|
||||
Content-Length: <bytes>\\r\\n
|
||||
\\r\\n
|
||||
<utf-8 JSON body>
|
||||
|
||||
The body is a JSON-RPC 2.0 envelope: request, response, or notification.
|
||||
|
||||
This module replaces what ``vscode-jsonrpc/node`` would do in a
|
||||
TypeScript implementation. We keep it deliberately small — just the
|
||||
framer + envelope helpers — so :class:`agent.lsp.client.LSPClient` can
|
||||
focus on protocol semantics.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("agent.lsp.protocol")
|
||||
|
||||
# LSP error codes we care about. Full list in
|
||||
# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#errorCodes
|
||||
ERROR_CONTENT_MODIFIED = -32801
|
||||
ERROR_REQUEST_CANCELLED = -32800
|
||||
ERROR_METHOD_NOT_FOUND = -32601
|
||||
|
||||
|
||||
class LSPProtocolError(Exception):
|
||||
"""Raised when the wire protocol is violated.
|
||||
|
||||
Distinct from :class:`LSPRequestError` which represents a server
|
||||
returning a JSON-RPC error response — that's protocol-conformant.
|
||||
This exception means the framing or envelope itself is broken.
|
||||
"""
|
||||
|
||||
|
||||
class LSPRequestError(Exception):
|
||||
"""Raised when an LSP request returns an error response.
|
||||
|
||||
Carries the JSON-RPC ``code``, ``message``, and optional ``data``.
|
||||
"""
|
||||
|
||||
def __init__(self, code: int, message: str, data: Any = None) -> None:
|
||||
super().__init__(f"LSP error {code}: {message}")
|
||||
self.code = code
|
||||
self.message = message
|
||||
self.data = data
|
||||
|
||||
|
||||
def encode_message(obj: dict) -> bytes:
|
||||
"""Encode a JSON-RPC envelope as a Content-Length framed byte string.
|
||||
|
||||
The body is encoded as compact UTF-8 JSON (no spaces between
|
||||
separators) — matches what ``vscode-jsonrpc`` emits and keeps the
|
||||
Content-Length count exact.
|
||||
"""
|
||||
body = json.dumps(obj, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||
header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
|
||||
return header + body
|
||||
|
||||
|
||||
async def read_message(reader: asyncio.StreamReader) -> Optional[dict]:
|
||||
"""Read one Content-Length framed JSON-RPC message from the stream.
|
||||
|
||||
Returns ``None`` on clean EOF (server closed stdout cleanly between
|
||||
messages — typical shutdown). Raises :class:`LSPProtocolError` on
|
||||
malformed framing.
|
||||
|
||||
The reader is advanced to just past the JSON body on success.
|
||||
"""
|
||||
headers: dict = {}
|
||||
header_bytes = 0
|
||||
while True:
|
||||
try:
|
||||
line = await reader.readuntil(b"\r\n")
|
||||
except asyncio.IncompleteReadError as e:
|
||||
# EOF while reading headers. If we hadn't started a header
|
||||
# block, treat as clean EOF; otherwise the framing is bad.
|
||||
if not e.partial and not headers:
|
||||
return None
|
||||
raise LSPProtocolError(
|
||||
f"unexpected EOF while reading LSP headers (partial={e.partial!r})"
|
||||
) from e
|
||||
# Defensive cap against a server streaming headers without ever
|
||||
# emitting CRLF-CRLF. Caps total header bytes at 8 KiB — a
|
||||
# well-behaved server fits in well under 200 bytes.
|
||||
header_bytes += len(line)
|
||||
if header_bytes > 8192:
|
||||
raise LSPProtocolError(
|
||||
f"LSP header block exceeded 8 KiB without terminator"
|
||||
)
|
||||
line = line[:-2] # strip CRLF
|
||||
if not line:
|
||||
break # blank line ends header block
|
||||
try:
|
||||
key, _, value = line.decode("ascii").partition(":")
|
||||
except UnicodeDecodeError as e:
|
||||
raise LSPProtocolError(f"non-ASCII LSP header: {line!r}") from e
|
||||
if not key:
|
||||
raise LSPProtocolError(f"malformed LSP header line: {line!r}")
|
||||
headers[key.strip().lower()] = value.strip()
|
||||
|
||||
cl = headers.get("content-length")
|
||||
if cl is None:
|
||||
raise LSPProtocolError(f"LSP message missing Content-Length: {headers!r}")
|
||||
try:
|
||||
n = int(cl)
|
||||
except ValueError as e:
|
||||
raise LSPProtocolError(f"non-integer Content-Length: {cl!r}") from e
|
||||
if n < 0 or n > 64 * 1024 * 1024: # 64 MiB sanity cap
|
||||
raise LSPProtocolError(f"unreasonable Content-Length: {n}")
|
||||
|
||||
try:
|
||||
body = await reader.readexactly(n)
|
||||
except asyncio.IncompleteReadError as e:
|
||||
raise LSPProtocolError(
|
||||
f"truncated LSP body: expected {n} bytes, got {len(e.partial)}"
|
||||
) from e
|
||||
|
||||
try:
|
||||
return json.loads(body.decode("utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise LSPProtocolError(f"invalid JSON in LSP body: {e}") from e
|
||||
except UnicodeDecodeError as e:
|
||||
raise LSPProtocolError(f"non-UTF-8 LSP body: {e}") from e
|
||||
|
||||
|
||||
def make_request(req_id: int, method: str, params: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 request envelope."""
|
||||
msg: dict = {"jsonrpc": "2.0", "id": req_id, "method": method}
|
||||
if params is not None:
|
||||
msg["params"] = params
|
||||
return msg
|
||||
|
||||
|
||||
def make_notification(method: str, params: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 notification envelope (no ``id``)."""
|
||||
msg: dict = {"jsonrpc": "2.0", "method": method}
|
||||
if params is not None:
|
||||
msg["params"] = params
|
||||
return msg
|
||||
|
||||
|
||||
def make_response(req_id: Any, result: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 success response envelope."""
|
||||
return {"jsonrpc": "2.0", "id": req_id, "result": result}
|
||||
|
||||
|
||||
def make_error_response(req_id: Any, code: int, message: str, data: Any = None) -> dict:
|
||||
"""Build a JSON-RPC 2.0 error response envelope."""
|
||||
err: dict = {"code": code, "message": message}
|
||||
if data is not None:
|
||||
err["data"] = data
|
||||
return {"jsonrpc": "2.0", "id": req_id, "error": err}
|
||||
|
||||
|
||||
def classify_message(msg: dict) -> Tuple[str, Any]:
|
||||
"""Return ``(kind, key)`` where kind is one of ``request``,
|
||||
``response``, ``notification``, ``invalid``.
|
||||
|
||||
The key is the request id for request/response, the method name
|
||||
for notifications, and ``None`` for invalid messages.
|
||||
"""
|
||||
if not isinstance(msg, dict):
|
||||
return "invalid", None
|
||||
if msg.get("jsonrpc") != "2.0":
|
||||
return "invalid", None
|
||||
has_id = "id" in msg
|
||||
has_method = "method" in msg
|
||||
if has_id and has_method:
|
||||
return "request", msg["id"]
|
||||
if has_id and ("result" in msg or "error" in msg):
|
||||
return "response", msg["id"]
|
||||
if has_method and not has_id:
|
||||
return "notification", msg["method"]
|
||||
return "invalid", None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ERROR_CONTENT_MODIFIED",
|
||||
"ERROR_REQUEST_CANCELLED",
|
||||
"ERROR_METHOD_NOT_FOUND",
|
||||
"LSPProtocolError",
|
||||
"LSPRequestError",
|
||||
"encode_message",
|
||||
"read_message",
|
||||
"make_request",
|
||||
"make_notification",
|
||||
"make_response",
|
||||
"make_error_response",
|
||||
"classify_message",
|
||||
]
|
||||
@@ -1,149 +0,0 @@
|
||||
"""Diff-aware line-shift map for cross-edit LSP delta filtering.
|
||||
|
||||
When an edit deletes or inserts lines in the middle of a file, every
|
||||
diagnostic below the edit point shifts to a new line number. The
|
||||
LSPService delta filter subtracts the pre-edit baseline from the
|
||||
post-edit diagnostics keyed on ``(severity, code, source, message,
|
||||
range)`` — without an adjustment, the shifted-but-otherwise-identical
|
||||
diagnostics look brand-new and the agent gets flooded with noise.
|
||||
|
||||
The fix used here is the same trick git's blame and unified diff use:
|
||||
build a piecewise-linear map from pre-edit line numbers to post-edit
|
||||
line numbers, then apply that map to baseline diagnostics before the
|
||||
set-difference. Diagnostics whose pre-edit line is in a region the
|
||||
edit deleted return ``None`` and are dropped from the baseline (they
|
||||
genuinely no longer apply).
|
||||
|
||||
Trade-off vs. dropping range from the key entirely (the previous
|
||||
fix): preserves the "new instance of an identical error at a
|
||||
different line" signal — if the model introduces a second instance
|
||||
of the same error class at a different location, that one will be
|
||||
surfaced as new instead of swallowed by content-only dedup.
|
||||
|
||||
The map is derived from ``difflib.SequenceMatcher.get_opcodes()`` and
|
||||
exposed as a single callable so callers don't have to reason about
|
||||
diff regions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
|
||||
def build_line_shift(pre_text: str, post_text: str) -> Callable[[int], Optional[int]]:
|
||||
"""Build a function mapping pre-edit line numbers to post-edit line numbers.
|
||||
|
||||
Lines are 0-indexed to match the LSP wire format
|
||||
(``range.start.line`` is 0-indexed).
|
||||
|
||||
The returned callable takes a pre-edit 0-indexed line number and
|
||||
returns the corresponding post-edit 0-indexed line number, or
|
||||
``None`` if that line was deleted by the edit (no post-edit
|
||||
counterpart exists).
|
||||
|
||||
Cost: one ``SequenceMatcher.get_opcodes()`` call up front; the
|
||||
returned closure is O(log n) per call (binary search over opcode
|
||||
regions). Cheap enough to call once per write/patch and apply to
|
||||
every baseline diagnostic.
|
||||
"""
|
||||
pre_lines = pre_text.splitlines() if pre_text else []
|
||||
post_lines = post_text.splitlines() if post_text else []
|
||||
|
||||
# Trivial case: identical content or no content — identity map.
|
||||
if pre_lines == post_lines:
|
||||
return lambda line: line
|
||||
|
||||
# SequenceMatcher.get_opcodes() returns a list of
|
||||
# (tag, i1, i2, j1, j2) where tag is 'equal', 'replace', 'delete',
|
||||
# or 'insert'. i1:i2 is the range in pre, j1:j2 is the range in
|
||||
# post. We build a list of (i1, i2, j1, j2, tag) tuples and
|
||||
# binary-search by i for each lookup.
|
||||
sm = difflib.SequenceMatcher(a=pre_lines, b=post_lines, autojunk=False)
|
||||
opcodes = sm.get_opcodes()
|
||||
|
||||
def shift(line: int) -> Optional[int]:
|
||||
# Find the opcode region whose i1 <= line < i2.
|
||||
# Linear scan is fine — typical opcode count is small (single
|
||||
# digits for a typical patch-tool edit).
|
||||
for tag, i1, i2, j1, j2 in opcodes:
|
||||
if i1 <= line < i2:
|
||||
if tag == "equal":
|
||||
# Pre-line N → post-line (N - i1 + j1).
|
||||
return line - i1 + j1
|
||||
if tag == "delete":
|
||||
# Pre-line is in a deleted region — no post counterpart.
|
||||
return None
|
||||
if tag == "replace":
|
||||
# Replace == delete + insert; the pre-line has no
|
||||
# post counterpart in any meaningful sense. Drop.
|
||||
return None
|
||||
# 'insert' has i1 == i2 so line < i2 can't be hit.
|
||||
if line < i1:
|
||||
# Past the relevant region — handled in earlier iteration.
|
||||
break
|
||||
# Past the last opcode region (line >= len(pre_lines)).
|
||||
# Anchor at end of post.
|
||||
return max(0, len(post_lines) - 1) if post_lines else None
|
||||
|
||||
return shift
|
||||
|
||||
|
||||
def shift_diagnostic_range(diag: Dict[str, Any],
|
||||
shift: Callable[[int], Optional[int]]) -> Optional[Dict[str, Any]]:
|
||||
"""Return a copy of ``diag`` with its line range remapped through ``shift``.
|
||||
|
||||
Returns ``None`` if the diagnostic's start line maps to ``None``
|
||||
(the line was deleted by the edit) — caller drops it from the
|
||||
baseline since the diagnostic no longer applies.
|
||||
|
||||
Both ``start.line`` and ``end.line`` are remapped independently;
|
||||
when only the end maps to ``None`` (rare, multi-line diagnostic
|
||||
straddling the edit boundary) we collapse to a single-line range
|
||||
at the shifted start to keep the diagnostic in the baseline.
|
||||
|
||||
The original ``diag`` is not mutated.
|
||||
"""
|
||||
rng = diag.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
|
||||
pre_start_line = int(start.get("line", 0))
|
||||
pre_end_line = int(end.get("line", pre_start_line))
|
||||
|
||||
new_start_line = shift(pre_start_line)
|
||||
if new_start_line is None:
|
||||
return None
|
||||
|
||||
new_end_line = shift(pre_end_line)
|
||||
if new_end_line is None:
|
||||
# Diagnostic straddled the deletion — collapse to start.
|
||||
new_end_line = new_start_line
|
||||
|
||||
shifted = dict(diag)
|
||||
shifted["range"] = {
|
||||
"start": {
|
||||
"line": new_start_line,
|
||||
"character": int(start.get("character", 0)),
|
||||
},
|
||||
"end": {
|
||||
"line": new_end_line,
|
||||
"character": int(end.get("character", 0)),
|
||||
},
|
||||
}
|
||||
return shifted
|
||||
|
||||
|
||||
def shift_baseline(baseline: List[Dict[str, Any]],
|
||||
shift: Callable[[int], Optional[int]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``shift`` to every diagnostic in ``baseline``, dropping deleted entries."""
|
||||
out: List[Dict[str, Any]] = []
|
||||
for d in baseline:
|
||||
if not isinstance(d, dict):
|
||||
continue
|
||||
shifted = shift_diagnostic_range(d, shift)
|
||||
if shifted is not None:
|
||||
out.append(shifted)
|
||||
return out
|
||||
|
||||
|
||||
__all__ = ["build_line_shift", "shift_diagnostic_range", "shift_baseline"]
|
||||
@@ -1,78 +0,0 @@
|
||||
"""Format LSP diagnostics for inclusion in tool output.
|
||||
|
||||
The model sees a compact, severity-filtered, line-bounded summary of
|
||||
diagnostics introduced by the latest edit. Format matches what
|
||||
OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
|
||||
``formatDiagnosticsSummary`` produce — ``<diagnostics>`` blocks with
|
||||
1-indexed line/column, capped at ``MAX_PER_FILE`` errors.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Severity-1 only by default — warnings/info/hints would flood the
|
||||
# agent. Lift this in config under ``lsp.severities`` if needed.
|
||||
SEVERITY_NAMES = {1: "ERROR", 2: "WARN", 3: "INFO", 4: "HINT"}
|
||||
DEFAULT_SEVERITIES = frozenset({1}) # ERROR only
|
||||
|
||||
MAX_PER_FILE = 20
|
||||
MAX_TOTAL_CHARS = 4000
|
||||
|
||||
|
||||
def format_diagnostic(d: Dict[str, Any]) -> str:
|
||||
"""One-line representation of a single diagnostic."""
|
||||
sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
line = int(start.get("line", 0)) + 1
|
||||
col = int(start.get("character", 0)) + 1
|
||||
msg = str(d.get("message") or "").rstrip()
|
||||
code = d.get("code")
|
||||
code_part = f" [{code}]" if code not in {None, ""} else ""
|
||||
source = d.get("source")
|
||||
source_part = f" ({source})" if source else ""
|
||||
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
|
||||
|
||||
|
||||
def report_for_file(
|
||||
file_path: str,
|
||||
diagnostics: List[Dict[str, Any]],
|
||||
*,
|
||||
severities: frozenset = DEFAULT_SEVERITIES,
|
||||
max_per_file: int = MAX_PER_FILE,
|
||||
) -> str:
|
||||
"""Build a ``<diagnostics file=...>`` block for one file.
|
||||
|
||||
Returns an empty string when no diagnostics pass the severity
|
||||
filter, so callers can do ``if block:`` to skip empty cases.
|
||||
"""
|
||||
if not diagnostics:
|
||||
return ""
|
||||
filtered = [d for d in diagnostics if (d.get("severity") or 1) in severities]
|
||||
if not filtered:
|
||||
return ""
|
||||
limited = filtered[:max_per_file]
|
||||
extra = len(filtered) - len(limited)
|
||||
lines = [format_diagnostic(d) for d in limited]
|
||||
body = "\n".join(lines)
|
||||
if extra > 0:
|
||||
body += f"\n... and {extra} more"
|
||||
return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
|
||||
|
||||
|
||||
def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
|
||||
"""Hard-cap a formatted summary string."""
|
||||
if len(s) <= limit:
|
||||
return s
|
||||
marker = "\n…[truncated]"
|
||||
return s[: limit - len(marker)] + marker
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SEVERITY_NAMES",
|
||||
"DEFAULT_SEVERITIES",
|
||||
"MAX_PER_FILE",
|
||||
"format_diagnostic",
|
||||
"report_for_file",
|
||||
"truncate",
|
||||
]
|
||||
1040
agent/lsp/servers.py
1040
agent/lsp/servers.py
File diff suppressed because it is too large
Load Diff
@@ -1,223 +0,0 @@
|
||||
"""Workspace and project-root resolution for LSP.
|
||||
|
||||
Two concerns live here:
|
||||
|
||||
1. **Workspace gate** — the upper-level "is this directory a project?"
|
||||
check. Hermes only runs LSP when the cwd (or the file being edited)
|
||||
sits inside a git worktree. Files outside any git root never
|
||||
trigger LSP, even if a server is configured. This keeps Telegram
|
||||
gateway users on user-home cwd's from spawning daemons.
|
||||
|
||||
2. **NearestRoot** — the per-server project-root walk. Each language
|
||||
server cares about a different marker (``pyproject.toml`` for
|
||||
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
|
||||
wants the directory containing that marker. ``nearest_root()``
|
||||
walks up from a starting path looking for any of a list of marker
|
||||
files, optionally bailing if an exclude marker shows up first.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("agent.lsp.workspace")
|
||||
|
||||
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
|
||||
# Cleared on shutdown. Keyed by absolute resolved path so symlink
|
||||
# folds collapse to one entry.
|
||||
_workspace_cache: dict = {}
|
||||
|
||||
|
||||
def normalize_path(path: str) -> str:
|
||||
"""Normalize a path for use as a stable map key.
|
||||
|
||||
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
|
||||
NOT resolve symlinks here — symlink stability matters for some
|
||||
LSP servers (rust-analyzer cares about Cargo workspace identity)
|
||||
and we want the canonical path the user typed when possible.
|
||||
"""
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
|
||||
def find_git_worktree(start: str) -> Optional[str]:
|
||||
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
|
||||
|
||||
Returns the directory containing ``.git``, or ``None`` if no git
|
||||
root is found before hitting the filesystem root.
|
||||
|
||||
A ``.git`` *file* (not directory) means we're inside a git
|
||||
worktree set up via ``git worktree add`` — both forms count.
|
||||
"""
|
||||
try:
|
||||
start_path = Path(normalize_path(start))
|
||||
if start_path.is_file():
|
||||
start_path = start_path.parent
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
# Pathological input (loop in symlinks, encoding error, etc.) —
|
||||
# bail out rather than crash the lint hook.
|
||||
return None
|
||||
|
||||
# Cache check
|
||||
cached = _workspace_cache.get(str(start_path))
|
||||
if cached is not None:
|
||||
root, _is_git = cached
|
||||
return root
|
||||
|
||||
cur = start_path
|
||||
# Defensive cap: the deepest reasonable monorepo is well under 64
|
||||
# levels. Caps the walk so a pathological cwd or a symlink cycle
|
||||
# we somehow traverse can't keep us looping.
|
||||
for _ in range(64):
|
||||
git_marker = cur / ".git"
|
||||
try:
|
||||
if git_marker.exists():
|
||||
resolved = str(cur)
|
||||
_workspace_cache[str(start_path)] = (resolved, True)
|
||||
return resolved
|
||||
except OSError:
|
||||
# Permission error on a parent dir — bail out cleanly.
|
||||
break
|
||||
parent = cur.parent
|
||||
if parent == cur:
|
||||
break
|
||||
cur = parent
|
||||
|
||||
_workspace_cache[str(start_path)] = (None, False)
|
||||
return None
|
||||
|
||||
|
||||
def is_inside_workspace(path: str, workspace_root: str) -> bool:
|
||||
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
|
||||
|
||||
Uses absolute paths but does not resolve symlinks — a file accessed
|
||||
via a symlink that points outside the workspace still counts as
|
||||
outside. This is the conservative interpretation; matches LSP
|
||||
behaviour where servers reject didOpen for unrelated files.
|
||||
"""
|
||||
p = normalize_path(path)
|
||||
root = normalize_path(workspace_root)
|
||||
if p == root:
|
||||
return True
|
||||
# Use os.path.commonpath to handle case-insensitive filesystems
|
||||
# correctly on macOS/Windows.
|
||||
try:
|
||||
common = os.path.commonpath([p, root])
|
||||
except ValueError:
|
||||
# Different drives on Windows.
|
||||
return False
|
||||
return common == root
|
||||
|
||||
|
||||
def nearest_root(
|
||||
start: str,
|
||||
markers: Iterable[str],
|
||||
*,
|
||||
excludes: Optional[Iterable[str]] = None,
|
||||
ceiling: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Walk up from ``start`` looking for any of the given marker files.
|
||||
|
||||
Returns the **directory containing** the first matched marker, or
|
||||
``None`` if no marker is found before hitting ``ceiling`` (or the
|
||||
filesystem root if no ceiling).
|
||||
|
||||
If ``excludes`` is provided and an exclude marker matches *first*
|
||||
in the upward walk, returns ``None`` — the server is gated off
|
||||
for that file. Mirrors OpenCode's NearestRoot exclude semantics
|
||||
(e.g. typescript skips deno projects when ``deno.json`` is found
|
||||
before ``package.json``).
|
||||
"""
|
||||
start_path = Path(normalize_path(start))
|
||||
try:
|
||||
if start_path.is_file():
|
||||
start_path = start_path.parent
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
return None
|
||||
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
|
||||
|
||||
markers_list = list(markers)
|
||||
excludes_list = list(excludes) if excludes else []
|
||||
|
||||
cur = start_path
|
||||
# Defensive cap matching ``find_git_worktree``. Bounded walk
|
||||
# protects against pathological inputs even though the
|
||||
# parent-equality stop normally terminates within ~10 steps.
|
||||
for _ in range(64):
|
||||
# Check excludes first — if an exclude is found at this level,
|
||||
# the server is gated off for this file.
|
||||
for exc in excludes_list:
|
||||
try:
|
||||
if (cur / exc).exists():
|
||||
return None
|
||||
except OSError:
|
||||
continue
|
||||
# Then check markers.
|
||||
for marker in markers_list:
|
||||
try:
|
||||
if (cur / marker).exists():
|
||||
return str(cur)
|
||||
except OSError:
|
||||
continue
|
||||
# Stop conditions.
|
||||
if ceiling_path is not None and cur == ceiling_path:
|
||||
return None
|
||||
parent = cur.parent
|
||||
if parent == cur:
|
||||
return None
|
||||
cur = parent
|
||||
return None
|
||||
|
||||
|
||||
def resolve_workspace_for_file(
|
||||
file_path: str,
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
) -> Tuple[Optional[str], bool]:
|
||||
"""Resolve the workspace root for a file.
|
||||
|
||||
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
|
||||
iff LSP should run for this file at all. Currently the gate is
|
||||
"file is inside a git worktree found by walking up from cwd OR
|
||||
from the file itself".
|
||||
|
||||
The cwd path takes precedence — if the agent was launched in a
|
||||
git project, that worktree is the workspace, and any edit inside
|
||||
it (regardless of where the file lives) is in-scope. If the cwd
|
||||
isn't in a git worktree, we try the file's own location as a
|
||||
fallback.
|
||||
|
||||
Returns ``(None, False)`` when neither path is in a git worktree.
|
||||
"""
|
||||
cwd = cwd or os.getcwd()
|
||||
cwd_root = find_git_worktree(cwd)
|
||||
if cwd_root is not None:
|
||||
if is_inside_workspace(file_path, cwd_root):
|
||||
return cwd_root, True
|
||||
# File is outside the cwd's worktree — try the file's own
|
||||
# location as a secondary anchor. Useful for monorepos where
|
||||
# the user opens an unrelated checkout.
|
||||
file_root = find_git_worktree(file_path)
|
||||
if file_root is not None:
|
||||
return file_root, True
|
||||
return None, False
|
||||
|
||||
|
||||
def clear_cache() -> None:
|
||||
"""Clear the workspace-resolution cache.
|
||||
|
||||
Called on service shutdown so a subsequent re-init doesn't pick
|
||||
up stale results from a previous session.
|
||||
"""
|
||||
_workspace_cache.clear()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"find_git_worktree",
|
||||
"is_inside_workspace",
|
||||
"nearest_root",
|
||||
"normalize_path",
|
||||
"resolve_workspace_for_file",
|
||||
"clear_cache",
|
||||
]
|
||||
@@ -1,49 +0,0 @@
|
||||
"""User-facing summaries for manual compression commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Sequence
|
||||
|
||||
|
||||
def summarize_manual_compression(
|
||||
before_messages: Sequence[dict[str, Any]],
|
||||
after_messages: Sequence[dict[str, Any]],
|
||||
before_tokens: int,
|
||||
after_tokens: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Return consistent user-facing feedback for manual compression."""
|
||||
before_count = len(before_messages)
|
||||
after_count = len(after_messages)
|
||||
noop = list(after_messages) == list(before_messages)
|
||||
|
||||
if noop:
|
||||
headline = f"No changes from compression: {before_count} messages"
|
||||
if after_tokens == before_tokens:
|
||||
token_line = (
|
||||
f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
|
||||
)
|
||||
else:
|
||||
token_line = (
|
||||
f"Approx request size: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
else:
|
||||
headline = f"Compressed: {before_count} → {after_count} messages"
|
||||
token_line = (
|
||||
f"Approx request size: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
|
||||
note = None
|
||||
if not noop and after_count < before_count and after_tokens > before_tokens:
|
||||
note = (
|
||||
"Note: fewer messages can still raise this estimate when "
|
||||
"compression rewrites the transcript into denser summaries."
|
||||
)
|
||||
|
||||
return {
|
||||
"noop": noop,
|
||||
"headline": headline,
|
||||
"token_line": token_line,
|
||||
"note": note,
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user