mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 04:08:28 +08:00
Compare commits
7 Commits
bb/cron-hi
...
asyncio
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d9a1e119d | ||
|
|
e91d9e839a | ||
|
|
98321be8b0 | ||
|
|
a219e178a1 | ||
|
|
e06a15b3ab | ||
|
|
349e37de0a | ||
|
|
31c733383b |
23
.cursorrules
Normal file
23
.cursorrules
Normal file
@@ -0,0 +1,23 @@
|
||||
Hermes-Agent is an agent harness for LLMs.
|
||||
|
||||
When building, the tool functionality is in the tools/ directory, where each specific tool (or in some cases, tools that are built for the same execution category or api) are placed in a script each their own.
|
||||
|
||||
Each tool is then consolidated in the model_tools.py file in the repo root.
|
||||
|
||||
There is also a way to consolidate sets of tools in toolsets.py for the agent to use.
|
||||
|
||||
The primary agent runner code is in run_agent, but other runners could be developed using the tools and framework.
|
||||
|
||||
Always ensure consistency between tools, the model_tools.py and toolsets.py when changing any of them, otherwise they could become desynced in a way that is detrimental to functionality.
|
||||
|
||||
The expected pathway for using API keys is to setup and place them in a .env file in the repo root.
|
||||
|
||||
Test scripts will be placed in tests/
|
||||
|
||||
The run_agent loop is setup to:
|
||||
- Process the enabled toolsets to provide to the model,
|
||||
- Pipe in a prompt or problem from the input to the agent,
|
||||
- Loop the LLM each time it calls a tool, until the model decides no more tools are needed and provides a natural language response,
|
||||
- Return that response.
|
||||
|
||||
There are additional caveats for logging, where we restructure the "tools" as a system prompt for storage later into a format that can be used and handled properly later.
|
||||
@@ -1,65 +0,0 @@
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
.gitmodules
|
||||
|
||||
# Python
|
||||
__pycache__
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
.notebooklm-cli-venv/
|
||||
.notebooklm-playwright/
|
||||
.pip-cache/
|
||||
.uv-cache/
|
||||
|
||||
# Built artifacts that are regenerated inside the image. Excluded so local
|
||||
# rebuilds on the developer's machine don't invalidate the npm-install layer
|
||||
# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
|
||||
ui-tui/dist/
|
||||
ui-tui/packages/hermes-ink/dist/
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
|
||||
# Environment files
|
||||
.env
|
||||
.env.*
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Testing
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
.hermes-docker/
|
||||
.notebooklm-home/
|
||||
|
||||
# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
|
||||
hermes-config/
|
||||
runtime/
|
||||
471
.env.example
471
.env.example
@@ -1,472 +1,49 @@
|
||||
# Hermes Agent Environment Configuration
|
||||
# Copy this file to .env and fill in your API keys
|
||||
# Get API keys from the URLs listed below
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (OpenRouter)
|
||||
# =============================================================================
|
||||
# OpenRouter provides access to many models through one API
|
||||
# All LLM calls go through OpenRouter - no direct provider keys needed
|
||||
# Get your key at: https://openrouter.ai/keys
|
||||
# OPENROUTER_API_KEY=
|
||||
|
||||
# Default model is configured in ~/.hermes/config.yaml (model.default).
|
||||
# Use 'hermes model' or 'hermes setup' to change it.
|
||||
# LLM_MODEL is no longer read from .env — this line is kept for reference only.
|
||||
# LLM_MODEL=anthropic/claude-opus-4.6
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (NovitaAI)
|
||||
# =============================================================================
|
||||
# NovitaAI — 90+ models, pay-per-use
|
||||
# Get your key at: https://novita.ai/settings/key-management
|
||||
# NOVITA_API_KEY=
|
||||
# NOVITA_BASE_URL=https://api.novita.ai/openai/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Google AI Studio / Gemini)
|
||||
# =============================================================================
|
||||
# Native Gemini API via Google's OpenAI-compatible endpoint.
|
||||
# Get your key at: https://aistudio.google.com/app/apikey
|
||||
# GOOGLE_API_KEY=your_google_ai_studio_key_here
|
||||
# GEMINI_API_KEY=your_gemini_key_here # alias for GOOGLE_API_KEY
|
||||
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
|
||||
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Ollama Cloud)
|
||||
# =============================================================================
|
||||
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
|
||||
# Get your key at: https://ollama.com/settings
|
||||
# OLLAMA_API_KEY=your_ollama_key_here
|
||||
# Optional base URL override (default: https://ollama.com/v1)
|
||||
# OLLAMA_BASE_URL=https://ollama.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (z.ai / GLM)
|
||||
# =============================================================================
|
||||
# z.ai provides access to ZhipuAI GLM models (GLM-4-Plus, etc.)
|
||||
# Get your key at: https://z.ai or https://open.bigmodel.cn
|
||||
# GLM_API_KEY=
|
||||
# GLM_BASE_URL=https://api.z.ai/api/paas/v4 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Kimi / Moonshot)
|
||||
# =============================================================================
|
||||
# Kimi Code provides access to Moonshot AI coding models (kimi-k2.5, etc.)
|
||||
# Get your key at: https://platform.kimi.ai (Kimi Code console)
|
||||
# Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default.
|
||||
# Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below.
|
||||
# KIMI_API_KEY=
|
||||
# KIMI_BASE_URL=https://api.kimi.com/coding/v1 # Default for sk-kimi- keys
|
||||
# KIMI_BASE_URL=https://api.moonshot.ai/v1 # For legacy Moonshot keys
|
||||
# KIMI_BASE_URL=https://api.moonshot.cn/v1 # For Moonshot China keys
|
||||
# KIMI_CN_API_KEY= # Dedicated Moonshot China key
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Arcee AI)
|
||||
# =============================================================================
|
||||
# Arcee AI provides access to Trinity models (trinity-mini, trinity-large-*)
|
||||
# Get an Arcee key at: https://chat.arcee.ai/
|
||||
# ARCEEAI_API_KEY=
|
||||
# ARCEE_BASE_URL= # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (MiniMax)
|
||||
# =============================================================================
|
||||
# MiniMax provides access to MiniMax models (global endpoint)
|
||||
# Get your key at: https://www.minimax.io
|
||||
# MINIMAX_API_KEY=
|
||||
# MINIMAX_BASE_URL=https://api.minimax.io/v1 # Override default base URL
|
||||
|
||||
# MiniMax China endpoint (for users in mainland China)
|
||||
# MINIMAX_CN_API_KEY=
|
||||
# MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (OpenCode Zen)
|
||||
# =============================================================================
|
||||
# OpenCode Zen provides curated, tested models (GPT, Claude, Gemini, MiniMax, GLM, Kimi)
|
||||
# Pay-as-you-go pricing. Get your key at: https://opencode.ai/auth
|
||||
# OPENCODE_ZEN_API_KEY=
|
||||
# OPENCODE_ZEN_BASE_URL=https://opencode.ai/zen/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (OpenCode Go)
|
||||
# =============================================================================
|
||||
# OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5)
|
||||
# $10/month subscription. Get your key at: https://opencode.ai/auth
|
||||
# OPENCODE_GO_API_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Hugging Face Inference Providers)
|
||||
# =============================================================================
|
||||
# Hugging Face routes to 20+ open models via unified OpenAI-compatible endpoint.
|
||||
# Free tier included ($0.10/month), no markup on provider rates.
|
||||
# Get your token at: https://huggingface.co/settings/tokens
|
||||
# Required permission: "Make calls to Inference Providers"
|
||||
# HF_TOKEN=
|
||||
# OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Qwen OAuth)
|
||||
# =============================================================================
|
||||
# Qwen OAuth reuses your local Qwen CLI login (qwen auth qwen-oauth).
|
||||
# No API key needed — credentials come from ~/.qwen/oauth_creds.json.
|
||||
# Optional base URL override:
|
||||
# HERMES_QWEN_BASE_URL=https://portal.qwen.ai/v1
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Xiaomi MiMo)
|
||||
# =============================================================================
|
||||
# Xiaomi MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash).
|
||||
# Get your key at: https://platform.xiaomimimo.com
|
||||
# XIAOMI_API_KEY=your_key_here
|
||||
# Optional base URL override:
|
||||
# XIAOMI_BASE_URL=https://api.xiaomimimo.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# TOOL API KEYS
|
||||
# REQUIRED API KEYS
|
||||
# =============================================================================
|
||||
|
||||
# Exa API Key - AI-native web search and contents
|
||||
# Get at: https://exa.ai
|
||||
# EXA_API_KEY=
|
||||
|
||||
# Parallel API Key - AI-native web search and extract
|
||||
# Get at: https://parallel.ai
|
||||
# PARALLEL_API_KEY=
|
||||
# Anthropic API Key - Main agent model
|
||||
# Get at: https://console.anthropic.com/
|
||||
ANTHROPIC_API_KEY=
|
||||
|
||||
# Firecrawl API Key - Web search, extract, and crawl
|
||||
# Get at: https://firecrawl.dev/
|
||||
# FIRECRAWL_API_KEY=
|
||||
FIRECRAWL_API_KEY=
|
||||
|
||||
# Nous Research API Key - Vision analysis and multi-model reasoning
|
||||
# Get at: https://inference-api.nousresearch.com/
|
||||
NOUS_API_KEY=
|
||||
|
||||
# Morph API Key - Terminal/command execution tools
|
||||
# Get at: https://morph.so/
|
||||
MORPH_API_KEY=
|
||||
|
||||
# FAL.ai API Key - Image generation
|
||||
# Get at: https://fal.ai/
|
||||
# FAL_KEY=
|
||||
|
||||
# Honcho - Cross-session AI-native user modeling (optional)
|
||||
# Builds a persistent understanding of the user across sessions and tools.
|
||||
# Get at: https://app.honcho.dev
|
||||
# Also requires ~/.honcho/config.json with enabled=true (see README).
|
||||
# HONCHO_API_KEY=
|
||||
FAL_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# HYPERLIQUID OPTIONAL SKILL
|
||||
# OPTIONAL API KEYS
|
||||
# =============================================================================
|
||||
# Optional defaults for the Hyperliquid skill in optional-skills/blockchain/hyperliquid
|
||||
#
|
||||
# Hyperliquid API base URL override
|
||||
# Default: https://api.hyperliquid.xyz
|
||||
# HYPERLIQUID_API_URL=https://api.hyperliquid-testnet.xyz
|
||||
#
|
||||
# Default address for account-level commands like state, fills, orders, and review
|
||||
# HYPERLIQUID_USER_ADDRESS=0x0000000000000000000000000000000000000000
|
||||
|
||||
# OpenAI API Key - Optional, for enhanced Hecate features
|
||||
# Get at: https://platform.openai.com/
|
||||
OPENAI_API_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# TERMINAL TOOL CONFIGURATION
|
||||
# OPTIONAL CONFIGURATION
|
||||
# =============================================================================
|
||||
# Backend type: "local", "singularity", "docker", "modal", or "ssh"
|
||||
# Terminal backend is configured in ~/.hermes/config.yaml (terminal.backend).
|
||||
# Use 'hermes setup' or 'hermes config set terminal.backend docker' to change.
|
||||
# Supported: local, docker, singularity, modal, ssh
|
||||
#
|
||||
# Only override here if you need to force a backend without touching config.yaml:
|
||||
# TERMINAL_ENV=local
|
||||
|
||||
# Override the container runtime binary (e.g. to use Podman instead of Docker).
|
||||
# Useful on systems where Docker's storage driver is broken or unavailable.
|
||||
# HERMES_DOCKER_BINARY=/usr/local/bin/podman
|
||||
# Terminal Tool Settings
|
||||
HECATE_VM_LIFETIME_SECONDS=300
|
||||
HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt
|
||||
|
||||
# Container images (for singularity/docker/modal backends)
|
||||
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
|
||||
TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
|
||||
|
||||
# Working directory for terminal commands
|
||||
# For local backend: "." means current directory (resolved automatically)
|
||||
# For remote backends (ssh/docker/modal/singularity): use an absolute path
|
||||
# INSIDE the target environment, or leave unset for the backend's default
|
||||
# (/root for modal, / for docker, ~ for ssh). Do NOT use a host-local path.
|
||||
# Usually managed by config.yaml (terminal.cwd) — uncomment to override
|
||||
# TERMINAL_CWD=.
|
||||
|
||||
# Default command timeout in seconds
|
||||
TERMINAL_TIMEOUT=60
|
||||
|
||||
# Cleanup inactive environments after this many seconds
|
||||
TERMINAL_LIFETIME_SECONDS=300
|
||||
|
||||
# =============================================================================
|
||||
# SSH REMOTE EXECUTION (for TERMINAL_ENV=ssh)
|
||||
# =============================================================================
|
||||
# Run terminal commands on a remote server via SSH.
|
||||
# Agent code stays on your machine, commands execute remotely.
|
||||
#
|
||||
# SECURITY BENEFITS:
|
||||
# - Agent cannot read your .env file (API keys protected)
|
||||
# - Agent cannot modify its own code
|
||||
# - Remote server acts as isolated sandbox
|
||||
# - Can safely configure passwordless sudo on remote
|
||||
#
|
||||
# TERMINAL_SSH_HOST=192.168.1.100
|
||||
# TERMINAL_SSH_USER=agent
|
||||
# TERMINAL_SSH_PORT=22
|
||||
# TERMINAL_SSH_KEY=~/.ssh/id_rsa
|
||||
|
||||
# =============================================================================
|
||||
# SUDO SUPPORT (works with ALL terminal backends)
|
||||
# =============================================================================
|
||||
# If set, enables sudo commands by piping password via `sudo -S`.
|
||||
# Works with: local, docker, singularity, modal, and ssh backends.
|
||||
#
|
||||
# SECURITY WARNING: Password stored in plaintext. Only use on trusted machines.
|
||||
#
|
||||
# ALTERNATIVES:
|
||||
# - For SSH backend: Configure passwordless sudo on the remote server
|
||||
# - For containers: Run as root inside the container (no sudo needed)
|
||||
# - For local: Configure /etc/sudoers for specific commands
|
||||
# - For CLI: Leave unset - you'll be prompted interactively with 45s timeout
|
||||
#
|
||||
# SUDO_PASSWORD=your_password_here
|
||||
|
||||
# =============================================================================
|
||||
# MODAL CLOUD BACKEND (Optional - for TERMINAL_ENV=modal)
|
||||
# =============================================================================
|
||||
# Modal uses CLI authentication, not environment variables.
|
||||
# Run: pip install modal && modal setup
|
||||
# This will authenticate via browser and store credentials locally.
|
||||
# No API key needed in .env - Modal handles auth automatically.
|
||||
|
||||
# =============================================================================
|
||||
# BROWSER TOOL CONFIGURATION (agent-browser + Browserbase)
|
||||
# =============================================================================
|
||||
# Browser automation requires Browserbase cloud service for remote browser execution.
|
||||
# This allows the agent to navigate websites, fill forms, and extract information.
|
||||
#
|
||||
# STEALTH MODES:
|
||||
# - Basic Stealth: ALWAYS active (random fingerprints, auto CAPTCHA solving)
|
||||
# - Advanced Stealth: Requires BROWSERBASE_ADVANCED_STEALTH=true (Scale Plan only)
|
||||
|
||||
# Browserbase API Key - Cloud browser execution
|
||||
# Get at: https://browserbase.com/
|
||||
# BROWSERBASE_API_KEY=
|
||||
|
||||
# Browserbase Project ID - From your Browserbase dashboard
|
||||
# BROWSERBASE_PROJECT_ID=
|
||||
|
||||
# Enable residential proxies for better CAPTCHA solving (default: true)
|
||||
# Routes traffic through residential IPs, significantly improves success rate
|
||||
BROWSERBASE_PROXIES=true
|
||||
|
||||
# Enable advanced stealth mode (default: false, requires Scale Plan)
|
||||
# Uses custom Chromium build to avoid bot detection altogether
|
||||
BROWSERBASE_ADVANCED_STEALTH=false
|
||||
|
||||
# Browser engine for local mode (default: auto = Chrome)
|
||||
# "auto" — use Chrome (don't pass --engine flag)
|
||||
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||
# "chrome" — explicitly request Chrome
|
||||
# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
|
||||
# empty results are automatically retried with Chrome.
|
||||
# Also configurable via browser.engine in config.yaml.
|
||||
# AGENT_BROWSER_ENGINE=auto
|
||||
|
||||
# Browser session timeout in seconds (default: 300)
|
||||
# Sessions are cleaned up after this duration of inactivity
|
||||
BROWSER_SESSION_TIMEOUT=300
|
||||
|
||||
# Browser inactivity timeout - auto-cleanup inactive sessions (default: 120 = 2 min)
|
||||
# Browser sessions are automatically closed after this period of no activity
|
||||
BROWSER_INACTIVITY_TIMEOUT=120
|
||||
|
||||
# Extra Chromium launch flags passed to agent-browser, comma- or newline-separated.
|
||||
# Hermes auto-injects "--no-sandbox,--disable-dev-shm-usage" when it detects root
|
||||
# or AppArmor-restricted unprivileged user namespaces (Ubuntu 23.10+, DGX Spark,
|
||||
# many container images), so leave this unset unless you need extra flags.
|
||||
# Setting this disables the auto-injection.
|
||||
# AGENT_BROWSER_ARGS=--no-sandbox
|
||||
|
||||
# Camofox local anti-detection browser (Camoufox-based Firefox).
|
||||
# Set CAMOFOX_URL to route the browser tools through a local Camofox server
|
||||
# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
|
||||
# CAMOFOX_URL=http://localhost:9377
|
||||
|
||||
# Externally managed Camofox sessions — when another app owns the visible
|
||||
# Camofox browser, set these so Hermes shares the same userId/profile instead
|
||||
# of creating its own isolated session.
|
||||
# CAMOFOX_USER_ID=
|
||||
# CAMOFOX_SESSION_KEY=
|
||||
# Set to true to reuse an already-open Camofox tab for this identity before
|
||||
# creating a new one (useful for gateway restarts).
|
||||
# CAMOFOX_ADOPT_EXISTING_TAB=false
|
||||
|
||||
# =============================================================================
|
||||
# SESSION LOGGING
|
||||
# =============================================================================
|
||||
# Session trajectories are automatically saved to logs/ directory
|
||||
# Format: logs/session_YYYYMMDD_HHMMSS_UUID.json
|
||||
# Contains full conversation history in trajectory format for debugging/replay
|
||||
|
||||
# =============================================================================
|
||||
# VOICE TRANSCRIPTION & OPENAI TTS
|
||||
# =============================================================================
|
||||
# Required for voice message transcription (Whisper) and OpenAI TTS voices.
|
||||
# Uses OpenAI's API directly (not via OpenRouter).
|
||||
# Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter.
|
||||
# Get at: https://platform.openai.com/api-keys
|
||||
# VOICE_TOOLS_OPENAI_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# SLACK INTEGRATION
|
||||
# =============================================================================
|
||||
# Slack Bot Token - From Slack App settings (OAuth & Permissions)
|
||||
# Get at: https://api.slack.com/apps
|
||||
# SLACK_BOT_TOKEN=xoxb-...
|
||||
|
||||
# Slack App Token - For Socket Mode (App-Level Tokens in Slack App settings)
|
||||
# SLACK_APP_TOKEN=xapp-...
|
||||
|
||||
# Slack allowed users (comma-separated Slack user IDs)
|
||||
# SLACK_ALLOWED_USERS=
|
||||
|
||||
# =============================================================================
|
||||
# TELEGRAM INTEGRATION
|
||||
# =============================================================================
|
||||
# Telegram Bot Token - From @BotFather (https://t.me/BotFather)
|
||||
# TELEGRAM_BOT_TOKEN=
|
||||
# TELEGRAM_ALLOWED_USERS= # Comma-separated user IDs
|
||||
# TELEGRAM_HOME_CHANNEL= # Default chat for cron delivery
|
||||
# TELEGRAM_HOME_CHANNEL_NAME= # Display name for home channel
|
||||
# TELEGRAM_CRON_THREAD_ID= # Forum topic ID for cron deliveries; overrides TELEGRAM_HOME_CHANNEL_THREAD_ID for cron so replies work in topic mode
|
||||
|
||||
# Webhook mode (optional — for cloud deployments like Fly.io/Railway)
|
||||
# Default is long polling. Setting TELEGRAM_WEBHOOK_URL switches to webhook mode.
|
||||
# TELEGRAM_WEBHOOK_URL=https://my-app.fly.dev/telegram
|
||||
# TELEGRAM_WEBHOOK_PORT=8443
|
||||
# TELEGRAM_WEBHOOK_SECRET= # Recommended for production
|
||||
|
||||
# WhatsApp (built-in Baileys bridge — run `hermes whatsapp` to pair)
|
||||
# WHATSAPP_ENABLED=false
|
||||
# WHATSAPP_ALLOWED_USERS=15551234567
|
||||
|
||||
# Email (IMAP/SMTP — send and receive emails as Hermes)
|
||||
# For Gmail: enable 2FA → create App Password at https://myaccount.google.com/apppasswords
|
||||
# EMAIL_ADDRESS=hermes@gmail.com
|
||||
# EMAIL_PASSWORD=xxxx xxxx xxxx xxxx
|
||||
# EMAIL_IMAP_HOST=imap.gmail.com
|
||||
# EMAIL_IMAP_PORT=993
|
||||
# EMAIL_SMTP_HOST=smtp.gmail.com
|
||||
# EMAIL_SMTP_PORT=587
|
||||
# EMAIL_POLL_INTERVAL=15
|
||||
# EMAIL_ALLOWED_USERS=your@email.com
|
||||
# EMAIL_HOME_ADDRESS=your@email.com
|
||||
|
||||
# Gateway-wide: allow ALL users without an allowlist (default: false = deny)
|
||||
# Only set to true if you intentionally want open access.
|
||||
# GATEWAY_ALLOW_ALL_USERS=false
|
||||
|
||||
# =============================================================================
|
||||
# RESPONSE PACING
|
||||
# =============================================================================
|
||||
# Human-like delays between message chunks on messaging platforms.
|
||||
# Makes the bot feel less robotic.
|
||||
# HERMES_HUMAN_DELAY_MODE=off # off | natural | custom
|
||||
# HERMES_HUMAN_DELAY_MIN_MS=800 # Min delay in ms (custom mode)
|
||||
# HERMES_HUMAN_DELAY_MAX_MS=2500 # Max delay in ms (custom mode)
|
||||
|
||||
# =============================================================================
|
||||
# DEBUG OPTIONS
|
||||
# =============================================================================
|
||||
# Debug Logging (set to "true" to enable, logs saved to ./logs/)
|
||||
WEB_TOOLS_DEBUG=false
|
||||
VISION_TOOLS_DEBUG=false
|
||||
MOA_TOOLS_DEBUG=false
|
||||
IMAGE_TOOLS_DEBUG=false
|
||||
|
||||
# =============================================================================
|
||||
# CONTEXT COMPRESSION (Auto-shrinks long conversations)
|
||||
# =============================================================================
|
||||
# When conversation approaches model's context limit, middle turns are
|
||||
# automatically summarized to free up space.
|
||||
#
|
||||
# Context compression is configured in ~/.hermes/config.yaml under compression:
|
||||
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
|
||||
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
|
||||
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
|
||||
|
||||
# =============================================================================
|
||||
# SKILLS HUB (GitHub integration for skill search/install/publish)
|
||||
# =============================================================================
|
||||
|
||||
# GitHub Personal Access Token — for higher API rate limits on skill search/install
|
||||
# Get at: https://github.com/settings/tokens (Fine-grained recommended)
|
||||
# GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
|
||||
|
||||
# GitHub App credentials (optional — for bot identity on PRs)
|
||||
# GITHUB_APP_ID=
|
||||
# GITHUB_APP_PRIVATE_KEY_PATH=
|
||||
# GITHUB_APP_INSTALLATION_ID=
|
||||
|
||||
# Groq API key (free tier — used for Whisper STT in voice mode)
|
||||
# GROQ_API_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# STT PROVIDER SELECTION
|
||||
# =============================================================================
|
||||
# Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
|
||||
# Install with: pip install faster-whisper
|
||||
# Model downloads automatically on first use (~150 MB for "base").
|
||||
# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
|
||||
# Provider priority: local > groq > openai > mistral > xai > elevenlabs
|
||||
# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
|
||||
|
||||
# =============================================================================
|
||||
# STT ADVANCED OVERRIDES (optional)
|
||||
# =============================================================================
|
||||
# Override default STT models per provider (normally set via stt.model in config.yaml)
|
||||
# STT_GROQ_MODEL=whisper-large-v3-turbo
|
||||
# STT_OPENAI_MODEL=whisper-1
|
||||
# STT_ELEVENLABS_MODEL=scribe_v2
|
||||
|
||||
# Override STT provider endpoints (for proxies or self-hosted instances)
|
||||
# GROQ_BASE_URL=https://api.groq.com/openai/v1
|
||||
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
# ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1
|
||||
|
||||
# =============================================================================
|
||||
# MICROSOFT TEAMS INTEGRATION
|
||||
# =============================================================================
|
||||
# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
|
||||
# Or use Azure Portal: Azure Active Directory → App registrations → New registration
|
||||
# Then add the bot to Teams via the Bot Framework or App Studio.
|
||||
#
|
||||
# TEAMS_CLIENT_ID= # Azure AD App (client) ID
|
||||
# TEAMS_CLIENT_SECRET= # Azure AD client secret value
|
||||
# TEAMS_TENANT_ID= # Azure AD tenant ID (or "common" for multi-tenant)
|
||||
# TEAMS_ALLOWED_USERS= # Comma-separated AAD object IDs or UPNs
|
||||
# TEAMS_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery
|
||||
# TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
# TEAMS_PORT=3978 # Webhook listen port (Bot Framework default)
|
||||
|
||||
# =============================================================================
|
||||
# GOOGLE CHAT INTEGRATION
|
||||
# =============================================================================
|
||||
# Connects via Cloud Pub/Sub pull subscription (no public URL required).
|
||||
# Setup walkthrough: website/docs/user-guide/messaging/google_chat.md.
|
||||
# 1. Create a GCP project, enable the Google Chat API and Cloud Pub/Sub.
|
||||
# 2. Create a Service Account with roles/pubsub.subscriber on the
|
||||
# subscription (NOT project-wide); download the JSON key.
|
||||
# 3. Configure your Chat app at console.cloud.google.com/apis/credentials
|
||||
# → Google Chat API → Configuration → Cloud Pub/Sub topic.
|
||||
# 4. (Optional, for native attachment delivery) Each user runs
|
||||
# `/setup-files` once in their own DM after Pub/Sub is wired up.
|
||||
#
|
||||
# GOOGLE_CHAT_PROJECT_ID= # GCP project hosting the topic (or set GOOGLE_CLOUD_PROJECT)
|
||||
# GOOGLE_CHAT_SUBSCRIPTION_NAME= # Full path: projects/<id>/subscriptions/<name>
|
||||
# GOOGLE_CHAT_SERVICE_ACCOUNT_JSON= # Path to SA JSON (or set GOOGLE_APPLICATION_CREDENTIALS)
|
||||
# GOOGLE_CHAT_ALLOWED_USERS= # Comma-separated emails allowed to talk to the bot
|
||||
# GOOGLE_CHAT_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# GOOGLE_CHAT_HOME_CHANNEL= # Default space (spaces/XXXX) for cron delivery
|
||||
# GOOGLE_CHAT_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
|
||||
5
.envrc
5
.envrc
@@ -1,5 +0,0 @@
|
||||
watch_file pyproject.toml uv.lock
|
||||
watch_file package-lock.json package.json web/package.json ui-tui/package.json website/package.json apps/shared/package.json apps/desktop/package.json ui-tui/packages/hermes-ink/package.json
|
||||
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
|
||||
|
||||
use flake
|
||||
10
.gitattributes
vendored
10
.gitattributes
vendored
@@ -1,10 +0,0 @@
|
||||
# Auto-generated files — collapse diffs and exclude from language stats
|
||||
web/package-lock.json linguist-generated=true
|
||||
|
||||
# Enforce LF for scripts that run inside Linux containers.
|
||||
# Without this, Windows checkout converts to CRLF and breaks `exec` in the
|
||||
# container entrypoint with "no such file or directory".
|
||||
*.sh text eol=lf
|
||||
Dockerfile text eol=lf
|
||||
*.dockerfile text eol=lf
|
||||
docker/entrypoint.sh text eol=lf
|
||||
162
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
162
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,162 +0,0 @@
|
||||
name: "🐛 Bug Report"
|
||||
description: Report a bug — something that's broken, crashes, or behaves incorrectly.
|
||||
title: "[Bug]: "
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for reporting a bug! Please fill out the sections below so we can reproduce and fix it quickly.
|
||||
|
||||
**Before submitting**, please:
|
||||
- [ ] Search [existing issues](https://github.com/NousResearch/hermes-agent/issues) to avoid duplicates
|
||||
- [ ] Update to the latest version (`hermes update`) and confirm the bug still exists
|
||||
- [ ] Run `hermes debug share` and paste the links below (see Debug Report section)
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Bug Description
|
||||
description: A clear description of what's broken. Include error messages, tracebacks, or screenshots if relevant.
|
||||
placeholder: |
|
||||
What happened? What did you expect to happen instead?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
attributes:
|
||||
label: Steps to Reproduce
|
||||
description: Minimal steps to trigger the bug. The more specific, the faster we can fix it.
|
||||
placeholder: |
|
||||
1. Run `hermes chat`
|
||||
2. Send the message "..."
|
||||
3. Agent calls tool X
|
||||
4. Error appears: ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: What should have happened instead?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
label: Actual Behavior
|
||||
description: What actually happened? Include full error output if available.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
label: Affected Component
|
||||
description: Which part of Hermes is affected?
|
||||
multiple: true
|
||||
options:
|
||||
- CLI (interactive chat)
|
||||
- Gateway (Telegram/Discord/Slack/WhatsApp)
|
||||
- Setup / Installation
|
||||
- Tools (terminal, file ops, web, code execution, etc.)
|
||||
- Skills (skill loading, skill hub, skill guard)
|
||||
- Agent Core (conversation loop, context compression, memory)
|
||||
- Configuration (config.yaml, .env, hermes setup)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: platform
|
||||
attributes:
|
||||
label: Messaging Platform (if gateway-related)
|
||||
description: Which platform adapter is affected?
|
||||
multiple: true
|
||||
options:
|
||||
- N/A (CLI only)
|
||||
- Telegram
|
||||
- Discord
|
||||
- Slack
|
||||
- WhatsApp
|
||||
|
||||
- type: textarea
|
||||
id: debug-report
|
||||
attributes:
|
||||
label: Debug Report
|
||||
description: |
|
||||
Run `hermes debug share` from your terminal and paste the links it prints here.
|
||||
This uploads your system info, config, and recent logs to a paste service automatically.
|
||||
|
||||
If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
|
||||
|
||||
If the upload fails, run `hermes debug share --local` and paste the output directly.
|
||||
placeholder: |
|
||||
Report https://paste.rs/abc123
|
||||
agent.log https://paste.rs/def456
|
||||
gateway.log https://paste.rs/ghi789
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
description: e.g. Ubuntu 24.04, macOS 15.2, Windows 11
|
||||
placeholder: Ubuntu 24.04
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python Version
|
||||
description: Output of `python --version`
|
||||
placeholder: "3.11.9"
|
||||
|
||||
- type: input
|
||||
id: hermes-version
|
||||
attributes:
|
||||
label: Hermes Version
|
||||
description: Output of `hermes version`
|
||||
placeholder: "2.1.0"
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Additional Logs / Traceback (optional)
|
||||
description: |
|
||||
The debug report above covers most logs. Use this field for any extra error output,
|
||||
tracebacks, or screenshots not captured by `hermes debug share`.
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
id: root-cause
|
||||
attributes:
|
||||
label: Root Cause Analysis (optional)
|
||||
description: |
|
||||
If you've dug into the code and identified the root cause, share it here.
|
||||
Include file paths, line numbers, and code snippets if possible. This massively speeds up fixes.
|
||||
placeholder: |
|
||||
The bug is in `gateway/run.py` line 949. `len(history)` counts session_meta entries
|
||||
but `agent_messages` was built from filtered history...
|
||||
|
||||
- type: textarea
|
||||
id: proposed-fix
|
||||
attributes:
|
||||
label: Proposed Fix (optional)
|
||||
description: If you have a fix in mind (or a PR ready), describe it here.
|
||||
placeholder: |
|
||||
Replace `.get()` with `.pop()` on line 289 of `gateway/platforms/base.py`
|
||||
to actually clear the pending message after retrieval.
|
||||
|
||||
- type: checkboxes
|
||||
id: pr-ready
|
||||
attributes:
|
||||
label: Are you willing to submit a PR for this?
|
||||
options:
|
||||
- label: I'd like to fix this myself and submit a PR
|
||||
11
.github/ISSUE_TEMPLATE/config.yml
vendored
11
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,11 +0,0 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: 💬 Nous Research Discord
|
||||
url: https://discord.gg/NousResearch
|
||||
about: For quick questions, showcasing projects, sharing skills, and community chat.
|
||||
- name: 📖 Documentation
|
||||
url: https://github.com/NousResearch/hermes-agent/blob/main/README.md
|
||||
about: Check the README and docs before opening an issue.
|
||||
- name: 🤝 Contributing Guide
|
||||
url: https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md
|
||||
about: Read this before submitting a PR.
|
||||
85
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
85
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -1,85 +0,0 @@
|
||||
name: "✨ Feature Request"
|
||||
description: Suggest a new feature or improvement.
|
||||
title: "[Feature]: "
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for the suggestion! Before submitting, please consider:
|
||||
|
||||
- **Is this a new skill?** Most capabilities should be [skills, not tools](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#should-it-be-a-skill-or-a-tool). If it's a specialized integration (crypto, NFT, niche SaaS), it belongs on the Skills Hub, not bundled.
|
||||
- **Search [existing issues](https://github.com/NousResearch/hermes-agent/issues)** — someone may have already proposed this.
|
||||
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: Problem or Use Case
|
||||
description: What problem does this solve? What are you trying to do that you can't today?
|
||||
placeholder: |
|
||||
I'm trying to use Hermes with [provider/platform/workflow] but currently
|
||||
there's no way to...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: Proposed Solution
|
||||
description: How do you think this should work? Be as specific as you can — CLI flags, config options, UI behavior.
|
||||
placeholder: |
|
||||
Add a `--foo` flag to `hermes chat` that enables...
|
||||
Or: Add a config key `bar.baz` that controls...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Alternatives Considered
|
||||
description: What other approaches did you consider? Why is the proposed solution better?
|
||||
|
||||
- type: dropdown
|
||||
id: type
|
||||
attributes:
|
||||
label: Feature Type
|
||||
options:
|
||||
- New tool
|
||||
- New bundled skill
|
||||
- CLI improvement
|
||||
- Gateway / messaging improvement
|
||||
- Configuration option
|
||||
- Performance / reliability
|
||||
- Developer experience (tests, docs, CI)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: scope
|
||||
attributes:
|
||||
label: Scope
|
||||
description: How big is this change?
|
||||
options:
|
||||
- Small (single file, < 50 lines)
|
||||
- Medium (few files, < 300 lines)
|
||||
- Large (new module or significant refactor)
|
||||
|
||||
- type: checkboxes
|
||||
id: pr-ready
|
||||
attributes:
|
||||
label: Contribution
|
||||
options:
|
||||
- label: I'd like to implement this myself and submit a PR
|
||||
|
||||
- type: textarea
|
||||
id: debug-report
|
||||
attributes:
|
||||
label: Debug Report (optional)
|
||||
description: |
|
||||
If this feature request is related to a problem you're experiencing, run `hermes debug share` and paste the links here.
|
||||
In an interactive chat session, you can use `/debug` instead.
|
||||
This helps us understand your environment and any related logs.
|
||||
placeholder: |
|
||||
Report https://paste.rs/abc123
|
||||
render: shell
|
||||
112
.github/ISSUE_TEMPLATE/setup_help.yml
vendored
112
.github/ISSUE_TEMPLATE/setup_help.yml
vendored
@@ -1,112 +0,0 @@
|
||||
name: "🔧 Setup / Installation Help"
|
||||
description: Having trouble installing or configuring Hermes? Ask here.
|
||||
title: "[Setup]: "
|
||||
labels: ["setup"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Sorry you're having trouble! Please fill out the details below so we can help.
|
||||
|
||||
**Quick checks first:**
|
||||
- Run `hermes debug share` and paste the links in the Debug Report section below
|
||||
- If you're in a chat session, you can use `/debug` instead — it does the same thing
|
||||
- Try `hermes update` to get the latest version
|
||||
- Check the [README troubleshooting section](https://github.com/NousResearch/hermes-agent#troubleshooting)
|
||||
- For general questions, consider the [Nous Research Discord](https://discord.gg/NousResearch) for faster help
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: What's Going Wrong?
|
||||
description: Describe what you're trying to do and where it fails.
|
||||
placeholder: |
|
||||
I ran `hermes setup` and selected Nous Portal, but when I try to
|
||||
start the gateway I get...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: steps
|
||||
attributes:
|
||||
label: Steps Taken
|
||||
description: What did you do? Include the exact commands you ran.
|
||||
placeholder: |
|
||||
1. Ran the install script: `curl -fsSL ... | bash`
|
||||
2. Ran `hermes setup` and chose "Quick setup"
|
||||
3. Selected OpenRouter, entered API key
|
||||
4. Ran `hermes chat` and got error...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: install-method
|
||||
attributes:
|
||||
label: Installation Method
|
||||
options:
|
||||
- Install script (curl | bash)
|
||||
- Manual clone + pip/uv install
|
||||
- PowerShell installer (Windows)
|
||||
- Docker
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
placeholder: Ubuntu 24.04 / macOS 15.2 / Windows 11
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python Version
|
||||
description: Output of `python --version` (or `python3 --version`)
|
||||
placeholder: "3.11.9"
|
||||
|
||||
- type: input
|
||||
id: hermes-version
|
||||
attributes:
|
||||
label: Hermes Version
|
||||
description: Output of `hermes version` (if install got that far)
|
||||
placeholder: "2.1.0"
|
||||
|
||||
- type: textarea
|
||||
id: debug-report
|
||||
attributes:
|
||||
label: Debug Report
|
||||
description: |
|
||||
Run `hermes debug share` from your terminal and paste the links it prints here.
|
||||
This uploads your system info, config, and recent logs to a paste service automatically.
|
||||
|
||||
If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
|
||||
|
||||
If the upload fails or install didn't get that far, run `hermes debug share --local` and paste the output directly.
|
||||
If even that doesn't work, run `hermes doctor` and paste that output instead.
|
||||
placeholder: |
|
||||
Report https://paste.rs/abc123
|
||||
agent.log https://paste.rs/def456
|
||||
gateway.log https://paste.rs/ghi789
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
id: error-output
|
||||
attributes:
|
||||
label: Full Error Output
|
||||
description: Paste the complete error message or traceback. This will be auto-formatted.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: tried
|
||||
attributes:
|
||||
label: What I've Already Tried
|
||||
description: List any fixes or workarounds you've already attempted.
|
||||
placeholder: |
|
||||
- Ran `hermes update`
|
||||
- Tried reinstalling with `pip install -e ".[all]"`
|
||||
- Checked that OPENROUTER_API_KEY is set in ~/.hermes/.env
|
||||
75
.github/PULL_REQUEST_TEMPLATE.md
vendored
75
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,75 +0,0 @@
|
||||
## What does this PR do?
|
||||
|
||||
<!-- Describe the change clearly. What problem does it solve? Why is this approach the right one? -->
|
||||
|
||||
|
||||
|
||||
## Related Issue
|
||||
|
||||
<!-- Link the issue this PR addresses. If no issue exists, consider creating one first. -->
|
||||
|
||||
Fixes #
|
||||
|
||||
## Type of Change
|
||||
|
||||
<!-- Check the one that applies. -->
|
||||
|
||||
- [ ] 🐛 Bug fix (non-breaking change that fixes an issue)
|
||||
- [ ] ✨ New feature (non-breaking change that adds functionality)
|
||||
- [ ] 🔒 Security fix
|
||||
- [ ] 📝 Documentation update
|
||||
- [ ] ✅ Tests (adding or improving test coverage)
|
||||
- [ ] ♻️ Refactor (no behavior change)
|
||||
- [ ] 🎯 New skill (bundled or hub)
|
||||
|
||||
## Changes Made
|
||||
|
||||
<!-- List the specific changes. Include file paths for code changes. -->
|
||||
|
||||
-
|
||||
|
||||
## How to Test
|
||||
|
||||
<!-- Steps to verify this change works. For bugs: reproduction steps + proof that the fix works. -->
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
## Checklist
|
||||
|
||||
<!-- Complete these before requesting review. -->
|
||||
|
||||
### Code
|
||||
|
||||
- [ ] I've read the [Contributing Guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md)
|
||||
- [ ] My commit messages follow [Conventional Commits](https://www.conventionalcommits.org/) (`fix(scope):`, `feat(scope):`, etc.)
|
||||
- [ ] I searched for [existing PRs](https://github.com/NousResearch/hermes-agent/pulls) to make sure this isn't a duplicate
|
||||
- [ ] My PR contains **only** changes related to this fix/feature (no unrelated commits)
|
||||
- [ ] I've run `pytest tests/ -q` and all tests pass
|
||||
- [ ] I've added tests for my changes (required for bug fixes, strongly encouraged for features)
|
||||
- [ ] I've tested on my platform: <!-- e.g. Ubuntu 24.04, macOS 15.2, Windows 11 -->
|
||||
|
||||
### Documentation & Housekeeping
|
||||
|
||||
<!-- Check all that apply. It's OK to check "N/A" if a category doesn't apply to your change. -->
|
||||
|
||||
- [ ] I've updated relevant documentation (README, `docs/`, docstrings) — or N/A
|
||||
- [ ] I've updated `cli-config.yaml.example` if I added/changed config keys — or N/A
|
||||
- [ ] I've updated `CONTRIBUTING.md` or `AGENTS.md` if I changed architecture or workflows — or N/A
|
||||
- [ ] I've considered cross-platform impact (Windows, macOS) per the [compatibility guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#cross-platform-compatibility) — or N/A
|
||||
- [ ] I've updated tool descriptions/schemas if I changed tool behavior — or N/A
|
||||
|
||||
## For New Skills
|
||||
|
||||
<!-- Only fill this out if you're adding a skill. Delete this section otherwise. -->
|
||||
|
||||
- [ ] This skill is **broadly useful** to most users (if bundled) — see [Contributing Guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#should-the-skill-be-bundled)
|
||||
- [ ] SKILL.md follows the [standard format](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#skillmd-format) (frontmatter, trigger conditions, steps, pitfalls)
|
||||
- [ ] No external dependencies that aren't already available (prefer stdlib, curl, existing Hermes tools)
|
||||
- [ ] I've tested the skill end-to-end: `hermes --toolsets skills -q "Use the X skill to do Y"`
|
||||
|
||||
## Screenshots / Logs
|
||||
|
||||
<!-- If applicable, add screenshots or log output showing the fix/feature in action. -->
|
||||
|
||||
50
.github/actions/hermes-smoke-test/action.yml
vendored
50
.github/actions/hermes-smoke-test/action.yml
vendored
@@ -1,50 +0,0 @@
|
||||
name: Hermes smoke test
|
||||
description: >
|
||||
Run the image's built-in entrypoint against `--help` and `dashboard --help`
|
||||
to catch basic runtime regressions before publishing. Requires the image
|
||||
to already be loaded into the local Docker daemon under `image`.
|
||||
|
||||
Works identically on amd64 and arm64 runners.
|
||||
|
||||
inputs:
|
||||
image:
|
||||
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Ensure /tmp/hermes-test is hermes-writable
|
||||
shell: bash
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
|
||||
- name: hermes --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Use the image's real ENTRYPOINT (/init + main-wrapper.sh) so
|
||||
# this exercises the actual production startup path. PR #30136
|
||||
# review caught that an --entrypoint override here had been
|
||||
# silently neutered by the s6-overlay migration — stage2-hook
|
||||
# ignores its CMD args, so the smoke test was a no-op.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
"${{ inputs.image }}" --help
|
||||
|
||||
- name: hermes dashboard --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Regression guard for #9153: dashboard was present in source but
|
||||
# missing from the published image. If this fails, something in
|
||||
# the Dockerfile is excluding the dashboard subcommand from the
|
||||
# installed package.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
"${{ inputs.image }}" dashboard --help
|
||||
18
.github/actions/nix-setup/action.yml
vendored
18
.github/actions/nix-setup/action.yml
vendored
@@ -1,18 +0,0 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix and configure Cachix binary cache'
|
||||
|
||||
inputs:
|
||||
cachix-auth-token:
|
||||
description: 'Cachix auth token (enables push). Omit for read-only.'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
|
||||
with:
|
||||
name: hermes-agent
|
||||
authToken: ${{ inputs.cachix-auth-token }}
|
||||
continue-on-error: true
|
||||
44
.github/dependabot.yml
vendored
44
.github/dependabot.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# Dependabot configuration for hermes-agent.
|
||||
#
|
||||
# Deliberately scoped to github-actions only.
|
||||
#
|
||||
# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
|
||||
# because we pin source dependencies exactly (uv.lock, package-lock.json) as
|
||||
# part of our supply-chain posture. Automatic version-bump PRs against those
|
||||
# pins would undermine the strategy — pins are moved deliberately, after
|
||||
# review, not on a schedule.
|
||||
#
|
||||
# github-actions is the exception: action pins (we use full commit SHAs per
|
||||
# supply-chain policy) must be updated when upstream actions publish
|
||||
# patches — usually themselves security fixes. Dependabot opens a PR with
|
||||
# the new SHA and release notes; we review and merge like any other PR.
|
||||
#
|
||||
# Security-update PRs for source dependencies (opened ONLY when a CVE is
|
||||
# published affecting a currently-pinned version) are enabled separately
|
||||
# via the repo's Dependabot security updates setting
|
||||
# (Settings → Code security → Dependabot → Dependabot security updates).
|
||||
# Those are CVE-only, not schedule-driven, and do not conflict with our
|
||||
# pinning strategy — they fire when a pinned version becomes known-bad,
|
||||
# which is exactly when we want to move the pin.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
- "github-actions"
|
||||
commit-message:
|
||||
prefix: "chore(actions)"
|
||||
include: "scope"
|
||||
groups:
|
||||
# Batch routine action bumps into one PR per week to reduce noise.
|
||||
# Security updates still open individually and bypass grouping.
|
||||
actions-minor-patch:
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
BIN
.github/pr-screenshots/39327/providers-collapsed.png
vendored
BIN
.github/pr-screenshots/39327/providers-collapsed.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 36 KiB |
BIN
.github/pr-screenshots/39327/providers-expanded.png
vendored
BIN
.github/pr-screenshots/39327/providers-expanded.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 40 KiB |
BIN
.github/pr-screenshots/39327/tools-collapsed.png
vendored
BIN
.github/pr-screenshots/39327/tools-collapsed.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 33 KiB |
BIN
.github/pr-screenshots/39327/tools-expanded.png
vendored
BIN
.github/pr-screenshots/39327/tools-expanded.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 36 KiB |
100
.github/workflows/build-windows-installer.yml
vendored
100
.github/workflows/build-windows-installer.yml
vendored
@@ -1,100 +0,0 @@
|
||||
name: Build Windows Installer
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
# Gate: workflow_dispatch is already restricted to users with write access,
|
||||
# but we want ADMIN-only. Explicitly check the triggering actor's repo
|
||||
# permission via the API and fail fast for anyone below admin.
|
||||
authorize:
|
||||
name: Authorize (admins only)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Check actor is a repo admin
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
ACTOR: ${{ github.actor }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
perm=$(gh api \
|
||||
"repos/${{ github.repository }}/collaborators/${ACTOR}/permission" \
|
||||
--jq '.permission')
|
||||
echo "Actor '${ACTOR}' has permission: ${perm}"
|
||||
if [ "${perm}" != "admin" ]; then
|
||||
echo "::error::'${ACTOR}' is not a repo admin (permission=${perm}). Refusing to build/sign."
|
||||
exit 1
|
||||
fi
|
||||
echo "Authorized: '${ACTOR}' is an admin."
|
||||
|
||||
build:
|
||||
name: Hermes-Setup.exe
|
||||
needs: authorize
|
||||
runs-on: windows-latest
|
||||
timeout-minutes: 30
|
||||
permissions:
|
||||
contents: read
|
||||
# Required for OIDC auth to Azure (azure/login federated credentials).
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
|
||||
- name: Install npm dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Setup Rust
|
||||
uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable
|
||||
|
||||
- name: Cache Rust targets
|
||||
uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
|
||||
with:
|
||||
workspaces: apps/bootstrap-installer/src-tauri
|
||||
|
||||
- name: Build installer
|
||||
run: npm run tauri:build
|
||||
working-directory: apps/bootstrap-installer
|
||||
|
||||
- name: Azure login (OIDC)
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2
|
||||
with:
|
||||
client-id: ${{ secrets.AZURE_CLIENT_ID }}
|
||||
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
|
||||
|
||||
- name: Sign Hermes-Setup.exe with Azure Artifact Signing
|
||||
uses: azure/artifact-signing-action@c7ab2a863ab5f9a846ddb8265964877ef296ee82 # v2
|
||||
with:
|
||||
endpoint: ${{ vars.AZURE_SIGNING_ENDPOINT }}
|
||||
signing-account-name: ${{ vars.AZURE_SIGNING_ACCOUNT_NAME }}
|
||||
certificate-profile-name: ${{ vars.AZURE_SIGNING_CERTIFICATE_PROFILE }}
|
||||
# Sign both the raw exe and the bundled NSIS installer.
|
||||
files-folder: ${{ github.workspace }}\apps\bootstrap-installer\src-tauri\target\release
|
||||
files-folder-filter: exe
|
||||
files-folder-recurse: true
|
||||
file-digest: SHA256
|
||||
timestamp-rfc3161: http://timestamp.acs.microsoft.com
|
||||
timestamp-digest: SHA256
|
||||
|
||||
- name: Upload NSIS installer
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: Hermes-Setup-installer
|
||||
path: apps/bootstrap-installer/src-tauri/target/release/bundle/nsis/*.exe
|
||||
|
||||
- name: Upload raw exe
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: Hermes-Setup-exe
|
||||
path: apps/bootstrap-installer/src-tauri/target/release/Hermes-Setup.exe
|
||||
85
.github/workflows/contributor-check.yml
vendored
85
.github/workflows/contributor-check.yml
vendored
@@ -1,85 +0,0 @@
|
||||
name: Contributor Attribution Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
# No paths filter — the job must always run so the required check
|
||||
# reports a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-attribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0 # Full history needed for git log
|
||||
|
||||
- name: Check if relevant files changed
|
||||
id: filter
|
||||
run: |
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
CHANGED=$(git diff --name-only "$BASE"..."$HEAD" -- '*.py' '**/*.py' '.github/workflows/contributor-check.yml' || true)
|
||||
if [ -n "$CHANGED" ]; then
|
||||
echo "run=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run=false" >> "$GITHUB_OUTPUT"
|
||||
echo "No Python files changed, skipping attribution check."
|
||||
fi
|
||||
|
||||
- name: Check for unmapped contributor emails
|
||||
if: steps.filter.outputs.run == 'true'
|
||||
run: |
|
||||
# Get the merge base between this PR and main
|
||||
MERGE_BASE=$(git merge-base origin/main HEAD)
|
||||
|
||||
# Find any new author emails in this PR's commits
|
||||
NEW_EMAILS=$(git log ${MERGE_BASE}..HEAD --format='%ae' --no-merges | sort -u)
|
||||
|
||||
if [ -z "$NEW_EMAILS" ]; then
|
||||
echo "No new commits to check."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check each email against AUTHOR_MAP in release.py
|
||||
MISSING=""
|
||||
while IFS= read -r email; do
|
||||
# Skip teknium and bot emails
|
||||
case "$email" in
|
||||
*teknium*|*noreply@github.com*|*dependabot*|*github-actions*|*anthropic.com*|*cursor.com*)
|
||||
continue ;;
|
||||
esac
|
||||
|
||||
# Check if email is in AUTHOR_MAP (either as a key or matches noreply pattern)
|
||||
if echo "$email" | grep -qP '\+.*@users\.noreply\.github\.com'; then
|
||||
continue # GitHub noreply emails auto-resolve
|
||||
fi
|
||||
|
||||
if ! grep -qF "\"${email}\"" scripts/release.py 2>/dev/null; then
|
||||
AUTHOR=$(git log --author="$email" --format='%an' -1)
|
||||
MISSING="${MISSING}\n ${email} (${AUTHOR})"
|
||||
fi
|
||||
done <<< "$NEW_EMAILS"
|
||||
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo ""
|
||||
echo "⚠️ New contributor email(s) not in AUTHOR_MAP:"
|
||||
echo -e "$MISSING"
|
||||
echo ""
|
||||
echo "Please add mappings to scripts/release.py AUTHOR_MAP:"
|
||||
echo -e "$MISSING" | while read -r line; do
|
||||
email=$(echo "$line" | sed 's/^ *//' | cut -d' ' -f1)
|
||||
[ -z "$email" ] && continue
|
||||
echo " \"${email}\": \"<github-username>\","
|
||||
done
|
||||
echo ""
|
||||
echo "To find the GitHub username for an email:"
|
||||
echo " gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All contributor emails are mapped in AUTHOR_MAP."
|
||||
fi
|
||||
105
.github/workflows/deploy-site.yml
vendored
105
.github/workflows/deploy-site.yml
vendored
@@ -1,105 +0,0 @@
|
||||
name: Deploy Site
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'website/**'
|
||||
- 'skills/**'
|
||||
- 'optional-skills/**'
|
||||
- '.github/workflows/deploy-site.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: pages
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy-vercel:
|
||||
# Triggered automatically on release publish (production cuts) and
|
||||
# manually via `gh workflow run deploy-site.yml` when an out-of-band
|
||||
# main commit needs to ship live before the next release tag — e.g.
|
||||
# a skills-index PR that doesn't touch website/** paths and so
|
||||
# doesn't auto-deploy via the deploy-docs path.
|
||||
if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Vercel Deploy
|
||||
run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
|
||||
|
||||
deploy-docs:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deploy.outputs.page_url }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: website/package-lock.json
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install PyYAML for skill extraction
|
||||
run: pip install pyyaml==6.0.2 httpx==0.28.1
|
||||
|
||||
- name: Build skills index (unified multi-source catalog)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Always rebuild — the file isn't committed (gitignored), so a
|
||||
# fresh checkout starts without it and we want the freshest crawl
|
||||
# in every deploy. Failure is non-fatal: extract-skills.py will
|
||||
# fall back to the legacy snapshot cache and the Skills Hub page
|
||||
# still renders, just without the latest community catalog.
|
||||
python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
|
||||
- name: Build Docusaurus
|
||||
run: npm run build
|
||||
working-directory: website
|
||||
|
||||
- name: Stage deployment
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
cp -r website/build/* _site/docs/
|
||||
# llms.txt / llms-full.txt are also published at the site root
|
||||
# (https://hermes-agent.nousresearch.com/llms.txt) because some
|
||||
# agents and IDE plugins probe the classic root-level path rather
|
||||
# than /docs/llms.txt. Same file, two URLs, one source of truth.
|
||||
if [ -f website/build/llms.txt ]; then
|
||||
cp website/build/llms.txt _site/llms.txt
|
||||
fi
|
||||
if [ -f website/build/llms-full.txt ]; then
|
||||
cp website/build/llms-full.txt _site/llms-full.txt
|
||||
fi
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
|
||||
with:
|
||||
path: _site
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deploy
|
||||
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4
|
||||
68
.github/workflows/docker-lint.yml
vendored
68
.github/workflows/docker-lint.yml
vendored
@@ -1,68 +0,0 @@
|
||||
name: Docker / shell lint
|
||||
|
||||
# Lints the container build inputs: Dockerfile (via hadolint) and any shell
|
||||
# scripts under docker/ (via shellcheck). These catch the class of regression
|
||||
# the behavioral docker-publish smoke test can't — unquoted variable
|
||||
# expansions, silently-failing RUN commands, etc.
|
||||
#
|
||||
# Rules and ignores are documented in .hadolint.yaml at the repo root.
|
||||
# shellcheck severity is pinned to `error` so SC1091-style "can't follow
|
||||
# sourced script" info-level warnings don't fail the job — the .venv
|
||||
# activate script doesn't exist at lint time.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- Dockerfile
|
||||
- docker/**
|
||||
- .hadolint.yaml
|
||||
- .github/workflows/docker-lint.yml
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- Dockerfile
|
||||
- docker/**
|
||||
- .hadolint.yaml
|
||||
- .github/workflows/docker-lint.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: docker-lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
hadolint:
|
||||
name: Lint Dockerfile (hadolint)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: hadolint
|
||||
uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
|
||||
with:
|
||||
dockerfile: Dockerfile
|
||||
config: .hadolint.yaml
|
||||
failure-threshold: warning
|
||||
|
||||
shellcheck:
|
||||
name: Lint docker/ shell scripts (shellcheck)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: shellcheck
|
||||
uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 # v2.0.0
|
||||
env:
|
||||
# Severity = error: SC1091 (can't follow sourced script) is info-
|
||||
# level and would otherwise fail when the venv activate script
|
||||
# doesn't exist at lint time.
|
||||
SHELLCHECK_OPTS: --severity=error
|
||||
with:
|
||||
scandir: ./docker
|
||||
360
.github/workflows/docker-publish.yml
vendored
360
.github/workflows/docker-publish.yml
vendored
@@ -1,360 +0,0 @@
|
||||
name: Docker Build and Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# Needed so the arm64 job can push/pull its registry-backed build cache
|
||||
# to ghcr.io (cache-to/cache-from type=registry). See the build-arm64
|
||||
# job for why registry cache replaced the gha cache on that arch.
|
||||
packages: write
|
||||
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets
|
||||
# its own image. PR runs reuse a PR-scoped group with
|
||||
# cancel-in-progress: true so rapid pushes to the same PR collapse to the
|
||||
# latest commit.
|
||||
concurrency:
|
||||
group: docker-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
env:
|
||||
IMAGE_NAME: nousresearch/hermes-agent
|
||||
|
||||
jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build amd64 natively. This job also runs the smoke tests (basic --help
|
||||
# and the dashboard subcommand regression guard from #9153), because amd64
|
||||
# is the only arch we can `load` into the local daemon on an amd64 runner.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-amd64:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (amd64, smoke test)
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/amd64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
- name: Smoke test image
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Run the docker-integration test suite against the freshly-built
|
||||
# image already loaded into the local daemon (`:test`). These tests
|
||||
# are excluded from the sharded `tests.yml :: test` matrix on purpose
|
||||
# (see `_SKIP_PARTS` in scripts/run_tests_parallel.py) because each
|
||||
# shard would otherwise reach the session-scoped ``built_image``
|
||||
# fixture in ``tests/docker/conftest.py`` and start a 3-7min
|
||||
# ``docker build`` under a 180s pytest-timeout cap — guaranteed to
|
||||
# die in fixture setup.
|
||||
#
|
||||
# Piggybacking here avoids a second image build: the smoke test
|
||||
# already proved the image loads + runs, so the daemon has it under
|
||||
# `${IMAGE_NAME}:test` and we just point ``HERMES_TEST_IMAGE`` at
|
||||
# that. The fixture's ``HERMES_TEST_IMAGE`` branch (see
|
||||
# tests/docker/conftest.py:62-63) short-circuits the rebuild.
|
||||
#
|
||||
# Why this job and not a standalone one: the image is 5GB+; passing
|
||||
# it between jobs via ``docker save``/``upload-artifact`` is slower
|
||||
# than the build itself. Reusing the existing daemon state is the
|
||||
# cheapest path to coverage on every PR that touches docker code.
|
||||
# ---------------------------------------------------------------------
|
||||
- name: Install uv (for docker tests)
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11 (for docker tests)
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install Python dependencies (for docker tests)
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
# ``dev`` extra pulls in pytest, pytest-asyncio, pytest-timeout —
|
||||
# everything tests/docker/ needs. We deliberately avoid ``all``
|
||||
# here because the docker tests only drive the container via
|
||||
# subprocess and don't import hermes_agent's optional deps.
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
- name: Run docker integration tests
|
||||
env:
|
||||
# Skip rebuild; use the image already loaded by the build step.
|
||||
HERMES_TEST_IMAGE: ${{ env.IMAGE_NAME }}:test
|
||||
# Match the policy in tests.yml :: test job — no accidental
|
||||
# real-API calls from inside the harness.
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/docker/ -v --tb=short
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Push amd64 by digest only (no tag). The merge job assembles the
|
||||
# tagged manifest list. `push-by-digest=true` is docker's recommended
|
||||
# pattern for multi-runner multi-platform builds.
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/amd64
|
||||
labels: |
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
# Write the digest to a file and upload it as an artifact so the
|
||||
# merge job can stitch both per-arch digests into a manifest list.
|
||||
- name: Export digest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.push.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-amd64
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
|
||||
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
|
||||
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
|
||||
# smoke test, then on push/release push by digest.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-arm64:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-24.04-arm
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Log in to ghcr.io so the registry-backed build cache below can be
|
||||
# read (cache-from) on every event and written (cache-to) on
|
||||
# push/release. Uses the workflow's GITHUB_TOKEN, which is valid for
|
||||
# the whole job — unlike the gha cache backend's short-lived Azure SAS
|
||||
# token, which expired mid-build on slow cold-cache arm64 runs and
|
||||
# crashed the build before the smoke test (the reason the gha cache
|
||||
# was removed from arm64 PRs in the first place).
|
||||
- name: Log in to ghcr.io (build cache)
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Build once, load into the local daemon for smoke testing.
|
||||
#
|
||||
# PR builds use the registry-backed cache READ-ONLY (cache-from only):
|
||||
# they pull warm layers pushed by the most recent main build but never
|
||||
# write, so rapid PR pushes don't race on cache writes or pollute the
|
||||
# cache ref. This restores warm-cache speed to arm64 PR builds (which
|
||||
# were running fully uncached and were ~45% slower than amd64, making
|
||||
# them the job most often cancelled on supersede).
|
||||
#
|
||||
# Registry cache (type=registry on ghcr.io) is used instead of the gha
|
||||
# cache that previously broke here: its credential is the job-lifetime
|
||||
# GITHUB_TOKEN, not a short-lived SAS token, so the cold-build-outlives-
|
||||
# token failure mode cannot recur.
|
||||
- name: Build image (arm64, smoke test, cache read-only PR)
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
|
||||
|
||||
# Main/release builds read AND write the registry cache so the digest
|
||||
# push below reuses layers from this smoke-test build, and so the next
|
||||
# PR/main build starts warm.
|
||||
- name: Build image (arm64, smoke test, cached publish)
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
|
||||
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
|
||||
|
||||
- name: Smoke test image
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Push arm64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/arm64
|
||||
labels: |
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
build-args: |
|
||||
HERMES_GIT_SHA=${{ github.sha }}
|
||||
outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64
|
||||
cache-to: type=registry,ref=ghcr.io/nousresearch/hermes-agent:buildcache-arm64,mode=max
|
||||
|
||||
- name: Export digest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.push.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-arm64
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stitch both per-arch digests into a single tagged multi-arch manifest.
|
||||
# This is a registry-side operation — no building, no layer re-push —
|
||||
# so it runs in ~30 seconds.
|
||||
#
|
||||
# On main pushes: tags both :main and :latest.
|
||||
# On releases: tags :<release_tag_name>.
|
||||
# ---------------------------------------------------------------------------
|
||||
merge:
|
||||
if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-amd64, build-arm64]
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digest-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
TAG="${{ github.event.release.tag_name }}"
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
else
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:main" \
|
||||
-t "${IMAGE_NAME}:latest" \
|
||||
"${args[@]}"
|
||||
fi
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${{ github.event.release.tag_name }}"
|
||||
else
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:main"
|
||||
fi
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
48
.github/workflows/docs-site-checks.yml
vendored
48
.github/workflows/docs-site-checks.yml
vendored
@@ -1,48 +0,0 @@
|
||||
name: Docs Site Checks
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'website/**'
|
||||
- '.github/workflows/docs-site-checks.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
docs-site-checks:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: website/package-lock.json
|
||||
|
||||
- name: Install website dependencies
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install ascii-guard
|
||||
run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3
|
||||
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
- name: Build Docusaurus
|
||||
run: npm run build
|
||||
working-directory: website
|
||||
58
.github/workflows/history-check.yml
vendored
58
.github/workflows/history-check.yml
vendored
@@ -1,58 +0,0 @@
|
||||
name: History Check
|
||||
|
||||
# Rejects PRs whose branch has no common ancestor with main.
|
||||
#
|
||||
# In May 2026 PR #25045 was merged from a branch that had been disconnected
|
||||
# from main's history (likely an accidental `git checkout --orphan` or
|
||||
# `.git/` re-init). GitHub's merge UI does not refuse merges of unrelated
|
||||
# histories, so the PR landed cleanly with the intended one-file change —
|
||||
# but its parent-less root commit (413990c94) got grafted into main as a
|
||||
# second root, and ~1500 files' worth of `git blame` history collapsed
|
||||
# onto that single commit.
|
||||
#
|
||||
# This check catches the failure mode by requiring `git merge-base` between
|
||||
# the PR head and main to be non-empty.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-common-ancestor:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0 # full history both sides for merge-base
|
||||
|
||||
- name: Reject PRs with no common ancestor on main
|
||||
run: |
|
||||
# `git merge-base` exits non-zero AND prints nothing when the two
|
||||
# commits share no ancestor. We check both conditions explicitly
|
||||
# so the failure message is clear regardless of which signal fires
|
||||
# first.
|
||||
if ! BASE=$(git merge-base origin/main HEAD 2>/dev/null) || [ -z "$BASE" ]; then
|
||||
echo ""
|
||||
echo "::error::This PR has no common ancestor with main."
|
||||
echo ""
|
||||
echo "Your branch's history is disconnected from main. Common causes:"
|
||||
echo " - the branch was created with 'git checkout --orphan'"
|
||||
echo " - '.git/' was re-initialized at some point during the work"
|
||||
echo " - the branch was force-pushed from an unrelated repository"
|
||||
echo ""
|
||||
echo "Merging an unrelated-history PR grafts a parent-less root commit"
|
||||
echo "into main and collapses git blame for every file in that snapshot."
|
||||
echo "Reference: PR #25045 caused this and re-rooted blame on ~1500"
|
||||
echo "files to a single orphan commit."
|
||||
echo ""
|
||||
echo "To fix, rebase your changes onto current main:"
|
||||
echo " git fetch origin main"
|
||||
echo " git checkout -b fix-branch origin/main"
|
||||
echo " # re-apply your changes (cherry-pick, copy files, etc.)"
|
||||
echo " git push -f origin fix-branch"
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::Common ancestor with main: $BASE"
|
||||
202
.github/workflows/lint.yml
vendored
202
.github/workflows/lint.yml
vendored
@@ -1,202 +0,0 @@
|
||||
name: Lint (ruff + ty)
|
||||
|
||||
# Two things here:
|
||||
# 1. Advisory diff — ruff + ty diagnostics as a diff vs the target branch.
|
||||
# Posts a Markdown summary and a PR comment. Exit zero always.
|
||||
# 2. Blocking ``ruff check .`` — enforces the explicit rules in
|
||||
# ``[tool.ruff.lint.select]`` (currently PLW1514). Failure blocks merge.
|
||||
# Separate job so the advisory diff still runs and posts even when
|
||||
# enforcement fails.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # needed to post/update PR comments
|
||||
|
||||
concurrency:
|
||||
group: lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint-diff:
|
||||
name: ruff + ty diff
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
run: |
|
||||
uv tool install ruff
|
||||
uv tool install ty
|
||||
|
||||
- name: Determine base ref
|
||||
id: base
|
||||
run: |
|
||||
# For PRs, diff against the merge base with the target branch.
|
||||
# For pushes to main, diff against the previous commit on main.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
|
||||
BASE_REF="origin/${{ github.base_ref }}"
|
||||
else
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
|
||||
BASE_REF="HEAD~1"
|
||||
fi
|
||||
echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
|
||||
echo "Base SHA: ${BASE_SHA}"
|
||||
echo "Base ref: ${BASE_REF}"
|
||||
|
||||
- name: Run ruff + ty on HEAD
|
||||
run: |
|
||||
mkdir -p .lint-reports/head
|
||||
ruff check --output-format json --exit-zero \
|
||||
> .lint-reports/head/ruff.json || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> .lint-reports/head/ty.json || true
|
||||
echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
|
||||
echo "HEAD ty: $(wc -c < .lint-reports/head/ty.json) bytes"
|
||||
|
||||
- name: Run ruff + ty on base (via git worktree)
|
||||
run: |
|
||||
mkdir -p .lint-reports/base
|
||||
# Use a worktree so we don't clobber the main checkout. If the basex
|
||||
# SHA is identical to HEAD (e.g. first commit), skip and leave the
|
||||
# base reports empty — the diff script handles missing files.
|
||||
HEAD_SHA=$(git rev-parse HEAD)
|
||||
BASE_SHA="${{ steps.base.outputs.sha }}"
|
||||
if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
|
||||
echo "Base SHA == HEAD SHA, skipping base scan."
|
||||
echo '[]' > .lint-reports/base/ruff.json
|
||||
echo '[]' > .lint-reports/base/ty.json
|
||||
else
|
||||
git worktree add --detach /tmp/lint-base "$BASE_SHA"
|
||||
(
|
||||
cd /tmp/lint-base
|
||||
ruff check --output-format json --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
|
||||
)
|
||||
git worktree remove --force /tmp/lint-base
|
||||
fi
|
||||
echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
|
||||
echo "base ty: $(wc -c < .lint-reports/base/ty.json) bytes"
|
||||
|
||||
- name: Generate diff summary
|
||||
run: |
|
||||
python scripts/lint_diff.py \
|
||||
--base-ruff .lint-reports/base/ruff.json \
|
||||
--head-ruff .lint-reports/head/ruff.json \
|
||||
--base-ty .lint-reports/base/ty.json \
|
||||
--head-ty .lint-reports/head/ty.json \
|
||||
--base-ref "${{ steps.base.outputs.ref }}" \
|
||||
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
retention-days: 14
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
|
||||
const marker = '<!-- lint-diff-summary -->';
|
||||
const fullBody = marker + '\n' + body;
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: fullBody,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: fullBody,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
ruff-blocking:
|
||||
# Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
|
||||
# PLW1514 (unspecified-encoding) — catches bare ``open()`` /
|
||||
# ``read_text()`` / ``write_text()`` calls that default to locale
|
||||
# encoding on Windows. Failure here blocks merge; the advisory
|
||||
# ``lint-diff`` job above runs independently so reviewers still get
|
||||
# the diff comment even when enforcement fails.
|
||||
name: ruff enforcement (blocking)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff
|
||||
run: uv tool install ruff
|
||||
|
||||
- name: ruff check .
|
||||
# No --exit-zero, no || true. Exit code propagates to the job,
|
||||
# which propagates to the required-check gate.
|
||||
run: |
|
||||
ruff check .
|
||||
|
||||
windows-footguns:
|
||||
# Static guardrails on Windows-unsafe Python primitives — os.kill(pid, 0),
|
||||
# os.killpg, os.setsid, signal.SIGKILL without getattr fallback,
|
||||
# shebang scripts via subprocess, bare open() without encoding=, etc.
|
||||
# See scripts/check-windows-footguns.py for the full rule list.
|
||||
name: Windows footguns (blocking)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Run footgun checker
|
||||
run: python scripts/check-windows-footguns.py --all
|
||||
254
.github/workflows/nix-lockfile-fix.yml
vendored
254
.github/workflows/nix-lockfile-fix.yml
vendored
@@ -1,254 +0,0 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'package-lock.json'
|
||||
- 'package.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'apps/desktop/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
description: 'PR number to fix (leave empty to run on the selected branch)'
|
||||
required: false
|
||||
type: string
|
||||
issue_comment:
|
||||
types: [edited]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json.
|
||||
# Runs fix-lockfiles and pushes the hash update commit directly to main
|
||||
# so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'package-lock.json' 'package.json' \
|
||||
'ui-tui/package.json' 'apps/desktop/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
&& github.event.issue.pull_request != null
|
||||
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
|
||||
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
- name: Authorize & resolve PR
|
||||
id: resolve
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
with:
|
||||
script: |
|
||||
// 1. Verify the actor has write access — applies to both checkbox
|
||||
// clicks and manual dispatch.
|
||||
const { data: perm } =
|
||||
await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
username: context.actor,
|
||||
});
|
||||
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
|
||||
core.setFailed(
|
||||
`${context.actor} lacks write access (has: ${perm.permission})`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Resolve which ref to check out.
|
||||
let prNumber = '';
|
||||
if (context.eventName === 'issue_comment') {
|
||||
prNumber = String(context.payload.issue.number);
|
||||
} else if (context.eventName === 'workflow_dispatch') {
|
||||
prNumber = context.payload.inputs.pr_number || '';
|
||||
}
|
||||
|
||||
if (!prNumber) {
|
||||
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
|
||||
core.setOutput('repo', context.repo.repo);
|
||||
core.setOutput('owner', context.repo.owner);
|
||||
core.setOutput('pr', '');
|
||||
return;
|
||||
}
|
||||
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: Number(prNumber),
|
||||
});
|
||||
core.setOutput('ref', pr.head.ref);
|
||||
core.setOutput('repo', pr.head.repo.name);
|
||||
core.setOutput('owner', pr.head.repo.owner.login);
|
||||
core.setOutput('pr', String(pr.number));
|
||||
|
||||
# Wipe the sticky lockfile-check comment to a "running" state as soon
|
||||
# as the job is authorized, so the user sees their click was picked up
|
||||
# before the ~minute of nix build work.
|
||||
- name: Mark sticky as running
|
||||
if: steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### 🔄 Applying lockfile fix…
|
||||
|
||||
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
|
||||
ref: ${{ steps.resolve.outputs.ref }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): refresh npm lockfile hashes"
|
||||
git push
|
||||
|
||||
- name: Update sticky (applied)
|
||||
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ✅ Lockfile fix applied
|
||||
|
||||
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- name: Update sticky (already current)
|
||||
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ✅ Lockfile hashes already current
|
||||
|
||||
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- name: Update sticky (failed)
|
||||
if: failure() && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ❌ Lockfile fix failed
|
||||
|
||||
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
|
||||
105
.github/workflows/nix.yml
vendored
105
.github/workflows/nix.yml
vendored
@@ -1,105 +0,0 @@
|
||||
name: Nix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nix:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Resolve head SHA
|
||||
if: github.event_name == 'pull_request'
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check flake
|
||||
id: flake
|
||||
continue-on-error: true
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
# When the flake check fails, run a targeted diagnostic to see if
|
||||
# the failure is specifically a stale npm lockfile hash in one of the
|
||||
# known npm subpackages (tui / web). This avoids surfacing a generic
|
||||
# "build failed" message when the fix is a single known command.
|
||||
- name: Diagnose npm lockfile hashes
|
||||
id: hash_check
|
||||
if: steps.flake.outcome == 'failure' && runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
# If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
|
||||
# etc.) it won't set stale=true/false. Treat that as a distinct failure
|
||||
# mode rather than silently ignoring it.
|
||||
- name: Fail if hash check crashed without reporting
|
||||
if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale hashes)
|
||||
if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.hash_check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles` and commit the diff
|
||||
|
||||
# Clear the sticky comment when either the flake check passed outright (no
|
||||
# hash check needed) or the hash check explicitly returned stale=false
|
||||
# (check failed for a non-hash reason).
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
(steps.hash_check.outputs.stale == 'false' ||
|
||||
steps.flake.outcome == 'success')
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Final fail if flake check failed
|
||||
if: steps.flake.outcome == 'failure'
|
||||
run: |
|
||||
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
|
||||
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
|
||||
else
|
||||
echo "::error::Nix flake check failed. See logs above."
|
||||
fi
|
||||
exit 1
|
||||
65
.github/workflows/osv-scanner.yml
vendored
65
.github/workflows/osv-scanner.yml
vendored
@@ -1,65 +0,0 @@
|
||||
name: OSV-Scanner
|
||||
|
||||
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
|
||||
# database. Runs on every PR that touches a lockfile and on a weekly schedule
|
||||
# against main.
|
||||
#
|
||||
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
|
||||
# It reports known CVEs in currently-pinned dependency versions so we can
|
||||
# decide when and how to patch on our own schedule. Our pinning strategy
|
||||
# (full SHA / exact version) is preserved; only the notification signal
|
||||
# is added.
|
||||
#
|
||||
# Complements the existing supply-chain-audit.yml workflow (which scans
|
||||
# for malicious code patterns in PR diffs) by covering the orthogonal
|
||||
# "currently-pinned dep became known-vulnerable" case.
|
||||
#
|
||||
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
|
||||
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
|
||||
# fail-on-vuln is disabled so the job does not block merges on pre-existing
|
||||
# vulnerabilities in pinned deps that we may need to patch deliberately.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'website/package.json'
|
||||
- 'website/package-lock.json'
|
||||
- '.github/workflows/osv-scanner.yml'
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'website/package-lock.json'
|
||||
schedule:
|
||||
# Weekly scan against main — catches CVEs published after merge for
|
||||
# deps that haven't changed since.
|
||||
- cron: '0 9 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
# Required by the reusable workflow to upload SARIF to the Security tab.
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan lockfiles
|
||||
uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@9a498708959aeaef5ef730655706c5a1df1edbc2 # v2.3.8
|
||||
with:
|
||||
# Scan explicit lockfiles rather than recursing, so we only look at
|
||||
# the three sources of truth and skip vendored / test / worktree dirs.
|
||||
scan-args: |-
|
||||
--lockfile=uv.lock
|
||||
--lockfile=package-lock.json
|
||||
--lockfile=website/package-lock.json
|
||||
fail-on-vuln: false
|
||||
149
.github/workflows/skills-index-freshness.yml
vendored
149
.github/workflows/skills-index-freshness.yml
vendored
@@ -1,149 +0,0 @@
|
||||
name: Skills Index Freshness Check
|
||||
|
||||
# Belt-and-suspenders for the twice-daily build_skills_index pipeline.
|
||||
# If the live /docs/api/skills-index.json ever goes more than 26 hours
|
||||
# stale OR the file disappears entirely OR a major source has collapsed,
|
||||
# this workflow opens a GitHub issue so we hear about it before users do.
|
||||
#
|
||||
# Triggered every 4 hours so we catch a stuck cron within one tick.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 */4 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
check-freshness:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Probe live index
|
||||
id: probe
|
||||
run: |
|
||||
set -e
|
||||
URL="https://hermes-agent.nousresearch.com/docs/api/skills-index.json"
|
||||
echo "Probing $URL"
|
||||
# -L follows redirects; -f fails on HTTP errors; -s suppresses progress
|
||||
if ! curl -fsSL -o /tmp/skills-index.json "$URL"; then
|
||||
echo "status=fetch-failed" >> "$GITHUB_OUTPUT"
|
||||
echo "detail=Could not download $URL" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
# Validate + extract generated_at and per-source counts
|
||||
python3 <<'PY' >> "$GITHUB_OUTPUT"
|
||||
import json, sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
try:
|
||||
with open("/tmp/skills-index.json") as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"status=parse-failed")
|
||||
print(f"detail=JSON decode error: {e}")
|
||||
sys.exit(0)
|
||||
|
||||
generated_at = data.get("generated_at", "")
|
||||
total = data.get("skill_count", 0)
|
||||
skills = data.get("skills", [])
|
||||
if not isinstance(skills, list):
|
||||
print("status=invalid-shape")
|
||||
print(f"detail=skills field is not a list (got {type(skills).__name__})")
|
||||
sys.exit(0)
|
||||
|
||||
# Per-source counts
|
||||
from collections import Counter
|
||||
by_src = Counter(s.get("source", "") for s in skills)
|
||||
|
||||
# Freshness
|
||||
age_hours = None
|
||||
try:
|
||||
ts = datetime.fromisoformat(generated_at.replace("Z", "+00:00"))
|
||||
age_hours = (datetime.now(timezone.utc) - ts).total_seconds() / 3600
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Floors — same as build_skills_index.py EXPECTED_FLOORS.
|
||||
floors = {
|
||||
"skills.sh": 100,
|
||||
"lobehub": 100,
|
||||
"clawhub": 50,
|
||||
"official": 50,
|
||||
"github": 30,
|
||||
"browse-sh": 50,
|
||||
}
|
||||
issues = []
|
||||
if age_hours is not None and age_hours > 26:
|
||||
issues.append(f"Index is {age_hours:.1f}h old (limit 26h)")
|
||||
for src, floor in floors.items():
|
||||
count = by_src.get(src, 0)
|
||||
if src == "skills.sh":
|
||||
count = by_src.get("skills.sh", 0) + by_src.get("skills-sh", 0)
|
||||
if count < floor:
|
||||
issues.append(f"{src}: {count} < {floor}")
|
||||
if total < 1500:
|
||||
issues.append(f"total skills: {total} < 1500")
|
||||
|
||||
if issues:
|
||||
detail = "; ".join(issues)
|
||||
print("status=degraded")
|
||||
# GITHUB_OUTPUT doesn't allow newlines without explicit delimiter
|
||||
print(f"detail={detail}")
|
||||
else:
|
||||
print("status=ok")
|
||||
print(f"detail=Index OK — {total} skills, generated {generated_at}")
|
||||
by_summary = ", ".join(f"{k}={v}" for k, v in by_src.most_common(8))
|
||||
print(f"summary={by_summary}")
|
||||
PY
|
||||
|
||||
- name: Report status
|
||||
run: |
|
||||
echo "Probe status: ${{ steps.probe.outputs.status }}"
|
||||
echo "Detail: ${{ steps.probe.outputs.detail }}"
|
||||
if [ -n "${{ steps.probe.outputs.summary }}" ]; then
|
||||
echo "Summary: ${{ steps.probe.outputs.summary }}"
|
||||
fi
|
||||
|
||||
- name: Open issue on degraded / failed probe
|
||||
if: steps.probe.outputs.status != 'ok'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
STATUS: ${{ steps.probe.outputs.status }}
|
||||
DETAIL: ${{ steps.probe.outputs.detail }}
|
||||
run: |
|
||||
# Find existing open issue by title prefix so we don't spam — we
|
||||
# append a comment instead of opening a new one each tick.
|
||||
TITLE_PREFIX="[skills-index-watchdog]"
|
||||
existing=$(gh issue list \
|
||||
--repo "${{ github.repository }}" \
|
||||
--state open \
|
||||
--search "in:title \"$TITLE_PREFIX\"" \
|
||||
--json number,title \
|
||||
--jq '.[] | select(.title | startswith("'"$TITLE_PREFIX"'")) | .number' \
|
||||
| head -1)
|
||||
BODY="Automated freshness probe failed.
|
||||
|
||||
**Status:** \`$STATUS\`
|
||||
**Detail:** $DETAIL
|
||||
|
||||
The Skills Hub at /docs/skills depends on \`/docs/api/skills-index.json\`.
|
||||
The unified index is rebuilt by \`.github/workflows/skills-index.yml\` (cron 6/18 UTC)
|
||||
and \`.github/workflows/deploy-site.yml\` (on every push affecting website/skills).
|
||||
If this issue keeps reopening, check the latest runs:
|
||||
|
||||
- https://github.com/${{ github.repository }}/actions/workflows/skills-index.yml
|
||||
- https://github.com/${{ github.repository }}/actions/workflows/deploy-site.yml
|
||||
|
||||
This issue was opened by \`.github/workflows/skills-index-freshness.yml\`. Close it once the underlying problem is fixed; the next probe will reopen if it's still broken."
|
||||
if [ -n "$existing" ]; then
|
||||
echo "Appending to existing issue #$existing"
|
||||
gh issue comment "$existing" --repo "${{ github.repository }}" --body "Probe still failing at $(date -u +%FT%TZ): \`$STATUS\` — $DETAIL"
|
||||
else
|
||||
echo "Opening new watchdog issue"
|
||||
gh issue create --repo "${{ github.repository }}" \
|
||||
--title "$TITLE_PREFIX Skills index is stale or degraded ($STATUS)" \
|
||||
--body "$BODY"
|
||||
fi
|
||||
56
.github/workflows/skills-index.yml
vendored
56
.github/workflows/skills-index.yml
vendored
@@ -1,56 +0,0 @@
|
||||
name: Build Skills Index
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run twice daily: 6 AM and 6 PM UTC
|
||||
- cron: '0 6,18 * * *'
|
||||
workflow_dispatch: # Manual trigger
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'scripts/build_skills_index.py'
|
||||
- '.github/workflows/skills-index.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write # to trigger deploy-site.yml on schedule
|
||||
|
||||
jobs:
|
||||
build-index:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install httpx==0.28.1 pyyaml==6.0.2
|
||||
|
||||
- name: Build skills index
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: python scripts/build_skills_index.py
|
||||
|
||||
- name: Upload index artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: skills-index
|
||||
path: website/static/api/skills-index.json
|
||||
retention-days: 7
|
||||
|
||||
# Re-trigger the docs deploy so the refreshed index lands on the live site.
|
||||
# The deploy itself is owned by deploy-site.yml (which crawls and deploys
|
||||
# everything in one pipeline); we just kick it on a schedule.
|
||||
trigger-deploy:
|
||||
needs: build-index
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Deploy Site workflow
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: gh workflow run deploy-site.yml --repo ${{ github.repository }}
|
||||
270
.github/workflows/supply-chain-audit.yml
vendored
270
.github/workflows/supply-chain-audit.yml
vendored
@@ -1,270 +0,0 @@
|
||||
name: Supply Chain Audit
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
# No paths filter — the jobs must always run so required checks
|
||||
# report a status (path-gated workflows leave checks "pending" forever
|
||||
# when no matching files change, which blocks merge).
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
# Narrow, high-signal scanner. Only fires on critical indicators of supply
|
||||
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
|
||||
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
|
||||
# Actions version unpinning, outbound POST/PUT) were intentionally
|
||||
# removed — they fired on nearly every PR and trained reviewers to ignore
|
||||
# the scanner. Keep this file's checks ruthlessly narrow: if you find
|
||||
# yourself adding WARNING-tier patterns here again, make a separate
|
||||
# advisory-only workflow instead.
|
||||
|
||||
jobs:
|
||||
# ── Path filter (shared by both scan and dep-bounds) ───────────────
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
# True when any file the scanner cares about changed in this PR
|
||||
scan: ${{ steps.filter.outputs.scan }}
|
||||
# True when pyproject.toml changed in this PR
|
||||
deps: ${{ steps.filter.outputs.deps }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Check for relevant file changes
|
||||
id: filter
|
||||
run: |
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
SCAN_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
|
||||
'*.py' '**/*.py' '*.pth' '**/*.pth' \
|
||||
'setup.py' 'setup.cfg' \
|
||||
'sitecustomize.py' 'usercustomize.py' '__init__.pth' \
|
||||
'pyproject.toml' || true)
|
||||
if [ -n "$SCAN_FILES" ]; then
|
||||
echo "scan=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "scan=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
DEPS_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- 'pyproject.toml' || true)
|
||||
if [ -n "$DEPS_FILES" ]; then
|
||||
echo "deps=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "deps=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
scan:
|
||||
name: Scan PR for critical supply chain risks
|
||||
needs: changes
|
||||
if: needs.changes.outputs.scan == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Scan diff for critical patterns
|
||||
id: scan
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Added lines only, excluding lockfiles.
|
||||
# Three-dot diff (base...head) diffs from the merge base to HEAD,
|
||||
# so only changes introduced by this PR are included — not changes
|
||||
# that landed on main after the PR branched off.
|
||||
DIFF=$(git diff "$BASE"..."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
|
||||
|
||||
FINDINGS=""
|
||||
|
||||
# --- .pth files (auto-execute on Python startup) ---
|
||||
# The exact mechanism used in the litellm supply chain attack:
|
||||
# https://github.com/BerriAI/litellm/issues/24512
|
||||
PTH_FILES=$(git diff --name-only "$BASE"..."$HEAD" | grep '\.pth$' || true)
|
||||
if [ -n "$PTH_FILES" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: .pth file added or modified
|
||||
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
${PTH_FILES}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
|
||||
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
|
||||
if [ -n "$B64_EXEC_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: base64 decode + exec/eval combo
|
||||
Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
${B64_EXEC_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- subprocess with encoded/obfuscated command argument ---
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
|
||||
if [ -n "$PROC_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
|
||||
Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
${PROC_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
|
||||
# These execute during pip install or interpreter startup.
|
||||
# Anchored at repo root: only the top-level setup.py/setup.cfg run during
|
||||
# `pip install`, and only top-level sitecustomize.py/usercustomize.py are
|
||||
# auto-loaded by the interpreter via site.py. Any nested file with the
|
||||
# same name (e.g. hermes_cli/setup.py — the CLI setup wizard) is unrelated
|
||||
# and produced false positives that trained reviewers to ignore the scanner.
|
||||
SETUP_HITS=$(git diff --name-only "$BASE"..."$HEAD" | grep -E '^(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
|
||||
if [ -n "$SETUP_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: Install-hook file added or modified
|
||||
These files can execute code during package installation or interpreter startup.
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
${SETUP_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
if [ -n "$FINDINGS" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
echo "$FINDINGS" > /tmp/findings.md
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Post critical finding comment
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## 🚨 CRITICAL Supply Chain Risk Detected
|
||||
|
||||
This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.
|
||||
|
||||
$(cat /tmp/findings.md)
|
||||
|
||||
---
|
||||
*Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
|
||||
|
||||
- name: Fail on critical findings
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
run: |
|
||||
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
|
||||
exit 1
|
||||
|
||||
# Gate: reports success when scan was skipped (no relevant files changed).
|
||||
# This ensures the required check always gets a status.
|
||||
scan-gate:
|
||||
name: Scan PR for critical supply chain risks
|
||||
needs: changes
|
||||
# always() so the gate still reports SUCCESS even if `changes` fails/is
|
||||
# skipped — without it, a failed dependency would leave the required
|
||||
# check unreported (i.e. "pending"), the exact failure mode this fixes.
|
||||
if: always() && needs.changes.outputs.scan != 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "No supply-chain-relevant files changed, skipping scan."
|
||||
|
||||
dep-bounds:
|
||||
name: Check PyPI dependency upper bounds
|
||||
needs: changes
|
||||
if: needs.changes.outputs.deps == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for unbounded PyPI deps
|
||||
id: bounds
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Only check added lines in pyproject.toml
|
||||
ADDED=$(git diff "$BASE"..."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
|
||||
|
||||
if [ -z "$ADDED" ]; then
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Match PyPI dep specs that have >= but no < ceiling.
|
||||
# Pattern: "package>=version" without a following ",<" bound.
|
||||
# Excludes git+ URLs (which use commit SHAs) and comments.
|
||||
UNBOUNDED=$(echo "$ADDED" | grep -oE '"[a-zA-Z0-9_-]+(\[[^\]]*\])?>=[ 0-9.]+"' | grep -v ',<' || true)
|
||||
|
||||
if [ -n "$UNBOUNDED" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
echo "$UNBOUNDED" > /tmp/unbounded.txt
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Post unbounded dep warning
|
||||
if: steps.bounds.outputs.found == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## ⚠️ Unbounded PyPI Dependency Detected
|
||||
|
||||
This PR adds PyPI dependencies without a \`<next_major\` upper bound. Per our [supply chain policy](../blob/main/CONTRIBUTING.md#dependency-pinning-policy-supply-chain-hardening), all PyPI deps must be pinned as \`>=floor,<next_major\`.
|
||||
|
||||
**Unbounded specs found:**
|
||||
\`\`\`
|
||||
$(cat /tmp/unbounded.txt)
|
||||
\`\`\`
|
||||
|
||||
**Fix:** Add an upper bound, e.g. \`\"package>=1.2.0,<2\"\`
|
||||
|
||||
---
|
||||
*See PR #2810 and CONTRIBUTING.md for the full policy rationale.*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
|
||||
|
||||
- name: Fail on unbounded deps
|
||||
if: steps.bounds.outputs.found == 'true'
|
||||
run: |
|
||||
echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
|
||||
exit 1
|
||||
|
||||
# Gate: reports success when dep-bounds was skipped (no pyproject.toml changed).
|
||||
# This ensures the required check always gets a status.
|
||||
dep-bounds-gate:
|
||||
name: Check PyPI dependency upper bounds
|
||||
needs: changes
|
||||
# always() so the gate still reports SUCCESS even if `changes` fails/is
|
||||
# skipped — without it, a failed dependency would leave the required
|
||||
# check unreported (i.e. "pending"), the exact failure mode this fixes.
|
||||
if: always() && needs.changes.outputs.deps != 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "No pyproject.toml changes, skipping dependency bounds check."
|
||||
186
.github/workflows/tests.yml
vendored
186
.github/workflows/tests.yml
vendored
@@ -1,186 +0,0 @@
|
||||
name: Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**/*.md'
|
||||
- 'docs/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**/*.md'
|
||||
- 'docs/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Cancel in-progress runs for the same PR/branch
|
||||
concurrency:
|
||||
group: tests-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
slice: [1, 2, 3, 4, 5, 6]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Restore duration cache
|
||||
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
# Single stable key. main always overwrites, PRs always find it.
|
||||
key: test-durations
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run tests (slice ${{ matrix.slice }}/6)
|
||||
# Per-file isolation via scripts/run_tests_parallel.py: discovers
|
||||
# every test_*.py file under tests/ (excluding integration/ + e2e/),
|
||||
# then runs `python -m pytest <file>` in a freshly-spawned subprocess
|
||||
# with bounded parallelism. No xdist, no shared workers, no
|
||||
# module-level state leakage between files.
|
||||
#
|
||||
# Why per-file (not per-test): per-test spawn cost (~250ms × 17k
|
||||
# tests = 70min CPU minimum) blew the wall-clock budget. Per-file
|
||||
# spawn (~250ms × ~850 files = ~3.5min) fits while still giving
|
||||
# every file a fresh interpreter — the only isolation boundary
|
||||
# that matters in practice (cross-file leakage was the original
|
||||
# flake source; intra-file is the test author's responsibility).
|
||||
#
|
||||
# Why drop xdist entirely: xdist's persistent workers accumulate
|
||||
# state across files, which is exactly the leakage we wanted to
|
||||
# fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
|
||||
# the job with cleaner semantics.
|
||||
#
|
||||
# Matrix slicing (--slice I/N): files are distributed across 6
|
||||
# jobs by cached duration (LPT algorithm) so each job gets
|
||||
# roughly equal wall time. Without a cache, files default to 2s
|
||||
# estimate and get split roughly evenly by count — still correct,
|
||||
# just not perfectly balanced.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/run_tests_parallel.py --slice ${{ matrix.slice }}/6
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
- name: Upload per-slice durations
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: test-durations-slice-${{ matrix.slice }}
|
||||
path: test_durations.json
|
||||
retention-days: 1
|
||||
|
||||
# Merge per-slice duration data into a single cache, so future runs
|
||||
# (including PRs) get balanced slicing.
|
||||
save-durations:
|
||||
needs: test
|
||||
if: always() && github.ref == 'refs/heads/main'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download all slice durations
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
pattern: test-durations-slice-*
|
||||
path: durations
|
||||
merge-multiple: true
|
||||
|
||||
- name: Merge into single durations file
|
||||
run: |
|
||||
python3 -c "
|
||||
import json, glob, os
|
||||
merged = {}
|
||||
for f in glob.glob('durations/*test_durations.json'):
|
||||
with open(f) as fh:
|
||||
merged.update(json.load(fh))
|
||||
with open('test_durations.json', 'w') as fh:
|
||||
json.dump(merged, fh, indent=2, sort_keys=True)
|
||||
print(f'Merged {len(merged)} file durations')
|
||||
"
|
||||
|
||||
- name: Save merged duration cache
|
||||
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: test_durations.json
|
||||
key: test-durations
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install ripgrep (prebuilt binary)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RG_VERSION=15.1.0
|
||||
RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
|
||||
RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
|
||||
curl -sSfL -o "$RG_TARBALL" \
|
||||
"https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
|
||||
echo "${RG_SHA256} ${RG_TARBALL}" | sha256sum -c -
|
||||
tar -xzf "$RG_TARBALL"
|
||||
sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
|
||||
rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
|
||||
rg --version
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Packaged-wheel i18n smoke test
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest -m integration tests/test_wheel_locales_e2e.py -v
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/e2e/ -v --tb=short
|
||||
env:
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
164
.github/workflows/upload_to_pypi.yml
vendored
164
.github/workflows/upload_to_pypi.yml
vendored
@@ -1,164 +0,0 @@
|
||||
name: Publish to PyPI
|
||||
|
||||
# Triggered by CalVer tag pushes from scripts/release.py (e.g. v2026.5.15)
|
||||
# Can also be triggered manually from the Actions tab as an escape hatch.
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm_tag:
|
||||
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
# Restrict default token to read-only; each job escalates as needed.
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Prevent overlapping publishes (e.g. two same-day tags pushed quickly).
|
||||
concurrency:
|
||||
group: pypi-publish
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build distribution 📦
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
# On workflow_dispatch, check out the confirmed tag.
|
||||
ref: ${{ inputs.confirm_tag || github.ref }}
|
||||
fetch-tags: true
|
||||
|
||||
- name: Validate tag exists
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ! git tag -l "${{ inputs.confirm_tag }}" | grep -q .; then
|
||||
echo "::error::Tag '${{ inputs.confirm_tag }}' does not exist in the repo"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: '22'
|
||||
|
||||
- name: Build web dashboard
|
||||
run: cd web && npm ci && npm run build
|
||||
|
||||
- name: Build TUI bundle
|
||||
run: cd ui-tui && npm ci && npm run build
|
||||
|
||||
- name: Bundle TUI into hermes_cli
|
||||
run: |
|
||||
mkdir -p hermes_cli/tui_dist
|
||||
cp ui-tui/dist/entry.js hermes_cli/tui_dist/entry.js
|
||||
|
||||
- name: Verify frontend assets exist
|
||||
run: |
|
||||
test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; }
|
||||
test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; }
|
||||
|
||||
- name: Bundle install scripts into wheel
|
||||
run: |
|
||||
mkdir -p hermes_cli/scripts
|
||||
cp scripts/install.sh hermes_cli/scripts/install.sh
|
||||
cp scripts/install.ps1 hermes_cli/scripts/install.ps1
|
||||
|
||||
- name: Build wheel and sdist
|
||||
run: uv build --sdist --wheel
|
||||
|
||||
- name: Upload distribution artifacts
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
publish:
|
||||
name: Publish to PyPI
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/hermes-agent
|
||||
permissions:
|
||||
id-token: write # OIDC trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
||||
with:
|
||||
skip-existing: true
|
||||
|
||||
sign:
|
||||
name: Sign and attach to GitHub Release
|
||||
# Only runs on tag pushes — release.py creates the GitHub Release,
|
||||
# and workflow_dispatch won't have a matching release to attach to.
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
needs: publish
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # attach assets to the existing release
|
||||
id-token: write # sigstore signing
|
||||
|
||||
steps:
|
||||
- name: Download distribution artifacts
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
- name: Wait for GitHub Release to exist
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
# release.py creates the GitHub Release after pushing the tag,
|
||||
# but this workflow starts from the tag push — wait for it.
|
||||
run: |
|
||||
for i in $(seq 1 30); do
|
||||
if gh release view "$GITHUB_REF_NAME" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then
|
||||
echo "Release $GITHUB_REF_NAME found"
|
||||
exit 0
|
||||
fi
|
||||
echo "Waiting for release... ($i/30)"
|
||||
sleep 10
|
||||
done
|
||||
echo "::warning::Release $GITHUB_REF_NAME not found after 5 minutes — skipping signature upload"
|
||||
echo "skip_sign=true" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Sign with Sigstore
|
||||
if: env.skip_sign != 'true'
|
||||
uses: sigstore/gh-action-sigstore-python@04cffa1d795717b140764e8b640de88853c92acc # v3.3.0
|
||||
with:
|
||||
inputs: >-
|
||||
./dist/*.tar.gz
|
||||
./dist/*.whl
|
||||
|
||||
- name: Attach signed artifacts to GitHub Release
|
||||
if: env.skip_sign != 'true'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
# release.py already created the GitHub Release — just upload
|
||||
# the Sigstore signatures alongside the existing assets.
|
||||
run: >-
|
||||
gh release upload
|
||||
"$GITHUB_REF_NAME" dist/*.sigstore.json
|
||||
--repo "$GITHUB_REPOSITORY"
|
||||
--clobber
|
||||
119
.github/workflows/uv-lockfile-check.yml
vendored
119
.github/workflows/uv-lockfile-check.yml
vendored
@@ -1,119 +0,0 @@
|
||||
name: uv.lock check
|
||||
|
||||
# Verify uv.lock is in sync with pyproject.toml. Blocking check — PRs
|
||||
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
|
||||
# must not merge, because the Docker build's `uv sync --frozen` step will
|
||||
# fail on a stale lockfile and we'd rather catch it here than in the
|
||||
# docker-publish workflow on main.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# IMPORTANT: this check runs against the MERGED state, not just your branch
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# For `pull_request` events, GitHub checks out `refs/pull/<N>/merge` by
|
||||
# default — a synthetic commit that merges your PR branch into the CURRENT
|
||||
# state of `main`. That means the pyproject.toml evaluated here is
|
||||
# `main's pyproject.toml + your PR's changes to pyproject.toml`, not just
|
||||
# what's on your branch.
|
||||
#
|
||||
# Failure mode this creates: if `main` has advanced since you branched
|
||||
# (e.g. someone merged a PR that added a dep to pyproject.toml + its
|
||||
# corresponding uv.lock entries), your branch's uv.lock is missing those
|
||||
# new entries. `uv lock --check` resolves against the merged pyproject
|
||||
# and sees a lockfile that doesn't cover all the current deps → fails
|
||||
# with "The lockfile at uv.lock needs to be updated."
|
||||
#
|
||||
# This can be confusing: `uv lock --check` passes locally (your branch
|
||||
# is internally consistent) but fails in CI (merged state isn't).
|
||||
#
|
||||
# Fix is to sync your branch with main and regenerate the lockfile:
|
||||
#
|
||||
# git fetch origin main
|
||||
# git rebase origin/main # or merge, whatever the repo prefers
|
||||
# uv lock # regenerates uv.lock against new pyproject.toml
|
||||
# git add uv.lock
|
||||
# git commit -m "chore: refresh uv.lock after rebase onto main"
|
||||
# git push --force-with-lease # if you rebased
|
||||
#
|
||||
# If you also changed pyproject.toml in your PR, `uv lock` handles that
|
||||
# at the same time — one regeneration covers both your changes and the
|
||||
# drift from main.
|
||||
#
|
||||
# This is the correct behavior! The check is protecting main's Docker
|
||||
# build: a post-merge build would see the same merged state and fail
|
||||
# the same way. Better to catch it here than after merge.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/uv-lockfile-check.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/uv-lockfile-check.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: uv-lockfile-check-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: uv lock --check
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
# `uv lock --check` re-resolves the project from pyproject.toml and
|
||||
# compares the result to uv.lock, exiting non-zero if they disagree.
|
||||
# No network writes, no file modifications.
|
||||
#
|
||||
# On PRs this runs against the merge commit (see comment at the top
|
||||
# of this file) — failures often mean "your branch is behind main,
|
||||
# rebase and regenerate uv.lock."
|
||||
- name: Verify uv.lock is up-to-date
|
||||
run: |
|
||||
if ! uv lock --check; then
|
||||
cat <<'EOF' >> "$GITHUB_STEP_SUMMARY"
|
||||
## ❌ uv.lock is out of sync with pyproject.toml
|
||||
|
||||
**If this is a PR:** this check runs against the merged state
|
||||
(your branch + current `main`), not just your branch. If
|
||||
`uv lock --check` passes locally, your branch is likely behind
|
||||
`main` — recent changes to `pyproject.toml` on `main` aren't
|
||||
reflected in your branch's `uv.lock` yet.
|
||||
|
||||
To fix, sync with main and regenerate the lockfile:
|
||||
|
||||
```bash
|
||||
git fetch origin main
|
||||
git rebase origin/main # or `git merge origin/main`
|
||||
uv lock # regenerate against new pyproject.toml
|
||||
git add uv.lock
|
||||
git commit -m "chore: refresh uv.lock after syncing with main"
|
||||
git push --force-with-lease # drop --force-with-lease if you merged
|
||||
```
|
||||
|
||||
**If you only changed pyproject.toml:** run `uv lock` locally
|
||||
and commit the result.
|
||||
|
||||
This check is blocking because the Docker image build uses
|
||||
`uv sync --frozen --extra all`, which rejects stale lockfiles
|
||||
— catching it here avoids a ~15 min failed docker-publish run
|
||||
on `main` post-merge.
|
||||
EOF
|
||||
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
|
||||
exit 1
|
||||
fi
|
||||
157
.gitignore
vendored
157
.gitignore
vendored
@@ -1,125 +1,32 @@
|
||||
.DS_Store
|
||||
/venv/
|
||||
/venv.old/
|
||||
/_pycache/
|
||||
*.pyc*
|
||||
__pycache__/
|
||||
.venv/
|
||||
.vscode/
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.development
|
||||
.env.test
|
||||
.hermes-docker/
|
||||
.notebooklm-home/
|
||||
.notebooklm-cli-venv/
|
||||
.notebooklm-playwright/
|
||||
.pip-cache/
|
||||
.uv-cache/
|
||||
compose.hermes.local.yml
|
||||
export*
|
||||
__pycache__/model_tools.cpython-310.pyc
|
||||
__pycache__/web_tools.cpython-310.pyc
|
||||
logs/
|
||||
data/
|
||||
.pytest_cache/
|
||||
test_durations.json
|
||||
.pytest-cache/
|
||||
tmp/
|
||||
temp_vision_images/
|
||||
hermes-*/*
|
||||
examples/
|
||||
tests/quick_test_dataset.jsonl
|
||||
tests/sample_dataset.jsonl
|
||||
run_datagen_kimik2-thinking.sh
|
||||
run_datagen_megascience_glm4-6.sh
|
||||
run_datagen_sonnet.sh
|
||||
source-data/*
|
||||
run_datagen_megascience_glm4-6.sh
|
||||
data/*
|
||||
node_modules/
|
||||
browser-use/
|
||||
agent-browser/
|
||||
# Private keys
|
||||
*.ppk
|
||||
*.pem
|
||||
privvy*
|
||||
images/
|
||||
__pycache__/
|
||||
hermes_agent.egg-info/
|
||||
wandb/
|
||||
testlogs
|
||||
|
||||
# CLI config (may contain sensitive SSH paths)
|
||||
cli-config.yaml
|
||||
|
||||
# Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case)
|
||||
skills/.hub/
|
||||
ignored/
|
||||
.worktrees/
|
||||
environments/benchmarks/evals/
|
||||
|
||||
# Web UI build output
|
||||
hermes_cli/web_dist/
|
||||
apps/desktop/build/
|
||||
apps/desktop/dist/
|
||||
apps/desktop/release/
|
||||
apps/desktop/*.tsbuildinfo
|
||||
|
||||
# Web UI assets — synced from @nous-research/ui at build time via
|
||||
# `npm run sync-assets` (see web/package.json).
|
||||
web/public/fonts/
|
||||
web/public/ds-assets/
|
||||
|
||||
# Release script temp files
|
||||
.release_notes.md
|
||||
mini-swe-agent/
|
||||
|
||||
# Nix
|
||||
.direnv/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
# skills.json + skills-meta.json are build artifacts emitted by
|
||||
# website/scripts/extract-skills.py during prebuild — keep them out of
|
||||
# git for the same reason as skills-index.json (large, generated, change
|
||||
# every build).
|
||||
website/static/api/skills.json
|
||||
website/static/api/skills-meta.json
|
||||
models-dev-upstream/
|
||||
|
||||
# Local editor / agent tooling (machine-specific; keep in global config, not the repo)
|
||||
.codex/
|
||||
.cursor/
|
||||
.gemini/
|
||||
.zed/
|
||||
.mcp.json
|
||||
opencode.json
|
||||
config/mcporter.json
|
||||
|
||||
hermes_cli/tui_dist/*
|
||||
hermes_cli/scripts/
|
||||
docs/superpowers/*
|
||||
# Working directory for the Hermes Agent's session state (~/.hermes/ at runtime;
|
||||
# also created in-repo when an agent operates in this checkout). Plans, audit
|
||||
# logs, and per-session caches are never artifacts of the codebase.
|
||||
.hermes/
|
||||
|
||||
# Desktop/bootstrap install marker written into the managed checkout root by the
|
||||
# bootstrap installer. It is Hermes-managed runtime state, never a code change —
|
||||
# ignore it so `hermes update`'s `git stash push --include-untracked` does not
|
||||
# treat it as a local edit and autostash it on every run (#38529).
|
||||
.hermes-bootstrap-complete
|
||||
|
||||
# Tool Search live-test harness output — non-deterministic model transcripts,
|
||||
# regenerated by scripts/tool_search_livetest.py. Never an artifact of the repo.
|
||||
scripts/out/
|
||||
|
||||
# Per-release changelog drafts. These exist only transiently during a release
|
||||
# cut (passed to `gh release create --notes-file`); the GitHub Release itself
|
||||
# stores the published notes. They are not a build artifact and must never be
|
||||
# committed to the repo root. See the hermes-release skill.
|
||||
RELEASE_v*.md
|
||||
/venv/
|
||||
/_pycache/
|
||||
hecate/
|
||||
hecate-lib/
|
||||
*.pyc*
|
||||
__pycache__/
|
||||
.venv/
|
||||
.vscode/
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.development
|
||||
.env.test
|
||||
export*
|
||||
__pycache__/model_tools.cpython-310.pyc
|
||||
__pycache__/web_tools.cpython-310.pyc
|
||||
logs/
|
||||
data/
|
||||
.pytest_cache/
|
||||
tmp/
|
||||
temp_vision_images/
|
||||
hermes-*/*
|
||||
examples/
|
||||
tests/quick_test_dataset.jsonl
|
||||
tests/sample_dataset.jsonl
|
||||
run_datagen_kimik2-thinking.sh
|
||||
run_datagen_megascience_glm4-6.sh
|
||||
run_datagen_sonnet.sh
|
||||
source-data/*
|
||||
run_datagen_megascience_glm4-6.sh
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
# hadolint configuration for the Hermes Agent Dockerfile.
|
||||
# See https://github.com/hadolint/hadolint#configure for rules.
|
||||
#
|
||||
# We want hadolint to surface NEW Dockerfile lint regressions, but we
|
||||
# don't want to rewrite the existing image to silence rules that are
|
||||
# either intentional or pragmatic tradeoffs for this project. Each
|
||||
# ignore below has a one-line justification.
|
||||
failure-threshold: warning
|
||||
|
||||
ignored:
|
||||
# Pin versions in apt get install. We intentionally don't pin common
|
||||
# tools (curl, git, openssh-client, etc.) — security updates flow in
|
||||
# via the periodic base-image rebuild, and pinning would lock us to
|
||||
# superseded patch releases. Same rationale as nearly every distro-
|
||||
# base official image (python, node, debian).
|
||||
- DL3008
|
||||
# Use WORKDIR to switch to a directory. The image uses `(cd web && …)`
|
||||
# / `(cd ../ui-tui && …)` inline subshells for one-off build steps
|
||||
# because they don't affect later RUN commands; promoting them to
|
||||
# full WORKDIR switches with restores would obscure intent.
|
||||
- DL3003
|
||||
# Multiple consecutive RUN instructions. The `touch README.md` + `uv
|
||||
# sync` split is intentional — `touch` is cheap, `uv sync` is the
|
||||
# expensive layer-cached step we want isolated, and merging them
|
||||
# would invalidate the cache for trivial changes.
|
||||
- DL3059
|
||||
# Last USER should not be root. /init (s6-overlay) runs as root so the
|
||||
# stage2 hook can usermod/groupmod and chown the data volume per
|
||||
# HERMES_UID at runtime; each supervised service then drops to the
|
||||
# hermes user via `s6-setuidgid`.
|
||||
- DL3002
|
||||
|
||||
# Require explicit base-image pins (SHA256) — we already do this.
|
||||
trustedRegistries:
|
||||
- docker.io
|
||||
- ghcr.io
|
||||
108
.mailmap
108
.mailmap
@@ -1,108 +0,0 @@
|
||||
# .mailmap — canonical author mapping for git shortlog / git log / GitHub
|
||||
# Format: Canonical Name <canonical@email> <commit@email>
|
||||
# See: https://git-scm.com/docs/gitmailmap
|
||||
#
|
||||
# This maps commit emails to GitHub noreply addresses so that:
|
||||
# 1. `git shortlog -sn` shows deduplicated contributor counts
|
||||
# 2. GitHub's contributor graph can attribute commits correctly
|
||||
# 3. Contributors with personal/work emails get proper credit
|
||||
#
|
||||
# When adding entries: use the contributor's GitHub noreply email as canonical
|
||||
# so GitHub can link commits to their profile.
|
||||
|
||||
# === Teknium (multiple emails) ===
|
||||
Teknium <127238744+teknium1@users.noreply.github.com> <teknium1@gmail.com>
|
||||
Teknium <127238744+teknium1@users.noreply.github.com> <teknium@nousresearch.com>
|
||||
|
||||
# === Contributors — personal/work emails mapped to GitHub noreply ===
|
||||
# Format: Canonical Name <GH-noreply> <commit-email>
|
||||
|
||||
# Verified via GH API email search
|
||||
luyao618 <364939526@qq.com> <364939526@qq.com>
|
||||
ethernet8023 <arilotter@gmail.com> <arilotter@gmail.com>
|
||||
nicoloboschi <boschi1997@gmail.com> <boschi1997@gmail.com>
|
||||
cherifya <chef.ya@gmail.com> <chef.ya@gmail.com>
|
||||
BongSuCHOI <chlqhdtn98@gmail.com> <chlqhdtn98@gmail.com>
|
||||
dsocolobsky <dsocolobsky@gmail.com> <dsocolobsky@gmail.com>
|
||||
pefontana <fontana.pedro93@gmail.com> <fontana.pedro93@gmail.com>
|
||||
Helmi <frank@helmschrott.de> <frank@helmschrott.de>
|
||||
hata1234 <hata1234@gmail.com> <hata1234@gmail.com>
|
||||
|
||||
# Verified via PR investigation / salvage PR bodies
|
||||
DeployFaith <agents@kylefrench.dev> <agents@kylefrench.dev>
|
||||
flobo3 <floptopbot33@gmail.com> <floptopbot33@gmail.com>
|
||||
gaixianggeng <gaixg94@gmail.com> <gaixg94@gmail.com>
|
||||
KUSH42 <xush@xush.org> <xush@xush.org>
|
||||
konsisumer <der@konsi.org> <der@konsi.org>
|
||||
WorldInnovationsDepartment <vorvul.danylo@gmail.com> <vorvul.danylo@gmail.com>
|
||||
m0n5t3r <iacobs@m0n5t3r.info> <iacobs@m0n5t3r.info>
|
||||
sprmn24 <oncuevtv@gmail.com> <oncuevtv@gmail.com>
|
||||
fancydirty <fancydirty@gmail.com> <fancydirty@gmail.com>
|
||||
fxfitz <francis.x.fitzpatrick@gmail.com> <francis.x.fitzpatrick@gmail.com>
|
||||
limars874 <limars874@gmail.com> <limars874@gmail.com>
|
||||
AaronWong1999 <aaronwong1999@icloud.com> <aaronwong1999@icloud.com>
|
||||
dippwho <dipp.who@gmail.com> <dipp.who@gmail.com>
|
||||
duerzy <duerzy@gmail.com> <duerzy@gmail.com>
|
||||
geoffwellman <geoff.wellman@gmail.com> <geoff.wellman@gmail.com>
|
||||
hcshen0111 <shenhaocheng19990111@gmail.com> <shenhaocheng19990111@gmail.com>
|
||||
jamesarch <han.shan@live.cn> <han.shan@live.cn>
|
||||
stephenschoettler <stephenschoettler@gmail.com> <stephenschoettler@gmail.com>
|
||||
Tranquil-Flow <tranquil_flow@protonmail.com> <tranquil_flow@protonmail.com>
|
||||
Dusk1e <yusufalweshdemir@gmail.com> <yusufalweshdemir@gmail.com>
|
||||
Awsh1 <ysfalweshcan@gmail.com> <ysfalweshcan@gmail.com>
|
||||
WAXLYY <ysfwaxlycan@gmail.com> <ysfwaxlycan@gmail.com>
|
||||
donrhmexe <don.rhm@gmail.com> <don.rhm@gmail.com>
|
||||
hqhq1025 <1506751656@qq.com> <1506751656@qq.com>
|
||||
BlackishGreen33 <s5460703@gmail.com> <s5460703@gmail.com>
|
||||
tomqiaozc <zqiao@microsoft.com> <zqiao@microsoft.com>
|
||||
MagicRay1217 <mingjwan@microsoft.com> <mingjwan@microsoft.com>
|
||||
aaronagent <1115117931@qq.com> <1115117931@qq.com>
|
||||
YoungYang963 <young@YoungdeMacBook-Pro.local> <young@YoungdeMacBook-Pro.local>
|
||||
LongOddCode <haolong@microsoft.com> <haolong@microsoft.com>
|
||||
Cafexss <coffeemjj@gmail.com> <coffeemjj@gmail.com>
|
||||
Cygra <sjtuwbh@gmail.com> <sjtuwbh@gmail.com>
|
||||
DomGrieco <dgrieco@redhat.com> <dgrieco@redhat.com>
|
||||
|
||||
# Duplicate email mapping (same person, multiple emails)
|
||||
Sertug17 <104278804+Sertug17@users.noreply.github.com> <srhtsrht17@gmail.com>
|
||||
yyovil <birdiegyal@gmail.com> <tanishq231003@gmail.com>
|
||||
DomGrieco <dgrieco@redhat.com> <dgrieco@redhat.com>
|
||||
dsocolobsky <dsocolobsky@gmail.com> <dylan.socolobsky@lambdaclass.com>
|
||||
olafthiele <programming@olafthiele.com> <olafthiele@gmail.com>
|
||||
|
||||
# Verified via git display name matching GH contributor username
|
||||
cokemine <aptx4561@gmail.com> <aptx4561@gmail.com>
|
||||
dalianmao000 <dalianmao0107@gmail.com> <dalianmao0107@gmail.com>
|
||||
emozilla <emozilla@nousresearch.com> <emozilla@nousresearch.com>
|
||||
jjovalle99 <juan.ovalle@mistral.ai> <juan.ovalle@mistral.ai>
|
||||
kagura-agent <kagura.chen28@gmail.com> <kagura.chen28@gmail.com>
|
||||
spniyant <niyant@spicefi.xyz> <niyant@spicefi.xyz>
|
||||
olafthiele <programming@olafthiele.com> <programming@olafthiele.com>
|
||||
r266-tech <r2668940489@gmail.com> <r2668940489@gmail.com>
|
||||
xingkongliang <tianliangjay@gmail.com> <tianliangjay@gmail.com>
|
||||
win4r <win4r@outlook.com> <win4r@outlook.com>
|
||||
zhouboli <zhouboli@gmail.com> <zhouboli@gmail.com>
|
||||
yongtenglei <yongtenglei@gmail.com> <yongtenglei@gmail.com>
|
||||
|
||||
# Nous Research team
|
||||
benbarclay <ben@nousresearch.com> <ben@nousresearch.com>
|
||||
jquesnelle <jonny@nousresearch.com> <jonny@nousresearch.com>
|
||||
|
||||
# GH contributor list verified
|
||||
spideystreet <dhicham.pro@gmail.com> <dhicham.pro@gmail.com>
|
||||
dorukardahan <dorukardahan@hotmail.com> <dorukardahan@hotmail.com>
|
||||
MustafaKara7 <karamusti912@gmail.com> <karamusti912@gmail.com>
|
||||
Hmbown <hmbown@gmail.com> <hmbown@gmail.com>
|
||||
kamil-gwozdz <kamil@gwozdz.me> <kamil@gwozdz.me>
|
||||
kira-ariaki <kira@ariaki.me> <kira@ariaki.me>
|
||||
knopki <knopki@duck.com> <knopki@duck.com>
|
||||
Unayung <unayung@gmail.com> <unayung@gmail.com>
|
||||
SeeYangZhi <yangzhi.see@gmail.com> <yangzhi.see@gmail.com>
|
||||
Julientalbot <julien.talbot@ergonomia.re> <julien.talbot@ergonomia.re>
|
||||
lesterli <lisicheng168@gmail.com> <lisicheng168@gmail.com>
|
||||
JiayuuWang <jiayuw794@gmail.com> <jiayuw794@gmail.com>
|
||||
tesseracttars-creator <tesseracttars@gmail.com> <tesseracttars@gmail.com>
|
||||
xinbenlv <zzn+pa@zzn.im> <zzn+pa@zzn.im>
|
||||
SaulJWu <saul.jj.wu@gmail.com> <saul.jj.wu@gmail.com>
|
||||
angelos <angelos@oikos.lan.home.malaiwah.com> <angelos@oikos.lan.home.malaiwah.com>
|
||||
MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> <MestreY0d4-Uninter@users.noreply.github.com>
|
||||
@@ -1,291 +0,0 @@
|
||||
# OpenAI-Compatible API Server for Hermes Agent
|
||||
|
||||
## Motivation
|
||||
|
||||
Every major chat frontend (Open WebUI 126k★, LobeChat 73k★, LibreChat 34k★,
|
||||
AnythingLLM 56k★, NextChat 87k★, ChatBox 39k★, Jan 26k★, HF Chat-UI 8k★,
|
||||
big-AGI 7k★) connects to backends via the OpenAI-compatible REST API with
|
||||
SSE streaming. By exposing this endpoint, hermes-agent becomes instantly
|
||||
usable as a backend for all of them — no custom adapters needed.
|
||||
|
||||
## What It Enables
|
||||
|
||||
```
|
||||
┌──────────────────┐
|
||||
│ Open WebUI │──┐
|
||||
│ LobeChat │ │ POST /v1/chat/completions
|
||||
│ LibreChat │ ├──► Authorization: Bearer <key> ┌─────────────────┐
|
||||
│ AnythingLLM │ │ {"messages": [...]} │ hermes-agent │
|
||||
│ NextChat │ │ │ gateway │
|
||||
│ Any OAI client │──┘ ◄── SSE streaming response │ (API server) │
|
||||
└──────────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
A user would:
|
||||
1. Set `API_SERVER_ENABLED=true` in `~/.hermes/.env`
|
||||
2. Run `hermes gateway` (API server starts alongside Telegram/Discord/etc.)
|
||||
3. Point Open WebUI (or any frontend) at `http://localhost:8642/v1`
|
||||
4. Chat with hermes-agent through any OpenAI-compatible UI
|
||||
|
||||
## Endpoints
|
||||
|
||||
| Method | Path | Purpose |
|
||||
|--------|------|---------|
|
||||
| POST | `/v1/chat/completions` | Chat with the agent (streaming + non-streaming) |
|
||||
| GET | `/v1/models` | List available "models" (returns hermes-agent as a model) |
|
||||
| GET | `/health` | Health check |
|
||||
|
||||
## Architecture
|
||||
|
||||
### Option A: Gateway Platform Adapter (recommended)
|
||||
|
||||
Create `gateway/platforms/api_server.py` as a new platform adapter that
|
||||
extends `BasePlatformAdapter`. This is the cleanest approach because:
|
||||
|
||||
- Reuses all gateway infrastructure (session management, auth, context building)
|
||||
- Runs in the same async loop as other adapters
|
||||
- Gets message handling, interrupt support, and session persistence for free
|
||||
- Follows the established pattern (like Telegram, Discord, etc.)
|
||||
- Uses `aiohttp.web` (already a dependency) for the HTTP server
|
||||
|
||||
The adapter would start an `aiohttp.web.Application` server in `connect()`
|
||||
and route incoming HTTP requests through the standard `handle_message()` pipeline.
|
||||
|
||||
### Option B: Standalone Component
|
||||
|
||||
A separate HTTP server class in `gateway/api_server.py` that creates its own
|
||||
AIAgent instances directly. Simpler but duplicates session/auth logic.
|
||||
|
||||
**Recommendation: Option A** — fits the existing architecture, less code to
|
||||
maintain, gets all gateway features for free.
|
||||
|
||||
## Request/Response Format
|
||||
|
||||
### Chat Completions (non-streaming)
|
||||
|
||||
```
|
||||
POST /v1/chat/completions
|
||||
Authorization: Bearer hermes-api-key-here
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "hermes-agent",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "What files are in the current directory?"}
|
||||
],
|
||||
"stream": false,
|
||||
"temperature": 0.7
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-abc123",
|
||||
"object": "chat.completion",
|
||||
"created": 1710000000,
|
||||
"model": "hermes-agent",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Here are the files in the current directory:\n..."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {
|
||||
"prompt_tokens": 50,
|
||||
"completion_tokens": 200,
|
||||
"total_tokens": 250
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Chat Completions (streaming)
|
||||
|
||||
Same request with `"stream": true`. Response is SSE:
|
||||
|
||||
```
|
||||
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
|
||||
|
||||
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Here "},"finish_reason":null}]}
|
||||
|
||||
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"are "},"finish_reason":null}]}
|
||||
|
||||
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
|
||||
|
||||
data: [DONE]
|
||||
```
|
||||
|
||||
### Models List
|
||||
|
||||
```
|
||||
GET /v1/models
|
||||
Authorization: Bearer hermes-api-key-here
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"object": "list",
|
||||
"data": [{
|
||||
"id": "hermes-agent",
|
||||
"object": "model",
|
||||
"created": 1710000000,
|
||||
"owned_by": "hermes-agent"
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### 1. Session Management
|
||||
|
||||
The OpenAI API is stateless — each request includes the full conversation.
|
||||
But hermes-agent sessions have persistent state (memory, skills, tool context).
|
||||
|
||||
**Approach: Hybrid**
|
||||
- Default: Stateless. Each request is independent. The `messages` array IS
|
||||
the conversation. No session persistence between requests.
|
||||
- Opt-in persistent sessions via `X-Session-ID` header. When provided, the
|
||||
server maintains session state across requests (conversation history,
|
||||
memory context, tool state). This enables richer agent behavior.
|
||||
- The session ID also enables interrupt support — a subsequent request with
|
||||
the same session ID while one is running triggers an interrupt.
|
||||
|
||||
### 2. Streaming
|
||||
|
||||
The agent's `run_conversation()` is synchronous and returns the full response.
|
||||
For real SSE streaming, we need to emit chunks as they're generated.
|
||||
|
||||
**Phase 1 (MVP):** Run agent in a thread, return the complete response as
|
||||
a single SSE chunk + `[DONE]`. This works with all frontends — they just see
|
||||
a fast single-chunk response. Not true streaming but functional.
|
||||
|
||||
**Phase 2:** Add a response callback to AIAgent that emits text chunks as the
|
||||
LLM generates them. The API server captures these via a queue and streams them
|
||||
as SSE events. This gives real token-by-token streaming.
|
||||
|
||||
**Phase 3:** Stream tool execution progress too — emit tool call/result events
|
||||
as the agent works, giving frontends visibility into what the agent is doing.
|
||||
|
||||
### 3. Tool Transparency
|
||||
|
||||
Two modes:
|
||||
- **Opaque (default):** Frontends see only the final response. Tool calls
|
||||
happen server-side and are invisible. Best for general-purpose UIs.
|
||||
- **Transparent (opt-in via header):** Tool calls are emitted as OpenAI-format
|
||||
tool_call/tool_result messages in the stream. Useful for agent-aware frontends.
|
||||
|
||||
### 4. Authentication
|
||||
|
||||
- Bearer token via `Authorization: Bearer <key>` header
|
||||
- Token configured via `API_SERVER_KEY` env var
|
||||
- Optional: allow unauthenticated local-only access (127.0.0.1 bind)
|
||||
- Follows the same pattern as other platform adapters
|
||||
|
||||
### 5. Model Mapping
|
||||
|
||||
Frontends send `"model": "hermes-agent"` (or whatever). The actual LLM model
|
||||
used is configured server-side in config.yaml. The API server maps any
|
||||
requested model name to the configured hermes-agent model.
|
||||
|
||||
Optionally, allow model passthrough: if the frontend sends
|
||||
`"model": "anthropic/claude-sonnet-4"`, the agent uses that model. Controlled
|
||||
by a config flag.
|
||||
|
||||
## Configuration
|
||||
|
||||
```yaml
|
||||
# In config.yaml
|
||||
api_server:
|
||||
enabled: true
|
||||
port: 8642
|
||||
host: "127.0.0.1" # localhost only by default
|
||||
key: "your-secret-key" # or via API_SERVER_KEY env var
|
||||
allow_model_override: false # let clients choose the model
|
||||
max_concurrent: 5 # max simultaneous requests
|
||||
```
|
||||
|
||||
Environment variables:
|
||||
```bash
|
||||
API_SERVER_ENABLED=true
|
||||
API_SERVER_PORT=8642
|
||||
API_SERVER_HOST=127.0.0.1
|
||||
API_SERVER_KEY=your-secret-key
|
||||
```
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: MVP (non-streaming) — PR
|
||||
|
||||
1. `gateway/platforms/api_server.py` — new adapter
|
||||
- aiohttp.web server with endpoints:
|
||||
- `POST /v1/chat/completions` — Chat Completions API (universal compat)
|
||||
- `POST /v1/responses` — Responses API (server-side state, tool preservation)
|
||||
- `GET /v1/models` — list available models
|
||||
- `GET /health` — health check
|
||||
- Bearer token auth middleware
|
||||
- Non-streaming responses (run agent, return full result)
|
||||
- Chat Completions: stateless, messages array is the conversation
|
||||
- Responses API: server-side conversation storage via previous_response_id
|
||||
- Store full internal conversation (including tool calls) keyed by response ID
|
||||
- On subsequent requests, reconstruct full context from stored chain
|
||||
- Frontend system prompt layered on top of hermes-agent's core prompt
|
||||
|
||||
2. `gateway/config.py` — add `Platform.API_SERVER` enum + config
|
||||
|
||||
3. `gateway/run.py` — register adapter in `_create_adapter()`
|
||||
|
||||
4. Tests in `tests/gateway/test_api_server.py`
|
||||
|
||||
### Phase 2: SSE Streaming
|
||||
|
||||
1. Add response streaming to both endpoints
|
||||
- Chat Completions: `choices[0].delta.content` SSE format
|
||||
- Responses API: semantic events (response.output_text.delta, etc.)
|
||||
- Run agent in thread, collect output via callback queue
|
||||
- Handle client disconnect (cancel agent)
|
||||
|
||||
2. Add `stream_callback` parameter to `AIAgent.run_conversation()`
|
||||
|
||||
### Phase 3: Enhanced Features
|
||||
|
||||
1. Tool call transparency mode (opt-in)
|
||||
2. Model passthrough/override
|
||||
3. Concurrent request limiting
|
||||
4. Usage tracking / rate limiting
|
||||
5. CORS headers for browser-based frontends
|
||||
6. GET /v1/responses/{id} — retrieve stored response
|
||||
7. DELETE /v1/responses/{id} — delete stored response
|
||||
|
||||
## Files Changed
|
||||
|
||||
| File | Change |
|
||||
|------|--------|
|
||||
| `gateway/platforms/api_server.py` | NEW — main adapter (~300 lines) |
|
||||
| `gateway/config.py` | Add Platform.API_SERVER + config (~20 lines) |
|
||||
| `gateway/run.py` | Register adapter in _create_adapter() (~10 lines) |
|
||||
| `tests/gateway/test_api_server.py` | NEW — tests (~200 lines) |
|
||||
| `cli-config.yaml.example` | Add api_server section |
|
||||
| `README.md` | Mention API server in platform list |
|
||||
|
||||
## Compatibility Matrix
|
||||
|
||||
Once implemented, hermes-agent works as a drop-in backend for:
|
||||
|
||||
| Frontend | Stars | How to Connect |
|
||||
|----------|-------|---------------|
|
||||
| Open WebUI | 126k | Settings → Connections → Add OpenAI API, URL: `http://localhost:8642/v1` |
|
||||
| NextChat | 87k | BASE_URL env var |
|
||||
| LobeChat | 73k | Custom provider endpoint |
|
||||
| AnythingLLM | 56k | LLM Provider → Generic OpenAI |
|
||||
| Oobabooga | 42k | Already a backend, not a frontend |
|
||||
| ChatBox | 39k | API Host setting |
|
||||
| LibreChat | 34k | librechat.yaml custom endpoint |
|
||||
| Chatbot UI | 29k | Custom API endpoint |
|
||||
| Jan | 26k | Remote model config |
|
||||
| AionUI | 18k | Custom API endpoint |
|
||||
| HF Chat-UI | 8k | OPENAI_BASE_URL env var |
|
||||
| big-AGI | 7k | Custom endpoint |
|
||||
@@ -1,705 +0,0 @@
|
||||
# Streaming LLM Response Support for Hermes Agent
|
||||
|
||||
## Overview
|
||||
|
||||
Add token-by-token streaming of LLM responses across all platforms. When enabled,
|
||||
users see the response typing out live instead of waiting for the full generation.
|
||||
Streaming is opt-in via config, defaults to off, and all existing non-streaming
|
||||
code paths remain intact as the default.
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Feature-flagged**: `streaming.enabled: true` in config.yaml. Off by default.
|
||||
When off, all existing code paths are unchanged — zero risk to current behavior.
|
||||
2. **Callback-based**: A simple `stream_callback(text_delta: str)` function injected
|
||||
into AIAgent. The agent doesn't know or care what the consumer does with tokens.
|
||||
3. **Graceful degradation**: If the provider doesn't support streaming, or streaming
|
||||
fails for any reason, silently fall back to the non-streaming path.
|
||||
4. **Platform-agnostic core**: The streaming mechanism in AIAgent works the same
|
||||
regardless of whether the consumer is CLI, Telegram, Discord, or the API server.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
stream_callback(delta)
|
||||
│
|
||||
┌─────────────┐ ┌─────────────▼──────────────┐
|
||||
│ LLM API │ │ queue.Queue() │
|
||||
│ (stream) │───►│ thread-safe bridge between │
|
||||
│ │ │ agent thread & consumer │
|
||||
└─────────────┘ └─────────────┬──────────────┘
|
||||
│
|
||||
┌──────────────┼──────────────┐
|
||||
│ │ │
|
||||
┌─────▼─────┐ ┌─────▼─────┐ ┌─────▼─────┐
|
||||
│ CLI │ │ Gateway │ │ API Server│
|
||||
│ print to │ │ edit msg │ │ SSE event │
|
||||
│ terminal │ │ on Tg/Dc │ │ to client │
|
||||
└───────────┘ └───────────┘ └───────────┘
|
||||
```
|
||||
|
||||
The agent runs in a thread. The callback puts tokens into a thread-safe queue.
|
||||
Each consumer reads the queue in its own context (async task, main thread, etc.).
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
### config.yaml
|
||||
|
||||
```yaml
|
||||
streaming:
|
||||
enabled: false # Master switch. Default off.
|
||||
# Per-platform overrides (optional):
|
||||
# cli: true # Override for CLI only
|
||||
# telegram: true # Override for Telegram only
|
||||
# discord: false # Keep Discord non-streaming
|
||||
# api_server: true # Override for API server
|
||||
```
|
||||
|
||||
### Environment variables
|
||||
|
||||
```
|
||||
HERMES_STREAMING_ENABLED=true # Master switch via env
|
||||
```
|
||||
|
||||
### How the flag is read
|
||||
|
||||
- **CLI**: `load_cli_config()` reads `streaming.enabled`, sets env var. AIAgent
|
||||
checks at init time.
|
||||
- **Gateway**: `_run_agent()` reads config, decides whether to pass
|
||||
`stream_callback` to the AIAgent constructor.
|
||||
- **API server**: For Chat Completions `stream=true` requests, always uses streaming
|
||||
regardless of config (the client is explicitly requesting it). For non-stream
|
||||
requests, uses config.
|
||||
|
||||
### Precedence
|
||||
|
||||
1. API server: client's `stream` field overrides everything
|
||||
2. Per-platform config override (e.g., `streaming.telegram: true`)
|
||||
3. Master `streaming.enabled` flag
|
||||
4. Default: off
|
||||
|
||||
---
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Core streaming infrastructure in AIAgent
|
||||
|
||||
**File: run_agent.py**
|
||||
|
||||
#### 1a. Add stream_callback parameter to __init__ (~5 lines)
|
||||
|
||||
```python
|
||||
def __init__(self, ..., stream_callback: callable = None, ...):
|
||||
self.stream_callback = stream_callback
|
||||
```
|
||||
|
||||
No other init changes. The callback is optional — when None, everything
|
||||
works exactly as before.
|
||||
|
||||
#### 1b. Add _run_streaming_chat_completion() method (~65 lines)
|
||||
|
||||
New method for Chat Completions API streaming:
|
||||
|
||||
```python
|
||||
def _run_streaming_chat_completion(self, api_kwargs: dict):
|
||||
"""Stream a chat completion, emitting text tokens via stream_callback.
|
||||
|
||||
Returns a fake response object compatible with the non-streaming code path.
|
||||
Falls back to non-streaming on any error.
|
||||
"""
|
||||
stream_kwargs = dict(api_kwargs)
|
||||
stream_kwargs["stream"] = True
|
||||
stream_kwargs["stream_options"] = {"include_usage": True}
|
||||
|
||||
accumulated_content = []
|
||||
accumulated_tool_calls = {} # index -> {id, name, arguments}
|
||||
final_usage = None
|
||||
|
||||
try:
|
||||
stream = self.client.chat.completions.create(**stream_kwargs)
|
||||
|
||||
for chunk in stream:
|
||||
if not chunk.choices:
|
||||
# Usage-only chunk (final)
|
||||
if chunk.usage:
|
||||
final_usage = chunk.usage
|
||||
continue
|
||||
|
||||
delta = chunk.choices[0].delta
|
||||
|
||||
# Text content — emit via callback
|
||||
if delta.content:
|
||||
accumulated_content.append(delta.content)
|
||||
if self.stream_callback:
|
||||
try:
|
||||
self.stream_callback(delta.content)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Tool call deltas — accumulate silently
|
||||
if delta.tool_calls:
|
||||
for tc_delta in delta.tool_calls:
|
||||
idx = tc_delta.index
|
||||
if idx not in accumulated_tool_calls:
|
||||
accumulated_tool_calls[idx] = {
|
||||
"id": tc_delta.id or "",
|
||||
"name": "", "arguments": ""
|
||||
}
|
||||
if tc_delta.function:
|
||||
if tc_delta.function.name:
|
||||
accumulated_tool_calls[idx]["name"] = tc_delta.function.name
|
||||
if tc_delta.function.arguments:
|
||||
accumulated_tool_calls[idx]["arguments"] += tc_delta.function.arguments
|
||||
|
||||
# Build fake response compatible with existing code
|
||||
tool_calls = []
|
||||
for idx in sorted(accumulated_tool_calls):
|
||||
tc = accumulated_tool_calls[idx]
|
||||
if tc["name"]:
|
||||
tool_calls.append(SimpleNamespace(
|
||||
id=tc["id"], type="function",
|
||||
function=SimpleNamespace(name=tc["name"], arguments=tc["arguments"]),
|
||||
))
|
||||
|
||||
return SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(
|
||||
content="".join(accumulated_content) or "",
|
||||
tool_calls=tool_calls or None,
|
||||
role="assistant",
|
||||
),
|
||||
finish_reason="tool_calls" if tool_calls else "stop",
|
||||
)],
|
||||
usage=final_usage,
|
||||
model=self.model,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Streaming failed, falling back to non-streaming: %s", e)
|
||||
return self.client.chat.completions.create(**api_kwargs)
|
||||
```
|
||||
|
||||
#### 1c. Modify _run_codex_stream() for Responses API (~10 lines)
|
||||
|
||||
The method already iterates the stream. Add callback emission:
|
||||
|
||||
```python
|
||||
def _run_codex_stream(self, api_kwargs: dict):
|
||||
with self.client.responses.stream(**api_kwargs) as stream:
|
||||
for event in stream:
|
||||
# Emit text deltas if streaming callback is set
|
||||
if self.stream_callback and hasattr(event, 'type'):
|
||||
if event.type == 'response.output_text.delta':
|
||||
try:
|
||||
self.stream_callback(event.delta)
|
||||
except Exception:
|
||||
pass
|
||||
return stream.get_final_response()
|
||||
```
|
||||
|
||||
#### 1d. Modify _interruptible_api_call() (~5 lines)
|
||||
|
||||
Add the streaming branch:
|
||||
|
||||
```python
|
||||
def _call():
|
||||
try:
|
||||
if self.api_mode == "codex_responses":
|
||||
result["response"] = self._run_codex_stream(api_kwargs)
|
||||
elif self.stream_callback is not None:
|
||||
result["response"] = self._run_streaming_chat_completion(api_kwargs)
|
||||
else:
|
||||
result["response"] = self.client.chat.completions.create(**api_kwargs)
|
||||
except Exception as e:
|
||||
result["error"] = e
|
||||
```
|
||||
|
||||
#### 1e. Signal end-of-stream to consumers (~5 lines)
|
||||
|
||||
After the API call returns, signal the callback that streaming is done
|
||||
so consumers can finalize (remove cursor, close SSE, etc.):
|
||||
|
||||
```python
|
||||
# In run_conversation(), after _interruptible_api_call returns:
|
||||
if self.stream_callback:
|
||||
try:
|
||||
self.stream_callback(None) # None = end of stream signal
|
||||
except Exception:
|
||||
pass
|
||||
```
|
||||
|
||||
Consumers check: `if delta is None: finalize()`
|
||||
|
||||
**Tests for Phase 1:** (~150 lines)
|
||||
- Test _run_streaming_chat_completion with mocked stream
|
||||
- Test fallback to non-streaming on error
|
||||
- Test tool_call accumulation during streaming
|
||||
- Test stream_callback receives correct deltas
|
||||
- Test None signal at end of stream
|
||||
- Test streaming disabled when callback is None
|
||||
|
||||
---
|
||||
|
||||
### Phase 2: Gateway consumers (Telegram, Discord, etc.)
|
||||
|
||||
**File: gateway/run.py**
|
||||
|
||||
#### 2a. Read streaming config (~15 lines)
|
||||
|
||||
In `_run_agent()`, before creating the AIAgent:
|
||||
|
||||
```python
|
||||
# Read streaming config
|
||||
_streaming_enabled = False
|
||||
try:
|
||||
# Check per-platform override first
|
||||
platform_key = source.platform.value if source.platform else ""
|
||||
_stream_cfg = {} # loaded from config.yaml streaming section
|
||||
if _stream_cfg.get(platform_key) is not None:
|
||||
_streaming_enabled = bool(_stream_cfg[platform_key])
|
||||
else:
|
||||
_streaming_enabled = bool(_stream_cfg.get("enabled", False))
|
||||
except Exception:
|
||||
pass
|
||||
# Env var override
|
||||
if os.getenv("HERMES_STREAMING_ENABLED", "").lower() in ("true", "1", "yes"):
|
||||
_streaming_enabled = True
|
||||
```
|
||||
|
||||
#### 2b. Set up queue + callback (~15 lines)
|
||||
|
||||
```python
|
||||
_stream_q = None
|
||||
_stream_done = None
|
||||
_stream_msg_id = [None] # mutable ref for the async task
|
||||
|
||||
if _streaming_enabled:
|
||||
import queue as _q
|
||||
_stream_q = _q.Queue()
|
||||
_stream_done = threading.Event()
|
||||
|
||||
def _on_token(delta):
|
||||
if delta is None:
|
||||
_stream_done.set()
|
||||
else:
|
||||
_stream_q.put(delta)
|
||||
```
|
||||
|
||||
Pass `stream_callback=_on_token` to the AIAgent constructor.
|
||||
|
||||
#### 2c. Telegram/Discord stream preview task (~50 lines)
|
||||
|
||||
```python
|
||||
async def stream_preview():
|
||||
"""Progressively edit a message with streaming tokens."""
|
||||
if not _stream_q:
|
||||
return
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if not adapter:
|
||||
return
|
||||
|
||||
accumulated = []
|
||||
token_count = 0
|
||||
last_edit = 0.0
|
||||
MIN_TOKENS = 20 # Don't show until enough context
|
||||
EDIT_INTERVAL = 1.5 # Respect Telegram rate limits
|
||||
|
||||
try:
|
||||
while not _stream_done.is_set():
|
||||
try:
|
||||
chunk = _stream_q.get(timeout=0.1)
|
||||
accumulated.append(chunk)
|
||||
token_count += 1
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
now = time.monotonic()
|
||||
if token_count >= MIN_TOKENS and (now - last_edit) >= EDIT_INTERVAL:
|
||||
preview = "".join(accumulated) + " ▌"
|
||||
if _stream_msg_id[0] is None:
|
||||
r = await adapter.send(
|
||||
chat_id=source.chat_id,
|
||||
content=preview,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
if r.success and r.message_id:
|
||||
_stream_msg_id[0] = r.message_id
|
||||
else:
|
||||
await adapter.edit_message(
|
||||
chat_id=source.chat_id,
|
||||
message_id=_stream_msg_id[0],
|
||||
content=preview,
|
||||
)
|
||||
last_edit = now
|
||||
|
||||
# Drain remaining tokens
|
||||
while not _stream_q.empty():
|
||||
accumulated.append(_stream_q.get_nowait())
|
||||
|
||||
# Final edit — remove cursor, show complete text
|
||||
if _stream_msg_id[0] and accumulated:
|
||||
await adapter.edit_message(
|
||||
chat_id=source.chat_id,
|
||||
message_id=_stream_msg_id[0],
|
||||
content="".join(accumulated),
|
||||
)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
# Clean up on cancel
|
||||
if _stream_msg_id[0] and accumulated:
|
||||
try:
|
||||
await adapter.edit_message(
|
||||
chat_id=source.chat_id,
|
||||
message_id=_stream_msg_id[0],
|
||||
content="".join(accumulated),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug("stream_preview error: %s", e)
|
||||
```
|
||||
|
||||
#### 2d. Skip final send if already streamed (~10 lines)
|
||||
|
||||
In `_process_message_background()` (base.py), after getting the response,
|
||||
if streaming was active and `_stream_msg_id[0]` is set, the final response
|
||||
was already delivered via progressive edits. Skip the normal `self.send()`
|
||||
call to avoid duplicating the message.
|
||||
|
||||
This is the most delicate integration point — we need to communicate from
|
||||
the gateway's `_run_agent` back to the base adapter's response sender that
|
||||
the response was already delivered. Options:
|
||||
|
||||
- **Option A**: Return a special marker in the result dict:
|
||||
`result["_streamed_msg_id"] = _stream_msg_id[0]`
|
||||
The base adapter checks this and skips `send()`.
|
||||
|
||||
- **Option B**: Edit the already-sent message with the final response
|
||||
(which may differ slightly from accumulated tokens due to think-block
|
||||
stripping, etc.) and don't send a new one.
|
||||
|
||||
- **Option C**: The stream preview task handles the FULL final response
|
||||
(including any post-processing), and the handler returns None to skip
|
||||
the normal send path.
|
||||
|
||||
Recommended: **Option A** — cleanest separation. The result dict already
|
||||
carries metadata; adding one more field is low-risk.
|
||||
|
||||
**Platform-specific considerations:**
|
||||
|
||||
| Platform | Edit support | Rate limits | Streaming approach |
|
||||
|----------|-------------|-------------|-------------------|
|
||||
| Telegram | ✅ edit_message_text | ~20 edits/min | Edit every 1.5s |
|
||||
| Discord | ✅ message.edit | 5 edits/5s per message | Edit every 1.2s |
|
||||
| Slack | ✅ chat.update | Tier 3 (~50/min) | Edit every 1.5s |
|
||||
| WhatsApp | ❌ no edit support | N/A | Skip streaming, use normal path |
|
||||
| HomeAssistant | ❌ no edit | N/A | Skip streaming |
|
||||
| API Server | ✅ SSE native | No limit | Real SSE events |
|
||||
|
||||
WhatsApp and HomeAssistant fall back to non-streaming automatically because
|
||||
they don't support message editing.
|
||||
|
||||
**Tests for Phase 2:** (~100 lines)
|
||||
- Test stream_preview sends/edits correctly
|
||||
- Test skip-final-send when streaming delivered
|
||||
- Test WhatsApp/HA graceful fallback
|
||||
- Test streaming disabled per-platform config
|
||||
- Test thread_id metadata forwarded in stream messages
|
||||
|
||||
---
|
||||
|
||||
### Phase 3: CLI streaming
|
||||
|
||||
**File: cli.py**
|
||||
|
||||
#### 3a. Set up callback in the CLI chat loop (~20 lines)
|
||||
|
||||
In `_chat_once()` or wherever the agent is invoked:
|
||||
|
||||
```python
|
||||
if streaming_enabled:
|
||||
_stream_q = queue.Queue()
|
||||
_stream_done = threading.Event()
|
||||
|
||||
def _cli_stream_callback(delta):
|
||||
if delta is None:
|
||||
_stream_done.set()
|
||||
else:
|
||||
_stream_q.put(delta)
|
||||
|
||||
agent.stream_callback = _cli_stream_callback
|
||||
```
|
||||
|
||||
#### 3b. Token display thread/task (~30 lines)
|
||||
|
||||
Start a thread that reads the queue and prints tokens:
|
||||
|
||||
```python
|
||||
def _stream_display():
|
||||
"""Print tokens to terminal as they arrive."""
|
||||
first_token = True
|
||||
while not _stream_done.is_set():
|
||||
try:
|
||||
delta = _stream_q.get(timeout=0.1)
|
||||
except queue.Empty:
|
||||
continue
|
||||
if first_token:
|
||||
# Print response box top border
|
||||
_cprint(f"\n{top}")
|
||||
first_token = False
|
||||
sys.stdout.write(delta)
|
||||
sys.stdout.flush()
|
||||
# Drain remaining
|
||||
while not _stream_q.empty():
|
||||
sys.stdout.write(_stream_q.get_nowait())
|
||||
sys.stdout.flush()
|
||||
# Print bottom border
|
||||
_cprint(f"\n\n{bot}")
|
||||
```
|
||||
|
||||
**Integration challenge: prompt_toolkit**
|
||||
|
||||
The CLI uses prompt_toolkit which controls the terminal. Writing directly
|
||||
to stdout while prompt_toolkit is active can cause display corruption.
|
||||
The existing KawaiiSpinner already solves this by using prompt_toolkit's
|
||||
`patch_stdout` context. The streaming display would need to do the same.
|
||||
|
||||
Alternative: use `_cprint()` for each token chunk (routes through
|
||||
prompt_toolkit's renderer). But this might be slow for individual tokens.
|
||||
|
||||
Recommended approach: accumulate tokens in small batches (e.g., every 50ms)
|
||||
and `_cprint()` the batch. This balances display responsiveness with
|
||||
prompt_toolkit compatibility.
|
||||
|
||||
**Tests for Phase 3:** (~50 lines)
|
||||
- Test CLI streaming callback setup
|
||||
- Test response box borders with streaming
|
||||
- Test fallback when streaming disabled
|
||||
|
||||
---
|
||||
|
||||
### Phase 4: API Server real streaming
|
||||
|
||||
**File: gateway/platforms/api_server.py**
|
||||
|
||||
Replace the pseudo-streaming `_write_sse_chat_completion()` with real
|
||||
token-by-token SSE when the agent supports it.
|
||||
|
||||
#### 4a. Wire streaming callback for stream=true requests (~20 lines)
|
||||
|
||||
```python
|
||||
if stream:
|
||||
_stream_q = queue.Queue()
|
||||
|
||||
def _api_stream_callback(delta):
|
||||
_stream_q.put(delta) # None = done
|
||||
|
||||
# Pass callback to _run_agent
|
||||
result, usage = await self._run_agent(
|
||||
..., stream_callback=_api_stream_callback,
|
||||
)
|
||||
```
|
||||
|
||||
#### 4b. Real SSE writer (~40 lines)
|
||||
|
||||
```python
|
||||
async def _write_real_sse(self, request, completion_id, model, stream_q):
|
||||
response = web.StreamResponse(
|
||||
headers={"Content-Type": "text/event-stream", "Cache-Control": "no-cache"},
|
||||
)
|
||||
await response.prepare(request)
|
||||
|
||||
# Role chunk
|
||||
await response.write(...)
|
||||
|
||||
# Stream content chunks as they arrive
|
||||
while True:
|
||||
try:
|
||||
delta = await asyncio.get_event_loop().run_in_executor(
|
||||
None, lambda: stream_q.get(timeout=0.1)
|
||||
)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
if delta is None: # End of stream
|
||||
break
|
||||
|
||||
chunk = {"id": completion_id, "object": "chat.completion.chunk", ...
|
||||
"choices": [{"delta": {"content": delta}, ...}]}
|
||||
await response.write(f"data: {json.dumps(chunk)}\n\n".encode())
|
||||
|
||||
# Finish + [DONE]
|
||||
await response.write(...)
|
||||
await response.write(b"data: [DONE]\n\n")
|
||||
return response
|
||||
```
|
||||
|
||||
**Challenge: concurrent execution**
|
||||
|
||||
The agent runs in a thread executor. SSE writing happens in the async event
|
||||
loop. The queue bridges them. But `_run_agent()` currently awaits the full
|
||||
result before returning. For real streaming, we need to start the agent in
|
||||
the background and stream tokens while it runs:
|
||||
|
||||
```python
|
||||
# Start agent in background
|
||||
agent_task = asyncio.create_task(self._run_agent_async(...))
|
||||
|
||||
# Stream tokens while agent runs
|
||||
await self._write_real_sse(request, ..., stream_q)
|
||||
|
||||
# Agent is done by now (stream_q received None)
|
||||
result, usage = await agent_task
|
||||
```
|
||||
|
||||
This requires splitting `_run_agent` into an async version that doesn't
|
||||
block waiting for the result, or running it in a separate task.
|
||||
|
||||
**Responses API SSE format:**
|
||||
|
||||
For `/v1/responses` with `stream=true`, the SSE events are different:
|
||||
|
||||
```
|
||||
event: response.output_text.delta
|
||||
data: {"type":"response.output_text.delta","delta":"Hello"}
|
||||
|
||||
event: response.completed
|
||||
data: {"type":"response.completed","response":{...}}
|
||||
```
|
||||
|
||||
This needs a separate SSE writer that emits Responses API format events.
|
||||
|
||||
**Tests for Phase 4:** (~80 lines)
|
||||
- Test real SSE streaming with mocked agent
|
||||
- Test SSE event format (Chat Completions vs Responses)
|
||||
- Test client disconnect during streaming
|
||||
- Test fallback to pseudo-streaming when callback not available
|
||||
|
||||
---
|
||||
|
||||
## Integration Issues & Edge Cases
|
||||
|
||||
### 1. Tool calls during streaming
|
||||
|
||||
When the model returns tool calls instead of text, no text tokens are emitted.
|
||||
The stream_callback is simply never called with text. After tools execute, the
|
||||
next API call may produce the final text response — streaming picks up again.
|
||||
|
||||
The stream preview task needs to handle this: if no tokens arrive during a
|
||||
tool-call round, don't send/edit any message. The tool progress messages
|
||||
continue working as before.
|
||||
|
||||
### 2. Duplicate messages
|
||||
|
||||
The biggest risk: the agent sends the final response normally (via the
|
||||
existing send path) AND the stream preview already showed it. The user
|
||||
sees the response twice.
|
||||
|
||||
Prevention: when streaming is active and tokens were delivered, the final
|
||||
response send must be suppressed. The `result["_streamed_msg_id"]` marker
|
||||
tells the base adapter to skip its normal send.
|
||||
|
||||
### 3. Response post-processing
|
||||
|
||||
The final response may differ from the accumulated streamed tokens:
|
||||
- Think block stripping (`<think>...</think>` removed)
|
||||
- Trailing whitespace cleanup
|
||||
- Tool result media tag appending
|
||||
|
||||
The stream preview shows raw tokens. The final edit should use the
|
||||
post-processed version. This means the final edit (removing the cursor)
|
||||
should use the post-processed `final_response`, not just the accumulated
|
||||
stream text.
|
||||
|
||||
### 4. Context compression during streaming
|
||||
|
||||
If the agent triggers context compression mid-conversation, the streaming
|
||||
tokens from BEFORE compression are from a different context than those
|
||||
after. This isn't a problem in practice — compression happens between
|
||||
API calls, not during streaming.
|
||||
|
||||
### 5. Interrupt during streaming
|
||||
|
||||
User sends a new message while streaming → interrupt. The stream is killed
|
||||
(HTTP connection closed), accumulated tokens are shown as-is (no cursor),
|
||||
and the interrupt message is processed normally. This is already handled by
|
||||
`_interruptible_api_call` closing the client.
|
||||
|
||||
### 6. Multi-model / fallback
|
||||
|
||||
If the primary model fails and the agent falls back to a different model,
|
||||
streaming state resets. The fallback call may or may not support streaming.
|
||||
The graceful fallback in `_run_streaming_chat_completion` handles this.
|
||||
|
||||
### 7. Rate limiting on edits
|
||||
|
||||
Telegram: ~20 edits/minute (~1 every 3 seconds to be safe)
|
||||
Discord: 5 edits per 5 seconds per message
|
||||
Slack: ~50 API calls/minute
|
||||
|
||||
The 1.5s edit interval is conservative enough for all platforms. If we get
|
||||
429 rate limit errors on edits, just skip that edit cycle and try next time.
|
||||
|
||||
---
|
||||
|
||||
## Files Changed Summary
|
||||
|
||||
| File | Phase | Changes |
|
||||
|------|-------|---------|
|
||||
| `run_agent.py` | 1 | +stream_callback param, +_run_streaming_chat_completion(), modify _run_codex_stream(), modify _interruptible_api_call() |
|
||||
| `gateway/run.py` | 2 | +streaming config reader, +queue/callback setup, +stream_preview task, +skip-final-send logic |
|
||||
| `gateway/platforms/base.py` | 2 | +check for _streamed_msg_id in response handler |
|
||||
| `cli.py` | 3 | +streaming setup, +token display, +response box integration |
|
||||
| `gateway/platforms/api_server.py` | 4 | +real SSE writer, +streaming callback wiring |
|
||||
| `hermes_cli/config.py` | 1 | +streaming config defaults |
|
||||
| `cli-config.yaml.example` | 1 | +streaming section |
|
||||
| `tests/test_streaming.py` | 1-4 | NEW — ~380 lines of tests |
|
||||
|
||||
**Total new code**: ~500 lines across all phases
|
||||
**Total test code**: ~380 lines
|
||||
|
||||
---
|
||||
|
||||
## Rollout Plan
|
||||
|
||||
1. **Phase 1** (core): Merge to main. Streaming disabled by default.
|
||||
Zero impact on existing behavior. Can be tested with env var.
|
||||
|
||||
2. **Phase 2** (gateway): Merge to main. Test on Telegram manually.
|
||||
Enable per-platform: `streaming.telegram: true` in config.
|
||||
|
||||
3. **Phase 3** (CLI): Merge to main. Test in terminal.
|
||||
Enable: `streaming.cli: true` or `streaming.enabled: true`.
|
||||
|
||||
4. **Phase 4** (API server): Merge to main. Test with Open WebUI.
|
||||
Auto-enabled when client sends `stream: true`.
|
||||
|
||||
Each phase is independently mergeable and testable. Streaming stays
|
||||
off by default throughout. Once all phases are stable, consider
|
||||
changing the default to enabled.
|
||||
|
||||
---
|
||||
|
||||
## Config Reference (final state)
|
||||
|
||||
```yaml
|
||||
# config.yaml
|
||||
streaming:
|
||||
enabled: false # Master switch (default: off)
|
||||
cli: true # Per-platform override
|
||||
telegram: true
|
||||
discord: true
|
||||
slack: true
|
||||
api_server: true # API server always streams when client requests it
|
||||
edit_interval: 1.5 # Seconds between message edits (default: 1.5)
|
||||
min_tokens: 20 # Tokens before first display (default: 20)
|
||||
```
|
||||
|
||||
```bash
|
||||
# Environment variable override
|
||||
HERMES_STREAMING_ENABLED=true
|
||||
```
|
||||
922
CONTRIBUTING.md
922
CONTRIBUTING.md
@@ -1,922 +0,0 @@
|
||||
# Contributing to Hermes Agent
|
||||
|
||||
Thank you for contributing to Hermes Agent! This guide covers everything you need: setting up your dev environment, understanding the architecture, deciding what to build, and getting your PR merged.
|
||||
|
||||
---
|
||||
|
||||
## Contribution Priorities
|
||||
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
6. **New tools** — rarely needed. Most capabilities should be skills. See below.
|
||||
7. **Documentation** — fixes, clarifications, new examples.
|
||||
|
||||
---
|
||||
|
||||
## Should it be a Skill or a Tool?
|
||||
|
||||
This is the most common question for new contributors. The answer is almost always **skill**.
|
||||
|
||||
### Make it a Skill when:
|
||||
|
||||
- The capability can be expressed as instructions + shell commands + existing tools
|
||||
- It wraps an external CLI or API that the agent can call via `terminal` or `web_extract`
|
||||
- It doesn't need custom Python integration or API key management baked into the agent
|
||||
- Examples: arXiv search, git workflows, Docker management, PDF processing, email via CLI tools
|
||||
|
||||
### Make it a Tool when:
|
||||
|
||||
- It requires end-to-end integration with API keys, auth flows, or multi-component configuration managed by the agent harness
|
||||
- It needs custom processing logic that must execute precisely every time (not "best effort" from LLM interpretation)
|
||||
- It handles binary data, streaming, or real-time events that can't go through the terminal
|
||||
- Examples: browser automation (Browserbase session management), TTS (audio encoding + platform delivery), vision analysis (base64 image handling)
|
||||
|
||||
### Should the Skill be bundled?
|
||||
|
||||
Bundled skills (in `skills/`) ship with every Hermes install. They should be **broadly useful to most users**:
|
||||
|
||||
- Document handling, web research, common dev workflows, system administration
|
||||
- Used regularly by a wide range of people
|
||||
|
||||
If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo but isn't activated by default. Users can discover it via `hermes skills browse` (labeled "official") and install it with `hermes skills install` (no third-party warning, built-in trust).
|
||||
|
||||
If your skill is specialized, community-contributed, or niche, it's better suited for a **Skills Hub** — upload it to a skills registry and share it in the [Nous Research Discord](https://discord.gg/NousResearch). Users can install it with `hermes skills install`.
|
||||
|
||||
---
|
||||
|
||||
## Memory Providers: Ship as a Standalone Plugin
|
||||
|
||||
**We are no longer accepting new memory providers into this repo.** The set of built-in providers under `plugins/memory/` (honcho, mem0, supermemory, byterover, hindsight, holographic, openviking, retaindb) is closed. If you want to add a new memory backend, publish it as a **standalone plugin repo** that users install into `~/.hermes/plugins/` (or via a pip entry point).
|
||||
|
||||
Standalone memory plugins:
|
||||
|
||||
- Implement the same `MemoryProvider` ABC (`agent/memory_provider.py`) — `sync_turn`, `prefetch`, `shutdown`, and optionally `post_setup(hermes_home, config)` for setup-wizard integration
|
||||
- Use the same discovery system — `discover_memory_providers()` picks them up from user/project plugin directories and pip entry points
|
||||
- Integrate with `hermes memory setup` via `post_setup()` — no need to touch core code
|
||||
- Can register their own CLI subcommands via `register_cli(subparser)` in a `cli.py` file
|
||||
- Get all the same lifecycle hooks and config plumbing as in-tree providers
|
||||
|
||||
PRs that add a new directory under `plugins/memory/` will be closed with a pointer to publish the provider as its own repo. Existing in-tree providers stay; bug fixes to them are welcome.
|
||||
|
||||
This isn't a quality bar — it's a coupling-and-maintenance decision. Memory providers are the most common plugin type and they shouldn't all live in this tree.
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With the `git-lfs` extension installed |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
|
||||
|
||||
### Clone and install
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
|
||||
# Create venv with Python 3.11
|
||||
uv venv venv --python 3.11
|
||||
export VIRTUAL_ENV="$(pwd)/venv"
|
||||
|
||||
# Install with all extras (messaging, cron, CLI menus, dev tools)
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
# Optional: browser tools
|
||||
npm install
|
||||
```
|
||||
|
||||
### Configure for development
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills}
|
||||
cp cli-config.yaml.example ~/.hermes/config.yaml
|
||||
touch ~/.hermes/.env
|
||||
|
||||
# Add at minimum an LLM provider key:
|
||||
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```bash
|
||||
# Symlink for global access
|
||||
mkdir -p ~/.local/bin
|
||||
ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
|
||||
|
||||
# Verify
|
||||
hermes doctor
|
||||
hermes chat -q "Hello"
|
||||
```
|
||||
|
||||
### Run tests
|
||||
|
||||
```bash
|
||||
# Preferred — matches CI (hermetic env, 4 xdist workers); see AGENTS.md
|
||||
scripts/run_tests.sh
|
||||
|
||||
# Alternative (activate the venv first). The wrapper is still recommended
|
||||
# for parity with GitHub Actions before you open a PR:
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop, tool dispatch, session persistence
|
||||
├── cli.py # HermesCLI class — interactive TUI, prompt_toolkit integration
|
||||
├── model_tools.py # Tool orchestration (thin layer over tools/registry.py)
|
||||
├── toolsets.py # Tool groupings and presets (hermes-cli, hermes-telegram, etc.)
|
||||
├── hermes_state.py # SQLite session database with FTS5 full-text search, session titles
|
||||
├── batch_runner.py # Parallel batch processing for trajectory generation
|
||||
│
|
||||
├── agent/ # Agent internals (extracted modules)
|
||||
│ ├── prompt_builder.py # System prompt assembly (identity, skills, context files, memory)
|
||||
│ ├── context_compressor.py # Auto-summarization when approaching context limits
|
||||
│ ├── auxiliary_client.py # Resolves auxiliary OpenAI clients (summarization, vision)
|
||||
│ ├── display.py # KawaiiSpinner, tool progress formatting
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
│
|
||||
├── hermes_cli/ # CLI command implementations
|
||||
│ ├── main.py # Entry point, argument parsing, command dispatch
|
||||
│ ├── config.py # Config management, migration, env var definitions
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── auth.py # Provider resolution, OAuth, Nous Portal
|
||||
│ ├── models.py # OpenRouter model selection lists
|
||||
│ ├── banner.py # Welcome banner, ASCII art
|
||||
│ ├── commands.py # Central slash command registry (CommandDef), autocomplete, gateway helpers
|
||||
│ ├── callbacks.py # Interactive callbacks (clarify, sudo, approval)
|
||||
│ ├── doctor.py # Diagnostics
|
||||
│ ├── skills_hub.py # Skills Hub CLI + /skills slash command
|
||||
│ └── skin_engine.py # Skin/theme engine — data-driven CLI visual customization
|
||||
│
|
||||
├── tools/ # Tool implementations (self-registering)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection + per-session approval
|
||||
│ ├── terminal_tool.py # Terminal orchestration (sudo, env lifecycle, backends)
|
||||
│ ├── file_operations.py # read_file, write_file, search, patch, etc.
|
||||
│ ├── web_tools.py # web_search, web_extract (Parallel/Firecrawl + Gemini summarization)
|
||||
│ ├── vision_tools.py # Image analysis via multimodal models
|
||||
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
||||
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
||||
│ ├── session_search_tool.py # Search past conversations with FTS5 + anchored windows
|
||||
│ ├── cronjob_tools.py # Scheduled task management
|
||||
│ ├── skill_tools.py # Skill search, load, manage
|
||||
│ └── environments/ # Terminal execution backends
|
||||
│ ├── base.py # BaseEnvironment ABC
|
||||
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py
|
||||
│
|
||||
├── gateway/ # Messaging gateway
|
||||
│ ├── run.py # GatewayRunner — platform lifecycle, message routing, cron
|
||||
│ ├── config.py # Platform configuration resolution
|
||||
│ ├── session.py # Session store, context prompts, reset policies
|
||||
│ └── platforms/ # Platform adapters
|
||||
│ ├── telegram.py, discord_adapter.py, slack.py, whatsapp.py
|
||||
│
|
||||
├── scripts/ # Installer and bridge scripts
|
||||
│ ├── install.sh # Linux/macOS installer
|
||||
│ ├── install.ps1 # Windows PowerShell installer
|
||||
│ └── whatsapp-bridge/ # Node.js WhatsApp bridge (Baileys)
|
||||
│
|
||||
├── skills/ # Bundled skills (copied to ~/.hermes/skills/ on install)
|
||||
├── optional-skills/ # Official optional skills (discoverable via hub, not activated by default)
|
||||
├── tests/ # Test suite
|
||||
├── website/ # Documentation site (hermes-agent.nousresearch.com)
|
||||
│
|
||||
├── cli-config.yaml.example # Example configuration (copied to ~/.hermes/config.yaml)
|
||||
└── AGENTS.md # Development guide for AI coding assistants
|
||||
```
|
||||
|
||||
### User configuration (stored in `~/.hermes/`)
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
| `~/.hermes/config.yaml` | Settings (model, terminal, toolsets, compression, etc.) |
|
||||
| `~/.hermes/.env` | API keys and secrets |
|
||||
| `~/.hermes/auth.json` | OAuth credentials (Nous Portal) |
|
||||
| `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) |
|
||||
| `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) |
|
||||
| `~/.hermes/state.db` | SQLite session database |
|
||||
| `~/.hermes/sessions/` | Gateway routing index (`sessions.json`), request-dump breadcrumbs, gateway `*.jsonl` transcripts, and (optionally) per-session JSON snapshots when `sessions.write_json_snapshots: true` is set. The per-session snapshots are off by default; state.db is canonical. |
|
||||
| `~/.hermes/cron/` | Scheduled job data |
|
||||
| `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials |
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Core Loop
|
||||
|
||||
```
|
||||
User message → AIAgent._run_agent_loop()
|
||||
├── Build system prompt (prompt_builder.py)
|
||||
├── Build API kwargs (model, messages, tools, reasoning config)
|
||||
├── Call LLM (OpenAI-compatible API)
|
||||
├── If tool_calls in response:
|
||||
│ ├── Execute each tool via registry dispatch
|
||||
│ ├── Add tool results to conversation
|
||||
│ └── Loop back to LLM call
|
||||
├── If text response:
|
||||
│ ├── Persist session to DB
|
||||
│ └── Return final_response
|
||||
└── Context compression if approaching token limit
|
||||
```
|
||||
|
||||
### Key Design Patterns
|
||||
|
||||
- **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
|
||||
- **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
|
||||
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. Per-session JSON snapshots in `~/.hermes/sessions/` were superseded by the SQLite store and are off by default; opt back in with `sessions.write_json_snapshots: true` if you have external tooling that consumes the JSON files directly.
|
||||
- **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
|
||||
- **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
|
||||
- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
- **PEP 8** with practical exceptions (we don't enforce strict line length)
|
||||
- **Comments**: Only when explaining non-obvious intent, trade-offs, or API quirks. Don't narrate what the code does — `# increment counter` adds nothing
|
||||
- **Error handling**: Catch specific exceptions. Log with `logger.warning()`/`logger.error()` — use `exc_info=True` for unexpected errors so stack traces appear in logs
|
||||
- **Cross-platform**: Never assume Unix. See [Cross-Platform Compatibility](#cross-platform-compatibility)
|
||||
|
||||
---
|
||||
|
||||
## Adding a New Tool
|
||||
|
||||
Before writing a tool, ask: [should this be a skill instead?](#should-it-be-a-skill-or-a-tool)
|
||||
|
||||
Tools self-register with the central registry. Each tool file co-locates its schema, handler, and registration:
|
||||
|
||||
```python
|
||||
"""my_tool — Brief description of what this tool does."""
|
||||
|
||||
import json
|
||||
from tools.registry import registry
|
||||
|
||||
|
||||
def my_tool(param1: str, param2: int = 10, **kwargs) -> str:
|
||||
"""Handler. Returns a string result (often JSON)."""
|
||||
result = do_work(param1, param2)
|
||||
return json.dumps(result)
|
||||
|
||||
|
||||
MY_TOOL_SCHEMA = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "my_tool",
|
||||
"description": "What this tool does and when the agent should use it.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"param1": {"type": "string", "description": "What param1 is"},
|
||||
"param2": {"type": "integer", "description": "What param2 is", "default": 10},
|
||||
},
|
||||
"required": ["param1"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _check_requirements() -> bool:
|
||||
"""Return True if this tool's dependencies are available."""
|
||||
return True
|
||||
|
||||
|
||||
registry.register(
|
||||
name="my_tool",
|
||||
toolset="my_toolset",
|
||||
schema=MY_TOOL_SCHEMA,
|
||||
handler=lambda args, **kw: my_tool(**args, **kw),
|
||||
check_fn=_check_requirements,
|
||||
)
|
||||
```
|
||||
|
||||
**Wire into a toolset (required):** Built-in tools are auto-discovered: any
|
||||
`tools/*.py` file that contains a top-level `registry.register(...)` call is
|
||||
imported by `discover_builtin_tools()` in `tools/registry.py` when `model_tools`
|
||||
loads. There is **no** manual import list in `model_tools.py` to maintain.
|
||||
|
||||
You must still add the tool name to the appropriate list in `toolsets.py`
|
||||
(for example `_HERMES_CORE_TOOLS` or a dedicated toolset); otherwise the tool
|
||||
registers but is never exposed to the agent. If you introduce a new toolset,
|
||||
add it in `toolsets.py` and wire it into the relevant platform presets.
|
||||
|
||||
See `AGENTS.md` (section **Adding New Tools**) for profile-aware paths and
|
||||
plugin vs core guidance.
|
||||
|
||||
---
|
||||
|
||||
## Adding a Skill
|
||||
|
||||
Bundled skills live in `skills/` organized by category. Official optional skills use the same structure in `optional-skills/`:
|
||||
|
||||
```
|
||||
skills/
|
||||
├── research/
|
||||
│ └── arxiv/
|
||||
│ ├── SKILL.md # Required: main instructions
|
||||
│ └── scripts/ # Optional: helper scripts
|
||||
│ └── search_arxiv.py
|
||||
├── productivity/
|
||||
│ └── ocr-and-documents/
|
||||
│ ├── SKILL.md
|
||||
│ ├── scripts/
|
||||
│ └── references/
|
||||
└── ...
|
||||
```
|
||||
|
||||
### SKILL.md format
|
||||
|
||||
```markdown
|
||||
---
|
||||
name: my-skill
|
||||
description: Brief description (shown in skill search results)
|
||||
version: 1.0.0
|
||||
author: Your Name
|
||||
license: MIT
|
||||
platforms: [macos, linux] # Optional — restrict to specific OS platforms
|
||||
# Valid: macos, linux, windows
|
||||
# Omit to load on all platforms (default)
|
||||
required_environment_variables: # Optional — secure setup-on-load metadata
|
||||
- name: MY_API_KEY
|
||||
prompt: API key
|
||||
help: Where to get it
|
||||
required_for: full functionality
|
||||
prerequisites: # Optional legacy runtime requirements
|
||||
env_vars: [MY_API_KEY] # Backward-compatible alias for required env vars
|
||||
commands: [curl, jq] # Advisory only; does not hide the skill
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [Category, Subcategory, Keywords]
|
||||
related_skills: [other-skill-name]
|
||||
fallback_for_toolsets: [web] # Optional — show only when toolset is unavailable
|
||||
requires_toolsets: [terminal] # Optional — show only when toolset is available
|
||||
---
|
||||
|
||||
# Skill Title
|
||||
|
||||
Brief intro.
|
||||
|
||||
## When to Use
|
||||
Trigger conditions — when should the agent load this skill?
|
||||
|
||||
## Quick Reference
|
||||
Table of common commands or API calls.
|
||||
|
||||
## Procedure
|
||||
Step-by-step instructions the agent follows.
|
||||
|
||||
## Pitfalls
|
||||
Known failure modes and how to handle them.
|
||||
|
||||
## Verification
|
||||
How the agent confirms it worked.
|
||||
```
|
||||
|
||||
### Platform-specific skills
|
||||
|
||||
Skills can declare which OS platforms they support via the `platforms` frontmatter field. Skills with this field are automatically hidden from the system prompt, `skills_list()`, and slash commands on incompatible platforms.
|
||||
|
||||
```yaml
|
||||
platforms: [macos] # macOS only (e.g., iMessage, Apple Reminders)
|
||||
platforms: [macos, linux] # macOS and Linux
|
||||
platforms: [windows] # Windows only
|
||||
```
|
||||
|
||||
If the field is omitted or empty, the skill loads on all platforms (backward compatible). See `skills/apple/` for examples of macOS-only skills.
|
||||
|
||||
### Conditional skill activation
|
||||
|
||||
Skills can declare conditions that control when they appear in the system prompt, based on which tools and toolsets are available in the current session. This is primarily used for **fallback skills** — alternatives that should only be shown when a primary tool is unavailable.
|
||||
|
||||
Four fields are supported under `metadata.hermes`:
|
||||
|
||||
```yaml
|
||||
metadata:
|
||||
hermes:
|
||||
fallback_for_toolsets: [web] # Show ONLY when these toolsets are unavailable
|
||||
requires_toolsets: [terminal] # Show ONLY when these toolsets are available
|
||||
fallback_for_tools: [web_search] # Show ONLY when these specific tools are unavailable
|
||||
requires_tools: [terminal] # Show ONLY when these specific tools are available
|
||||
```
|
||||
|
||||
**Semantics:**
|
||||
- `fallback_for_*`: The skill is a backup. It is **hidden** when the listed tools/toolsets are available, and **shown** when they are unavailable. Use this for free alternatives to premium tools.
|
||||
- `requires_*`: The skill needs certain tools to function. It is **hidden** when the listed tools/toolsets are unavailable. Use this for skills that depend on specific capabilities (e.g., a skill that only makes sense with terminal access).
|
||||
- If both are specified, both conditions must be satisfied for the skill to appear.
|
||||
- If neither is specified, the skill is always shown (backward compatible).
|
||||
|
||||
**Examples:**
|
||||
|
||||
```yaml
|
||||
# DuckDuckGo search — shown when Firecrawl (web toolset) is unavailable
|
||||
metadata:
|
||||
hermes:
|
||||
fallback_for_toolsets: [web]
|
||||
|
||||
# Smart home skill — only useful when terminal is available
|
||||
metadata:
|
||||
hermes:
|
||||
requires_toolsets: [terminal]
|
||||
|
||||
# Local browser fallback — shown when Browserbase is unavailable
|
||||
metadata:
|
||||
hermes:
|
||||
fallback_for_toolsets: [browser]
|
||||
```
|
||||
|
||||
The filtering happens at prompt build time in `agent/prompt_builder.py`. The `build_skills_system_prompt()` function receives the set of available tools and toolsets from the agent and uses `_skill_should_show()` to evaluate each skill's conditions.
|
||||
|
||||
### Skill setup metadata
|
||||
|
||||
Skills can declare secure setup-on-load metadata via the `required_environment_variables` frontmatter field. Missing values do not hide the skill from discovery; they trigger a CLI-only secure prompt when the skill is actually loaded.
|
||||
|
||||
```yaml
|
||||
required_environment_variables:
|
||||
- name: TENOR_API_KEY
|
||||
prompt: Tenor API key
|
||||
help: Get a key from https://developers.google.com/tenor
|
||||
required_for: full functionality
|
||||
```
|
||||
|
||||
The user may skip setup and keep loading the skill. Hermes only exposes metadata (`stored_as`, `skipped`, `validated`) to the model — never the secret value.
|
||||
|
||||
Legacy `prerequisites.env_vars` remains supported and is normalized into the new representation.
|
||||
|
||||
```yaml
|
||||
prerequisites:
|
||||
env_vars: [TENOR_API_KEY] # Legacy alias for required_environment_variables
|
||||
commands: [curl, jq] # Advisory CLI checks
|
||||
```
|
||||
|
||||
Gateway and messaging sessions never collect secrets in-band; they instruct the user to run `hermes setup` or update `~/.hermes/.env` locally.
|
||||
|
||||
**When to declare required environment variables:**
|
||||
- The skill uses an API key or token that should be collected securely at load time
|
||||
- The skill can still be useful if the user skips setup, but may degrade gracefully
|
||||
|
||||
**When to declare command prerequisites:**
|
||||
- The skill relies on a CLI tool that may not be installed (e.g., `himalaya`, `openhue`, `ddgs`)
|
||||
- Treat command checks as guidance, not discovery-time hiding
|
||||
|
||||
See `skills/gifs/gif-search/` and `skills/email/himalaya/` for examples.
|
||||
|
||||
### Skill authoring standards (HARDLINE)
|
||||
|
||||
Every new or modernized skill — bundled, optional, or contributed — must meet these standards before merge. Reviewers reject PRs that violate them.
|
||||
|
||||
1. **`description` ≤ 60 characters, one sentence, ends with a period.** Long descriptions bloat the skill listing UI and dilute the model's attention when many skills are loaded. State the capability, not the implementation. No marketing words ("powerful", "comprehensive", "seamless", "advanced"). Don't repeat the skill name. Verify with:
|
||||
```python
|
||||
import re, pathlib
|
||||
m = re.search(r'^description: (.*)$',
|
||||
pathlib.Path('skills/<cat>/<name>/SKILL.md').read_text(),
|
||||
re.MULTILINE)
|
||||
assert len(m.group(1)) <= 60, len(m.group(1))
|
||||
```
|
||||
|
||||
Good: `Search arXiv papers by keyword, author, category, or ID.`
|
||||
Bad: `A powerful and comprehensive skill that allows the agent to search arXiv for relevant academic papers using various criteria including keywords, authors, and categories.`
|
||||
|
||||
2. **Tools referenced in SKILL.md prose must be native Hermes tools or MCP servers the skill explicitly expects.** When the skill needs a capability, point at the proper tool by name in backticks: `` `terminal` ``, `` `web_extract` ``, `` `web_search` ``, `` `read_file` ``, `` `write_file` ``, `` `patch` ``, `` `search_files` ``, `` `vision_analyze` ``, `` `browser_navigate` ``, `` `delegate_task` ``, `` `image_generate` ``, `` `text_to_speech` ``, `` `cronjob` ``, `` `memory` ``, `` `skill_view` ``, `` `todo` ``, `` `execute_code` ``.
|
||||
|
||||
Do NOT name shell utilities the agent already has wrapped:
|
||||
|
||||
| Don't say | Say |
|
||||
|---|---|
|
||||
| `grep`, `rg` | `search_files` |
|
||||
| `cat`, `head`, `tail` | `read_file` |
|
||||
| `sed`, `awk` | `patch` |
|
||||
| `find`, `ls` | `search_files` (with `target='files'`) |
|
||||
| `curl` for content extraction | `web_extract` |
|
||||
| `echo > file`, `cat <<EOF` | `write_file` |
|
||||
|
||||
If the skill depends on an MCP server, name the MCP server and document its setup in `## Prerequisites`. Third-party CLIs (e.g. `ffmpeg`, `gh`, a specific SDK) are fine to invoke from inside script files, but the prose should frame the interaction as "invoke through the `terminal` tool", not as a manual shell session.
|
||||
|
||||
3. **`platforms:` gating audited against actual script imports.** Skills that use POSIX-only primitives (`fcntl`, `termios`, `os.setsid`, `os.kill(pid, 0)` for liveness, `/proc`, hardcoded `/tmp` paths, `signal.SIGKILL`, bash heredocs, `osascript`, `apt`, `systemctl`) must declare their supported platforms via the `platforms:` frontmatter. Default posture is to fix it cross-platform first — `tempfile.gettempdir()`, `pathlib.Path`, `psutil.pid_exists()`, Python-level filtering instead of `grep`. Gate to a narrower set only when the dependency is genuinely platform-bound (e.g. `osascript` is macOS-only, `/proc` is Linux-only).
|
||||
|
||||
4. **`author` credits the human contributor first.** For external contributions, the contributor's real name + GitHub handle goes first (`Jane Doe (jane-doe)`); "Hermes Agent" is the secondary collaborator. If the contributor's commit shows "Hermes Agent" as author because they used Hermes to draft the skill, replace it with their actual name — credit the human, not the tool.
|
||||
|
||||
5. **SKILL.md body uses the modern section order.** `# <Skill> Skill` title, 2-3 sentence intro stating what it does and what it doesn't do, then:
|
||||
- `## When to Use` — trigger conditions
|
||||
- `## Prerequisites` — env vars, install steps, MCP setup, API key sourcing
|
||||
- `## How to Run` — canonical invocation through the `terminal` tool
|
||||
- `## Quick Reference` — flat command/API reference
|
||||
- `## Procedure` — numbered steps with copy-paste commands
|
||||
- `## Pitfalls` — known limits, rate limits, things that look broken but aren't
|
||||
- `## Verification` — single command that proves the skill works
|
||||
|
||||
Target ~200 lines for a complex skill, ~100 lines for a simple one. Cut redundant intro fluff, marketing prose, and re-explanations of env vars already documented in `## Prerequisites`.
|
||||
|
||||
6. **Scripts go in `scripts/`, references in `references/`, templates in `templates/`.** Don't expect the model to inline-write parsers, XML walkers, or non-trivial logic every call — ship a helper script. Reference scripts from SKILL.md by path relative to the skill directory.
|
||||
|
||||
7. **Tests live at `tests/skills/test_<skill>_skill.py`** and use only stdlib + pytest + `unittest.mock`. No live network calls. Run via `scripts/run_tests.sh tests/skills/test_<skill>_skill.py -q`. Must pass under the hermetic CI env (no API keys leaking through). Use `monkeypatch` and `tmp_path` for any env-var or filesystem dependencies.
|
||||
|
||||
8. **`.env.example` additions are isolated to a clearly delimited block.** Don't touch the surrounding file — contributor-supplied `.env.example` versions are usually stale, and edits outside the skill's own block will be dropped during salvage. Comment all values with `#` (it's documentation, not live config).
|
||||
|
||||
### Skill guidelines
|
||||
|
||||
- **No external dependencies unless absolutely necessary.** Prefer stdlib Python, curl, and existing Hermes tools (`web_extract`, `terminal`, `read_file`).
|
||||
- **Progressive disclosure.** Put the most common workflow first. Edge cases and advanced usage go at the bottom.
|
||||
- **Include helper scripts** for XML/JSON parsing or complex logic — don't expect the LLM to write parsers inline every time.
|
||||
- **Test it.** Run `hermes --toolsets skills -q "Use the X skill to do Y"` and verify the agent follows the instructions correctly.
|
||||
|
||||
---
|
||||
|
||||
## Adding a Skin / Theme
|
||||
|
||||
Hermes uses a data-driven skin system — no code changes needed to add a new skin.
|
||||
|
||||
**Option A: User skin (YAML file)**
|
||||
|
||||
Create `~/.hermes/skins/<name>.yaml`:
|
||||
|
||||
```yaml
|
||||
name: mytheme
|
||||
description: Short description of the theme
|
||||
|
||||
colors:
|
||||
banner_border: "#HEX" # Panel border color
|
||||
banner_title: "#HEX" # Panel title color
|
||||
banner_accent: "#HEX" # Section header color
|
||||
banner_dim: "#HEX" # Muted/dim text color
|
||||
banner_text: "#HEX" # Body text color
|
||||
response_border: "#HEX" # Response box border
|
||||
|
||||
spinner:
|
||||
waiting_faces: ["(⚔)", "(⛨)"]
|
||||
thinking_faces: ["(⚔)", "(⌁)"]
|
||||
thinking_verbs: ["forging", "plotting"]
|
||||
wings: # Optional left/right decorations
|
||||
- ["⟪⚔", "⚔⟫"]
|
||||
|
||||
branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔"
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
|
||||
All fields are optional — missing values inherit from the default skin.
|
||||
|
||||
**Option B: Built-in skin**
|
||||
|
||||
Add to `_BUILTIN_SKINS` dict in `hermes_cli/skin_engine.py`. Use the same schema as above but as a Python dict. Built-in skins ship with the package and are always available.
|
||||
|
||||
**Activating:**
|
||||
- CLI: `/skin mytheme` or set `display.skin: mytheme` in config.yaml
|
||||
- Config: `display: { skin: mytheme }`
|
||||
|
||||
See `hermes_cli/skin_engine.py` for the full schema and existing skins as examples.
|
||||
|
||||
---
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and native Windows (plus WSL2). When writing code
|
||||
that touches the OS, assume *any* platform can hit your code path.
|
||||
|
||||
> **Before you PR:** run `scripts/check-windows-footguns.py` to catch the
|
||||
> common Windows-unsafe patterns in your diff. It's grep-based and cheap;
|
||||
> CI runs it on every PR too.
|
||||
|
||||
### Critical rules
|
||||
|
||||
1. **Never call `os.kill(pid, 0)` for liveness checks.** `os.kill(pid, 0)`
|
||||
is a standard POSIX idiom to check "is this PID alive" — the signal 0
|
||||
is a no-op permission check. **On Windows it is NOT a no-op.** Python's
|
||||
Windows `os.kill` maps `sig=0` to `CTRL_C_EVENT` (they collide at the
|
||||
integer value 0) and routes it through `GenerateConsoleCtrlEvent(0, pid)`,
|
||||
which broadcasts Ctrl+C to the **entire console process group** containing
|
||||
the target PID. "Probe if alive" silently becomes "kill the target and
|
||||
often unrelated processes sharing its console." See [bpo-14484](https://bugs.python.org/issue14484)
|
||||
(open since 2012 — will never be fixed for compat reasons).
|
||||
|
||||
**Preferred:** use `psutil` (a core dependency — always available):
|
||||
|
||||
```python
|
||||
import psutil
|
||||
if psutil.pid_exists(pid):
|
||||
# process is alive — safe on every platform
|
||||
...
|
||||
```
|
||||
|
||||
If you specifically need the hermes wrapper (it has a stdlib fallback
|
||||
for scaffold-phase imports before pip install finishes), use
|
||||
`gateway.status._pid_exists(pid)`. It calls `psutil.pid_exists` first
|
||||
and falls back to a hand-rolled `OpenProcess + WaitForSingleObject`
|
||||
dance on Windows only when psutil is somehow missing.
|
||||
|
||||
Audit grep for new callsites: `rg "os\.kill\([^,]+,\s*0\s*\)"`. Any hit
|
||||
in non-test code is presumptively a Windows silent-kill bug.
|
||||
|
||||
2. **Use `shutil.which()` before shelling out — don't assume Windows has
|
||||
tools Linux has.** `wmic` was removed in Windows 10 21H1 and later. `ps`,
|
||||
`kill`, `grep`, `awk`, `fuser`, `lsof`, `pgrep`, and most POSIX CLI tools
|
||||
simply don't exist on Windows. Test availability with
|
||||
`shutil.which("tool")` and fall back to a Windows-native equivalent —
|
||||
usually PowerShell via `subprocess.run(["powershell", "-NoProfile",
|
||||
"-Command", ...])`.
|
||||
|
||||
For process enumeration: PowerShell's `Get-CimInstance Win32_Process` is
|
||||
the modern replacement for `wmic process`. See
|
||||
`hermes_cli/gateway.py::_scan_gateway_pids` for the pattern.
|
||||
|
||||
3. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError`
|
||||
and `NotImplementedError`:
|
||||
```python
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
menu = TerminalMenu(options)
|
||||
idx = menu.show()
|
||||
except (ImportError, NotImplementedError):
|
||||
# Fallback: numbered menu for Windows
|
||||
for i, opt in enumerate(options):
|
||||
print(f" {i+1}. {opt}")
|
||||
idx = int(input("Choice: ")) - 1
|
||||
```
|
||||
|
||||
4. **File encoding.** Windows may save `.env` files in `cp1252`. Always
|
||||
handle encoding errors:
|
||||
```python
|
||||
try:
|
||||
load_dotenv(env_path)
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
```
|
||||
Config files (`config.yaml`) may be saved with a UTF-8 BOM by Notepad and
|
||||
similar editors — use `encoding="utf-8-sig"` when reading files that
|
||||
could have been touched by a Windows GUI editor.
|
||||
|
||||
5. **Process management.** `os.setsid()`, `os.killpg()`, `os.fork()`,
|
||||
`os.getuid()`, and POSIX signal handling differ on Windows. Guard with
|
||||
`platform.system()`, `sys.platform`, or `hasattr(os, "setsid")`:
|
||||
```python
|
||||
if platform.system() != "Windows":
|
||||
kwargs["preexec_fn"] = os.setsid
|
||||
else:
|
||||
kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
```
|
||||
|
||||
**Preferred:** for killing a process AND its children (what `os.killpg`
|
||||
does on POSIX), use `psutil` — it works on every platform:
|
||||
```python
|
||||
import psutil
|
||||
try:
|
||||
parent = psutil.Process(pid)
|
||||
# Kill children first (leaf-up), then the parent.
|
||||
for child in parent.children(recursive=True):
|
||||
child.kill()
|
||||
parent.kill()
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
```
|
||||
|
||||
6. **Signals that don't exist on Windows: `SIGALRM`, `SIGCHLD`, `SIGHUP`,
|
||||
`SIGUSR1`, `SIGUSR2`, `SIGPIPE`, `SIGQUIT`, `SIGKILL`.** Python's
|
||||
`signal` module raises `AttributeError` at import time if you reference
|
||||
them on Windows. Use `getattr(signal, "SIGKILL", signal.SIGTERM)` or
|
||||
gate the whole block behind a platform check. `loop.add_signal_handler`
|
||||
raises `NotImplementedError` on Windows — always catch it.
|
||||
|
||||
7. **Path separators.** Use `pathlib.Path` instead of string concatenation
|
||||
with `/`. Forward slashes work almost everywhere on Windows, but
|
||||
`subprocess.run(["cmd.exe", "/c", ...])` and other shell contexts can
|
||||
require backslashes — convert with `str(path)` at the subprocess boundary,
|
||||
not inside Python logic.
|
||||
|
||||
8. **Symlinks need elevated privileges on Windows** (unless Developer Mode is
|
||||
on). Tests that create symlinks need `@pytest.mark.skipif(sys.platform ==
|
||||
"win32", reason="Symlinks require elevated privileges on Windows")`.
|
||||
|
||||
9. **POSIX file modes (0o600, 0o644, etc.) are NOT enforced on NTFS** by
|
||||
default. Tests that assert on `stat().st_mode & 0o777` must skip on
|
||||
Windows — the concept doesn't translate. Use ACLs (`icacls`, `pywin32`)
|
||||
for Windows secret-file protection if needed.
|
||||
|
||||
10. **Detached background daemons on Windows need `pythonw.exe`, NOT
|
||||
`python.exe`.** `python.exe` always allocates or attaches to a console,
|
||||
which makes it vulnerable to `CTRL_C_EVENT` broadcasts from any sibling
|
||||
process. `pythonw.exe` is the no-console variant. Combine with
|
||||
`CREATE_NO_WINDOW | DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP |
|
||||
CREATE_BREAKAWAY_FROM_JOB` in `subprocess.Popen(creationflags=...)`.
|
||||
See `hermes_cli/gateway_windows.py::_spawn_detached` for the reference
|
||||
implementation.
|
||||
|
||||
11. **`subprocess.Popen` with `.cmd` or `.bat` shims needs `shutil.which`
|
||||
to resolve.** Passing `"agent-browser"` to `Popen` on Windows finds
|
||||
the extensionless POSIX shebang shim in `node_modules/.bin/`, which
|
||||
`CreateProcessW` can't execute — you'll get `WinError 193 "not a valid
|
||||
Win32 application"`. Use `shutil.which("agent-browser", path=local_bin)`
|
||||
which honors PATHEXT and picks the `.CMD` variant on Windows.
|
||||
|
||||
12. **Don't use shell shebangs as a way to run Python.** `#!/usr/bin/env
|
||||
python` only works when the file is executed through a Unix shell.
|
||||
`subprocess.run(["./myscript.py"])` on Windows fails even if the file
|
||||
has a shebang line. Always invoke Python explicitly:
|
||||
`[sys.executable, "myscript.py"]`.
|
||||
|
||||
13. **Shell commands in installers.** If you change `scripts/install.sh`,
|
||||
make the equivalent change in `scripts/install.ps1`. The two scripts
|
||||
are the canonical example of "works on Linux does not mean works on
|
||||
Windows" and have drifted multiple times — keep them in lockstep.
|
||||
|
||||
14. **Known paths that are OneDrive-redirected on Windows:** Desktop,
|
||||
Documents, Pictures, Videos. The "real" path when OneDrive Backup is
|
||||
enabled is `%USERPROFILE%\OneDrive\Desktop` (etc.), NOT
|
||||
`%USERPROFILE%\Desktop` (which exists as an empty husk). Resolve the
|
||||
real location via `ctypes` + `SHGetKnownFolderPath` or by reading the
|
||||
`Shell Folders` registry key — never assume `~/Desktop`.
|
||||
|
||||
15. **CRLF vs LF in generated scripts.** Windows `cmd.exe` and `schtasks`
|
||||
parse line-by-line; mixed or LF-only line endings can break multi-line
|
||||
`.cmd` / `.bat` files. Use `open(path, "w", encoding="utf-8",
|
||||
newline="\r\n")` — or `open(path, "wb")` + explicit bytes — when
|
||||
generating scripts Windows will execute.
|
||||
|
||||
16. **Two different quoting schemes in one command line.** `subprocess.run
|
||||
(["schtasks", "/TR", some_cmd])` → schtasks itself parses `/TR`, AND
|
||||
the `some_cmd` string is re-parsed by `cmd.exe` when the task fires.
|
||||
Different parsers, different escape rules. Use two separate quoting
|
||||
helpers and never cross them. See `hermes_cli/gateway_windows.py::
|
||||
_quote_cmd_script_arg` and `_quote_schtasks_arg` for the reference
|
||||
pair.
|
||||
|
||||
### Testing cross-platform
|
||||
|
||||
Tests that use POSIX-only syscalls need a skip marker. Common ones:
|
||||
- Symlinks → `@pytest.mark.skipif(sys.platform == "win32", ...)`
|
||||
- `0o600` file modes → `@pytest.mark.skipif(sys.platform.startswith("win"), ...)`
|
||||
- `signal.SIGALRM` → Unix-only (see `tests/conftest.py::_enforce_test_timeout`)
|
||||
- `os.setsid` / `os.fork` → Unix-only
|
||||
- Live Winsock / Windows-specific regression tests →
|
||||
`@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific regression")`
|
||||
|
||||
If you monkeypatch `sys.platform` for cross-platform tests, also patch
|
||||
`platform.system()` / `platform.release()` / `platform.mac_ver()` — each
|
||||
re-reads the real OS independently, so half-patched tests still route
|
||||
through the wrong branch on a Windows runner.
|
||||
|
||||
---
|
||||
|
||||
## Security Considerations
|
||||
|
||||
Hermes has terminal access. Security matters.
|
||||
|
||||
### Existing protections
|
||||
|
||||
| Layer | Implementation |
|
||||
|-------|---------------|
|
||||
| **Sudo password piping** | Uses `shlex.quote()` to prevent shell injection |
|
||||
| **Dangerous command detection** | Regex patterns in `tools/approval.py` with user approval flow |
|
||||
| **Cron prompt injection** | Scanner in `tools/cronjob_tools.py` blocks instruction-override patterns |
|
||||
| **Write deny list** | Protected paths (`~/.ssh/authorized_keys`, `/etc/shadow`) resolved via `os.path.realpath()` to prevent symlink bypass |
|
||||
| **Skills guard** | Security scanner for hub-installed skills (`tools/skills_guard.py`) |
|
||||
| **Code execution sandbox** | `execute_code` child process runs with API keys stripped from environment |
|
||||
| **Container hardening** | Docker: all capabilities dropped, no privilege escalation, PID limits, size-limited tmpfs |
|
||||
|
||||
### When contributing security-sensitive code
|
||||
|
||||
- **Always use `shlex.quote()`** when interpolating user input into shell commands
|
||||
- **Resolve symlinks** with `os.path.realpath()` before path-based access control checks
|
||||
- **Don't log secrets.** API keys, tokens, and passwords should never appear in log output
|
||||
- **Catch broad exceptions** around tool execution so a single failure doesn't crash the agent loop
|
||||
- **Test on all platforms** if your change touches file paths, process management, or shell commands
|
||||
|
||||
If your PR affects security, note it explicitly in the description.
|
||||
|
||||
### Dependency pinning policy (supply chain hardening)
|
||||
|
||||
After the [litellm supply chain compromise](https://github.com/BerriAI/litellm/issues/24512) in March 2026 and the [Mini Shai-Hulud worm campaign](https://socket.dev/blog/tanstack-npm-packages-compromised-mini-shai-hulud-supply-chain-attack) in May 2026, all dependencies must follow these rules:
|
||||
|
||||
| Source type | Required treatment | Rationale |
|
||||
|---|---|---|
|
||||
| **PyPI package** | `>=floor,<next_major` | PyPI versions are immutable once published, but new versions can be pushed into your range. A `<next_major` ceiling stops a 1.x install from upgrading to a malicious 2.0.0. |
|
||||
| **Git URL** (atroposlib, tinker, yc-bench, Baileys) | Full commit SHA | Branches and tags are mutable refs; SHA is content-addressed. |
|
||||
| **GitHub Actions** | Full commit SHA + version comment | Action tags are mutable refs (e.g. tj-actions/changed-files March 2025). Pin as `uses: owner/action@<sha> # vX.Y.Z` |
|
||||
| **CI-only pip installs** | `==exact` | Hermetic CI builds; churn is acceptable. |
|
||||
|
||||
**Every new PyPI dependency in a PR must have a `<next_major` upper bound.** PRs adding unbounded `>=X.Y.Z` specs will be rejected by reviewers. The `supply-chain-audit.yml` CI workflow also flags dependency manifest changes for manual review.
|
||||
|
||||
**How to determine the ceiling:**
|
||||
- If the package is at version `1.x.y`, use `<2`.
|
||||
- If the package is at version `0.x.y` (pre-1.0), use `<0.(current_minor + 2)` — e.g. if current is `0.29.x`, use `<0.32`. This gives ~2 minor versions of headroom while keeping the window small enough that a hostile takeover version is unlikely to land inside it.
|
||||
- Exception: packages with very stable APIs (e.g. `aiohttp-socks`) can use `<1` at reviewer discretion.
|
||||
|
||||
**Examples:**
|
||||
```toml
|
||||
# ✅ Correct — post-1.0
|
||||
"openai>=2.21.0,<3"
|
||||
"pydantic>=2.12.5,<3"
|
||||
|
||||
# ✅ Correct — pre-1.0 (tight minor window)
|
||||
"asyncpg>=0.29,<0.32"
|
||||
"aiosqlite>=0.20,<0.23"
|
||||
"hindsight-client>=0.4.22,<0.5"
|
||||
|
||||
# ❌ Rejected — no upper bound
|
||||
"some-package>=1.2.3"
|
||||
|
||||
# ❌ Rejected — too tight (blocks legitimate patches)
|
||||
"some-package==1.2.3"
|
||||
|
||||
# ❌ Rejected — too loose for pre-1.0 (allows 80 minor versions)
|
||||
"some-package>=0.20,<1"
|
||||
```
|
||||
|
||||
**Reference PRs:** #2796 (litellm removal), #2810 (upper bounds pass), #9801 (SHA pinning + supply-chain-audit CI).
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
### Branch naming
|
||||
|
||||
```
|
||||
fix/description # Bug fixes
|
||||
feat/description # New features
|
||||
docs/description # Documentation
|
||||
test/description # Tests
|
||||
refactor/description # Code restructuring
|
||||
```
|
||||
|
||||
### Before submitting
|
||||
|
||||
1. **Run tests**: `scripts/run_tests.sh` (recommended; same as CI) or `pytest tests/ -v` with the project venv activated
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
Include:
|
||||
- **What** changed and **why**
|
||||
- **How to test** it (reproduction steps for bugs, usage examples for features)
|
||||
- **What platforms** you tested on
|
||||
- Reference any related issues
|
||||
|
||||
### Commit messages
|
||||
|
||||
We use [Conventional Commits](https://www.conventionalcommits.org/):
|
||||
|
||||
```
|
||||
<type>(<scope>): <description>
|
||||
```
|
||||
|
||||
| Type | Use for |
|
||||
|------|---------|
|
||||
| `fix` | Bug fixes |
|
||||
| `feat` | New features |
|
||||
| `docs` | Documentation |
|
||||
| `test` | Tests |
|
||||
| `refactor` | Code restructuring (no behavior change) |
|
||||
| `chore` | Build, CI, dependency updates |
|
||||
|
||||
Scopes: `cli`, `gateway`, `tools`, `skills`, `agent`, `install`, `whatsapp`, `security`, etc.
|
||||
|
||||
Examples:
|
||||
```
|
||||
fix(cli): prevent crash in save_config_value when model is a string
|
||||
feat(gateway): add WhatsApp multi-user session isolation
|
||||
fix(security): prevent shell injection in sudo password piping
|
||||
test(tools): add unit tests for file_operations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
- Use [GitHub Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- Include: OS, Python version, Hermes version (`hermes version`), full error traceback
|
||||
- Include steps to reproduce
|
||||
- Check existing issues before creating duplicates
|
||||
- For security vulnerabilities, please report privately
|
||||
|
||||
---
|
||||
|
||||
## Community
|
||||
|
||||
- **Discord**: [discord.gg/NousResearch](https://discord.gg/NousResearch) — for questions, showcasing projects, and sharing skills
|
||||
- **GitHub Discussions**: For design proposals and architecture discussions
|
||||
- **Skills Hub**: Upload specialized skills to a registry and share them with the community
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
|
||||
326
Dockerfile
326
Dockerfile
@@ -1,326 +0,0 @@
|
||||
FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
|
||||
# Node 22 LTS source stage. Debian trixie's bundled nodejs is pinned to 20.x
|
||||
# which reached EOL in April 2026 — we copy node + npm + corepack from the
|
||||
# upstream node:22 image instead so we can stay on a supported LTS without
|
||||
# waiting for Debian 14 (forky, ~mid-2027). Bookworm-based slim image used
|
||||
# so the produced binary links against glibc 2.36, which runs cleanly on
|
||||
# our Debian 13 (trixie, glibc 2.41) runtime. Bumping to a new Node major
|
||||
# is a one-line ARG change; see #4977.
|
||||
FROM node:22-bookworm-slim@sha256:7af03b14a13c8cdd38e45058fd957bf00a72bbe17feac43b1c15a689c029c732 AS node_source
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Store Playwright browsers outside the volume mount so the build-time
|
||||
# install survives the /opt/data volume overlay at runtime.
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache.
|
||||
# tini was previously PID 1 to reap orphaned zombie processes (MCP stdio
|
||||
# subprocesses, git, bun, etc.) that would otherwise accumulate when hermes
|
||||
# ran as PID 1. See #15012. Phase 2 of the s6-overlay supervision plan
|
||||
# replaces tini with s6-overlay's /init (PID 1 = s6-svscan), which reaps
|
||||
# zombies non-blockingly on SIGCHLD and additionally supervises the main
|
||||
# hermes process, the dashboard, and per-profile gateways.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates curl iputils-ping python3 python-is-python3 ripgrep ffmpeg gcc python3-dev python3-venv libffi-dev libolm-dev procps git openssh-client docker-cli xz-utils && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ---------- s6-overlay install ----------
|
||||
# s6-overlay provides supervision for the main hermes process, the dashboard,
|
||||
# and per-profile gateways. /init becomes PID 1 below — see ENTRYPOINT.
|
||||
#
|
||||
# Multi-arch: BuildKit auto-populates TARGETARCH (amd64 / arm64). s6-overlay
|
||||
# uses tarball names keyed on the kernel arch string (x86_64 / aarch64), so
|
||||
# we map between them inline. The noarch + symlinks tarballs are
|
||||
# architecture-independent and reused as-is.
|
||||
#
|
||||
# We use `curl` instead of `ADD` for the per-arch tarball because `ADD`
|
||||
# evaluates its URL at parse time, before any ARG / TARGETARCH substitution
|
||||
# — splitting one URL per arch into two ADDs would download both on every
|
||||
# build and leave dead bytes in the cache. A single curl + arch-keyed URL
|
||||
# is simpler and cache-friendlier.
|
||||
#
|
||||
# Supply-chain integrity: every tarball is checksum-verified against the
|
||||
# upstream-published SHA256. To bump S6_OVERLAY_VERSION, fetch the four
|
||||
# `.sha256` files from the corresponding release and update the ARGs. The
|
||||
# checksum lookup happens during build, so a compromised release artifact
|
||||
# fails the build loudly instead of silently producing a tampered image.
|
||||
ARG TARGETARCH
|
||||
ARG S6_OVERLAY_VERSION=3.2.3.0
|
||||
ARG S6_OVERLAY_NOARCH_SHA256=b720f9d9340efc8bb07528b9743813c836e4b02f8693d90241f047998b4c53cf
|
||||
ARG S6_OVERLAY_X86_64_SHA256=a93f02882c6ed46b21e7adb5c0add86154f01236c93cd82c7d682722e8840563
|
||||
ARG S6_OVERLAY_AARCH64_SHA256=0952056ff913482163cc30e35b2e944b507ba1025d78f5becbb89367bf344581
|
||||
ARG S6_OVERLAY_SYMLINKS_SHA256=a60dc5235de3ecbcf874b9c1f18d73263ab99b289b9329aa950e8729c4789f0e
|
||||
ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz /tmp/
|
||||
ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-symlinks-noarch.tar.xz /tmp/
|
||||
RUN set -eu; \
|
||||
case "${TARGETARCH:-amd64}" in \
|
||||
amd64) s6_arch="x86_64"; s6_arch_sha="${S6_OVERLAY_X86_64_SHA256}" ;; \
|
||||
arm64) s6_arch="aarch64"; s6_arch_sha="${S6_OVERLAY_AARCH64_SHA256}" ;; \
|
||||
*) echo "Unsupported TARGETARCH=${TARGETARCH} for s6-overlay" >&2; exit 1 ;; \
|
||||
esac; \
|
||||
curl -fsSL --retry 3 -o /tmp/s6-overlay-arch.tar.xz \
|
||||
"https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${s6_arch}.tar.xz"; \
|
||||
{ \
|
||||
printf '%s %s\n' "${S6_OVERLAY_NOARCH_SHA256}" /tmp/s6-overlay-noarch.tar.xz; \
|
||||
printf '%s %s\n' "${s6_arch_sha}" /tmp/s6-overlay-arch.tar.xz; \
|
||||
printf '%s %s\n' "${S6_OVERLAY_SYMLINKS_SHA256}" /tmp/s6-overlay-symlinks-noarch.tar.xz; \
|
||||
} > /tmp/s6-overlay.sha256; \
|
||||
sha256sum -c /tmp/s6-overlay.sha256; \
|
||||
tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz; \
|
||||
tar -C / -Jxpf /tmp/s6-overlay-arch.tar.xz; \
|
||||
tar -C / -Jxpf /tmp/s6-overlay-symlinks-noarch.tar.xz; \
|
||||
rm /tmp/s6-overlay-*.tar.xz /tmp/s6-overlay.sha256; \
|
||||
# #34192: backward-compat shim for orchestration templates that still\
|
||||
# reference the legacy /usr/bin/tini entrypoint (e.g. Hostinger's\
|
||||
# 'Hermes WebUI' catalog). The image has moved to s6-overlay /init\
|
||||
# as PID 1 (see ENTRYPOINT below + the migration comment at the top\
|
||||
# of this file), but external wrappers pinned to /usr/bin/tini will\
|
||||
# crash with 'tini: No such file or directory' on startup. The shim\
|
||||
# symlinks /usr/bin/tini -> /init so legacy wrappers exec the right\
|
||||
# PID-1 reaper without behavior change for users on the current\
|
||||
# ENTRYPOINT. Safe to drop once the affected catalogs are updated.\
|
||||
ln -sf /init /usr/bin/tini
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
RUN useradd -u 10000 -m -d /opt/data hermes
|
||||
|
||||
COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
|
||||
|
||||
# Node 22 LTS: copy the node binary plus the bundled npm + corepack JS
|
||||
# installs from the upstream image. npm and npx are recreated as symlinks
|
||||
# because they're symlinks in the source image (and need to live on PATH).
|
||||
# See node_source stage at the top of the file for the version-bump
|
||||
# rationale (#4977).
|
||||
COPY --chmod=0755 --from=node_source /usr/local/bin/node /usr/local/bin/
|
||||
COPY --from=node_source /usr/local/lib/node_modules/npm /usr/local/lib/node_modules/npm
|
||||
COPY --from=node_source /usr/local/lib/node_modules/corepack /usr/local/lib/node_modules/corepack
|
||||
RUN ln -sf /usr/local/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm && \
|
||||
ln -sf /usr/local/lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx && \
|
||||
ln -sf /usr/local/lib/node_modules/corepack/dist/corepack.js /usr/local/bin/corepack
|
||||
|
||||
WORKDIR /opt/hermes
|
||||
|
||||
# ---------- Layer-cached dependency install ----------
|
||||
# Copy only package manifests first so npm install + Playwright are cached
|
||||
# unless the lockfiles themselves change.
|
||||
#
|
||||
# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
|
||||
# because it is referenced as a `file:` workspace dependency from
|
||||
# ui-tui/package.json. Copying the tree up front lets npm resolve the
|
||||
# workspace to real content instead of stopping at a bare package.json.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/
|
||||
COPY ui-tui/package.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
|
||||
|
||||
# `npm_config_install_links=false` forces npm to install `file:` deps as
|
||||
# symlinks instead of copies. This is the default since npm 10+, which is
|
||||
# what the image ships now (via the node:22 source stage). We set it
|
||||
# explicitly anyway as defense-in-depth: the previous Debian-bundled npm
|
||||
# 9.x defaulted to install-as-copy, which produced a hidden
|
||||
# node_modules/.package-lock.json that permanently disagreed with the root
|
||||
# lock on the @hermes/ink entry, tripped the TUI launcher's
|
||||
# `_tui_need_npm_install()` check on every startup, and triggered a
|
||||
# runtime `npm install` that then failed with EACCES. Keeping the env
|
||||
# guards against a future regression if the source npm version changes.
|
||||
ENV npm_config_install_links=false
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
npm cache clean --force
|
||||
|
||||
# ---------- Layer-cached Python dependency install ----------
|
||||
# Copy only pyproject.toml + uv.lock so the Python dep resolve + wheel
|
||||
# download + native-extension compile layer is cached unless those inputs
|
||||
# change. Before this split the Python install sat after `COPY . .`, so
|
||||
# every source-only commit re-did ~4-5 min of dep work on cold builds.
|
||||
#
|
||||
# README.md is referenced by pyproject.toml's `readme =` field, but it's
|
||||
# excluded from the build context by .dockerignore's `*.md`. uv's build
|
||||
# frontend stats the readme path during dep resolution, so we `touch` an
|
||||
# empty placeholder — the real README is restored by `COPY . .` below.
|
||||
#
|
||||
# `uv sync --frozen --no-install-project --extra all --extra messaging`
|
||||
# installs the deps reachable through the composite `[all]` extra
|
||||
# (handpicked set intended for the production image), plus gateway
|
||||
# messaging adapters that should work in the published image without a
|
||||
# first-boot lazy install. We do NOT use `--all-extras`:
|
||||
# that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
|
||||
# git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
|
||||
# redundancy), none of which belong in the published container.
|
||||
#
|
||||
# Provider packages (anthropic, bedrock, azure-identity) are included
|
||||
# so Docker users can use these providers without requiring runtime
|
||||
# lazy-install access to PyPI (often blocked in containerized envs).
|
||||
#
|
||||
# The hindsight memory provider's client (hindsight-client) is baked in
|
||||
# for the same reason: it lazy-installs into /opt/hermes/.venv at first
|
||||
# use, which lives inside the (immutable) image layer rather than the
|
||||
# mounted /opt/data volume, so it is lost on every container recreate /
|
||||
# image update and recall/retain then fails with
|
||||
# `ModuleNotFoundError: No module named 'hindsight_client'` (#38128).
|
||||
#
|
||||
# The editable link is created after the source copy below.
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN touch ./README.md
|
||||
RUN uv sync --frozen --no-install-project --extra all --extra messaging --extra anthropic --extra bedrock --extra azure-identity --extra hindsight
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
# node_modules trees additionally need to be writable by the hermes user
|
||||
# so the runtime `npm install` triggered by _tui_need_npm_install() in
|
||||
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
|
||||
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
|
||||
# not chowned here.
|
||||
# /opt/hermes/gateway is runtime-writable: Python may create __pycache__ and
|
||||
# gateway state artifacts beneath the package after services drop privileges,
|
||||
# especially when the hermes UID is remapped at boot (#27221).
|
||||
# The .venv MUST remain hermes-writable so lazy_deps.py can install
|
||||
# remaining optional platform packages and future pin bumps at first use.
|
||||
# Without this, `uv pip install` fails with EACCES and adapters silently
|
||||
# fail to load. See tools/lazy_deps.py.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/gateway /opt/hermes/node_modules
|
||||
# Start as root so the s6-overlay stage2 hook can usermod/groupmod and chown
|
||||
# the data volume. Each supervised service then drops to the hermes user via
|
||||
# `s6-setuidgid hermes` in its run script. If HERMES_UID is unset, services
|
||||
# run as the default hermes user (UID 10000).
|
||||
|
||||
# ---------- Link hermes-agent itself (editable) ----------
|
||||
# Deps are already installed in the cached layer above; `--no-deps` makes
|
||||
# this a fast (~1s) egg-link creation with no resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
|
||||
# ---------- Bake build-time git revision ----------
|
||||
# .dockerignore excludes .git, so `git rev-parse HEAD` from inside the
|
||||
# container always returns nothing — meaning `hermes dump` reports
|
||||
# "(unknown)" and the startup banner drops its `· upstream <sha>` suffix.
|
||||
# That makes support triage from container bug reports impossible:
|
||||
# we can't tell which commit the user is actually running.
|
||||
#
|
||||
# Fix: write the commit SHA passed via the HERMES_GIT_SHA build-arg to
|
||||
# /opt/hermes/.hermes_build_sha at build time, and have
|
||||
# hermes_cli/build_info.py read it at runtime. Both `hermes dump` and
|
||||
# banner.get_git_banner_state() try the baked SHA first, then fall back
|
||||
# to live `git rev-parse` for source installs (unchanged behaviour).
|
||||
#
|
||||
# The arg is optional — local `docker build` without --build-arg simply
|
||||
# omits the file, and the runtime falls back to live-git lookup. CI
|
||||
# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
|
||||
# every published image has it.
|
||||
ARG HERMES_GIT_SHA=
|
||||
RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
|
||||
printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
|
||||
chown hermes:hermes /opt/hermes/.hermes_build_sha; \
|
||||
fi
|
||||
|
||||
# ---------- s6-overlay service wiring ----------
|
||||
# Static services declared at build time: main-hermes + dashboard.
|
||||
# Per-profile gateway services are registered dynamically at runtime by
|
||||
# the profile create/delete hooks (Phase 4); they live under
|
||||
# /run/service/ (tmpfs) and are reconciled on container restart by
|
||||
# /etc/cont-init.d/02-reconcile-profiles (Phase 4 Task 4.0).
|
||||
COPY docker/s6-rc.d/ /etc/s6-overlay/s6-rc.d/
|
||||
|
||||
# stage2-hook handles UID/GID remap, volume chown, config seeding,
|
||||
# skills sync — all the work the old entrypoint.sh did before
|
||||
# `exec hermes`. Wired in as cont-init.d/01- so it
|
||||
# runs before user services start.
|
||||
#
|
||||
# 02-reconcile-profiles re-creates per-profile gateway s6 service
|
||||
# slots from $HERMES_HOME/profiles/<name>/ after a container restart
|
||||
# (the /run/service/ scandir is tmpfs and wiped on restart). Phase 4.
|
||||
RUN mkdir -p /etc/cont-init.d && \
|
||||
printf '#!/command/with-contenv sh\nexec /opt/hermes/docker/stage2-hook.sh\n' \
|
||||
> /etc/cont-init.d/01-hermes-setup && \
|
||||
chmod +x /etc/cont-init.d/01-hermes-setup
|
||||
COPY --chmod=0755 docker/cont-init.d/015-supervise-perms /etc/cont-init.d/015-supervise-perms
|
||||
COPY --chmod=0755 docker/cont-init.d/02-reconcile-profiles /etc/cont-init.d/02-reconcile-profiles
|
||||
|
||||
# ---------- Runtime ----------
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
# Point the TUI launcher at the prebuilt bundle baked at build time (Layer 8:
|
||||
# `ui-tui && npm run build`). This makes _make_tui_argv take the prebuilt-bundle
|
||||
# fast path (`node --expose-gc /opt/hermes/ui-tui/dist/entry.js`) and skip the
|
||||
# _tui_need_npm_install / runtime `npm install` branch entirely — exactly the
|
||||
# nix/packaged-release path the launcher was designed for.
|
||||
#
|
||||
# Why this is required (not just an optimization): the root package-lock.json
|
||||
# describes the WHOLE monorepo workspace set (root + web + ui-tui + apps/*),
|
||||
# but the image only installs root/web/ui-tui (apps/* — the desktop app — is
|
||||
# never `npm install`ed here). So the actualized node_modules permanently
|
||||
# disagrees with the canonical lock, _tui_need_npm_install() returns True on
|
||||
# every launch, and the runtime `npm install` it triggers (a) can never
|
||||
# converge against the partial monorepo and (b) races itself across concurrent
|
||||
# embedded-chat (/api/pty) connections → ENOTEMPTY → the chat tab dies with a
|
||||
# 502 / "[session ended]". Pointing at the prebuilt bundle sidesteps the whole
|
||||
# check. (A separate launcher hardening is tracked independently.)
|
||||
ENV HERMES_TUI_DIR=/opt/hermes/ui-tui
|
||||
ENV HERMES_HOME=/opt/data
|
||||
|
||||
# `docker exec` privilege-drop shim. When operators run
|
||||
# `docker exec <c> hermes ...` they default to root, and any file the
|
||||
# command writes under $HERMES_HOME (auth.json, .env, config.yaml) ends
|
||||
# up root-owned and unreadable to the supervised gateway (UID 10000).
|
||||
# The shim lives at /opt/hermes/bin/hermes, sits earliest on PATH, and
|
||||
# transparently re-exec's the real venv binary via `s6-setuidgid hermes`
|
||||
# when invoked as root. Non-root callers (supervised processes,
|
||||
# `--user hermes`, etc.) hit the short-circuit path with no overhead.
|
||||
# Recursion is impossible because the shim exec's the venv binary by
|
||||
# absolute path (/opt/hermes/.venv/bin/hermes). See the shim source for
|
||||
# the opt-out env var (HERMES_DOCKER_EXEC_AS_ROOT=1).
|
||||
COPY --chmod=0755 docker/hermes-exec-shim.sh /opt/hermes/bin/hermes
|
||||
|
||||
# Pre-s6 entrypoint.sh did `source .venv/bin/activate` which exported
|
||||
# the venv bin onto PATH; Architecture B's main-wrapper.sh does the
|
||||
# same for the container's main process, but `docker exec` and our
|
||||
# cont-init.d scripts don't pass through the wrapper. Expose the venv
|
||||
# bin globally so `docker exec <container> hermes ...` and any
|
||||
# subprocess that doesn't activate the venv first still find hermes.
|
||||
#
|
||||
# /opt/hermes/bin is prepended ahead of the venv so the privilege-drop
|
||||
# shim wins PATH resolution. The shim's last act is to exec the venv
|
||||
# binary by absolute path, so this PATH ordering is transparent to
|
||||
# every other consumer.
|
||||
ENV PATH="/opt/hermes/bin:/opt/hermes/.venv/bin:/opt/data/.local/bin:${PATH}"
|
||||
RUN mkdir -p /opt/data
|
||||
VOLUME [ "/opt/data" ]
|
||||
|
||||
# s6-overlay's /init is PID 1. It sets up the supervision tree, runs
|
||||
# /etc/cont-init.d/* (our stage2 hook), starts s6-rc services
|
||||
# declared in /etc/s6-overlay/s6-rc.d/, then exec's its remaining
|
||||
# argv as the container's "main program" with stdin/stdout/stderr
|
||||
# inherited (this is what makes interactive --tui work). When the
|
||||
# main program exits, /init begins stage 3 shutdown and the container
|
||||
# exits with the program's exit code. Replaces tini — see Phase 2 of
|
||||
# docs/plans/2026-05-07-s6-overlay-dynamic-subagent-gateways.md.
|
||||
#
|
||||
# We use the ENTRYPOINT+CMD split rather than CMD alone so the
|
||||
# wrapper is prepended to user-supplied args automatically:
|
||||
#
|
||||
# docker run <image> → /init main-wrapper.sh (CMD default)
|
||||
# docker run <image> chat -q "hi" → /init main-wrapper.sh chat -q hi
|
||||
# docker run <image> sleep infinity → /init main-wrapper.sh sleep infinity
|
||||
# docker run <image> --tui → /init main-wrapper.sh --tui
|
||||
#
|
||||
# main-wrapper.sh handles arg routing (bare-exec vs. hermes
|
||||
# subcommand vs. no-args), drops to the hermes user via s6-setuidgid,
|
||||
# and exec's the final program so its exit code becomes the container
|
||||
# exit code. Without the wrapper-as-ENTRYPOINT, leading-dash args
|
||||
# like `--version` would be intercepted by /init's POSIX shell.
|
||||
ENTRYPOINT [ "/init", "/opt/hermes/docker/main-wrapper.sh" ]
|
||||
CMD [ ]
|
||||
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Nous Research
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
10
MANIFEST.in
10
MANIFEST.in
@@ -1,10 +0,0 @@
|
||||
graft skills
|
||||
graft optional-skills
|
||||
graft locales
|
||||
# Bundled plugin manifests (plugin.yaml / plugin.yml). Without these the
|
||||
# PluginManager scan (hermes_cli/plugins.py) finds zero plugins on installs
|
||||
# built from the sdist (e.g. Homebrew, downstream packagers). package-data
|
||||
# below covers the wheel; this covers the sdist. See #34034 / #28149.
|
||||
recursive-include plugins plugin.yaml plugin.yml
|
||||
global-exclude __pycache__
|
||||
global-exclude *.py[cod]
|
||||
375
README.md
375
README.md
@@ -1,216 +1,243 @@
|
||||
<p align="center">
|
||||
<img src="assets/banner.png" alt="Hermes Agent" width="100%">
|
||||
</p>
|
||||
# Hermes Agent
|
||||
|
||||
# Hermes Agent ☤
|
||||
An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
|
||||
</p>
|
||||
## Features
|
||||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
- **Web Tools**: Search, extract content, and crawl websites
|
||||
- **Terminal Tools**: Execute commands with interactive session support
|
||||
- **Vision Tools**: Analyze images from URLs
|
||||
- **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
|
||||
- **Creative Tools**: Generate images from text prompts
|
||||
- **Toolsets System**: Organize tools into logical groups for different scenarios
|
||||
- **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking
|
||||
- **Ephemeral System Prompts**: Guide model behavior without polluting training datasets
|
||||
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
## Setup
|
||||
|
||||
<table>
|
||||
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
|
||||
<tr><td><b>Lives where you do</b></td><td>Telegram, Discord, Slack, WhatsApp, Signal, and CLI — all from a single gateway process. Voice memo transcription, cross-platform conversation continuity.</td></tr>
|
||||
<tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
|
||||
<tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
|
||||
<tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Singularity, Modal, and Daytona. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
</table>
|
||||
### 1. Install Dependencies
|
||||
```bash
|
||||
# Create and activate virtual environment (recommended)
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
|
||||
---
|
||||
# Install required packages
|
||||
pip install -r requirements.txt
|
||||
|
||||
## Quick Install
|
||||
# Install Hecate for terminal tools
|
||||
git clone git@github.com:NousResearch/hecate.git
|
||||
cd hecate
|
||||
pip install -e .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### Linux, macOS, WSL2, Termux
|
||||
### 2. Configure Environment Variables
|
||||
```bash
|
||||
# Copy the example environment file
|
||||
cp .env.example .env
|
||||
|
||||
# Edit .env and add your API keys
|
||||
nano .env # or use your preferred editor
|
||||
```
|
||||
|
||||
**Required API Keys:**
|
||||
- `ANTHROPIC_API_KEY` - Main agent model (get at: https://console.anthropic.com/)
|
||||
- `FIRECRAWL_API_KEY` - Web tools (get at: https://firecrawl.dev/)
|
||||
- `NOUS_API_KEY` - Vision & reasoning tools (get at: https://inference-api.nousresearch.com/)
|
||||
- `MORPH_API_KEY` - Terminal tools (get at: https://morph.so/)
|
||||
- `FAL_KEY` - Image generation (get at: https://fal.ai/)
|
||||
- `OPENAI_API_KEY` - Optional, for some Hecate features
|
||||
|
||||
See `.env.example` for all available configuration options including debug settings and terminal tool configuration.
|
||||
|
||||
## Toolsets System
|
||||
|
||||
The agent uses a toolsets system for organizing and managing tools. All tools must be part of a toolset to be accessible - individual tool selection is not supported. This ensures consistent and logical grouping of capabilities.
|
||||
|
||||
### Key Concepts
|
||||
|
||||
- **Toolsets**: Logical groups of tools for specific use cases (e.g., "research", "development", "debugging")
|
||||
- **Composition**: Toolsets can include other toolsets for powerful combinations
|
||||
- **Custom Toolsets**: Create your own toolsets at runtime or by editing `toolsets.py`
|
||||
- **Toolset-Only Access**: Tools are only accessible through toolsets, not individually
|
||||
|
||||
### Available Toolsets
|
||||
|
||||
See `toolsets.py` for the complete list of predefined toolsets including:
|
||||
- Basic toolsets (web, terminal, vision, creative, reasoning)
|
||||
- Composite toolsets (research, development, analysis, etc.)
|
||||
- Scenario-specific toolsets (debugging, documentation, API testing, etc.)
|
||||
- Special toolsets (safe mode without terminal, minimal, offline)
|
||||
|
||||
### Using Toolsets
|
||||
|
||||
```bash
|
||||
curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
|
||||
# Use a predefined toolset
|
||||
python run_agent.py --enabled_toolsets=research --query "Find latest AI papers"
|
||||
|
||||
# Combine multiple toolsets
|
||||
python run_agent.py --enabled_toolsets=web,vision --query "Analyze this website"
|
||||
|
||||
# Enable all toolsets explicitly (same as omitting the flag)
|
||||
python run_agent.py --enabled_toolsets=all --query "Do web research and run commands if helpful"
|
||||
|
||||
# Safe mode (no terminal access)
|
||||
python run_agent.py --enabled_toolsets=safe --query "Help without running commands"
|
||||
|
||||
# List all available toolsets and tools
|
||||
python run_agent.py --list_tools
|
||||
```
|
||||
|
||||
### Windows (native, PowerShell)
|
||||
For detailed documentation on toolsets, see `TOOLSETS_README.md`.
|
||||
|
||||
> **Heads up:** Native Windows runs Hermes without WSL — CLI, gateway, TUI, and tools all work natively. If you'd rather use WSL2, the Linux/macOS one-liner above works there too. Found a bug? Please [file issues](https://github.com/NousResearch/hermes-agent/issues).
|
||||
## Basic Usage
|
||||
|
||||
Run this in PowerShell:
|
||||
|
||||
```powershell
|
||||
iex (irm https://hermes-agent.nousresearch.com/install.ps1)
|
||||
### Default (all tools enabled)
|
||||
```bash
|
||||
python run_agent.py \
|
||||
--query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
|
||||
--max_turns 20 \
|
||||
--model claude-sonnet-4-20250514 \
|
||||
--base_url https://api.anthropic.com/v1/ \
|
||||
--api_key $ANTHROPIC_API_KEY
|
||||
```
|
||||
|
||||
The installer handles everything: uv, Python 3.11, Node.js, ripgrep, ffmpeg, **and a portable Git Bash** (MinGit, unpacked to `%LOCALAPPDATA%\hermes\git` — no admin required, completely isolated from any system Git install). Hermes uses this bundled Git Bash to run shell commands.
|
||||
### With specific toolset
|
||||
```bash
|
||||
python run_agent.py \
|
||||
--query "Debug this Python error" \
|
||||
--enabled_toolsets=debugging \
|
||||
--model claude-sonnet-4-20250514 \
|
||||
--api_key $ANTHROPIC_API_KEY
|
||||
```
|
||||
|
||||
If you already have Git installed, the installer detects it and uses that instead. Otherwise a ~45MB MinGit download is all you need — it won't touch or interfere with any system Git.
|
||||
### Python API
|
||||
```python
|
||||
from run_agent import AIAgent
|
||||
|
||||
> **Android / Termux:** The tested manual path is documented in the [Termux guide](https://hermes-agent.nousresearch.com/docs/getting-started/termux). On Termux, Hermes installs a curated `.[termux]` extra because the full `.[all]` extra currently pulls Android-incompatible voice dependencies.
|
||||
>
|
||||
> **Windows:** Native Windows is fully supported — the PowerShell one-liner above installs everything. If you'd rather use WSL2, the Linux command works there too. Native Windows install lives under `%LOCALAPPDATA%\hermes`; WSL2 installs under `~/.hermes` as on Linux. The only Hermes feature that currently needs WSL2 specifically is the browser-based dashboard chat pane (it uses a POSIX PTY — classic CLI and gateway both run natively).
|
||||
# Use a specific toolset
|
||||
agent = AIAgent(
|
||||
model="claude-opus-4-20250514",
|
||||
enabled_toolsets=["research"]
|
||||
)
|
||||
response = agent.chat("Find information about quantum computing")
|
||||
|
||||
After installation:
|
||||
# Create custom toolset at runtime
|
||||
from toolsets import create_custom_toolset
|
||||
|
||||
create_custom_toolset(
|
||||
name="my_tools",
|
||||
description="My custom toolkit",
|
||||
tools=["web_search"],
|
||||
includes=["terminal", "vision"]
|
||||
)
|
||||
|
||||
agent = AIAgent(enabled_toolsets=["my_tools"])
|
||||
```
|
||||
|
||||
## Batch Processing
|
||||
|
||||
Process multiple prompts from a dataset in parallel with automatic checkpointing and statistics tracking:
|
||||
|
||||
```bash
|
||||
source ~/.bashrc # reload shell (or: source ~/.zshrc)
|
||||
hermes # start chatting!
|
||||
# Basic batch processing
|
||||
python batch_runner.py \
|
||||
--dataset_file=prompts.jsonl \
|
||||
--batch_size=20 \
|
||||
--run_name=my_run
|
||||
|
||||
# With specific distribution
|
||||
python batch_runner.py \
|
||||
--dataset_file=prompts.jsonl \
|
||||
--batch_size=20 \
|
||||
--run_name=image_run \
|
||||
--distribution=image_gen \
|
||||
--num_workers=4
|
||||
```
|
||||
|
||||
---
|
||||
**Key Features:**
|
||||
- Parallel processing with configurable workers
|
||||
- Toolset distributions for varied data generation
|
||||
- Automatic checkpointing and resume capability
|
||||
- Combined output in `data/<run_name>/trajectories.jsonl`
|
||||
- Tool usage statistics and success rates
|
||||
|
||||
## Getting Started
|
||||
**Quick Start:** See [QUICKSTART_BATCH.md](QUICKSTART_BATCH.md) for a 5-minute getting started guide.
|
||||
**Full Documentation:** See [BATCH_PROCESSING.md](BATCH_PROCESSING.md) for comprehensive documentation.
|
||||
|
||||
### Ephemeral System Prompts
|
||||
|
||||
The ephemeral system prompt feature allows you to guide the model's behavior during batch processing **without** saving that prompt to the training dataset trajectories. This is useful for:
|
||||
|
||||
- Guiding model behavior during data collection
|
||||
- Adding task-specific instructions
|
||||
- Keeping saved trajectories clean and focused on tool-calling format
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
hermes # Interactive CLI — start a conversation
|
||||
hermes model # Choose your LLM provider and model
|
||||
hermes tools # Configure which tools are enabled
|
||||
hermes config set # Set individual config values
|
||||
hermes gateway # Start the messaging gateway (Telegram, Discord, etc.)
|
||||
hermes setup # Run the full setup wizard (configures everything at once)
|
||||
hermes claw migrate # Migrate from OpenClaw (if coming from OpenClaw)
|
||||
hermes update # Update to the latest version
|
||||
hermes doctor # Diagnose any issues
|
||||
python batch_runner.py \
|
||||
--dataset_file=prompts.jsonl \
|
||||
--batch_size=10 \
|
||||
--run_name=my_run \
|
||||
--ephemeral_system_prompt="You are a helpful assistant focused on image generation."
|
||||
```
|
||||
|
||||
📖 **[Full documentation →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
|
||||
|
||||
---
|
||||
**Documentation:** See [docs/ephemeral_system_prompt.md](docs/ephemeral_system_prompt.md) for complete details.
|
||||
|
||||
## Skip the API-key collection — Nous Portal
|
||||
## Command Line Arguments
|
||||
|
||||
Hermes works with whatever provider you want — that's not changing. But if you'd rather not collect five separate API keys for the model, web search, image generation, TTS, and a cloud browser, **[Nous Portal](https://portal.nousresearch.com)** covers all of them under one subscription:
|
||||
**Single Agent (`run_agent.py`):**
|
||||
- `--query`: The question or task for the agent
|
||||
- `--model`: Model to use (default: claude-opus-4-20250514)
|
||||
- `--api_key`: API key for authentication
|
||||
- `--base_url`: API endpoint URL
|
||||
- `--max_turns`: Maximum number of tool-calling iterations
|
||||
- `--enabled_toolsets`: Comma-separated list of toolsets to enable. Use `all` (or `*`) to enable everything. If omitted, all toolsets are enabled by default.
|
||||
- `--disabled_toolsets`: Comma-separated list of toolsets to disable
|
||||
- `--list_tools`: List all available toolsets and tools
|
||||
- `--save_trajectories`: Save conversation trajectories to JSONL files
|
||||
|
||||
- **300+ models** — pick any of them with `/model <name>`
|
||||
- **Tool Gateway** — web search (Firecrawl), image generation (FAL), text-to-speech (OpenAI), cloud browser (Browser Use), all routed through your sub. No extra accounts.
|
||||
**Batch Processing (`batch_runner.py`):**
|
||||
- `--dataset_file`: Path to JSONL file with prompts
|
||||
- `--batch_size`: Number of prompts per batch
|
||||
- `--run_name`: Name for this run (for output/checkpointing)
|
||||
- `--distribution`: Toolset distribution to use (default: "default")
|
||||
- `--num_workers`: Number of parallel workers (default: 4)
|
||||
- `--resume`: Resume from checkpoint if interrupted
|
||||
- `--ephemeral_system_prompt`: System prompt used during execution but NOT saved to trajectories
|
||||
- `--list_distributions`: List available toolset distributions
|
||||
|
||||
One command from a fresh install:
|
||||
## Environment Variables
|
||||
|
||||
```bash
|
||||
hermes setup --portal
|
||||
```
|
||||
All environment variables can be configured in the `.env` file (copy from `.env.example`).
|
||||
|
||||
That logs you in via OAuth, sets Nous as your provider, and turns on the Tool Gateway. Check what's wired up any time with `hermes portal info`. Full details on the [Tool Gateway docs page](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway).
|
||||
**Core API Keys:**
|
||||
- `ANTHROPIC_API_KEY`: Main agent model
|
||||
- `FIRECRAWL_API_KEY`: Web tools (search, extract, crawl)
|
||||
- `NOUS_API_KEY`: Vision and reasoning tools
|
||||
- `MORPH_API_KEY`: Terminal tools
|
||||
- `FAL_KEY`: Image generation tools
|
||||
- `OPENAI_API_KEY`: Optional, for some Hecate features
|
||||
|
||||
You can still bring your own keys per-tool whenever you want — the gateway is per-backend, not all-or-nothing.
|
||||
|
||||
---
|
||||
|
||||
## CLI vs Messaging Quick Reference
|
||||
|
||||
Hermes has two entry points: start the terminal UI with `hermes`, or run the gateway and talk to it from Telegram, Discord, Slack, WhatsApp, Signal, or Email. Once you're in a conversation, many slash commands are shared across both interfaces.
|
||||
|
||||
| Action | CLI | Messaging platforms |
|
||||
| ------------------------------ | --------------------------------------------- | -------------------------------------------------------------------------------- |
|
||||
| Start chatting | `hermes` | Run `hermes gateway setup` + `hermes gateway start`, then send the bot a message |
|
||||
| Start fresh conversation | `/new` or `/reset` | `/new` or `/reset` |
|
||||
| Change model | `/model [provider:model]` | `/model [provider:model]` |
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
For the full command lists, see the [CLI guide](https://hermes-agent.nousresearch.com/docs/user-guide/cli) and the [Messaging Gateway guide](https://hermes-agent.nousresearch.com/docs/user-guide/messaging).
|
||||
|
||||
---
|
||||
**Configuration Options:**
|
||||
- `HECATE_VM_LIFETIME_SECONDS`: VM lifetime (default: 300)
|
||||
- `HECATE_DEFAULT_SNAPSHOT_ID`: Default snapshot (default: snapshot_p5294qxt)
|
||||
- `WEB_TOOLS_DEBUG`, `VISION_TOOLS_DEBUG`, `MOA_TOOLS_DEBUG`, `IMAGE_TOOLS_DEBUG`: Enable debug logging
|
||||
|
||||
## Documentation
|
||||
|
||||
All documentation lives at **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**:
|
||||
**Single Agent Usage:**
|
||||
- `TOOLSETS_README.md`: Comprehensive guide to the toolsets system
|
||||
- `toolsets.py`: View and modify available toolsets
|
||||
- `model_tools.py`: Core tool definitions and handlers
|
||||
|
||||
| Section | What's Covered |
|
||||
| --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
|
||||
| [Quickstart](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | Install → setup → first conversation in 2 minutes |
|
||||
| [CLI Usage](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | Commands, keybindings, personalities, sessions |
|
||||
| [Configuration](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | Config file, providers, models, all options |
|
||||
| [Messaging Gateway](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram, Discord, Slack, WhatsApp, Signal, Home Assistant |
|
||||
| [Security](https://hermes-agent.nousresearch.com/docs/user-guide/security) | Command approval, DM pairing, container isolation |
|
||||
| [Tools & Toolsets](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ tools, toolset system, terminal backends |
|
||||
| [Skills System](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | Procedural memory, Skills Hub, creating skills |
|
||||
| [Memory](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | Persistent memory, user profiles, best practices |
|
||||
| [MCP Integration](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | Connect any MCP server for extended capabilities |
|
||||
| [Cron Scheduling](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | Scheduled tasks with platform delivery |
|
||||
| [Context Files](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | Project context that shapes every conversation |
|
||||
| [Architecture](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | Project structure, agent loop, key classes |
|
||||
| [Contributing](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | Development setup, PR process, code style |
|
||||
| [CLI Reference](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | All commands and flags |
|
||||
| [Environment Variables](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | Complete env var reference |
|
||||
**Batch Processing:**
|
||||
- `QUICKSTART_BATCH.md`: 5-minute quick start guide
|
||||
- `BATCH_PROCESSING.md`: Complete batch processing documentation
|
||||
- `toolset_distributions.py`: Toolset distributions for data generation
|
||||
|
||||
---
|
||||
## Examples
|
||||
|
||||
## Migrating from OpenClaw
|
||||
|
||||
If you're coming from OpenClaw, Hermes can automatically import your settings, memories, skills, and API keys.
|
||||
|
||||
**During first-time setup:** The setup wizard (`hermes setup`) automatically detects `~/.openclaw` and offers to migrate before configuration begins.
|
||||
|
||||
**Anytime after install:**
|
||||
|
||||
```bash
|
||||
hermes claw migrate # Interactive migration (full preset)
|
||||
hermes claw migrate --dry-run # Preview what would be migrated
|
||||
hermes claw migrate --preset user-data # Migrate without secrets
|
||||
hermes claw migrate --overwrite # Overwrite existing conflicts
|
||||
```
|
||||
|
||||
What gets imported:
|
||||
|
||||
- **SOUL.md** — persona file
|
||||
- **Memories** — MEMORY.md and USER.md entries
|
||||
- **Skills** — user-created skills → `~/.hermes/skills/openclaw-imports/`
|
||||
- **Command allowlist** — approval patterns
|
||||
- **Messaging settings** — platform configs, allowed users, working directory
|
||||
- **API keys** — allowlisted secrets (Telegram, OpenRouter, OpenAI, Anthropic, ElevenLabs)
|
||||
- **TTS assets** — workspace audio files
|
||||
- **Workspace instructions** — AGENTS.md (with `--workspace-target`)
|
||||
|
||||
See `hermes claw migrate --help` for all options, or use the `openclaw-migration` skill for an interactive agent-guided migration with dry-run previews.
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.
|
||||
|
||||
Quick start for contributors — clone and go with `setup-hermes.sh`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
|
||||
./hermes # auto-detects the venv, no need to `source` first
|
||||
```
|
||||
|
||||
Manual path (equivalent to the above):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
scripts/run_tests.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Community
|
||||
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [Skills Hub](https://agentskills.io)
|
||||
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — Linux desktop-control MCP server for Hermes and other MCP hosts, with AT-SPI accessibility trees, Wayland/X11 input, screenshots, and compositor window targeting.
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
MIT — see [LICENSE](LICENSE).
|
||||
|
||||
Built by [Nous Research](https://nousresearch.com).
|
||||
See `TOOLSETS_README.md` for extensive examples of using different toolsets for various scenarios.
|
||||
|
||||
201
README.zh-CN.md
201
README.zh-CN.md
@@ -1,201 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="assets/banner.png" alt="Hermes Agent" width="100%">
|
||||
</p>
|
||||
|
||||
# Hermes Agent ☤
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
|
||||
</p>
|
||||
|
||||
**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能,在使用中改进技能,主动持久化知识,搜索过往对话,并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行,也可以在 GPU 集群上运行,或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话,而它在云端 VM 上工作。
|
||||
|
||||
支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)(200+ 模型)、[NVIDIA NIM](https://build.nvidia.com)(Nemotron)、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI,或自定义端点。使用 `hermes model` 即可切换——无需改代码,无锁定。
|
||||
|
||||
<table>
|
||||
<tr><td><b>真正的终端界面</b></td><td>完整的 TUI,支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
|
||||
<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
|
||||
<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
|
||||
<tr><td><b>定时自动化</b></td><td>内置 cron 调度器,支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述,无人值守运行。</td></tr>
|
||||
<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具,将多步管道压缩为零上下文开销的轮次。</td></tr>
|
||||
<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒,空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
|
||||
<tr><td><b>研究就绪</b></td><td>批量轨迹生成、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## 快速安装
|
||||
|
||||
```bash
|
||||
curl -fsSL https://hermes-agent.nousresearch.com/install.sh | bash
|
||||
```
|
||||
|
||||
支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
|
||||
|
||||
> **Android / Termux:** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上,Hermes 会安装精选的 `.[termux]` 扩展,因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
|
||||
>
|
||||
> **Windows:** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
|
||||
|
||||
安装后:
|
||||
|
||||
```bash
|
||||
source ~/.bashrc # 重新加载 shell(或: source ~/.zshrc)
|
||||
hermes # 开始对话!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 快速入门
|
||||
|
||||
```bash
|
||||
hermes # 交互式 CLI — 开始对话
|
||||
hermes model # 选择 LLM 提供商和模型
|
||||
hermes tools # 配置启用的工具
|
||||
hermes config set # 设置单个配置项
|
||||
hermes gateway # 启动消息网关(Telegram、Discord 等)
|
||||
hermes setup # 运行完整设置向导(一次性配置所有内容)
|
||||
hermes claw migrate # 从 OpenClaw 迁移(如果来自 OpenClaw)
|
||||
hermes update # 更新到最新版本
|
||||
hermes doctor # 诊断问题
|
||||
```
|
||||
|
||||
📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
---
|
||||
|
||||
## 省去到处收集 API Key — Nous Portal
|
||||
|
||||
Hermes 始终允许你使用任意服务商,这点不会改变。但如果你不想为模型、网页搜索、图像生成、TTS、云浏览器分别去申请五个不同的 API Key,**[Nous Portal](https://portal.nousresearch.com)** 用一个订阅就能覆盖全部:
|
||||
|
||||
- **300+ 模型** — 用 `/model <name>` 随时切换
|
||||
- **Tool Gateway** — 网页搜索(Firecrawl)、图像生成(FAL)、文本转语音(OpenAI)、云浏览器(Browser Use),全部通过订阅托管。无需额外注册任何账户。
|
||||
|
||||
全新安装时一条命令即可:
|
||||
|
||||
```bash
|
||||
hermes setup --portal
|
||||
```
|
||||
|
||||
它会通过 OAuth 登录、把 Nous 设为推理服务商,并启用 Tool Gateway。随时用 `hermes portal info` 查看路由状态。完整说明见 [Tool Gateway 文档](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway)。
|
||||
|
||||
你随时可以按工具单独切回自己的 API Key — Gateway 是按工具粒度生效的,不是一刀切。
|
||||
|
||||
---
|
||||
|
||||
## CLI 与消息平台 快速对照
|
||||
|
||||
Hermes 有两种入口:用 `hermes` 启动终端 UI,或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后,许多斜杠命令在两种界面中通用。
|
||||
|
||||
| 操作 | CLI | 消息平台 |
|
||||
|------|-----|----------|
|
||||
| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`,然后给机器人发消息 |
|
||||
| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
|
||||
| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
|
||||
| 设置人格 | `/personality [name]` | `/personality [name]` |
|
||||
| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
|
||||
| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
|
||||
| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
|
||||
| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
|
||||
| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
|
||||
|
||||
完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
|
||||
|
||||
---
|
||||
|
||||
## 文档
|
||||
|
||||
所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**:
|
||||
|
||||
| 章节 | 内容 |
|
||||
|------|------|
|
||||
| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
|
||||
| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
|
||||
| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
|
||||
| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
|
||||
| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
|
||||
| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
|
||||
| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
|
||||
| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
|
||||
| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
|
||||
| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
|
||||
| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
|
||||
| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
|
||||
| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
|
||||
| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
|
||||
| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
|
||||
|
||||
---
|
||||
|
||||
## 从 OpenClaw 迁移
|
||||
|
||||
如果你来自 OpenClaw,Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
|
||||
|
||||
**首次安装时:** 安装向导(`hermes setup`)会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
|
||||
|
||||
**安装后任意时间:**
|
||||
|
||||
```bash
|
||||
hermes claw migrate # 交互式迁移(完整预设)
|
||||
hermes claw migrate --dry-run # 预览将要迁移的内容
|
||||
hermes claw migrate --preset user-data # 仅迁移用户数据,不含密钥
|
||||
hermes claw migrate --overwrite # 覆盖已有冲突
|
||||
```
|
||||
|
||||
导入内容:
|
||||
- **SOUL.md** — 人格文件
|
||||
- **记忆** — MEMORY.md 和 USER.md 条目
|
||||
- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
|
||||
- **命令白名单** — 审批模式
|
||||
- **消息设置** — 平台配置、允许用户、工作目录
|
||||
- **API 密钥** — 白名单中的密钥(Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs)
|
||||
- **TTS 资产** — 工作区音频文件
|
||||
- **工作区指令** — AGENTS.md(使用 `--workspace-target`)
|
||||
|
||||
使用 `hermes claw migrate --help` 查看所有选项,或使用 `openclaw-migration` 技能进行交互式代理引导迁移(含干运行预览)。
|
||||
|
||||
---
|
||||
|
||||
## 贡献
|
||||
|
||||
欢迎贡献!请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
|
||||
|
||||
贡献者快速开始——克隆并使用 `setup-hermes.sh`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
|
||||
./hermes # 自动检测 venv,无需先 source
|
||||
```
|
||||
|
||||
手动安装(等效于上述命令):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 社区
|
||||
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [技能中心](https://agentskills.io)
|
||||
- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接:在同一微信账号上运行 Hermes Agent 和 OpenClaw。
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT — 详见 [LICENSE](LICENSE)。
|
||||
|
||||
由 [Nous Research](https://nousresearch.com) 构建。
|
||||
331
SECURITY.md
331
SECURITY.md
@@ -1,331 +0,0 @@
|
||||
# Hermes Agent Security Policy
|
||||
|
||||
This document describes Hermes Agent's trust model, names the one
|
||||
security boundary the project treats as load-bearing, and defines the
|
||||
scope for vulnerability reports.
|
||||
|
||||
## 1. Reporting a Vulnerability
|
||||
|
||||
Report privately via [GitHub Security Advisories](https://github.com/NousResearch/hermes-agent/security/advisories/new)
|
||||
or **security@nousresearch.com**. Do not open public issues for
|
||||
security vulnerabilities. **Hermes Agent does not operate a bug
|
||||
bounty program.**
|
||||
|
||||
A useful report includes:
|
||||
|
||||
- A concise description and severity assessment.
|
||||
- The affected component, identified by file path and line range
|
||||
(e.g. `path/to/file.py:120-145`).
|
||||
- Environment details (`hermes version`, commit SHA, OS, Python
|
||||
version).
|
||||
- A reproduction against `main` or the latest release.
|
||||
- A statement of which trust boundary in §2 is crossed.
|
||||
|
||||
Please read §2 and §3 before submitting. Reports that demonstrate
|
||||
limits of an in-process heuristic this policy does not treat as a
|
||||
boundary will be closed as out-of-scope under §3 — but see §3.2:
|
||||
they are still welcome as regular issues or pull requests, just not
|
||||
through the private security channel.
|
||||
|
||||
---
|
||||
|
||||
## 2. Trust Model
|
||||
|
||||
Hermes Agent is a single-tenant personal agent. Its posture is
|
||||
layered, and the layers are not equally load-bearing. Reporters and
|
||||
operators should reason about them in the same terms.
|
||||
|
||||
### 2.1 Definitions
|
||||
|
||||
- **Agent process.** The Python interpreter running Hermes Agent,
|
||||
including any Python modules it has loaded (skills, plugins,
|
||||
hook handlers).
|
||||
- **Terminal backend.** A pluggable execution target for the
|
||||
`terminal()` tool. The default runs commands directly on the host.
|
||||
Other backends run commands inside a container, cloud sandbox, or
|
||||
remote host.
|
||||
- **Input surface.** Any channel through which content enters the
|
||||
agent's context: operator input, web fetches, email, gateway
|
||||
messages, file reads, MCP server responses, tool results.
|
||||
- **Trust envelope.** The set of resources an operator has implicitly
|
||||
granted Hermes Agent access to by running it — typically, whatever
|
||||
the operator's own user account can reach on the host.
|
||||
- **Stance.** An explicit statement in Hermes Agent's documentation
|
||||
or code about how a consuming layer (adapter, UI, file writer,
|
||||
shell) should treat agent output — e.g. "the dashboard renders
|
||||
agent output as inert HTML."
|
||||
|
||||
### 2.2 The Boundary: OS-Level Isolation
|
||||
|
||||
**The only security boundary against an adversarial LLM is the
|
||||
operating system.** Nothing inside the agent process constitutes
|
||||
containment — not the approval gate, not output redaction, not any
|
||||
pattern scanner, not any tool allowlist. Any in-process component
|
||||
that screens LLM output is a heuristic operating on an
|
||||
attacker-influenced string, and this policy treats it as such.
|
||||
|
||||
Hermes Agent supports two OS-level isolation postures. They address
|
||||
different threats and an operator should choose deliberately.
|
||||
|
||||
#### Terminal-backend isolation
|
||||
|
||||
A non-default terminal backend runs LLM-emitted shell commands
|
||||
inside a container, remote host, or cloud sandbox. The file tools
|
||||
(`read_file`, `write_file`, `patch`) also run through this backend,
|
||||
since they are implemented on top of the shell contract — they
|
||||
cannot reach paths the backend doesn't expose.
|
||||
|
||||
What this confines: anything the agent does by issuing shell or
|
||||
file operations. What this does **not** confine: everything the
|
||||
agent does in its own Python process. That includes the
|
||||
code-execution tool (spawned as a host subprocess), MCP subprocesses
|
||||
(spawned from the agent's environment), plugin loading, hook
|
||||
dispatch, and skill loading (all imported into the agent
|
||||
interpreter).
|
||||
|
||||
Terminal-backend isolation is the right posture when the concern is
|
||||
LLM-emitted destructive shell or unwanted file-tool writes, and the
|
||||
operator is otherwise trusted.
|
||||
|
||||
#### Whole-process wrapping
|
||||
|
||||
Whole-process wrapping runs the entire agent process tree inside a
|
||||
sandbox. Every code path — shell, code-execution, MCP, file tools,
|
||||
plugins, hooks, skill loading — is subject to the same filesystem,
|
||||
network, process, and (where applicable) inference policy.
|
||||
|
||||
Hermes Agent supports this in two ways:
|
||||
|
||||
- **Hermes Agent's own Docker image and Compose setup.** Lighter-
|
||||
weight; the agent runs in a standard container with operator-
|
||||
configured mounts and network policy.
|
||||
- **[NVIDIA OpenShell](https://github.com/NVIDIA/OpenShell)**.
|
||||
OpenShell provides per-session sandboxes with declarative policy
|
||||
across filesystem, network (L7 egress), process/syscall, and
|
||||
inference-routing layers. Network and inference policies are
|
||||
hot-reloadable. Credentials are injected from a Provider store
|
||||
and never touch the sandbox filesystem.
|
||||
|
||||
Under a whole-process wrapper, Hermes Agent's in-process heuristics
|
||||
(§2.4) function as accident-prevention layered on top of a real
|
||||
boundary. This is the supported posture when the agent ingests
|
||||
content from surfaces the operator does not control — the open web,
|
||||
inbound email, multi-user channels, untrusted MCP servers — and for
|
||||
production or shared deployments.
|
||||
|
||||
Operators running the default local backend with untrusted input
|
||||
surfaces, or running a terminal-backend sandbox and expecting it to
|
||||
contain code paths that don't go through the shell, are operating
|
||||
outside the supported security posture.
|
||||
|
||||
### 2.3 Credential Scoping
|
||||
|
||||
Hermes Agent filters the environment it passes to its lower-trust
|
||||
in-process components: shell subprocesses, MCP subprocesses, and
|
||||
the code-execution child. Credentials like provider API keys and
|
||||
gateway tokens are stripped by default; variables explicitly
|
||||
declared by the operator or by a loaded skill are passed through.
|
||||
|
||||
This reduces casual exfiltration. It is not containment. Any
|
||||
component running inside the agent process (skills, plugins, hook
|
||||
handlers) can read whatever the agent itself can read, including
|
||||
in-memory credentials. The mitigation against a compromised
|
||||
in-process component is operator review before install (§2.4,
|
||||
§2.5), not environment scrubbing.
|
||||
|
||||
### 2.4 In-Process Heuristics
|
||||
|
||||
The following components screen or warn about LLM behavior. They
|
||||
are useful. They are not boundaries.
|
||||
|
||||
- The **approval gate** detects common destructive shell patterns
|
||||
and prompts the operator before execution. Shell is Turing-
|
||||
complete; a denylist over shell strings is structurally
|
||||
incomplete. The gate catches cooperative-mode mistakes, not
|
||||
adversarial output.
|
||||
- **Output redaction** strips secret-like patterns from display.
|
||||
A motivated output producer will defeat it.
|
||||
- **Skills Guard** scans installable skill content for injection
|
||||
patterns. It is a review aid; the boundary for third-party skills
|
||||
is operator review before install. Reviewing a skill means
|
||||
reading its Python code and scripts, not just its SKILL.md
|
||||
description — skills execute arbitrary Python at import time.
|
||||
|
||||
### 2.5 Plugin Trust Model
|
||||
|
||||
Plugins load into the agent process and run with full agent
|
||||
privileges: they can read the same credentials, call the same
|
||||
tools, register the same hooks, and import the same modules as
|
||||
anything shipped in-tree. The boundary for third-party plugins is
|
||||
operator review before install — the same rule as skills (§2.4),
|
||||
called out separately because plugins are architecturally heavier
|
||||
and often ship their own background services, network listeners,
|
||||
and dependencies.
|
||||
|
||||
A malicious or buggy plugin is not a vulnerability in Hermes Agent
|
||||
itself. Bugs in Hermes Agent's plugin-install or plugin-discovery
|
||||
path that prevent the operator from seeing what they're installing
|
||||
are in scope under §3.1.
|
||||
|
||||
### 2.6 External Surfaces
|
||||
|
||||
An **external surface** is any channel outside the local agent
|
||||
process through which a caller can dispatch agent work, resolve
|
||||
approvals, or receive agent output. Each surface has its own
|
||||
authorization model, but the rules below apply uniformly.
|
||||
|
||||
**Surfaces in Hermes Agent:**
|
||||
|
||||
- **Gateway platform adapters.** Messaging integrations in
|
||||
`gateway/platforms/` (Telegram, Discord, Slack, email, SMS, etc.)
|
||||
and analogous adapters shipped as plugins.
|
||||
- **Network-exposed HTTP surfaces.** The API server adapter, the
|
||||
dashboard plugin, the kanban plugin's HTTP endpoints, and any
|
||||
other plugin that binds a listening socket.
|
||||
- **Editor / IDE adapters.** The ACP adapter (`acp_adapter/`) and
|
||||
equivalent integrations that accept requests from a local client
|
||||
process.
|
||||
- **The TUI gateway (`tui_gateway/`).** JSON-RPC backend for the
|
||||
Ink terminal UI, reached over local IPC.
|
||||
|
||||
**Uniform rules:**
|
||||
|
||||
1. **Authorization is required at every surface that crosses a
|
||||
trust boundary.** For messaging and network HTTP surfaces, the
|
||||
boundary is the network: authorization means an operator-
|
||||
configured caller allowlist. For editor and local-IPC surfaces
|
||||
(ACP, TUI gateway), the boundary is the host's user account:
|
||||
authorization means relying on OS-level access control (file
|
||||
permissions, loopback-only binds) and not exposing the surface
|
||||
beyond the local user without an explicit network auth layer.
|
||||
2. **An allowlist is required for every enabled network-exposed
|
||||
adapter.** Adapters must refuse to dispatch agent work, resolve
|
||||
approvals, or relay output until an allowlist is set. Code paths
|
||||
that fail open when no allowlist is configured are code bugs in
|
||||
scope under §3.1.
|
||||
3. **Session identifiers are routing handles, not authorization
|
||||
boundaries.** Knowing another caller's session ID does not grant
|
||||
access to their approvals or output; authorization is always
|
||||
re-checked against the allowlist (or OS-level equivalent).
|
||||
4. **Within the authorized set, all callers are equally trusted.**
|
||||
Hermes Agent does not model per-caller capabilities inside a
|
||||
single adapter. Operators who need capability separation should
|
||||
run separate agent instances with separate allowlists.
|
||||
5. **Binding a local-only surface to a non-loopback interface is a
|
||||
break-glass operator decision (§3.2).** The dashboard and other
|
||||
plugin HTTP servers default to loopback; exposing them via
|
||||
`--host 0.0.0.0` or equivalent makes public-exposure hardening
|
||||
(§4) the operator's responsibility.
|
||||
|
||||
---
|
||||
|
||||
## 3. Scope
|
||||
|
||||
### 3.1 In Scope
|
||||
|
||||
- Escape from a declared OS-level isolation posture (§2.2): an
|
||||
attacker-controlled code path reaching state that the posture
|
||||
claimed to confine.
|
||||
- Unauthorized external-surface access: a caller outside the
|
||||
configured authorization set (allowlist, or OS-level equivalent
|
||||
for local-IPC surfaces) dispatching work, receiving output, or
|
||||
resolving approvals (§2.6).
|
||||
- Credential exfiltration: leakage of operator credentials or
|
||||
session authorization material to a destination outside the
|
||||
trust envelope, via a mechanism that should have prevented it
|
||||
(environment scrubbing bug, adapter logging, transport error
|
||||
that flushes credentials to an upstream, etc.).
|
||||
- Trust-model documentation violations: code behaving contrary to
|
||||
what this policy, Hermes Agent's own documentation, or reasonable
|
||||
operator expectations would predict — including cases where
|
||||
Hermes Agent has documented a stance about how its output should
|
||||
be rendered by a consuming layer (dashboard, gateway adapter,
|
||||
file writer, shell) and a code path breaks that stance.
|
||||
|
||||
### 3.2 Out of Scope
|
||||
|
||||
"Out of scope" here means "not a security vulnerability under this
|
||||
policy." It does not mean "not worth reporting." Improvements to the
|
||||
in-process heuristics, hardening ideas, and UX fixes are welcome as
|
||||
regular issues or pull requests — the approval gate can always catch
|
||||
more patterns, redaction can always get smarter, adapter behavior
|
||||
can always be tightened. These items just don't go through the
|
||||
private-disclosure channel and don't receive advisories.
|
||||
|
||||
- **Bypasses of in-process heuristics (§2.4)** — approval-gate regex
|
||||
bypasses, redaction bypasses, Skills Guard pattern bypasses, and
|
||||
analogous reports against future heuristics. These components are
|
||||
not boundaries; defeating them is not a vulnerability under this
|
||||
policy.
|
||||
- **Prompt injection per se.** Getting the LLM to emit unusual
|
||||
output — via injected content, hallucination, training artifacts,
|
||||
or any other cause — is not itself a vulnerability. "I achieved
|
||||
prompt injection" without a chained §3.1 outcome is not an
|
||||
actionable report under this policy.
|
||||
- **Consequences of a chosen isolation posture.** Reports that a
|
||||
code path operating within its posture's scope can do what that
|
||||
posture permits are not vulnerabilities. Examples: shell or file
|
||||
tools reaching host state under the local backend; code-execution
|
||||
or MCP subprocesses reaching host state under terminal-backend
|
||||
isolation that only sandboxes shell; reports whose preconditions
|
||||
require pre-existing write access to operator-owned configuration
|
||||
or credential files (those are already inside the trust envelope).
|
||||
- **Documented break-glass settings.** Operator-selected trade-offs
|
||||
that explicitly disable protections: `--insecure` and equivalent
|
||||
flags on the dashboard or other components, disabled approvals,
|
||||
local backend in production, development profiles that bypass
|
||||
hermes-home security, and similar. Reports against those
|
||||
configurations are not vulnerabilities — that's the flag's job.
|
||||
- **Community-contributed skills and plugins.** Third-party skills
|
||||
(including the community skills repository) and third-party
|
||||
plugins are in the operator's review surface, not Hermes Agent's
|
||||
trust surface (§2.4, §2.5). A skill or plugin doing something
|
||||
malicious is the expected failure mode of one that wasn't
|
||||
reviewed, not a vulnerability in Hermes Agent. Bugs in Hermes
|
||||
Agent's skill-install or plugin-install path that prevent the
|
||||
operator from seeing what they're installing are in scope under
|
||||
§3.1.
|
||||
- **Public exposure without external controls.** Exposing the
|
||||
gateway or API to the public internet without authentication,
|
||||
VPN, or firewall.
|
||||
- **Tool-level read/write restrictions on a posture where shell is
|
||||
permitted.** If a path is reachable via the terminal tool, reports
|
||||
that other file tools can reach it add nothing.
|
||||
|
||||
---
|
||||
|
||||
## 4. Deployment Hardening
|
||||
|
||||
The single most important hardening decision is matching isolation
|
||||
(§2.2) to the trust of the content the agent will ingest. Beyond
|
||||
that:
|
||||
|
||||
- Run the agent as a non-root user. The supplied container image
|
||||
does this by default.
|
||||
- Keep credentials in the operator credential file with tight
|
||||
permissions, never in the main config, never in version control.
|
||||
Under OpenShell, use the Provider store rather than an on-disk
|
||||
credential file.
|
||||
- Do not expose the gateway or API to the public internet without
|
||||
VPN, Tailscale, or firewall protection. Under OpenShell, use the
|
||||
network policy layer to restrict egress.
|
||||
- Configure a caller allowlist for every network-exposed adapter
|
||||
you enable (§2.6).
|
||||
- Review third-party skills and plugins before install (§2.4,
|
||||
§2.5). For skills, this means reading the Python and scripts,
|
||||
not just SKILL.md. Skills Guard reports and the install audit
|
||||
log are the review surface.
|
||||
- Hermes Agent includes supply-chain guards for MCP server
|
||||
launches and for dependency / bundled-package changes in CI; see
|
||||
`CONTRIBUTING.md` for specifics.
|
||||
|
||||
---
|
||||
|
||||
## 5. Disclosure
|
||||
|
||||
- **Coordinated disclosure window:** 90 days from report, or until a
|
||||
fix is released, whichever comes first.
|
||||
- **Channel:** the GHSA thread or email correspondence with
|
||||
security@nousresearch.com.
|
||||
- **Credit:** reporters are credited in release notes unless
|
||||
anonymity is requested.
|
||||
BIN
__pycache__/model_tools.cpython-310.pyc
Normal file
BIN
__pycache__/model_tools.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/web_tools.cpython-310.pyc
Normal file
BIN
__pycache__/web_tools.cpython-310.pyc
Normal file
Binary file not shown.
@@ -1 +0,0 @@
|
||||
"""ACP (Agent Communication Protocol) adapter for hermes-agent."""
|
||||
@@ -1,5 +0,0 @@
|
||||
"""Allow running the ACP adapter as ``python -m acp_adapter``."""
|
||||
|
||||
from .entry import main
|
||||
|
||||
main()
|
||||
@@ -1,79 +0,0 @@
|
||||
"""ACP auth helpers — detect and advertise Hermes authentication methods."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup"
|
||||
|
||||
|
||||
def detect_provider() -> Optional[str]:
|
||||
"""Resolve the active Hermes runtime provider, or None if unavailable.
|
||||
|
||||
Treats a ``Callable`` ``api_key`` (Azure Foundry Entra ID bearer
|
||||
token provider — see :mod:`agent.azure_identity_adapter`) as a valid
|
||||
credential. Without this, ACP sessions for Entra-configured Foundry
|
||||
deployments silently default to ``"openrouter"`` and the ACP auth
|
||||
handshake rejects the legitimate provider.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
runtime = resolve_runtime_provider()
|
||||
api_key = runtime.get("api_key")
|
||||
provider = runtime.get("provider")
|
||||
if not isinstance(provider, str) or not provider.strip():
|
||||
return None
|
||||
is_string_key = isinstance(api_key, str) and api_key.strip()
|
||||
is_callable_provider = callable(api_key) and not isinstance(api_key, str)
|
||||
if is_string_key or is_callable_provider:
|
||||
return provider.strip().lower()
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def has_provider() -> bool:
|
||||
"""Return True if Hermes can resolve any runtime provider credentials."""
|
||||
return detect_provider() is not None
|
||||
|
||||
|
||||
def build_auth_methods() -> list[Any]:
|
||||
"""Return registry-compatible ACP auth methods for Hermes.
|
||||
|
||||
The official ACP registry validates that agents advertise at least one
|
||||
usable auth method during the initial handshake. A fresh Zed install may
|
||||
not have Hermes provider credentials configured yet, so Hermes always
|
||||
advertises a terminal setup method. When credentials are already present,
|
||||
it also advertises the resolved provider as the default agent-managed
|
||||
runtime credential method.
|
||||
"""
|
||||
from acp.schema import AuthMethodAgent, TerminalAuthMethod
|
||||
|
||||
methods: list[Any] = []
|
||||
provider = detect_provider()
|
||||
if provider:
|
||||
methods.append(
|
||||
AuthMethodAgent(
|
||||
id=provider,
|
||||
name=f"{provider} runtime credentials",
|
||||
description=(
|
||||
"Authenticate Hermes using the currently configured "
|
||||
f"{provider} runtime credentials."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
methods.append(
|
||||
TerminalAuthMethod(
|
||||
id=TERMINAL_SETUP_AUTH_METHOD_ID,
|
||||
name="Configure Hermes provider",
|
||||
description=(
|
||||
"Open Hermes' interactive model/provider setup in a terminal. "
|
||||
"Use this when Hermes has not been configured on this machine yet."
|
||||
),
|
||||
type="terminal",
|
||||
args=["--setup"],
|
||||
)
|
||||
)
|
||||
return methods
|
||||
@@ -1,286 +0,0 @@
|
||||
"""Pre-execution ACP edit approval helpers.
|
||||
|
||||
This module is intentionally isolated from the generic tool registry. ACP binds
|
||||
an edit approval requester in a ContextVar for the duration of one ACP agent run;
|
||||
CLI, gateway, and other sessions leave it unset and therefore bypass this guard.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import tempfile
|
||||
from concurrent.futures import TimeoutError as FutureTimeout
|
||||
from contextvars import ContextVar, Token
|
||||
from dataclasses import dataclass
|
||||
from itertools import count
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EditProposal:
|
||||
"""A proposed single-file edit that can be shown to an ACP client."""
|
||||
|
||||
tool_name: str
|
||||
path: str
|
||||
old_text: str | None
|
||||
new_text: str
|
||||
arguments: dict[str, Any]
|
||||
|
||||
|
||||
EditApprovalRequester = Callable[[EditProposal], bool]
|
||||
|
||||
_EDIT_APPROVAL_REQUESTER: ContextVar[EditApprovalRequester | None] = ContextVar(
|
||||
"ACP_EDIT_APPROVAL_REQUESTER",
|
||||
default=None,
|
||||
)
|
||||
_PERMISSION_REQUEST_IDS = count(1)
|
||||
|
||||
|
||||
SENSITIVE_AUTO_APPROVE_NAMES = {".env", ".env.local", ".env.production", "id_rsa", "id_ed25519"}
|
||||
AUTO_APPROVE_ASK = "ask"
|
||||
AUTO_APPROVE_WORKSPACE = "workspace_session"
|
||||
AUTO_APPROVE_SESSION = "session"
|
||||
|
||||
|
||||
def set_edit_approval_requester(requester: EditApprovalRequester | None) -> Token:
|
||||
"""Bind an ACP edit approval requester for the current context."""
|
||||
|
||||
return _EDIT_APPROVAL_REQUESTER.set(requester)
|
||||
|
||||
|
||||
def reset_edit_approval_requester(token: Token) -> None:
|
||||
"""Restore a previous edit approval requester binding."""
|
||||
|
||||
_EDIT_APPROVAL_REQUESTER.reset(token)
|
||||
|
||||
|
||||
def clear_edit_approval_requester() -> None:
|
||||
"""Clear the current requester; primarily used by tests."""
|
||||
|
||||
_EDIT_APPROVAL_REQUESTER.set(None)
|
||||
|
||||
|
||||
def get_edit_approval_requester() -> EditApprovalRequester | None:
|
||||
return _EDIT_APPROVAL_REQUESTER.get()
|
||||
|
||||
|
||||
def _read_text_if_exists(path: str) -> str | None:
|
||||
p = Path(path).expanduser()
|
||||
if not p.exists():
|
||||
return None
|
||||
if not p.is_file():
|
||||
raise OSError(f"Cannot edit non-file path: {path}")
|
||||
return p.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
|
||||
def _proposal_for_write_file(arguments: dict[str, Any]) -> EditProposal:
|
||||
path = str(arguments.get("path") or "")
|
||||
if not path:
|
||||
raise ValueError("path required")
|
||||
content = arguments.get("content")
|
||||
if content is None:
|
||||
raise ValueError("content required")
|
||||
return EditProposal(
|
||||
tool_name="write_file",
|
||||
path=path,
|
||||
old_text=_read_text_if_exists(path),
|
||||
new_text=str(content),
|
||||
arguments=dict(arguments),
|
||||
)
|
||||
|
||||
|
||||
def _proposal_for_patch_replace(arguments: dict[str, Any]) -> EditProposal:
|
||||
path = str(arguments.get("path") or "")
|
||||
if not path:
|
||||
raise ValueError("path required")
|
||||
old_string = arguments.get("old_string")
|
||||
new_string = arguments.get("new_string")
|
||||
if old_string is None or new_string is None:
|
||||
raise ValueError("old_string and new_string required")
|
||||
|
||||
old_text = _read_text_if_exists(path)
|
||||
if old_text is None:
|
||||
raise ValueError(f"Failed to read file: {path}")
|
||||
|
||||
from tools.fuzzy_match import fuzzy_find_and_replace
|
||||
|
||||
new_text, match_count, _strategy, error = fuzzy_find_and_replace(
|
||||
old_text,
|
||||
str(old_string),
|
||||
str(new_string),
|
||||
bool(arguments.get("replace_all", False)),
|
||||
)
|
||||
if error or match_count == 0:
|
||||
raise ValueError(error or f"Could not find match for old_string in {path}")
|
||||
|
||||
return EditProposal(
|
||||
tool_name="patch",
|
||||
path=path,
|
||||
old_text=old_text,
|
||||
new_text=new_text,
|
||||
arguments=dict(arguments),
|
||||
)
|
||||
|
||||
|
||||
def build_edit_proposal(tool_name: str, arguments: dict[str, Any]) -> EditProposal | None:
|
||||
"""Return an edit proposal for supported file mutation calls."""
|
||||
|
||||
if tool_name == "write_file":
|
||||
return _proposal_for_write_file(arguments)
|
||||
if tool_name == "patch" and arguments.get("mode", "replace") == "replace":
|
||||
return _proposal_for_patch_replace(arguments)
|
||||
return None
|
||||
|
||||
|
||||
def _is_sensitive_auto_approve_path(path: str) -> bool:
|
||||
parts = Path(path).expanduser().parts
|
||||
lowered = {part.lower() for part in parts}
|
||||
if ".git" in lowered or ".ssh" in lowered:
|
||||
return True
|
||||
return Path(path).name.lower() in SENSITIVE_AUTO_APPROVE_NAMES
|
||||
|
||||
|
||||
def should_auto_approve_edit(proposal: EditProposal, policy: str, cwd: str | None = None) -> bool:
|
||||
"""Return whether an ACP edit proposal may bypass the prompt for this session.
|
||||
|
||||
This is intentionally session-scoped and conservative: sensitive paths still
|
||||
ask even under autonomous policies.
|
||||
"""
|
||||
|
||||
policy = str(policy or AUTO_APPROVE_ASK).strip()
|
||||
if policy == AUTO_APPROVE_ASK or _is_sensitive_auto_approve_path(proposal.path):
|
||||
return False
|
||||
path = Path(proposal.path).expanduser().resolve(strict=False)
|
||||
if policy == AUTO_APPROVE_SESSION:
|
||||
return True
|
||||
if policy == AUTO_APPROVE_WORKSPACE:
|
||||
# `/tmp` is the POSIX path but tempfile.gettempdir() is the real one on
|
||||
# every platform: `/private/tmp` on macOS (because `/tmp` is a symlink
|
||||
# and Path.resolve() follows it) and the per-user Temp dir on Windows.
|
||||
tmp_root = Path(tempfile.gettempdir()).resolve(strict=False)
|
||||
try:
|
||||
path.relative_to(tmp_root)
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
if cwd:
|
||||
root = Path(cwd).expanduser().resolve(strict=False)
|
||||
try:
|
||||
path.relative_to(root)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def maybe_require_edit_approval(tool_name: str, arguments: dict[str, Any]) -> str | None:
|
||||
"""Run ACP edit approval if bound.
|
||||
|
||||
Returns a JSON tool-error string when the edit must be blocked, otherwise
|
||||
``None`` so dispatch can continue. Requester exceptions deny by default.
|
||||
"""
|
||||
|
||||
requester = get_edit_approval_requester()
|
||||
if requester is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
proposal = build_edit_proposal(tool_name, arguments)
|
||||
except Exception as exc:
|
||||
logger.warning("Could not build ACP edit approval proposal for %s: %s", tool_name, exc)
|
||||
return json.dumps({"error": f"Edit approval denied: could not prepare diff ({exc})"}, ensure_ascii=False)
|
||||
|
||||
if proposal is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
approved = bool(requester(proposal))
|
||||
except Exception as exc:
|
||||
logger.warning("ACP edit approval requester failed: %s", exc)
|
||||
approved = False
|
||||
|
||||
if approved:
|
||||
return None
|
||||
return json.dumps({"error": "Edit approval denied by ACP client; file was not modified."}, ensure_ascii=False)
|
||||
|
||||
|
||||
def build_acp_edit_tool_call(proposal: EditProposal):
|
||||
"""Build the ToolCallUpdate payload for ACP request_permission."""
|
||||
|
||||
import acp
|
||||
|
||||
tool_call_id = f"edit-approval-{next(_PERMISSION_REQUEST_IDS)}"
|
||||
return acp.update_tool_call(
|
||||
tool_call_id,
|
||||
title=f"Approve edit: {proposal.path}",
|
||||
kind="edit",
|
||||
status="pending",
|
||||
content=[
|
||||
acp.tool_diff_content(
|
||||
path=proposal.path,
|
||||
old_text=proposal.old_text,
|
||||
new_text=proposal.new_text,
|
||||
)
|
||||
],
|
||||
raw_input={"tool": proposal.tool_name, "arguments": proposal.arguments},
|
||||
)
|
||||
|
||||
|
||||
def make_acp_edit_approval_requester(
|
||||
request_permission_fn: Callable,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
session_id: str,
|
||||
timeout: float = 60.0,
|
||||
auto_approve_getter: Callable[[], tuple[str, str | None]] | None = None,
|
||||
) -> EditApprovalRequester:
|
||||
"""Return a sync requester that bridges edit proposals to ACP permissions."""
|
||||
|
||||
def _requester(proposal: EditProposal) -> bool:
|
||||
from acp.schema import PermissionOption
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
|
||||
if auto_approve_getter is not None:
|
||||
try:
|
||||
policy, cwd = auto_approve_getter()
|
||||
if should_auto_approve_edit(proposal, policy, cwd):
|
||||
logger.info("Auto-approved ACP edit under policy %s: %s", policy, proposal.path)
|
||||
return True
|
||||
except Exception:
|
||||
logger.debug("ACP edit auto-approval policy check failed", exc_info=True)
|
||||
|
||||
options = [
|
||||
PermissionOption(option_id="allow_once", kind="allow_once", name="Allow edit"),
|
||||
PermissionOption(option_id="deny", kind="reject_once", name="Deny"),
|
||||
]
|
||||
tool_call = build_acp_edit_tool_call(proposal)
|
||||
coro = request_permission_fn(
|
||||
session_id=session_id,
|
||||
tool_call=tool_call,
|
||||
options=options,
|
||||
)
|
||||
future = safe_schedule_threadsafe(
|
||||
coro,
|
||||
loop,
|
||||
logger=logger,
|
||||
log_message="Edit approval request: failed to schedule on loop",
|
||||
)
|
||||
if future is None:
|
||||
return False
|
||||
try:
|
||||
response = future.result(timeout=timeout)
|
||||
except (FutureTimeout, Exception) as exc:
|
||||
future.cancel()
|
||||
logger.warning("Edit approval request timed out or failed: %s", exc)
|
||||
return False
|
||||
outcome = getattr(response, "outcome", None)
|
||||
return (
|
||||
getattr(outcome, "outcome", None) == "selected"
|
||||
and getattr(outcome, "option_id", None) == "allow_once"
|
||||
)
|
||||
|
||||
return _requester
|
||||
@@ -1,266 +0,0 @@
|
||||
"""CLI entry point for the hermes-agent ACP adapter.
|
||||
|
||||
Loads environment variables from ``~/.hermes/.env``, configures logging
|
||||
to write to stderr (so stdout is reserved for ACP JSON-RPC transport),
|
||||
and starts the ACP agent server.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m acp_adapter.entry
|
||||
# or
|
||||
hermes acp
|
||||
# or
|
||||
hermes-acp
|
||||
"""
|
||||
|
||||
# IMPORTANT: hermes_bootstrap must be the very first import — UTF-8 stdio
|
||||
# on Windows. No-op on POSIX. See hermes_bootstrap.py for full rationale.
|
||||
try:
|
||||
import hermes_bootstrap # noqa: F401
|
||||
except ModuleNotFoundError:
|
||||
# Graceful fallback when hermes_bootstrap isn't registered in the venv
|
||||
# yet — happens during partial ``hermes update`` where git-reset landed
|
||||
# new code but ``uv pip install -e .`` didn't finish. Missing bootstrap
|
||||
# means UTF-8 stdio setup is skipped on Windows; POSIX is unaffected.
|
||||
pass
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
# Methods clients send as periodic liveness probes. They are not part of the
|
||||
# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
|
||||
# caller — but the supervisor task that dispatches the request then surfaces
|
||||
# the raised RequestError via ``logging.exception("Background task failed")``,
|
||||
# which dumps a traceback to stderr every probe interval. Clients like
|
||||
# acp-bridge already treat the -32601 response as "agent alive", so the
|
||||
# traceback is pure noise. We keep the protocol response intact and only
|
||||
# silence the stderr noise for this specific benign case.
|
||||
_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
|
||||
|
||||
|
||||
class _BenignProbeMethodFilter(logging.Filter):
|
||||
"""Suppress acp 'Background task failed' tracebacks caused by unknown
|
||||
liveness-probe methods (e.g. ``ping``) while leaving every other
|
||||
background-task error — including method_not_found for any non-probe
|
||||
method — visible in stderr.
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if record.getMessage() != "Background task failed":
|
||||
return True
|
||||
exc_info = record.exc_info
|
||||
if not exc_info:
|
||||
return True
|
||||
exc = exc_info[1]
|
||||
# Imported lazily so this module stays importable when the optional
|
||||
# ``agent-client-protocol`` dependency is not installed.
|
||||
try:
|
||||
from acp.exceptions import RequestError
|
||||
except ImportError:
|
||||
return True
|
||||
if not isinstance(exc, RequestError):
|
||||
return True
|
||||
if getattr(exc, "code", None) != -32601:
|
||||
return True
|
||||
data = getattr(exc, "data", None)
|
||||
method = data.get("method") if isinstance(data, dict) else None
|
||||
return method not in _BENIGN_PROBE_METHODS
|
||||
|
||||
|
||||
def _setup_logging() -> None:
|
||||
"""Route all logging to stderr so stdout stays clean for ACP stdio."""
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
handler.setFormatter(
|
||||
logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
)
|
||||
handler.addFilter(_BenignProbeMethodFilter())
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.addHandler(handler)
|
||||
root.setLevel(logging.INFO)
|
||||
|
||||
# Quiet down noisy libraries
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
logging.getLogger("openai").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def _load_env() -> None:
|
||||
"""Load .env from HERMES_HOME (default ``~/.hermes``)."""
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
loaded = load_hermes_dotenv(hermes_home=hermes_home)
|
||||
if loaded:
|
||||
for env_file in loaded:
|
||||
logging.getLogger(__name__).info("Loaded env from %s", env_file)
|
||||
else:
|
||||
logging.getLogger(__name__).info(
|
||||
"No .env found at %s, using system env", hermes_home / ".env"
|
||||
)
|
||||
|
||||
|
||||
def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="hermes-acp",
|
||||
description="Run Hermes Agent as an ACP stdio server.",
|
||||
)
|
||||
parser.add_argument("--version", action="store_true", help="Print Hermes version and exit")
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
help="Verify ACP dependencies and adapter imports, then exit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--setup",
|
||||
action="store_true",
|
||||
help="Run interactive Hermes provider/model setup for ACP terminal auth",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--setup-browser",
|
||||
action="store_true",
|
||||
help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ "
|
||||
"for browser tool support. Idempotent.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--yes",
|
||||
"-y",
|
||||
action="store_true",
|
||||
dest="assume_yes",
|
||||
help="Accept all prompts (currently used by --setup-browser to skip the "
|
||||
"~400 MB Chromium download confirmation).",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def _print_version() -> None:
|
||||
from hermes_cli import __version__ as hermes_version
|
||||
|
||||
print(hermes_version)
|
||||
|
||||
|
||||
def _run_check() -> None:
|
||||
import acp # noqa: F401
|
||||
from acp_adapter.server import HermesACPAgent # noqa: F401
|
||||
|
||||
print("Hermes ACP check OK")
|
||||
|
||||
|
||||
def _run_setup() -> None:
|
||||
from hermes_cli.main import main as hermes_main
|
||||
|
||||
old_argv = sys.argv[:]
|
||||
try:
|
||||
sys.argv = [old_argv[0] if old_argv else "hermes", "model"]
|
||||
hermes_main()
|
||||
finally:
|
||||
sys.argv = old_argv
|
||||
|
||||
# Offer browser-tools install as a follow-up. The terminal auth method
|
||||
# is the one supported first-run UX for registry installs, so this is
|
||||
# the natural moment to ask. Skip silently if stdin isn't a TTY (the
|
||||
# answer can't be collected anyway).
|
||||
if not sys.stdin.isatty():
|
||||
return
|
||||
try:
|
||||
reply = input(
|
||||
"\nInstall browser tools? Downloads agent-browser (npm) and "
|
||||
"optionally Playwright Chromium (~400 MB). [y/N] "
|
||||
).strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return
|
||||
if reply in {"y", "yes"}:
|
||||
_run_setup_browser(assume_yes=False)
|
||||
|
||||
|
||||
def _run_setup_browser(assume_yes: bool = False) -> int:
|
||||
"""Bootstrap agent-browser + Chromium.
|
||||
|
||||
Routes through dep_ensure -> install.{sh,ps1} --ensure, sharing code
|
||||
with ``hermes postinstall`` and the runtime lazy installer.
|
||||
|
||||
Returns 0 on success, 1 on failure.
|
||||
"""
|
||||
from hermes_cli.dep_ensure import ensure_dependency
|
||||
|
||||
try:
|
||||
node_ok = ensure_dependency("node", interactive=not assume_yes)
|
||||
if not node_ok:
|
||||
print("Node.js installation failed — cannot proceed with browser tools.",
|
||||
file=sys.stderr)
|
||||
return 1
|
||||
|
||||
browser_ok = ensure_dependency("browser", interactive=not assume_yes)
|
||||
if not browser_ok:
|
||||
print("Browser tools installation failed.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
except OSError as exc:
|
||||
print(f"Browser bootstrap failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
"""Entry point: load env, configure logging, run the ACP agent."""
|
||||
args = _parse_args(argv)
|
||||
if args.version:
|
||||
_print_version()
|
||||
return
|
||||
if args.check:
|
||||
_run_check()
|
||||
return
|
||||
if args.setup:
|
||||
_run_setup()
|
||||
return
|
||||
if args.setup_browser:
|
||||
rc = _run_setup_browser(assume_yes=args.assume_yes)
|
||||
if rc != 0:
|
||||
sys.exit(rc)
|
||||
return
|
||||
|
||||
_setup_logging()
|
||||
_load_env()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Starting hermes-agent ACP adapter")
|
||||
|
||||
# Ensure the project root is on sys.path so ``from run_agent import AIAgent`` works
|
||||
project_root = str(Path(__file__).resolve().parent.parent)
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Shutting down (KeyboardInterrupt)")
|
||||
except Exception:
|
||||
logger.exception("ACP agent crashed")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,279 +0,0 @@
|
||||
"""Callback factories for bridging AIAgent events to ACP notifications.
|
||||
|
||||
Each factory returns a callable with the signature that AIAgent expects
|
||||
for its callbacks. Internally, the callbacks push ACP session updates
|
||||
to the client via ``conn.session_update()`` using
|
||||
``asyncio.run_coroutine_threadsafe()`` (since AIAgent runs in a worker
|
||||
thread while the event loop lives on the main thread).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from collections import deque
|
||||
from typing import Any, Callable, Deque, Dict
|
||||
|
||||
import acp
|
||||
from acp.schema import AgentPlanUpdate, PlanEntry
|
||||
|
||||
from .tools import (
|
||||
build_tool_complete,
|
||||
build_tool_start,
|
||||
make_tool_call_id,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _json_loads_maybe_prefix(value: str) -> Any:
|
||||
"""Parse a JSON object even when Hermes appended a human hint after it."""
|
||||
text = value.strip()
|
||||
try:
|
||||
return json.loads(text)
|
||||
except Exception:
|
||||
decoder = json.JSONDecoder()
|
||||
data, _ = decoder.raw_decode(text)
|
||||
return data
|
||||
|
||||
|
||||
def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
|
||||
"""Translate Hermes' todo tool result into ACP's native plan update.
|
||||
|
||||
Zed renders ``sessionUpdate: plan`` as its first-class task/todo panel. The
|
||||
Hermes agent already maintains task state through the ``todo`` tool, so the
|
||||
ACP adapter should expose that state natively instead of only as a generic
|
||||
tool-call transcript block.
|
||||
"""
|
||||
if not isinstance(result, str) or not result.strip():
|
||||
return None
|
||||
|
||||
try:
|
||||
data = _json_loads_maybe_prefix(result)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
|
||||
return None
|
||||
|
||||
todos = data["todos"]
|
||||
if not todos:
|
||||
return AgentPlanUpdate(session_update="plan", entries=[])
|
||||
|
||||
status_map = {
|
||||
"pending": "pending",
|
||||
"in_progress": "in_progress",
|
||||
"completed": "completed",
|
||||
# ACP plans only support pending/in_progress/completed. Preserve
|
||||
# cancelled tasks as terminal entries instead of dropping them and
|
||||
# making the client's full-list replacement lose visible context.
|
||||
"cancelled": "completed",
|
||||
}
|
||||
entries: list[PlanEntry] = []
|
||||
for item in todos:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
content = str(item.get("content") or item.get("id") or "").strip()
|
||||
if not content:
|
||||
continue
|
||||
raw_status = str(item.get("status") or "pending").strip()
|
||||
status = status_map.get(raw_status, "pending")
|
||||
if raw_status == "cancelled":
|
||||
content = f"[cancelled] {content}"
|
||||
entries.append(PlanEntry(content=content, priority="medium", status=status))
|
||||
|
||||
return AgentPlanUpdate(session_update="plan", entries=entries)
|
||||
|
||||
|
||||
def _send_update(
|
||||
conn: acp.Client,
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
update: Any,
|
||||
) -> None:
|
||||
"""Fire-and-forget an ACP session update from a worker thread."""
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
|
||||
future = safe_schedule_threadsafe(
|
||||
conn.session_update(session_id, update),
|
||||
loop,
|
||||
logger=logger,
|
||||
log_message="Failed to send ACP update",
|
||||
)
|
||||
if future is None:
|
||||
return
|
||||
try:
|
||||
future.result(timeout=5)
|
||||
except Exception:
|
||||
logger.debug("Failed to send ACP update", exc_info=True)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool progress callback
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def make_tool_progress_cb(
|
||||
conn: acp.Client,
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
tool_call_ids: Dict[str, Deque[str]],
|
||||
tool_call_meta: Dict[str, Dict[str, Any]],
|
||||
edit_approval_policy_getter: Callable[[], tuple[str, str | None]] | None = None,
|
||||
) -> Callable:
|
||||
"""Create a ``tool_progress_callback`` for AIAgent.
|
||||
|
||||
Signature expected by AIAgent::
|
||||
|
||||
tool_progress_callback(event_type: str, name: str, preview: str, args: dict, **kwargs)
|
||||
|
||||
Emits ``ToolCallStart`` for ``tool.started`` events and tracks IDs in a FIFO
|
||||
queue per tool name so duplicate/parallel same-name calls still complete
|
||||
against the correct ACP tool call. Other event types (``tool.completed``,
|
||||
``reasoning.available``) are silently ignored.
|
||||
"""
|
||||
|
||||
def _tool_progress(event_type: str, name: str = None, preview: str = None, args: Any = None, **kwargs) -> None:
|
||||
# Only emit ACP ToolCallStart for tool.started; ignore other event types
|
||||
if event_type != "tool.started":
|
||||
return
|
||||
if isinstance(args, str):
|
||||
try:
|
||||
args = json.loads(args)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
args = {"raw": args}
|
||||
if not isinstance(args, dict):
|
||||
args = {}
|
||||
|
||||
tc_id = make_tool_call_id()
|
||||
queue = tool_call_ids.get(name)
|
||||
if queue is None:
|
||||
queue = deque()
|
||||
tool_call_ids[name] = queue
|
||||
elif isinstance(queue, str):
|
||||
queue = deque([queue])
|
||||
tool_call_ids[name] = queue
|
||||
queue.append(tc_id)
|
||||
|
||||
snapshot = None
|
||||
if name in {"write_file", "patch", "skill_manage"}:
|
||||
try:
|
||||
from agent.display import capture_local_edit_snapshot
|
||||
|
||||
snapshot = capture_local_edit_snapshot(name, args)
|
||||
except Exception:
|
||||
logger.debug("Failed to capture ACP edit snapshot for %s", name, exc_info=True)
|
||||
tool_call_meta[tc_id] = {"args": args, "snapshot": snapshot}
|
||||
|
||||
edit_diff = None
|
||||
if name in {"write_file", "patch"} and edit_approval_policy_getter is not None:
|
||||
try:
|
||||
from acp_adapter.edit_approval import build_edit_proposal, should_auto_approve_edit
|
||||
|
||||
proposal = build_edit_proposal(name, args)
|
||||
if proposal is not None:
|
||||
policy, cwd = edit_approval_policy_getter()
|
||||
if should_auto_approve_edit(proposal, policy, cwd):
|
||||
edit_diff = proposal
|
||||
except Exception:
|
||||
logger.debug("Failed to prepare auto-approved ACP edit diff for %s", name, exc_info=True)
|
||||
|
||||
update = build_tool_start(tc_id, name, args, edit_diff=edit_diff)
|
||||
_send_update(conn, session_id, loop, update)
|
||||
|
||||
return _tool_progress
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thinking callback
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def make_thinking_cb(
|
||||
conn: acp.Client,
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> Callable:
|
||||
"""Create a ``thinking_callback`` for AIAgent."""
|
||||
|
||||
def _thinking(text: str) -> None:
|
||||
if not text:
|
||||
return
|
||||
update = acp.update_agent_thought_text(text)
|
||||
_send_update(conn, session_id, loop, update)
|
||||
|
||||
return _thinking
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step callback
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def make_step_cb(
|
||||
conn: acp.Client,
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
tool_call_ids: Dict[str, Deque[str]],
|
||||
tool_call_meta: Dict[str, Dict[str, Any]],
|
||||
) -> Callable:
|
||||
"""Create a ``step_callback`` for AIAgent.
|
||||
|
||||
Signature expected by AIAgent::
|
||||
|
||||
step_callback(api_call_count: int, prev_tools: list)
|
||||
"""
|
||||
|
||||
def _step(api_call_count: int, prev_tools: Any = None) -> None:
|
||||
if prev_tools and isinstance(prev_tools, list):
|
||||
for tool_info in prev_tools:
|
||||
tool_name = None
|
||||
result = None
|
||||
function_args = None
|
||||
|
||||
if isinstance(tool_info, dict):
|
||||
tool_name = tool_info.get("name") or tool_info.get("function_name")
|
||||
result = tool_info.get("result") or tool_info.get("output")
|
||||
function_args = tool_info.get("arguments") or tool_info.get("args")
|
||||
elif isinstance(tool_info, str):
|
||||
tool_name = tool_info
|
||||
|
||||
queue = tool_call_ids.get(tool_name or "")
|
||||
if isinstance(queue, str):
|
||||
queue = deque([queue])
|
||||
tool_call_ids[tool_name] = queue
|
||||
if tool_name and queue:
|
||||
tc_id = queue.popleft()
|
||||
meta = tool_call_meta.pop(tc_id, {})
|
||||
update = build_tool_complete(
|
||||
tc_id,
|
||||
tool_name,
|
||||
result=str(result) if result is not None else None,
|
||||
function_args=function_args or meta.get("args"),
|
||||
snapshot=meta.get("snapshot"),
|
||||
)
|
||||
_send_update(conn, session_id, loop, update)
|
||||
if tool_name == "todo":
|
||||
plan_update = _build_plan_update_from_todo_result(result)
|
||||
if plan_update is not None:
|
||||
_send_update(conn, session_id, loop, plan_update)
|
||||
if not queue:
|
||||
tool_call_ids.pop(tool_name, None)
|
||||
|
||||
return _step
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Agent message callback
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def make_message_cb(
|
||||
conn: acp.Client,
|
||||
session_id: str,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> Callable:
|
||||
"""Create a callback that streams agent response text to the editor."""
|
||||
|
||||
def _message(text: str) -> None:
|
||||
if not text:
|
||||
return
|
||||
update = acp.update_agent_message_text(text)
|
||||
_send_update(conn, session_id, loop, update)
|
||||
|
||||
return _message
|
||||
@@ -1,168 +0,0 @@
|
||||
"""ACP permission bridging for Hermes dangerous-command approvals."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import TimeoutError as FutureTimeout
|
||||
from itertools import count
|
||||
from typing import Callable
|
||||
|
||||
from acp.schema import (
|
||||
AllowedOutcome,
|
||||
PermissionOption,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maps ACP permission option ids to Hermes approval result strings.
|
||||
# Option ids are stable across both the ``allow_permanent=True`` and
|
||||
# ``allow_permanent=False`` paths even though the option list differs.
|
||||
_OPTION_ID_TO_HERMES = {
|
||||
"allow_once": "once",
|
||||
"allow_session": "session",
|
||||
"allow_always": "always",
|
||||
"deny": "deny",
|
||||
"deny_always": "deny",
|
||||
}
|
||||
|
||||
_PERMISSION_REQUEST_IDS = count(1)
|
||||
|
||||
|
||||
def _permission_option_supports_kind(kind: str) -> bool:
|
||||
"""Return whether the installed ACP SDK accepts a permission option kind."""
|
||||
try:
|
||||
PermissionOption(option_id="__probe__", kind=kind, name="probe")
|
||||
except Exception:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _build_permission_options(*, allow_permanent: bool) -> list[PermissionOption]:
|
||||
"""Return ACP options that match Hermes approval semantics."""
|
||||
options = [
|
||||
PermissionOption(option_id="allow_once", kind="allow_once", name="Allow once"),
|
||||
PermissionOption(
|
||||
option_id="allow_session",
|
||||
# ACP has no session-scoped kind, so use the closest persistent
|
||||
# hint while keeping Hermes semantics in the option id.
|
||||
kind="allow_always",
|
||||
name="Allow for session",
|
||||
),
|
||||
]
|
||||
if allow_permanent:
|
||||
options.append(
|
||||
PermissionOption(
|
||||
option_id="allow_always",
|
||||
kind="allow_always",
|
||||
name="Allow always",
|
||||
),
|
||||
)
|
||||
options.append(PermissionOption(option_id="deny", kind="reject_once", name="Deny"))
|
||||
if _permission_option_supports_kind("reject_always"):
|
||||
options.append(
|
||||
PermissionOption(
|
||||
option_id="deny_always",
|
||||
kind="reject_always",
|
||||
name="Deny always",
|
||||
),
|
||||
)
|
||||
return options
|
||||
|
||||
|
||||
def _build_permission_tool_call(command: str, description: str):
|
||||
"""Return the ACP tool-call update attached to a permission request.
|
||||
|
||||
``request_permission`` expects a ``ToolCallUpdate`` payload — produced
|
||||
by ``_acp.update_tool_call`` — not a ``ToolCallStart``. Each request
|
||||
gets a unique ``perm-check-N`` id so concurrent requests don't collide.
|
||||
"""
|
||||
import acp as _acp
|
||||
|
||||
tool_call_id = f"perm-check-{next(_PERMISSION_REQUEST_IDS)}"
|
||||
title = f"{description}: {command}" if description else command
|
||||
content_text = f"{description}\n$ {command}" if description else f"$ {command}"
|
||||
return _acp.update_tool_call(
|
||||
tool_call_id,
|
||||
title=title,
|
||||
kind="execute",
|
||||
status="pending",
|
||||
content=[_acp.tool_content(_acp.text_block(content_text))],
|
||||
raw_input={"command": command, "description": description},
|
||||
)
|
||||
|
||||
|
||||
def _map_outcome_to_hermes(outcome: object, *, allowed_option_ids: set[str]) -> str:
|
||||
"""Map an ACP permission outcome into Hermes approval strings."""
|
||||
if not isinstance(outcome, AllowedOutcome):
|
||||
return "deny"
|
||||
|
||||
option_id = outcome.option_id
|
||||
if option_id not in allowed_option_ids:
|
||||
logger.warning("Permission request returned unknown option_id: %s", option_id)
|
||||
return "deny"
|
||||
return _OPTION_ID_TO_HERMES.get(option_id, "deny")
|
||||
|
||||
|
||||
def make_approval_callback(
|
||||
request_permission_fn: Callable,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
session_id: str,
|
||||
timeout: float = 60.0,
|
||||
) -> Callable[..., str]:
|
||||
"""
|
||||
Return a Hermes-compatible approval callback that bridges to ACP.
|
||||
|
||||
The callback accepts ``command`` and ``description`` plus optional
|
||||
keyword arguments such as ``allow_permanent`` used by
|
||||
``tools.approval.prompt_dangerous_approval()``.
|
||||
|
||||
Args:
|
||||
request_permission_fn: The ACP connection's ``request_permission`` coroutine.
|
||||
loop: The event loop on which the ACP connection lives.
|
||||
session_id: Current ACP session id.
|
||||
timeout: Seconds to wait for a response before auto-denying.
|
||||
"""
|
||||
|
||||
def _callback(
|
||||
command: str,
|
||||
description: str,
|
||||
*,
|
||||
allow_permanent: bool = True,
|
||||
**_: object,
|
||||
) -> str:
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
|
||||
options = _build_permission_options(allow_permanent=allow_permanent)
|
||||
|
||||
tool_call = _build_permission_tool_call(command, description)
|
||||
coro = request_permission_fn(
|
||||
session_id=session_id,
|
||||
tool_call=tool_call,
|
||||
options=options,
|
||||
)
|
||||
future = safe_schedule_threadsafe(
|
||||
coro, loop,
|
||||
logger=logger,
|
||||
log_message="Permission request: failed to schedule on loop",
|
||||
)
|
||||
if future is None:
|
||||
return "deny"
|
||||
|
||||
try:
|
||||
response = future.result(timeout=timeout)
|
||||
except (FutureTimeout, Exception) as exc:
|
||||
future.cancel()
|
||||
logger.warning("Permission request timed out or failed: %s", exc)
|
||||
return "deny"
|
||||
|
||||
if response is None:
|
||||
return "deny"
|
||||
|
||||
allowed_option_ids = {option.option_id for option in options}
|
||||
return _map_outcome_to_hermes(
|
||||
response.outcome,
|
||||
allowed_option_ids=allowed_option_ids,
|
||||
)
|
||||
|
||||
return _callback
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,623 +0,0 @@
|
||||
"""ACP session manager — maps ACP sessions to Hermes AIAgent instances.
|
||||
|
||||
Sessions are persisted to the shared SessionDB (``~/.hermes/state.db``) so they
|
||||
survive process restarts and appear in ``session_search``. When the editor
|
||||
reconnects after idle/restart, the ``load_session`` / ``resume_session`` calls
|
||||
find the persisted session in the database and restore the full conversation
|
||||
history.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from dataclasses import dataclass, field
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _win_path_to_wsl(path: str) -> str | None:
|
||||
"""Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
|
||||
match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
|
||||
if not match:
|
||||
return None
|
||||
drive = match.group(1).lower()
|
||||
tail = match.group(2).replace("\\", "/")
|
||||
return f"/mnt/{drive}/{tail}"
|
||||
|
||||
|
||||
def _translate_acp_cwd(cwd: str) -> str:
|
||||
"""Translate Windows ACP cwd values when Hermes itself is running in WSL.
|
||||
|
||||
Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
|
||||
editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
|
||||
and execute against the WSL mount path so agents, tools, and persisted ACP
|
||||
sessions all agree on the usable workspace. Native Linux/macOS keeps the
|
||||
original cwd unchanged.
|
||||
"""
|
||||
from hermes_constants import is_wsl
|
||||
|
||||
if not is_wsl():
|
||||
return cwd
|
||||
translated = _win_path_to_wsl(str(cwd))
|
||||
return translated if translated is not None else cwd
|
||||
|
||||
|
||||
def _normalize_cwd_for_compare(cwd: str | None) -> str:
|
||||
raw = str(cwd or ".").strip()
|
||||
if not raw:
|
||||
raw = "."
|
||||
expanded = os.path.expanduser(raw)
|
||||
|
||||
# Normalize Windows drive paths into the equivalent WSL mount form so
|
||||
# ACP history filters match the same workspace across Windows and WSL.
|
||||
translated = _win_path_to_wsl(expanded)
|
||||
if translated is not None:
|
||||
expanded = translated
|
||||
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
|
||||
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
|
||||
|
||||
return os.path.normpath(expanded)
|
||||
|
||||
|
||||
def _build_session_title(title: Any, preview: Any, cwd: str | None) -> str:
|
||||
explicit = str(title or "").strip()
|
||||
if explicit:
|
||||
return explicit
|
||||
preview_text = str(preview or "").strip()
|
||||
if preview_text:
|
||||
return preview_text
|
||||
leaf = os.path.basename(str(cwd or "").rstrip("/\\"))
|
||||
return leaf or "New thread"
|
||||
|
||||
|
||||
def _format_updated_at(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value
|
||||
try:
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _updated_at_sort_key(value: Any) -> float:
|
||||
if value is None:
|
||||
return float("-inf")
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
raw = str(value).strip()
|
||||
if not raw:
|
||||
return float("-inf")
|
||||
try:
|
||||
return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp()
|
||||
except Exception:
|
||||
try:
|
||||
return float(raw)
|
||||
except Exception:
|
||||
return float("-inf")
|
||||
|
||||
|
||||
def _acp_stderr_print(*args, **kwargs) -> None:
|
||||
"""Best-effort human-readable output sink for ACP stdio sessions.
|
||||
|
||||
ACP reserves stdout for JSON-RPC frames, so any incidental CLI/status output
|
||||
from AIAgent must be redirected away from stdout. Route it to stderr instead.
|
||||
"""
|
||||
kwargs = dict(kwargs)
|
||||
kwargs.setdefault("file", sys.stderr)
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
"""Bind a task/session id to the editor's working directory for tools.
|
||||
|
||||
Zed can launch Hermes from a Windows workspace while the ACP process runs
|
||||
inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
|
||||
local tools need the WSL mount equivalent or subprocess creation fails
|
||||
before the command can run.
|
||||
"""
|
||||
if not task_id:
|
||||
return
|
||||
try:
|
||||
from tools.terminal_tool import register_task_env_overrides
|
||||
register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
|
||||
except Exception:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
def _expand_acp_enabled_toolsets(
|
||||
toolsets: List[str] | None = None,
|
||||
mcp_server_names: List[str] | None = None,
|
||||
) -> List[str]:
|
||||
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
|
||||
expanded: List[str] = []
|
||||
for name in list(toolsets or ["hermes-acp"]):
|
||||
if name and name not in expanded:
|
||||
expanded.append(name)
|
||||
|
||||
for server_name in list(mcp_server_names or []):
|
||||
toolset_name = f"mcp-{server_name}"
|
||||
if server_name and toolset_name not in expanded:
|
||||
expanded.append(toolset_name)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def _clear_task_cwd(task_id: str) -> None:
|
||||
"""Remove task-specific cwd overrides for an ACP session."""
|
||||
if not task_id:
|
||||
return
|
||||
try:
|
||||
from tools.terminal_tool import clear_task_env_overrides
|
||||
clear_task_env_overrides(task_id)
|
||||
except Exception:
|
||||
logger.debug("Failed to clear ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionState:
|
||||
"""Tracks per-session state for an ACP-managed Hermes agent."""
|
||||
|
||||
session_id: str
|
||||
agent: Any # AIAgent instance
|
||||
cwd: str = "."
|
||||
model: str = ""
|
||||
history: List[Dict[str, Any]] = field(default_factory=list)
|
||||
cancel_event: Any = None # threading.Event
|
||||
is_running: bool = False
|
||||
queued_prompts: List[str] = field(default_factory=list)
|
||||
runtime_lock: Any = field(default_factory=Lock)
|
||||
current_prompt_text: str = ""
|
||||
interrupted_prompt_text: str = ""
|
||||
|
||||
|
||||
class SessionManager:
|
||||
"""Thread-safe manager for ACP sessions backed by Hermes AIAgent instances.
|
||||
|
||||
Sessions are held in-memory for fast access **and** persisted to the
|
||||
shared SessionDB so they survive process restarts and are searchable
|
||||
via ``session_search``.
|
||||
"""
|
||||
|
||||
def __init__(self, agent_factory=None, db=None):
|
||||
"""
|
||||
Args:
|
||||
agent_factory: Optional callable that creates an AIAgent-like object.
|
||||
Used by tests. When omitted, a real AIAgent is created
|
||||
using the current Hermes runtime provider configuration.
|
||||
db: Optional SessionDB instance. When omitted, the default
|
||||
SessionDB (``~/.hermes/state.db``) is lazily created.
|
||||
"""
|
||||
self._sessions: Dict[str, SessionState] = {}
|
||||
self._lock = Lock()
|
||||
self._agent_factory = agent_factory
|
||||
self._db_instance = db # None → lazy-init on first use
|
||||
|
||||
# ---- public API ---------------------------------------------------------
|
||||
|
||||
def create_session(self, cwd: str = ".") -> SessionState:
|
||||
"""Create a new session with a unique ID and a fresh AIAgent."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
session_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(session_id=session_id, cwd=cwd)
|
||||
state = SessionState(
|
||||
session_id=session_id,
|
||||
agent=agent,
|
||||
cwd=cwd,
|
||||
model=getattr(agent, "model", "") or "",
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session_id] = state
|
||||
_register_task_cwd(session_id, cwd)
|
||||
self._persist(state)
|
||||
logger.info("Created ACP session %s (cwd=%s)", session_id, cwd)
|
||||
return state
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[SessionState]:
|
||||
"""Return the session for *session_id*, or ``None``.
|
||||
|
||||
If the session is not in memory but exists in the database (e.g. after
|
||||
a process restart), it is transparently restored.
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if state is not None:
|
||||
return state
|
||||
# Attempt to restore from database.
|
||||
return self._restore(session_id)
|
||||
|
||||
def remove_session(self, session_id: str) -> bool:
|
||||
"""Remove a session from memory and database. Returns True if it existed."""
|
||||
with self._lock:
|
||||
existed = self._sessions.pop(session_id, None) is not None
|
||||
db_existed = self._delete_persisted(session_id)
|
||||
if existed or db_existed:
|
||||
_clear_task_cwd(session_id)
|
||||
return existed or db_existed
|
||||
|
||||
def fork_session(self, session_id: str, cwd: str = ".") -> Optional[SessionState]:
|
||||
"""Deep-copy a session's history into a new session."""
|
||||
import threading
|
||||
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
original = self.get_session(session_id) # checks DB too
|
||||
if original is None:
|
||||
return None
|
||||
|
||||
new_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(
|
||||
session_id=new_id,
|
||||
cwd=cwd,
|
||||
model=original.model or None,
|
||||
)
|
||||
state = SessionState(
|
||||
session_id=new_id,
|
||||
agent=agent,
|
||||
cwd=cwd,
|
||||
model=getattr(agent, "model", original.model) or original.model,
|
||||
history=copy.deepcopy(original.history),
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[new_id] = state
|
||||
_register_task_cwd(new_id, cwd)
|
||||
self._persist(state)
|
||||
logger.info("Forked ACP session %s -> %s", session_id, new_id)
|
||||
return state
|
||||
|
||||
def list_sessions(self, cwd: str | None = None) -> List[Dict[str, Any]]:
|
||||
"""Return lightweight info dicts for all sessions (memory + database)."""
|
||||
normalized_cwd = _normalize_cwd_for_compare(cwd) if cwd else None
|
||||
db = self._get_db()
|
||||
persisted_rows: dict[str, dict[str, Any]] = {}
|
||||
|
||||
if db is not None:
|
||||
try:
|
||||
for row in db.list_sessions_rich(source="acp", limit=1000):
|
||||
persisted_rows[str(row["id"])] = dict(row)
|
||||
except Exception:
|
||||
logger.debug("Failed to load ACP sessions from DB", exc_info=True)
|
||||
|
||||
# Collect in-memory sessions first.
|
||||
with self._lock:
|
||||
seen_ids = set(self._sessions.keys())
|
||||
results = []
|
||||
for s in self._sessions.values():
|
||||
history_len = len(s.history)
|
||||
if history_len <= 0:
|
||||
continue
|
||||
if normalized_cwd and _normalize_cwd_for_compare(s.cwd) != normalized_cwd:
|
||||
continue
|
||||
persisted = persisted_rows.get(s.session_id, {})
|
||||
preview = next(
|
||||
(
|
||||
str(msg.get("content") or "").strip()
|
||||
for msg in s.history
|
||||
if msg.get("role") == "user" and str(msg.get("content") or "").strip()
|
||||
),
|
||||
persisted.get("preview") or "",
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"cwd": s.cwd,
|
||||
"model": s.model,
|
||||
"history_len": history_len,
|
||||
"title": _build_session_title(persisted.get("title"), preview, s.cwd),
|
||||
"updated_at": _format_updated_at(
|
||||
persisted.get("last_active") or persisted.get("started_at") or time.time()
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# Merge any persisted sessions not currently in memory.
|
||||
for sid, row in persisted_rows.items():
|
||||
if sid in seen_ids:
|
||||
continue
|
||||
message_count = int(row.get("message_count") or 0)
|
||||
if message_count <= 0:
|
||||
continue
|
||||
# Extract cwd from model_config JSON.
|
||||
session_cwd = "."
|
||||
mc = row.get("model_config")
|
||||
if mc:
|
||||
try:
|
||||
session_cwd = json.loads(mc).get("cwd", ".")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if normalized_cwd and _normalize_cwd_for_compare(session_cwd) != normalized_cwd:
|
||||
continue
|
||||
results.append({
|
||||
"session_id": sid,
|
||||
"cwd": session_cwd,
|
||||
"model": row.get("model") or "",
|
||||
"history_len": message_count,
|
||||
"title": _build_session_title(row.get("title"), row.get("preview"), session_cwd),
|
||||
"updated_at": _format_updated_at(row.get("last_active") or row.get("started_at")),
|
||||
})
|
||||
|
||||
results.sort(key=lambda item: _updated_at_sort_key(item.get("updated_at")), reverse=True)
|
||||
return results
|
||||
|
||||
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
|
||||
"""Update the working directory for a session and its tool overrides."""
|
||||
cwd = _translate_acp_cwd(cwd)
|
||||
state = self.get_session(session_id) # checks DB too
|
||||
if state is None:
|
||||
return None
|
||||
state.cwd = cwd
|
||||
_register_task_cwd(session_id, cwd)
|
||||
self._persist(state)
|
||||
return state
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Remove all sessions (memory and database) and clear task-specific cwd overrides."""
|
||||
with self._lock:
|
||||
session_ids = list(self._sessions.keys())
|
||||
self._sessions.clear()
|
||||
for session_id in session_ids:
|
||||
_clear_task_cwd(session_id)
|
||||
self._delete_persisted(session_id)
|
||||
# Also remove any DB-only ACP sessions not currently in memory.
|
||||
db = self._get_db()
|
||||
if db is not None:
|
||||
try:
|
||||
rows = db.search_sessions(source="acp", limit=10000)
|
||||
for row in rows:
|
||||
sid = row["id"]
|
||||
_clear_task_cwd(sid)
|
||||
db.delete_session(sid)
|
||||
except Exception:
|
||||
logger.debug("Failed to cleanup ACP sessions from DB", exc_info=True)
|
||||
|
||||
def save_session(self, session_id: str) -> None:
|
||||
"""Persist the current state of a session to the database.
|
||||
|
||||
Called by the server after prompt completion, slash commands that
|
||||
mutate history, and model switches.
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if state is not None:
|
||||
self._persist(state)
|
||||
|
||||
# ---- persistence via SessionDB ------------------------------------------
|
||||
|
||||
def _get_db(self):
|
||||
"""Lazily initialise and return the SessionDB instance.
|
||||
|
||||
Returns ``None`` if the DB is unavailable (e.g. import error in a
|
||||
minimal test environment).
|
||||
|
||||
Note: we resolve ``HERMES_HOME`` dynamically rather than relying on
|
||||
the module-level ``DEFAULT_DB_PATH`` constant, because that constant
|
||||
is evaluated at import time and won't reflect env-var changes made
|
||||
later (e.g. by the test fixture ``_isolate_hermes_home``).
|
||||
"""
|
||||
if self._db_instance is not None:
|
||||
return self._db_instance
|
||||
try:
|
||||
from hermes_state import SessionDB
|
||||
hermes_home = get_hermes_home()
|
||||
self._db_instance = SessionDB(db_path=hermes_home / "state.db")
|
||||
return self._db_instance
|
||||
except Exception:
|
||||
logger.debug("SessionDB unavailable for ACP persistence", exc_info=True)
|
||||
return None
|
||||
|
||||
def _persist(self, state: SessionState) -> None:
|
||||
"""Write session state to the database.
|
||||
|
||||
Creates the session record if it doesn't exist, then replaces all
|
||||
stored messages with the current in-memory history.
|
||||
"""
|
||||
db = self._get_db()
|
||||
if db is None:
|
||||
return
|
||||
|
||||
# Ensure model is a plain string (not a MagicMock or other proxy).
|
||||
model_str = str(state.model) if state.model else None
|
||||
session_meta = {"cwd": state.cwd}
|
||||
provider = getattr(state.agent, "provider", None)
|
||||
base_url = getattr(state.agent, "base_url", None)
|
||||
api_mode = getattr(state.agent, "api_mode", None)
|
||||
if isinstance(provider, str) and provider.strip():
|
||||
session_meta["provider"] = provider.strip()
|
||||
if isinstance(base_url, str) and base_url.strip():
|
||||
session_meta["base_url"] = base_url.strip()
|
||||
if isinstance(api_mode, str) and api_mode.strip():
|
||||
session_meta["api_mode"] = api_mode.strip()
|
||||
cwd_json = json.dumps(session_meta)
|
||||
|
||||
try:
|
||||
# Ensure the session record exists.
|
||||
existing = db.get_session(state.session_id)
|
||||
if existing is None:
|
||||
db.create_session(
|
||||
session_id=state.session_id,
|
||||
source="acp",
|
||||
model=model_str,
|
||||
model_config={"cwd": state.cwd},
|
||||
)
|
||||
else:
|
||||
# Update model_config (contains cwd) if changed.
|
||||
try:
|
||||
db.update_session_meta(state.session_id, cwd_json, model_str)
|
||||
except Exception:
|
||||
logger.debug("Failed to update ACP session metadata", exc_info=True)
|
||||
|
||||
# Replace stored messages with current history atomically so a
|
||||
# mid-rewrite failure rolls back and the previously persisted
|
||||
# conversation is preserved (salvaged from #13675).
|
||||
db.replace_messages(state.session_id, state.history)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)
|
||||
|
||||
def _restore(self, session_id: str) -> Optional[SessionState]:
|
||||
"""Load a session from the database into memory, recreating the AIAgent."""
|
||||
import threading
|
||||
|
||||
db = self._get_db()
|
||||
if db is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
row = db.get_session(session_id)
|
||||
except Exception:
|
||||
logger.debug("Failed to query DB for ACP session %s", session_id, exc_info=True)
|
||||
return None
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
# Only restore ACP sessions.
|
||||
if row.get("source") != "acp":
|
||||
return None
|
||||
|
||||
# Extract cwd from model_config.
|
||||
cwd = "."
|
||||
requested_provider = row.get("billing_provider")
|
||||
restored_base_url = row.get("billing_base_url")
|
||||
restored_api_mode = None
|
||||
mc = row.get("model_config")
|
||||
if mc:
|
||||
try:
|
||||
meta = json.loads(mc)
|
||||
if isinstance(meta, dict):
|
||||
cwd = meta.get("cwd", ".")
|
||||
requested_provider = meta.get("provider") or requested_provider
|
||||
restored_base_url = meta.get("base_url") or restored_base_url
|
||||
restored_api_mode = meta.get("api_mode") or restored_api_mode
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
model = row.get("model") or None
|
||||
|
||||
# Load conversation history.
|
||||
try:
|
||||
history = db.get_messages_as_conversation(session_id)
|
||||
except Exception:
|
||||
logger.warning("Failed to load messages for ACP session %s", session_id, exc_info=True)
|
||||
history = []
|
||||
|
||||
try:
|
||||
agent = self._make_agent(
|
||||
session_id=session_id,
|
||||
cwd=cwd,
|
||||
model=model,
|
||||
requested_provider=requested_provider,
|
||||
base_url=restored_base_url,
|
||||
api_mode=restored_api_mode,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to recreate agent for ACP session %s", session_id, exc_info=True)
|
||||
return None
|
||||
|
||||
state = SessionState(
|
||||
session_id=session_id,
|
||||
agent=agent,
|
||||
cwd=cwd,
|
||||
model=model or getattr(agent, "model", "") or "",
|
||||
history=history,
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session_id] = state
|
||||
_register_task_cwd(session_id, cwd)
|
||||
logger.info("Restored ACP session %s from DB (%d messages)", session_id, len(history))
|
||||
return state
|
||||
|
||||
def _delete_persisted(self, session_id: str) -> bool:
|
||||
"""Delete a session from the database. Returns True if it existed."""
|
||||
db = self._get_db()
|
||||
if db is None:
|
||||
return False
|
||||
try:
|
||||
return db.delete_session(session_id)
|
||||
except Exception:
|
||||
logger.debug("Failed to delete ACP session %s from DB", session_id, exc_info=True)
|
||||
return False
|
||||
|
||||
# ---- internal -----------------------------------------------------------
|
||||
|
||||
def _make_agent(
|
||||
self,
|
||||
*,
|
||||
session_id: str,
|
||||
cwd: str,
|
||||
model: str | None = None,
|
||||
requested_provider: str | None = None,
|
||||
base_url: str | None = None,
|
||||
api_mode: str | None = None,
|
||||
):
|
||||
if self._agent_factory is not None:
|
||||
return self._agent_factory()
|
||||
|
||||
from run_agent import AIAgent
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
default_model = ""
|
||||
config_provider = None
|
||||
if isinstance(model_cfg, dict):
|
||||
default_model = str(model_cfg.get("default") or default_model)
|
||||
config_provider = model_cfg.get("provider")
|
||||
elif isinstance(model_cfg, str) and model_cfg.strip():
|
||||
default_model = model_cfg.strip()
|
||||
|
||||
configured_mcp_servers = [
|
||||
name
|
||||
for name, cfg in (config.get("mcp_servers") or {}).items()
|
||||
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
|
||||
]
|
||||
|
||||
kwargs = {
|
||||
"platform": "acp",
|
||||
"enabled_toolsets": _expand_acp_enabled_toolsets(
|
||||
["hermes-acp"],
|
||||
mcp_server_names=configured_mcp_servers,
|
||||
),
|
||||
"quiet_mode": True,
|
||||
"session_id": session_id,
|
||||
"session_db": self._get_db(),
|
||||
"model": model or default_model,
|
||||
}
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=requested_provider or config_provider)
|
||||
kwargs.update(
|
||||
{
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": api_mode or runtime.get("api_mode"),
|
||||
"base_url": base_url or runtime.get("base_url"),
|
||||
"api_key": runtime.get("api_key"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("ACP session falling back to default provider resolution", exc_info=True)
|
||||
|
||||
_register_task_cwd(session_id, cwd)
|
||||
agent = AIAgent(**kwargs)
|
||||
# ACP stdio transport requires stdout to remain protocol-only JSON-RPC.
|
||||
# Route any incidental human-readable agent output to stderr instead.
|
||||
agent._print_fn = _acp_stderr_print
|
||||
return agent
|
||||
1291
acp_adapter/tools.py
1291
acp_adapter/tools.py
File diff suppressed because it is too large
Load Diff
@@ -1,16 +0,0 @@
|
||||
{
|
||||
"id": "hermes-agent",
|
||||
"name": "Hermes Agent",
|
||||
"version": "0.16.0",
|
||||
"description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
|
||||
"repository": "https://github.com/NousResearch/hermes-agent",
|
||||
"website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
|
||||
"authors": ["Nous Research"],
|
||||
"license": "MIT",
|
||||
"distribution": {
|
||||
"uvx": {
|
||||
"package": "hermes-agent[acp]==0.16.0",
|
||||
"args": ["hermes-acp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16" fill="none">
|
||||
<path d="M8 1.5v13" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
|
||||
<path d="M8 3.25c-2.35-1.4-4.7-.95-6.25.35 1.85-.2 3.8.2 5.55 1.55" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 3.25c2.35-1.4 4.7-.95 6.25.35-1.85-.2-3.8.2-5.55 1.55" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 13.25c-2.3-1-3.05-2.65-1.35-4.15-2 .8-2.35 2.95-.35 4" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 13.25c2.3-1 3.05-2.65 1.35-4.15 2 .8 2.35 2.95.35 4" stroke="currentColor" stroke-width="1.1" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<circle cx="8" cy="1.8" r="1.1" fill="currentColor"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 882 B |
@@ -1,8 +0,0 @@
|
||||
"""Agent internals -- extracted modules from run_agent.py.
|
||||
|
||||
These modules contain pure utility functions and self-contained classes
|
||||
that were previously embedded in the 3,600-line run_agent.py. Extracting
|
||||
them makes run_agent.py focused on the AIAgent orchestrator class.
|
||||
"""
|
||||
|
||||
from . import jiter_preload as _jiter_preload # noqa: F401
|
||||
@@ -1,550 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
|
||||
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import TypeGuard
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageWindow:
|
||||
label: str
|
||||
used_percent: Optional[float] = None
|
||||
reset_at: Optional[datetime] = None
|
||||
detail: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageSnapshot:
|
||||
provider: str
|
||||
source: str
|
||||
fetched_at: datetime
|
||||
title: str = "Account limits"
|
||||
plan: Optional[str] = None
|
||||
windows: tuple[AccountUsageWindow, ...] = ()
|
||||
details: tuple[str, ...] = ()
|
||||
unavailable_reason: Optional[str] = None
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return bool(self.windows or self.details) and not self.unavailable_reason
|
||||
|
||||
|
||||
def _title_case_slug(value: Optional[str]) -> Optional[str]:
|
||||
cleaned = str(value or "").strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
return cleaned.replace("_", " ").replace("-", " ").title()
|
||||
|
||||
|
||||
def _parse_dt(value: Any) -> Optional[datetime]:
|
||||
if value in {None, ""}:
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc)
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.endswith("Z"):
|
||||
text = text[:-1] + "+00:00"
|
||||
try:
|
||||
dt = datetime.fromisoformat(text)
|
||||
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _format_reset(dt: Optional[datetime]) -> str:
|
||||
if not dt:
|
||||
return "unknown"
|
||||
local_dt = dt.astimezone()
|
||||
delta = dt - _utc_now()
|
||||
total_seconds = int(delta.total_seconds())
|
||||
if total_seconds <= 0:
|
||||
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
hours, rem = divmod(total_seconds, 3600)
|
||||
minutes = rem // 60
|
||||
if hours >= 24:
|
||||
days, hours = divmod(hours, 24)
|
||||
rel = f"in {days}d {hours}h"
|
||||
elif hours > 0:
|
||||
rel = f"in {hours}h {minutes}m"
|
||||
else:
|
||||
rel = f"in {minutes}m"
|
||||
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
|
||||
|
||||
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
|
||||
if not snapshot:
|
||||
return []
|
||||
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
|
||||
lines = [header]
|
||||
if snapshot.plan:
|
||||
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
|
||||
else:
|
||||
lines.append(f"Provider: {snapshot.provider}")
|
||||
for window in snapshot.windows:
|
||||
if window.used_percent is None:
|
||||
base = f"{window.label}: unavailable"
|
||||
else:
|
||||
remaining = max(0, round(100 - float(window.used_percent)))
|
||||
used = max(0, round(float(window.used_percent)))
|
||||
base = f"{window.label}: {remaining}% remaining ({used}% used)"
|
||||
if window.reset_at:
|
||||
base += f" • resets {_format_reset(window.reset_at)}"
|
||||
elif window.detail:
|
||||
base += f" • {window.detail}"
|
||||
lines.append(base)
|
||||
for detail in snapshot.details:
|
||||
lines.append(detail)
|
||||
if snapshot.unavailable_reason:
|
||||
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
|
||||
return lines
|
||||
|
||||
|
||||
def _fmt_usd(d: float) -> str:
|
||||
return f"${d:,.2f}"
|
||||
|
||||
|
||||
def _is_finite_num(v: Any) -> TypeGuard[float]:
|
||||
"""True iff v is a real numeric value (int or float, not bool, not NaN/Inf).
|
||||
|
||||
Typed as a ``TypeGuard[float]`` so the type checker narrows ``v`` to a real
|
||||
number in the positive branch — callers can then do arithmetic / pass it to
|
||||
``_fmt_usd`` without a None-operand warning.
|
||||
"""
|
||||
return isinstance(v, (int, float)) and not isinstance(v, bool) and math.isfinite(v)
|
||||
|
||||
|
||||
def build_nous_credits_snapshot(account_info) -> Optional[AccountUsageSnapshot]:
|
||||
"""Map a NousPortalAccountInfo into an AccountUsageSnapshot for /usage.
|
||||
|
||||
Shows dollar magnitudes (subscription / top-up / total) + renewal date + a
|
||||
portal CTA. When the portal supplies a subscription denominator
|
||||
(``monthly_credits``), also emits a subscription-usage window so the renderer
|
||||
shows a real ``% used`` gauge; when it's absent (older portals) the view
|
||||
gracefully degrades to magnitudes-only. Returns None when there's no usable
|
||||
account info to show (fail-open: caller just shows nothing).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.nous_account import nous_portal_billing_url
|
||||
|
||||
if account_info is None or not getattr(account_info, "logged_in", False):
|
||||
return None
|
||||
|
||||
access = getattr(account_info, "paid_service_access_info", None)
|
||||
sub = getattr(account_info, "subscription", None)
|
||||
|
||||
windows: list[AccountUsageWindow] = []
|
||||
details: list[str] = []
|
||||
|
||||
# Subscription usage gauge — only when the portal supplies a positive
|
||||
# monthly_credits denominator AND a finite remaining balance that does
|
||||
# not exceed the cap. Money math is on float dollars (allowed: numeric
|
||||
# account fields, NOT a server-provided *_usd string). used = cap -
|
||||
# remaining; clamp [0,100] so a debt balance (remaining < 0) reads 100%.
|
||||
# Excluded on purpose:
|
||||
# - non-finite values (NaN/Infinity slip past isinstance and json.loads
|
||||
# parses bare NaN/Infinity by default) → would render "$nan"/"$inf"
|
||||
# and a falsely-confident gauge;
|
||||
# - remaining > cap (rollover balance spanning the period) → monthly_credits
|
||||
# is no longer a meaningful denominator, and "$X of $Y left" with X>Y
|
||||
# reads as a contradiction. Both fall back to the magnitudes lines.
|
||||
if sub is not None:
|
||||
monthly_credits = getattr(sub, "monthly_credits", None)
|
||||
sub_remaining = getattr(sub, "credits_remaining", None)
|
||||
if (
|
||||
_is_finite_num(monthly_credits)
|
||||
and monthly_credits > 0
|
||||
and _is_finite_num(sub_remaining)
|
||||
and sub_remaining <= monthly_credits
|
||||
):
|
||||
used = monthly_credits - sub_remaining
|
||||
used_pct = max(0.0, min(100.0, used / monthly_credits * 100.0))
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label="Subscription",
|
||||
used_percent=used_pct,
|
||||
detail=f"{_fmt_usd(sub_remaining)} of {_fmt_usd(monthly_credits)} left",
|
||||
)
|
||||
)
|
||||
|
||||
if access is not None:
|
||||
sub_credits = getattr(access, "subscription_credits_remaining", None)
|
||||
if _is_finite_num(sub_credits):
|
||||
details.append(f"Subscription credits: {_fmt_usd(sub_credits)}")
|
||||
purchased = getattr(access, "purchased_credits_remaining", None)
|
||||
if _is_finite_num(purchased):
|
||||
details.append(f"Top-up credits: {_fmt_usd(purchased)}")
|
||||
total_usable = getattr(access, "total_usable_credits", None)
|
||||
if _is_finite_num(total_usable):
|
||||
details.append(f"Total usable: {_fmt_usd(total_usable)}")
|
||||
|
||||
if sub is not None:
|
||||
rollover = getattr(sub, "rollover_credits", None)
|
||||
if _is_finite_num(rollover) and rollover > 0:
|
||||
details.append(f"Rollover: {_fmt_usd(rollover)}")
|
||||
period_end = getattr(sub, "current_period_end", None)
|
||||
if period_end:
|
||||
details.append(f"Renews: {period_end}")
|
||||
|
||||
paid = getattr(account_info, "paid_service_access", None)
|
||||
if paid is False:
|
||||
details.append("Status: access depleted — top up to restore")
|
||||
|
||||
if not windows and not details:
|
||||
return None
|
||||
|
||||
details.append(f"Manage / top up: {nous_portal_billing_url(account_info)}")
|
||||
|
||||
plan = getattr(sub, "plan", None) if sub is not None else None
|
||||
return AccountUsageSnapshot(
|
||||
provider="nous",
|
||||
source="portal-account",
|
||||
fetched_at=_utc_now(),
|
||||
title="Nous credits",
|
||||
plan=plan,
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
except (AttributeError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def nous_credits_lines(*, markdown: bool = False, timeout: float = 10.0) -> list[str]:
|
||||
"""Return rendered Nous-credits /usage lines, or [] when there's nothing to show.
|
||||
|
||||
Account-independent of any live agent: gated on "a Nous account is logged in"
|
||||
(a cheap local auth-state check), then a wall-clock-bounded portal fetch. Shared
|
||||
by the CLI ``_show_usage`` and the TUI ``session.usage`` RPC so both surfaces show
|
||||
the same block regardless of session API-call count or resume state. Fail-open:
|
||||
any auth/portal hiccup or timeout returns [] (the caller shows nothing).
|
||||
|
||||
Dev override: when HERMES_DEV_CREDITS_FIXTURE selects a fixture state, /usage
|
||||
renders from that fixture instead of the real portal (so the block + gauge are
|
||||
testable without a live account). Throwaway scaffolding.
|
||||
"""
|
||||
# Dev fixture short-circuit — render /usage from the injected state, no portal.
|
||||
try:
|
||||
from agent.credits_tracker import dev_fixture_credits_state
|
||||
|
||||
fixture = dev_fixture_credits_state()
|
||||
except Exception:
|
||||
fixture = None
|
||||
if fixture is not None:
|
||||
snapshot = _snapshot_from_credits_state(fixture)
|
||||
return render_account_usage_lines(snapshot, markdown=markdown)
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
|
||||
tok = (get_provider_auth_state("nous") or {}).get("access_token")
|
||||
if not (isinstance(tok, str) and tok.strip()):
|
||||
return []
|
||||
except Exception:
|
||||
return []
|
||||
try:
|
||||
import concurrent.futures
|
||||
|
||||
from hermes_cli.nous_account import get_nous_portal_account_info
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
account = pool.submit(
|
||||
get_nous_portal_account_info, force_fresh=True
|
||||
).result(timeout=timeout)
|
||||
snapshot = build_nous_credits_snapshot(account)
|
||||
return render_account_usage_lines(snapshot, markdown=markdown)
|
||||
except Exception:
|
||||
# Fail-open (caller shows nothing), but leave a breadcrumb so a dead
|
||||
# /usage credits block is diagnosable in agent.log without a dev flag.
|
||||
logger.debug("credits ▸ /usage portal fetch/render failed (fail-open)", exc_info=True)
|
||||
return []
|
||||
|
||||
|
||||
def _snapshot_from_credits_state(state) -> Optional[AccountUsageSnapshot]:
|
||||
"""Map a header-shaped CreditsState (e.g. a dev fixture) to the /usage snapshot.
|
||||
|
||||
Renders the same magnitudes + monthly-grant % window the portal path produces,
|
||||
so HERMES_DEV_CREDITS_FIXTURE can exercise /usage without a live account. The
|
||||
*_usd strings are mock display values here (not server balance to compute on);
|
||||
the % comes from CreditsState.used_fraction (micros math). Fail-open → None.
|
||||
"""
|
||||
try:
|
||||
if state is None:
|
||||
return None
|
||||
|
||||
windows: list[AccountUsageWindow] = []
|
||||
details: list[str] = []
|
||||
|
||||
uf = getattr(state, "used_fraction", None)
|
||||
if isinstance(uf, (int, float)) and math.isfinite(uf):
|
||||
cap_usd = getattr(state, "subscription_limit_usd", None)
|
||||
sub_usd = getattr(state, "subscription_usd", None)
|
||||
detail = None
|
||||
if sub_usd and cap_usd:
|
||||
detail = f"${sub_usd} of ${cap_usd} left"
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label="Subscription",
|
||||
used_percent=max(0.0, min(100.0, uf * 100.0)),
|
||||
detail=detail,
|
||||
)
|
||||
)
|
||||
|
||||
sub_usd = getattr(state, "subscription_usd", None)
|
||||
if sub_usd:
|
||||
details.append(f"Subscription credits: ${sub_usd}")
|
||||
purchased_usd = getattr(state, "purchased_usd", None)
|
||||
if purchased_usd:
|
||||
details.append(f"Top-up credits: ${purchased_usd}")
|
||||
remaining_usd = getattr(state, "remaining_usd", None)
|
||||
if remaining_usd:
|
||||
details.append(f"Total usable: ${remaining_usd}")
|
||||
if getattr(state, "paid_access", True) is False:
|
||||
details.append("Status: access depleted — top up to restore")
|
||||
|
||||
if not windows and not details:
|
||||
return None
|
||||
|
||||
details.append("(dev fixture — HERMES_DEV_CREDITS_FIXTURE)")
|
||||
return AccountUsageSnapshot(
|
||||
provider="nous",
|
||||
source="dev-fixture",
|
||||
fetched_at=_utc_now(),
|
||||
title="Nous credits",
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
except (AttributeError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_codex_usage_url(base_url: str) -> str:
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
normalized = "https://chatgpt.com/backend-api/codex"
|
||||
if normalized.endswith("/codex"):
|
||||
normalized = normalized[: -len("/codex")]
|
||||
if "/backend-api" in normalized:
|
||||
return normalized + "/wham/usage"
|
||||
return normalized + "/api/codex/usage"
|
||||
|
||||
|
||||
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
token_data = _read_codex_tokens()
|
||||
tokens = token_data.get("tokens") or {}
|
||||
account_id = str(tokens.get("account_id", "") or "").strip() or None
|
||||
headers = {
|
||||
"Authorization": f"Bearer {creds['api_key']}",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "codex-cli",
|
||||
}
|
||||
if account_id:
|
||||
headers["ChatGPT-Account-Id"] = account_id
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
rate_limit = payload.get("rate_limit") or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
|
||||
window = rate_limit.get(key) or {}
|
||||
used = window.get("used_percent")
|
||||
if used is None:
|
||||
continue
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=float(used),
|
||||
reset_at=_parse_dt(window.get("reset_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
credits = payload.get("credits") or {}
|
||||
if credits.get("has_credits"):
|
||||
balance = credits.get("balance")
|
||||
if isinstance(balance, (int, float)):
|
||||
details.append(f"Credits balance: ${float(balance):.2f}")
|
||||
elif credits.get("unlimited"):
|
||||
details.append("Credits balance: unlimited")
|
||||
return AccountUsageSnapshot(
|
||||
provider="openai-codex",
|
||||
source="usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
plan=_title_case_slug(payload.get("plan_type")),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
token = (resolve_anthropic_token() or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
if not _is_oauth_token(token):
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
"User-Agent": "claude-code/2.1.0",
|
||||
}
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
mapping = (
|
||||
("five_hour", "Current session"),
|
||||
("seven_day", "Current week"),
|
||||
("seven_day_opus", "Opus week"),
|
||||
("seven_day_sonnet", "Sonnet week"),
|
||||
)
|
||||
for key, label in mapping:
|
||||
window = payload.get(key) or {}
|
||||
util = window.get("utilization")
|
||||
if util is None:
|
||||
continue
|
||||
used = float(util) * 100 if float(util) <= 1 else float(util)
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=used,
|
||||
reset_at=_parse_dt(window.get("resets_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
extra = payload.get("extra_usage") or {}
|
||||
if extra.get("is_enabled"):
|
||||
used_credits = extra.get("used_credits")
|
||||
monthly_limit = extra.get("monthly_limit")
|
||||
currency = extra.get("currency") or "USD"
|
||||
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
|
||||
details.append(
|
||||
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
|
||||
)
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested="openrouter",
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
token = str(runtime.get("api_key", "") or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
|
||||
credits_url = f"{normalized}/credits"
|
||||
key_url = f"{normalized}/key"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
credits_resp = client.get(credits_url, headers=headers)
|
||||
credits_resp.raise_for_status()
|
||||
credits = (credits_resp.json() or {}).get("data") or {}
|
||||
try:
|
||||
key_resp = client.get(key_url, headers=headers)
|
||||
key_resp.raise_for_status()
|
||||
key_data = (key_resp.json() or {}).get("data") or {}
|
||||
except Exception:
|
||||
key_data = {}
|
||||
total_credits = float(credits.get("total_credits") or 0.0)
|
||||
total_usage = float(credits.get("total_usage") or 0.0)
|
||||
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
|
||||
windows: list[AccountUsageWindow] = []
|
||||
limit = key_data.get("limit")
|
||||
limit_remaining = key_data.get("limit_remaining")
|
||||
limit_reset = str(key_data.get("limit_reset") or "").strip()
|
||||
usage = key_data.get("usage")
|
||||
if (
|
||||
isinstance(limit, (int, float))
|
||||
and float(limit) > 0
|
||||
and isinstance(limit_remaining, (int, float))
|
||||
and 0 <= float(limit_remaining) <= float(limit)
|
||||
):
|
||||
limit_value = float(limit)
|
||||
remaining_value = float(limit_remaining)
|
||||
used_percent = ((limit_value - remaining_value) / limit_value) * 100
|
||||
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
|
||||
if limit_reset:
|
||||
detail_parts.append(f"resets {limit_reset}")
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label="API key quota",
|
||||
used_percent=used_percent,
|
||||
detail=" • ".join(detail_parts),
|
||||
)
|
||||
)
|
||||
if isinstance(usage, (int, float)):
|
||||
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
|
||||
for value, label in (
|
||||
(key_data.get("usage_daily"), "today"),
|
||||
(key_data.get("usage_weekly"), "this week"),
|
||||
(key_data.get("usage_monthly"), "this month"),
|
||||
):
|
||||
if isinstance(value, (int, float)) and float(value) > 0:
|
||||
usage_parts.append(f"${float(value):.2f} {label}")
|
||||
details.append(" • ".join(usage_parts))
|
||||
return AccountUsageSnapshot(
|
||||
provider="openrouter",
|
||||
source="credits_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def fetch_account_usage(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Optional[AccountUsageSnapshot]:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
if normalized in {"", "auto", "custom"}:
|
||||
return None
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
return _fetch_codex_account_usage()
|
||||
if normalized == "anthropic":
|
||||
return _fetch_anthropic_account_usage()
|
||||
if normalized == "openrouter":
|
||||
return _fetch_openrouter_account_usage(base_url, api_key)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
1670
agent/agent_init.py
1670
agent/agent_init.py
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,68 +0,0 @@
|
||||
"""Async/sync bridging helpers.
|
||||
|
||||
The codebase has ~30 sites that schedule a coroutine onto an event loop from a
|
||||
worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can
|
||||
raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
|
||||
and when it does the coroutine object is never awaited and never closed —
|
||||
which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
|
||||
leaks the coroutine's frame until GC.
|
||||
|
||||
:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
|
||||
scheduling failure, and returns ``None`` (instead of a half-formed future) so
|
||||
callers can branch cleanly:
|
||||
|
||||
fut = safe_schedule_threadsafe(coro, loop)
|
||||
if fut is None:
|
||||
return # or fallback behavior
|
||||
fut.result(timeout=5)
|
||||
|
||||
The helper deliberately does NOT also handle ``future.result()`` failures —
|
||||
that is a separate concern. Once the loop has accepted the coroutine, its
|
||||
lifecycle belongs to the loop, not the scheduling thread.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import Future
|
||||
from typing import Any, Coroutine, Optional
|
||||
|
||||
|
||||
_DEFAULT_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def safe_schedule_threadsafe(
|
||||
coro: Coroutine[Any, Any, Any],
|
||||
loop: Optional[asyncio.AbstractEventLoop],
|
||||
*,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
log_message: str = "Failed to schedule coroutine on loop",
|
||||
log_level: int = logging.DEBUG,
|
||||
) -> Optional[Future]:
|
||||
"""Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
|
||||
|
||||
Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
|
||||
the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
|
||||
(e.g. the loop was closed during a shutdown race). In all failure paths
|
||||
the coroutine is :meth:`close`-d so it does not trigger
|
||||
``"coroutine was never awaited"`` warnings or leak its frame.
|
||||
|
||||
Callers retain full control over what to do with the returned future
|
||||
(call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
|
||||
fire-and-forget, etc.).
|
||||
"""
|
||||
log = logger if logger is not None else _DEFAULT_LOGGER
|
||||
|
||||
if loop is None:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
log.log(log_level, "%s: loop is None", log_message)
|
||||
return None
|
||||
|
||||
try:
|
||||
return asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
except Exception as exc:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
log.log(log_level, "%s: %s", log_message, exc)
|
||||
return None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,555 +0,0 @@
|
||||
"""Microsoft Entra ID adapter for Microsoft Foundry.
|
||||
|
||||
Provides keyless authentication for Microsoft Foundry deployments using the
|
||||
`azure-identity` SDK's `DefaultAzureCredential` chain (env service principal
|
||||
→ workload identity → managed identity → VS Code → Azure CLI → azd →
|
||||
PowerShell → broker).
|
||||
|
||||
Architecture mirrors `agent/bedrock_adapter.py`:
|
||||
|
||||
* Lazy import. `azure-identity` is only loaded when ``model.auth_mode =
|
||||
entra_id`` is selected. Users who stick with `AZURE_FOUNDRY_API_KEY`
|
||||
never pay the import cost.
|
||||
* SDK-callable contract. The public entry point ``build_token_provider``
|
||||
returns a zero-arg callable produced by ``get_bearer_token_provider`` —
|
||||
this is exactly the value Microsoft's documented sample plugs into
|
||||
``OpenAI(api_key=token_provider, base_url=...)``. The OpenAI SDK calls
|
||||
it before every request, so token refresh is transparent.
|
||||
* Three explicit consumer-side helpers (display / cache / http-bearer)
|
||||
rather than one generic "materialize" function — splitting them by
|
||||
purpose prevents accidental token-minting in logging paths or token
|
||||
leakage into cache keys / dashboard JSON.
|
||||
* No persisted JWT. ``azure-identity`` caches in-process and (where
|
||||
available) in the OS keychain or ``~/.IdentityService``. Hermes does
|
||||
not duplicate that storage in ``auth.json``.
|
||||
|
||||
Reference: https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
|
||||
|
||||
Requires: ``azure-identity`` (optional dependency — only needed when
|
||||
``model.auth_mode = entra_id``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Microsoft-documented scope for Foundry inference auth. Both the new
|
||||
# Foundry portal and the legacy Azure OpenAI managed-identity docs use
|
||||
# this scope for ALL Foundry endpoint shapes (*.openai.azure.com,
|
||||
# *.services.ai.azure.com, *.ai.azure.com). The older control-plane
|
||||
# scope ``https://cognitiveservices.azure.com/.default`` is for ARM
|
||||
# resource management and is rejected for inference by newer
|
||||
# resources — users with that requirement override via
|
||||
# ``model.entra.scope`` in config.yaml.
|
||||
SCOPE_AI_AZURE_DEFAULT = "https://ai.azure.com/.default"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lazy SDK import — only loaded when the Entra path is actually used.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_AZURE_IDENTITY_FEATURE = "provider.azure_identity"
|
||||
|
||||
|
||||
def has_azure_identity_installed() -> bool:
|
||||
"""Return True if `azure-identity` can be imported right now.
|
||||
|
||||
Cheap check — does not walk the credential chain.
|
||||
"""
|
||||
try:
|
||||
import azure.identity # noqa: F401
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _require_azure_identity():
|
||||
"""Import ``azure.identity``, lazy-installing it if allowed.
|
||||
|
||||
Raises ``ImportError`` with a clear actionable message when the
|
||||
package is missing and lazy installs are disabled.
|
||||
"""
|
||||
try:
|
||||
import azure.identity as _ai
|
||||
return _ai
|
||||
except ImportError:
|
||||
try:
|
||||
from tools.lazy_deps import ensure, FeatureUnavailable
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"The 'azure-identity' package is required for Azure AI "
|
||||
"Foundry Entra ID authentication. Install it with: "
|
||||
"pip install azure-identity"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
ensure(_AZURE_IDENTITY_FEATURE, prompt=False)
|
||||
except FeatureUnavailable as exc:
|
||||
raise ImportError(
|
||||
"The 'azure-identity' package is required for Azure AI "
|
||||
"Foundry Entra ID authentication. " + str(exc)
|
||||
) from exc
|
||||
|
||||
# Retry import after lazy install.
|
||||
import azure.identity as _ai # noqa: WPS440
|
||||
return _ai
|
||||
|
||||
|
||||
def reset_credential_cache() -> None:
|
||||
"""Clear the cached ``DefaultAzureCredential``. Used by tests and
|
||||
profile switches.
|
||||
|
||||
Defensive against tests that ``monkeypatch.setattr`` over
|
||||
``build_credential`` with a plain (non-lru-cached) function — those
|
||||
won't expose ``cache_clear()`` until pytest reverts the patch.
|
||||
"""
|
||||
cache_clear = getattr(build_credential, "cache_clear", None)
|
||||
if callable(cache_clear):
|
||||
cache_clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Token-provider construction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EntraIdentityConfig:
|
||||
"""Serializable Entra ID config.
|
||||
|
||||
Captures the Hermes-managed Entra knobs we need outside Azure SDK
|
||||
environment configuration. Everything else
|
||||
(tenant ID, service principal secret, federated token file, sovereign
|
||||
cloud authority, etc.) flows through azure-identity's standard
|
||||
``AZURE_*`` env vars — see the Bedrock pattern in
|
||||
``hermes_cli/runtime_provider.py:1310-1377`` for the analogous
|
||||
"let the SDK read env" approach.
|
||||
|
||||
``scope`` is Microsoft's documented Foundry inference audience. Almost
|
||||
everyone uses the default; sovereign-cloud / non-standard tenants can
|
||||
override via ``model.entra.scope``. Identity selection (user-assigned
|
||||
managed identity, workload identity, service principal, tenant, authority)
|
||||
stays in the standard Azure SDK env vars such as ``AZURE_CLIENT_ID``.
|
||||
|
||||
``exclude_interactive_browser`` is kept as an internal constructor knob
|
||||
so probes stay non-interactive by default. It is not written by the setup
|
||||
wizard.
|
||||
|
||||
The dataclass is frozen so it's hashable for ``functools.lru_cache``
|
||||
keying, and serializable across multiprocessing boundaries (workers
|
||||
rebuild the credential inside their own process).
|
||||
"""
|
||||
|
||||
scope: str = SCOPE_AI_AZURE_DEFAULT
|
||||
exclude_interactive_browser: bool = True
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
scope = str(self.scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
object.__setattr__(self, "scope", scope)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"scope": self.scope,
|
||||
"exclude_interactive_browser": self.exclude_interactive_browser,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Optional[Dict[str, Any]],
|
||||
*, default_scope: Optional[str] = None) -> "EntraIdentityConfig":
|
||||
data = data or {}
|
||||
scope = str(data.get("scope") or "").strip() or default_scope or SCOPE_AI_AZURE_DEFAULT
|
||||
exclude_browser = bool(data.get("exclude_interactive_browser", True))
|
||||
return cls(
|
||||
scope=scope,
|
||||
exclude_interactive_browser=exclude_browser,
|
||||
)
|
||||
|
||||
|
||||
def _build_default_credential(config: EntraIdentityConfig) -> Any:
|
||||
"""Construct a ``DefaultAzureCredential`` for ``config``.
|
||||
|
||||
Only Hermes-selected knobs are passed as kwargs. Everything else
|
||||
(tenant, service principal secret, federated token file, sovereign
|
||||
cloud authority, etc.) is read by ``azure-identity`` from the
|
||||
standard ``AZURE_*`` environment variables — see Microsoft's
|
||||
documented credential resolution chain. Users configure those in
|
||||
``~/.hermes/.env`` or the deployment environment.
|
||||
"""
|
||||
ai = _require_azure_identity()
|
||||
kwargs: Dict[str, Any] = {}
|
||||
# SDK default is True (browser excluded); only pass when the user
|
||||
# explicitly opts in to interactive browser auth.
|
||||
if not config.exclude_interactive_browser:
|
||||
kwargs["exclude_interactive_browser_credential"] = False
|
||||
return ai.DefaultAzureCredential(**kwargs)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def build_credential(config: EntraIdentityConfig) -> Any:
|
||||
"""Return the cached ``DefaultAzureCredential`` for ``config``.
|
||||
|
||||
Hermes processes use exactly one Entra config at a time (the
|
||||
``model.entra.*`` block in config.yaml drives every aux task,
|
||||
subagent, and credential probe in the session). ``maxsize=1`` is
|
||||
intentional: it reflects the actual usage pattern and keeps the
|
||||
cache trivially small.
|
||||
|
||||
``EntraIdentityConfig`` is a frozen dataclass, so it's hashable and
|
||||
safe as an LRU-cache key. ``functools.lru_cache`` is thread-safe in
|
||||
CPython.
|
||||
|
||||
If two distinct configs are ever passed (tests do this; production
|
||||
rarely), the LRU eviction handles it correctly — each call still
|
||||
returns a credential matching its config; only one is cached at a
|
||||
time. Use :func:`reset_credential_cache` to clear (e.g. in tests).
|
||||
"""
|
||||
return _build_default_credential(config)
|
||||
|
||||
|
||||
def build_token_provider(scope: Optional[str] = None,
|
||||
*,
|
||||
config: Optional[EntraIdentityConfig] = None,
|
||||
base_url: Optional[str] = None,
|
||||
exclude_interactive_browser: bool = True,
|
||||
) -> Callable[[], str]:
|
||||
"""Return a zero-arg callable that mints a fresh Entra bearer JWT.
|
||||
|
||||
The returned callable is exactly what Microsoft's documented Foundry
|
||||
sample expects::
|
||||
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
base_url="https://my-resource.openai.azure.com/openai/v1/",
|
||||
api_key=build_token_provider(),
|
||||
)
|
||||
|
||||
Scope resolution order:
|
||||
1. ``config.scope`` when a config object is supplied
|
||||
2. explicit ``scope`` kwarg
|
||||
3. ``SCOPE_AI_AZURE_DEFAULT`` (Microsoft's documented Foundry scope)
|
||||
|
||||
``base_url`` is unused today and kept for back-compat. Tenant /
|
||||
service-principal / sovereign-cloud configuration flows through
|
||||
``azure-identity``'s standard ``AZURE_*`` environment variables —
|
||||
see :func:`_build_default_credential` for the rationale.
|
||||
|
||||
NOT serializable across process boundaries. For multiprocessing
|
||||
workers, serialize the ``EntraIdentityConfig`` and rebuild the
|
||||
provider inside the worker.
|
||||
"""
|
||||
ai = _require_azure_identity()
|
||||
if config is None:
|
||||
config = EntraIdentityConfig(
|
||||
scope=scope or SCOPE_AI_AZURE_DEFAULT,
|
||||
exclude_interactive_browser=exclude_interactive_browser,
|
||||
)
|
||||
credential = build_credential(config)
|
||||
return ai.get_bearer_token_provider(credential, config.scope)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credential probing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def has_azure_identity_credentials(scope: Optional[str] = None,
|
||||
*,
|
||||
config: Optional[EntraIdentityConfig] = None,
|
||||
timeout_seconds: float = 10.0,
|
||||
allow_install: bool = True,
|
||||
**overrides: Any) -> bool:
|
||||
"""Best-effort probe: can `DefaultAzureCredential` mint a token now?
|
||||
|
||||
Runs ``credential.get_token(scope)`` under a thread-based timeout so
|
||||
a slow token service can't hang the caller. Returns False on any
|
||||
error — never raises. Use for ``hermes doctor`` /
|
||||
``hermes auth status`` / wizard preflight.
|
||||
|
||||
``allow_install``: when True (default) and ``azure-identity`` is not
|
||||
importable, the adapter triggers the standard lazy-install path
|
||||
(subject to ``security.allow_lazy_installs``) before probing. Set
|
||||
False to make this strictly an "is installed?" check — used on hot
|
||||
paths like CLI startup where we never want pip to run.
|
||||
|
||||
NOT used by ``is_provider_configured()`` — that path is structural
|
||||
only (no token mint), so CLI startup doesn't pay this latency.
|
||||
"""
|
||||
if not has_azure_identity_installed():
|
||||
if not allow_install:
|
||||
return False
|
||||
try:
|
||||
_require_azure_identity()
|
||||
except ImportError as exc:
|
||||
logger.debug("azure-identity lazy install unavailable: %s", exc)
|
||||
return False
|
||||
if config is None:
|
||||
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
config = EntraIdentityConfig(scope=effective_scope, **overrides)
|
||||
|
||||
result = {"ok": False}
|
||||
|
||||
def _probe() -> None:
|
||||
try:
|
||||
credential = build_credential(config)
|
||||
tok = credential.get_token(config.scope)
|
||||
result["ok"] = bool(getattr(tok, "token", None))
|
||||
except Exception as exc:
|
||||
logger.debug("Entra credential probe failed: %s", exc)
|
||||
result["ok"] = False
|
||||
|
||||
thread = threading.Thread(target=_probe, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(0.01, timeout_seconds))
|
||||
if thread.is_alive():
|
||||
logger.debug("Entra token service probe timed out after %ss", timeout_seconds)
|
||||
return False
|
||||
return bool(result.get("ok"))
|
||||
|
||||
|
||||
def describe_active_credential(config: Optional[EntraIdentityConfig] = None,
|
||||
*,
|
||||
scope: Optional[str] = None,
|
||||
timeout_seconds: float = 10.0,
|
||||
allow_install: bool = True,
|
||||
**overrides: Any) -> Dict[str, Any]:
|
||||
"""Return diagnostic info about the active credential chain.
|
||||
|
||||
Best-effort: runs ``get_token()`` and inspects what came back.
|
||||
Designed for ``hermes doctor`` and the wizard preflight — never
|
||||
raises, returns ``{"ok": False, "error": ...}`` on failure.
|
||||
|
||||
``allow_install``: when True (default) and ``azure-identity`` is not
|
||||
importable, the adapter triggers the standard lazy-install path
|
||||
(subject to ``security.allow_lazy_installs``) before probing. The
|
||||
install failure is surfaced as the diagnostic error when it fails.
|
||||
Set False for hot CLI paths that should never trigger pip.
|
||||
|
||||
``azure-identity`` doesn't expose the winning inner credential as
|
||||
a public field, so we report a coarse picture (env vars present,
|
||||
token expiry, claims-derived tenant) rather than the credential
|
||||
class name. Users wanting the precise class can run with
|
||||
``AZURE_LOG_LEVEL=DEBUG``.
|
||||
"""
|
||||
info: Dict[str, Any] = {"ok": False}
|
||||
if not has_azure_identity_installed():
|
||||
if not allow_install:
|
||||
info["error"] = "azure-identity not installed"
|
||||
info["hint"] = (
|
||||
"pip install azure-identity (or rely on lazy install at "
|
||||
"first use)"
|
||||
)
|
||||
return info
|
||||
try:
|
||||
_require_azure_identity()
|
||||
except ImportError as exc:
|
||||
info["error"] = str(exc) or "azure-identity not installed"
|
||||
info["hint"] = (
|
||||
"pip install azure-identity manually, or enable lazy "
|
||||
"installs (security.allow_lazy_installs: true in "
|
||||
"config.yaml)."
|
||||
)
|
||||
return info
|
||||
|
||||
if config is None:
|
||||
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
config = EntraIdentityConfig(scope=effective_scope, **overrides)
|
||||
|
||||
info["scope"] = config.scope
|
||||
# Tenant / authority / service-principal config flow through the
|
||||
# standard ``AZURE_*`` env vars; surface them below.
|
||||
if os.environ.get("AZURE_TENANT_ID", "").strip():
|
||||
info["tenant_id_env"] = os.environ["AZURE_TENANT_ID"].strip()
|
||||
|
||||
# Surface which env-var sources are present without minting yet.
|
||||
env_sources = []
|
||||
if os.environ.get("AZURE_FEDERATED_TOKEN_FILE", "").strip():
|
||||
env_sources.append("WorkloadIdentityCredential (AZURE_FEDERATED_TOKEN_FILE)")
|
||||
if (os.environ.get("AZURE_CLIENT_ID", "").strip()
|
||||
and os.environ.get("AZURE_CLIENT_SECRET", "").strip()
|
||||
and os.environ.get("AZURE_TENANT_ID", "").strip()):
|
||||
env_sources.append("EnvironmentCredential (client secret)")
|
||||
if os.environ.get("IDENTITY_ENDPOINT", "").strip() or os.environ.get("MSI_ENDPOINT", "").strip():
|
||||
env_sources.append("ManagedIdentityCredential (IDENTITY_ENDPOINT)")
|
||||
info["env_sources"] = env_sources
|
||||
|
||||
# Now try minting.
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
def _probe() -> None:
|
||||
try:
|
||||
credential = build_credential(config)
|
||||
tok = credential.get_token(config.scope)
|
||||
result["token"] = tok
|
||||
except Exception as exc:
|
||||
result["error"] = str(exc)
|
||||
|
||||
thread = threading.Thread(target=_probe, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(0.01, timeout_seconds))
|
||||
if thread.is_alive():
|
||||
info["error"] = f"Token probe timed out after {timeout_seconds:.0f}s"
|
||||
info["hint"] = (
|
||||
"DefaultAzureCredential can be slow when the token service is unreachable "
|
||||
"or when az login state is stale. Try `az login` or set "
|
||||
"AZURE_CLIENT_ID / AZURE_TENANT_ID / AZURE_CLIENT_SECRET."
|
||||
)
|
||||
return info
|
||||
|
||||
if "error" in result:
|
||||
info["error"] = result["error"]
|
||||
return info
|
||||
|
||||
token = result.get("token")
|
||||
if token is None:
|
||||
info["error"] = "credential chain exhausted"
|
||||
return info
|
||||
|
||||
info["ok"] = True
|
||||
info["expires_on"] = getattr(token, "expires_on", None)
|
||||
return info
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Consumer-side helpers — split by purpose to prevent accidental token
|
||||
# minting in logging / cache-key / dashboard paths.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_token_provider(value: Any) -> bool:
|
||||
"""Return True when ``value`` is a callable Entra token provider.
|
||||
|
||||
Used at the seams where a consumer must decide between
|
||||
string-API-key semantics and bearer-callable semantics.
|
||||
"""
|
||||
return callable(value) and not isinstance(value, str)
|
||||
|
||||
|
||||
def materialize_bearer_for_http(value: Any) -> str:
|
||||
"""Return a fresh Bearer JWT for a manual HTTP request.
|
||||
|
||||
Only call this at sites that must construct an ``Authorization``
|
||||
header outside the OpenAI SDK (e.g. ``hermes_cli/azure_detect.py``).
|
||||
Calls the callable exactly once and returns the resulting token.
|
||||
|
||||
**Anthropic SDK integration:** the Anthropic Python SDK does not
|
||||
accept a ``Callable[[], str]`` for ``auth_token``. Instead,
|
||||
:func:`build_bearer_http_client` returns an ``httpx.Client`` whose
|
||||
request event hook calls this function and rewrites the
|
||||
``Authorization`` header per request — and that client is passed to
|
||||
the Anthropic SDK via ``http_client=...``. See
|
||||
:func:`agent.anthropic_adapter.build_anthropic_client` for the
|
||||
consumer.
|
||||
|
||||
Raises ``ValueError`` if ``value`` is not a callable token provider
|
||||
or non-empty string.
|
||||
"""
|
||||
if is_token_provider(value):
|
||||
token = value()
|
||||
if not isinstance(token, str) or not token:
|
||||
raise ValueError("token provider returned empty value")
|
||||
return token
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
raise ValueError("no usable api_key / token provider")
|
||||
|
||||
|
||||
def build_bearer_http_client(token_provider: Callable[[], str], **httpx_kwargs: Any) -> Any:
|
||||
"""Return an ``httpx.Client`` that mints a fresh Entra bearer JWT
|
||||
per outbound request.
|
||||
|
||||
The Anthropic SDK (≤ 0.86.0 at the time of writing) stores
|
||||
``api_key`` / ``auth_token`` as static strings and computes the
|
||||
``Authorization`` header at construction time. To get per-request
|
||||
token refresh (the Microsoft-recommended Foundry pattern for
|
||||
callable bearer providers), we install an httpx ``request`` event
|
||||
hook on a custom client and pass that client to the SDK via
|
||||
``http_client=...``. The hook:
|
||||
|
||||
1. Calls :func:`materialize_bearer_for_http` to mint a fresh JWT
|
||||
(azure-identity caches internally — this is cheap when the
|
||||
cached token is still valid).
|
||||
2. Strips any pre-set ``Authorization`` / ``api-key`` /
|
||||
``x-api-key`` headers the SDK may have added (avoids
|
||||
conflicting auth values).
|
||||
3. Sets ``Authorization: Bearer <fresh-jwt>``.
|
||||
|
||||
``token_provider`` must be a zero-arg callable returning a string —
|
||||
typically the result of :func:`build_token_provider`.
|
||||
|
||||
``httpx_kwargs`` are forwarded verbatim to ``httpx.Client(...)`` so
|
||||
callers can attach a ``timeout``, ``transport``, ``proxy``, etc.
|
||||
|
||||
Raises ``ImportError`` if ``httpx`` is not installed (it is a
|
||||
transitive dependency of both ``openai`` and ``anthropic`` SDKs, so
|
||||
in practice always available when this helper is reached).
|
||||
"""
|
||||
if not is_token_provider(token_provider):
|
||||
raise ValueError(
|
||||
"build_bearer_http_client requires a zero-arg callable "
|
||||
"token provider"
|
||||
)
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError as exc: # pragma: no cover — httpx ships with openai/anthropic
|
||||
raise ImportError(
|
||||
"httpx is required for Entra ID bearer auth on Microsoft Foundry "
|
||||
"Anthropic-style endpoints. It is normally a transitive "
|
||||
"dependency of the openai/anthropic SDKs."
|
||||
) from exc
|
||||
|
||||
def _inject_bearer(request: "httpx.Request") -> None:
|
||||
try:
|
||||
token = materialize_bearer_for_http(token_provider)
|
||||
except ValueError as exc:
|
||||
# Token provider failed (chain exhausted, token service unreachable,
|
||||
# az login expired, etc.). Strip any auth headers the SDK
|
||||
# may have set — including our own placeholder sentinel
|
||||
# ``entra-id-bearer-via-http-hook`` from
|
||||
# ``_build_anthropic_client_with_bearer_hook`` — so the
|
||||
# outbound request hits Azure with NO Authorization rather
|
||||
# than with the placeholder. Azure returns a clean 401
|
||||
# "missing auth" that is easier to diagnose than a 401
|
||||
# against the sentinel string, and the sentinel never
|
||||
# appears in upstream access logs.
|
||||
#
|
||||
# Log at WARNING (not DEBUG) so the misconfiguration is
|
||||
# visible at default log levels.
|
||||
logger.warning(
|
||||
"Bearer hook: Entra ID token provider returned empty (%s) "
|
||||
"— stripping Authorization headers. Azure will respond 401. "
|
||||
"Run `hermes doctor` or `az login` to recover.",
|
||||
exc,
|
||||
)
|
||||
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
|
||||
request.headers.pop(header_name, None)
|
||||
return
|
||||
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
|
||||
request.headers.pop(header_name, None)
|
||||
request.headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
return httpx.Client(
|
||||
event_hooks={"request": [_inject_bearer]},
|
||||
**httpx_kwargs,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EntraIdentityConfig",
|
||||
"SCOPE_AI_AZURE_DEFAULT",
|
||||
"build_bearer_http_client",
|
||||
"build_credential",
|
||||
"build_token_provider",
|
||||
"describe_active_credential",
|
||||
"has_azure_identity_credentials",
|
||||
"has_azure_identity_installed",
|
||||
"is_token_provider",
|
||||
"materialize_bearer_for_http",
|
||||
"reset_credential_cache",
|
||||
]
|
||||
@@ -1,597 +0,0 @@
|
||||
"""Background memory/skill review — fork the agent to evaluate the turn.
|
||||
|
||||
After every turn, ``AIAgent.run_conversation`` may call
|
||||
:func:`spawn_background_review` to fire off a daemon thread that replays
|
||||
the conversation snapshot in a forked :class:`AIAgent` and asks itself
|
||||
"should any skill/memory be saved or updated?". Writes go straight to
|
||||
the memory + skill stores. Main conversation and prompt cache are never
|
||||
touched.
|
||||
|
||||
The fork inherits the parent's live runtime (provider, model, base_url,
|
||||
credentials, cached system prompt) so it hits the same prefix cache and
|
||||
uses the same auth. It runs with a tool whitelist limited to memory and
|
||||
skill management tools; everything else is denied at runtime.
|
||||
|
||||
See the ``hermes-agent-dev`` skill (``references/self-improvement-loop.md``)
|
||||
for invariants and PR review criteria.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
|
||||
# the user-message that the forked review agent receives. AIAgent exposes
|
||||
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
|
||||
# the actual text lives here so future edits are one-place.
|
||||
_MEMORY_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving to memory if appropriate.\n\n"
|
||||
"Focus on:\n"
|
||||
"1. Has the user revealed things about themselves — their persona, desires, "
|
||||
"preferences, or personal details worth remembering?\n"
|
||||
"2. Has the user expressed expectations about how you should behave, their work "
|
||||
"style, or ways they want you to operate?\n\n"
|
||||
"If something stands out, save it using the memory tool. "
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update the skill library. Be "
|
||||
"ACTIVE — most sessions produce at least one skill update, even if "
|
||||
"small. A pass that does nothing is a missed learning opportunity, "
|
||||
"not a neutral outcome.\n\n"
|
||||
"Target shape of the library: CLASS-LEVEL skills, each with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries. This "
|
||||
"shapes HOW you update, not WHETHER you update.\n\n"
|
||||
"Signals to look for (any one of these warrants action):\n"
|
||||
" • User corrected your style, tone, format, legibility, or "
|
||||
"verbosity. Frustration signals like 'stop doing X', 'this is too "
|
||||
"verbose', 'don't format like this', 'why are you explaining', "
|
||||
"'just give me the answer', 'you always do Y and I hate it', or an "
|
||||
"explicit 'remember this' are FIRST-CLASS skill signals, not just "
|
||||
"memory signals. Update the relevant skill(s) to embed the "
|
||||
"preference so the next session starts already knowing.\n"
|
||||
" • User corrected your workflow, approach, or sequence of steps. "
|
||||
"Encode the correction as a pitfall or explicit step in the skill "
|
||||
"that governs that class of task.\n"
|
||||
" • Non-trivial technique, fix, workaround, debugging path, or "
|
||||
"tool-usage pattern emerged that a future session would benefit "
|
||||
"from. Capture it.\n"
|
||||
" • A skill that got loaded or consulted this session turned out "
|
||||
"to be wrong, missing a step, or outdated. Patch it NOW.\n\n"
|
||||
"Preference order — prefer the earliest action that fits, but do "
|
||||
"pick one when a signal above fired:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Look back through the "
|
||||
"conversation for skills the user loaded via /skill-name or you "
|
||||
"read via skill_view. If any of them covers the territory of the "
|
||||
"new learning, PATCH that one first. It is the skill that was in "
|
||||
"play, so it's the right one to extend.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (via skills_list + skill_view). "
|
||||
"If no loaded skill fits but an existing class-level skill does, "
|
||||
"patch it. Add a subsection, a pitfall, or broaden a trigger.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella. Skills can be "
|
||||
"packaged with three kinds of support files — use the right "
|
||||
"directory per kind:\n"
|
||||
" • `references/<topic>.md` — session-specific detail (error "
|
||||
"transcripts, reproduction recipes, provider quirks) AND "
|
||||
"condensed knowledge banks: quoted research, API docs, external "
|
||||
"authoritative excerpts, or domain notes you found while working "
|
||||
"on the problem. Write it concise and for the value of the task, "
|
||||
"not as a full mirror of upstream docs.\n"
|
||||
" • `templates/<name>.<ext>` — starter files meant to be "
|
||||
"copied and modified (boilerplate configs, scaffolding, a "
|
||||
"known-good example the agent can `reproduce with modifications`).\n"
|
||||
" • `scripts/<name>.<ext>` — statically re-runnable actions "
|
||||
"the skill can invoke directly (verification scripts, fixture "
|
||||
"generators, deterministic probes, anything the agent should run "
|
||||
"rather than hand-type each time).\n"
|
||||
" Add support files via skill_manage action=write_file with "
|
||||
"file_path starting 'references/', 'templates/', or 'scripts/'. "
|
||||
"The umbrella's SKILL.md should gain a one-line pointer to any "
|
||||
"new support file so future agents know it exists.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA SKILL when no existing "
|
||||
"skill covers the class. The name MUST be at the class level. "
|
||||
"The name MUST NOT be a specific PR number, error string, feature "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y / audit-Z-today' "
|
||||
"session artifact. If the proposed name only makes sense for "
|
||||
"today's task, it's wrong — fall back to (1), (2), or (3).\n\n"
|
||||
"User-preference embedding (important): when the user expressed a "
|
||||
"style/format/workflow preference, the update belongs in the "
|
||||
"SKILL.md body, not just in memory. Memory captures 'who the user "
|
||||
"is and what the current situation and state of your operations "
|
||||
"are'; skills capture 'how to do this class of task for this "
|
||||
"user'. When they complain about how you handled a task, the "
|
||||
"skill that governs that task needs to carry the lesson.\n\n"
|
||||
"If you notice two existing skills that overlap, note it in your "
|
||||
"reply — the background curator handles consolidation at scale.\n\n"
|
||||
"Protected skills (DO NOT edit these):\n"
|
||||
" • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
|
||||
" • Hub-installed skills (installed via 'hermes skills install').\n"
|
||||
"Pinned skills (marked via 'hermes curator pin') CAN be improved — "
|
||||
"pin only blocks deletion/archive/consolidation by the curator, not "
|
||||
"content updates. Patch them when a pitfall or missing step turns up, "
|
||||
"same as any other agent-created skill.\n"
|
||||
"If the only skills that need updating are protected, say\n"
|
||||
"'Nothing to save.' and stop.\n\n"
|
||||
"Do NOT capture (these become persistent self-imposed constraints "
|
||||
"that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"'Nothing to save.' is a real option but should NOT be the "
|
||||
"default. If the session ran smoothly with no corrections and "
|
||||
"produced no new technique, just say 'Nothing to save.' and stop. "
|
||||
"Otherwise, act."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update two things:\n\n"
|
||||
"**Memory**: who the user is. Did the user reveal persona, "
|
||||
"desires, preferences, personal details, or expectations about "
|
||||
"how you should behave? Save facts about the user and durable "
|
||||
"preferences with the memory tool.\n\n"
|
||||
"**Skills**: how to do this class of task. Be ACTIVE — most "
|
||||
"sessions produce at least one skill update. A pass that does "
|
||||
"nothing is a missed learning opportunity, not a neutral outcome.\n\n"
|
||||
"Target shape of the skill library: CLASS-LEVEL skills with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries.\n\n"
|
||||
"Signals that warrant a skill update (any one is enough):\n"
|
||||
" • User corrected your style, tone, format, legibility, "
|
||||
"verbosity, or approach. Frustration is a FIRST-CLASS skill "
|
||||
"signal, not just a memory signal. 'stop doing X', 'don't format "
|
||||
"like this', 'I hate when you Y' — embed the lesson in the skill "
|
||||
"that governs that task so the next session starts fixed.\n"
|
||||
" • Non-trivial technique, fix, workaround, or debugging path "
|
||||
"emerged.\n"
|
||||
" • A skill that was loaded or consulted turned out wrong, "
|
||||
"missing, or outdated — patch it now.\n\n"
|
||||
"Preference order for skills — pick the earliest that fits:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Check what skills were "
|
||||
"loaded via /skill-name or skill_view in the conversation. If one "
|
||||
"of them covers the learning, PATCH it first. It was in play; "
|
||||
"it's the right place.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (skills_list + skill_view to "
|
||||
"find the right one). Patch it.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella via "
|
||||
"skill_manage action=write_file. Three kinds: "
|
||||
"`references/<topic>.md` for session-specific detail OR condensed "
|
||||
"knowledge banks (quoted research, API docs excerpts, domain "
|
||||
"notes) written concise and task-focused; `templates/<name>.<ext>` "
|
||||
"for starter files meant to be copied and modified; "
|
||||
"`scripts/<name>.<ext>` for statically re-runnable actions "
|
||||
"(verification, fixture generators, probes). Add a one-line "
|
||||
"pointer in SKILL.md so future agents find them.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA when nothing exists. "
|
||||
"Name at the class level — NOT a PR number, error string, "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y' session "
|
||||
"artifact. If the name only fits today's task, fall back to (1), "
|
||||
"(2), or (3).\n\n"
|
||||
"User-preference embedding: when the user complains about how "
|
||||
"you handled a task, update the skill that governs that task — "
|
||||
"memory alone isn't enough. Memory says 'who the user is and "
|
||||
"what the current situation and state of your operations are'; "
|
||||
"skills say 'how to do this class of task for this user'. Both "
|
||||
"should carry user-preference lessons when relevant.\n\n"
|
||||
"If you notice overlapping existing skills, mention it — the "
|
||||
"background curator handles consolidation.\n\n"
|
||||
"Protected skills (DO NOT edit these):\n"
|
||||
" • Bundled skills (shipped with Hermes, e.g. 'hermes-agent').\n"
|
||||
" • Hub-installed skills (installed via 'hermes skills install').\n"
|
||||
"Pinned skills (marked via 'hermes curator pin') CAN be improved — "
|
||||
"pin only blocks deletion/archive/consolidation by the curator, not "
|
||||
"content updates. Patch them when a pitfall or missing step turns up, "
|
||||
"same as any other agent-created skill.\n"
|
||||
"If the only skills that need updating are protected, say\n"
|
||||
"'Nothing to save.' and stop.\n\n"
|
||||
"Do NOT capture as skills (these become persistent self-imposed "
|
||||
"constraints that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"Act on whichever of the two dimensions has real signal. If "
|
||||
"genuinely nothing stands out on either, say 'Nothing to save.' "
|
||||
"and stop — but don't reach for that conclusion as a default."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def summarize_background_review_actions(
|
||||
review_messages: List[Dict],
|
||||
prior_snapshot: List[Dict],
|
||||
) -> List[str]:
|
||||
"""Build the human-facing action summary for a background review pass.
|
||||
|
||||
Walks the review agent's session messages and collects "successful tool
|
||||
action" descriptions to surface to the user (e.g. "Memory updated").
|
||||
Tool messages already present in ``prior_snapshot`` are skipped so we
|
||||
don't re-surface stale results from the prior conversation that the
|
||||
review agent inherited via ``conversation_history`` (issue #14944).
|
||||
|
||||
Matching is by ``tool_call_id`` when available, with a content-equality
|
||||
fallback for tool messages that lack one.
|
||||
"""
|
||||
existing_tool_call_ids = set()
|
||||
existing_tool_contents = set()
|
||||
for prior in prior_snapshot or []:
|
||||
if not isinstance(prior, dict) or prior.get("role") != "tool":
|
||||
continue
|
||||
tcid = prior.get("tool_call_id")
|
||||
if tcid:
|
||||
existing_tool_call_ids.add(tcid)
|
||||
else:
|
||||
content = prior.get("content")
|
||||
if isinstance(content, str):
|
||||
existing_tool_contents.add(content)
|
||||
|
||||
actions: List[str] = []
|
||||
for msg in review_messages or []:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
tcid = msg.get("tool_call_id")
|
||||
if tcid and tcid in existing_tool_call_ids:
|
||||
continue
|
||||
if not tcid:
|
||||
content_str = msg.get("content")
|
||||
if isinstance(content_str, str) and content_str in existing_tool_contents:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(msg.get("content", "{}"))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
if not isinstance(data, dict) or not data.get("success"):
|
||||
continue
|
||||
message = data.get("message", "")
|
||||
target = data.get("target", "")
|
||||
if "created" in message.lower():
|
||||
actions.append(message)
|
||||
elif "updated" in message.lower():
|
||||
actions.append(message)
|
||||
elif "added" in message.lower() or (target and "add" in message.lower()):
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "Entry added" in message:
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "removed" in message.lower() or "replaced" in message.lower():
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
return actions
|
||||
|
||||
|
||||
def build_memory_write_metadata(
|
||||
agent: Any,
|
||||
*,
|
||||
write_origin: Optional[str] = None,
|
||||
execution_context: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
tool_call_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build provenance metadata for external memory-provider mirrors."""
|
||||
metadata: Dict[str, Any] = {
|
||||
"write_origin": write_origin or getattr(agent, "_memory_write_origin", "assistant_tool"),
|
||||
"execution_context": (
|
||||
execution_context
|
||||
or getattr(agent, "_memory_write_context", "foreground")
|
||||
),
|
||||
"session_id": agent.session_id or "",
|
||||
"parent_session_id": agent._parent_session_id or "",
|
||||
"platform": agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
"tool_name": "memory",
|
||||
}
|
||||
if task_id:
|
||||
metadata["task_id"] = task_id
|
||||
if tool_call_id:
|
||||
metadata["tool_call_id"] = tool_call_id
|
||||
return {k: v for k, v in metadata.items() if v not in {None, ""}}
|
||||
|
||||
|
||||
def _run_review_in_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
prompt: str,
|
||||
) -> None:
|
||||
"""Worker function executed in the background-review daemon thread.
|
||||
|
||||
Spawns a forked ``AIAgent`` inheriting the parent's runtime, runs the
|
||||
review prompt, and surfaces a compact action summary back to the user
|
||||
via ``agent._safe_print`` and ``agent.background_review_callback``.
|
||||
"""
|
||||
# Local import to avoid a hard circular dep at module load.
|
||||
from run_agent import AIAgent
|
||||
from tools.terminal_tool import set_approval_callback as _set_approval_callback
|
||||
|
||||
# Install a non-interactive approval callback on this worker
|
||||
# thread so any dangerous-command guard the review agent trips
|
||||
# resolves to "deny" instead of falling back to input() -- which
|
||||
# deadlocks against the parent's prompt_toolkit TUI (#15216).
|
||||
# Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
|
||||
def _bg_review_auto_deny(command, description, **kwargs):
|
||||
logger.warning(
|
||||
"Background review auto-denied dangerous command: %s (%s)",
|
||||
command, description,
|
||||
)
|
||||
return "deny"
|
||||
try:
|
||||
_set_approval_callback(_bg_review_auto_deny)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
review_agent = None
|
||||
review_messages: List[Dict] = []
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = agent._current_main_runtime()
|
||||
_parent_api_mode = _parent_runtime.get("api_mode") or None
|
||||
# The review fork needs to call agent-loop tools (memory,
|
||||
# skill_manage). Those tools require Hermes' own dispatch,
|
||||
# which the codex_app_server runtime bypasses entirely
|
||||
# (it runs the turn inside codex's subprocess). So when
|
||||
# the parent is on codex_app_server, downgrade the review
|
||||
# fork to codex_responses — same auth/credentials, but
|
||||
# talks to the OpenAI Responses API directly so Hermes
|
||||
# owns the loop and the agent-loop tools dispatch.
|
||||
if _parent_api_mode == "codex_app_server":
|
||||
_parent_api_mode = "codex_responses"
|
||||
# skip_memory=True keeps the review fork from
|
||||
# touching external memory plugins (honcho, mem0,
|
||||
# supermemory, etc.). Without it, the fork's
|
||||
# __init__ rebuilds its own _memory_manager from
|
||||
# config, scoped to the parent's session_id, and
|
||||
# run_conversation() then leaks the harness prompt
|
||||
# into the user's real memory namespace via three
|
||||
# ingestion sites: on_turn_start (cadence + turn
|
||||
# message), prefetch_all (recall query), and
|
||||
# sync_all (harness prompt + review output recorded
|
||||
# as a (user, assistant) turn pair). Built-in
|
||||
# MEMORY.md / USER.md state is re-bound from the
|
||||
# parent below so memory(action="add") writes from
|
||||
# the review still land on disk; the review just
|
||||
# has zero side effects on external providers.
|
||||
# Match parent's toolset config so ``tools[]`` is byte-identical
|
||||
# in the request body — Anthropic's cache key includes it.
|
||||
# (The runtime whitelist below still restricts dispatch.)
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
max_iterations=16,
|
||||
quiet_mode=True,
|
||||
platform=agent.platform,
|
||||
provider=agent.provider,
|
||||
api_mode=_parent_api_mode,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
||||
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
|
||||
skip_memory=True,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
review_agent._memory_write_context = "background_review"
|
||||
review_agent._memory_store = agent._memory_store
|
||||
review_agent._memory_enabled = agent._memory_enabled
|
||||
review_agent._user_profile_enabled = agent._user_profile_enabled
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
# Suppress all status/warning emits from the fork so the
|
||||
# user only sees the final successful-action summary.
|
||||
# Without this, mid-review "Iteration budget exhausted",
|
||||
# rate-limit retries, compression warnings, and other
|
||||
# lifecycle messages bubble up through _emit_status ->
|
||||
# _vprint and leak past the stdout redirect (they go via
|
||||
# _print_fn/status_callback, which bypass sys.stdout).
|
||||
review_agent.suppress_status_output = True
|
||||
# Inherit the parent's cached system prompt verbatim so
|
||||
# the review fork's outbound HTTP request hits the same
|
||||
# Anthropic/OpenRouter prefix cache the parent warmed.
|
||||
# Without this, the fork rebuilds the system prompt from
|
||||
# scratch (fresh _hermes_now() timestamp, fresh
|
||||
# session_id, narrower toolset → different skills_prompt)
|
||||
# and the byte-exact prefix-cache key misses. See
|
||||
# issue #25322 and PR #17276 for the full analysis +
|
||||
# measured impact (~26% end-to-end cost reduction on
|
||||
# Sonnet 4.5).
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
review_agent.session_id = agent.session_id
|
||||
|
||||
from model_tools import get_tool_definitions
|
||||
from hermes_cli.plugins import (
|
||||
set_thread_tool_whitelist,
|
||||
clear_thread_tool_whitelist,
|
||||
)
|
||||
|
||||
review_whitelist = {
|
||||
t["function"]["name"]
|
||||
for t in get_tool_definitions(
|
||||
enabled_toolsets=["memory", "skills"],
|
||||
quiet_mode=True,
|
||||
)
|
||||
}
|
||||
set_thread_tool_whitelist(
|
||||
review_whitelist,
|
||||
deny_msg_fmt=(
|
||||
"Background review denied non-whitelisted tool: "
|
||||
"{tool_name}. Only memory/skill tools are allowed."
|
||||
),
|
||||
)
|
||||
try:
|
||||
review_agent.run_conversation(
|
||||
user_message=(
|
||||
prompt
|
||||
+ "\n\nYou can only call memory and skill "
|
||||
"management tools. Other tools will be denied "
|
||||
"at runtime — do not attempt them."
|
||||
),
|
||||
conversation_history=messages_snapshot,
|
||||
)
|
||||
finally:
|
||||
clear_thread_tool_whitelist()
|
||||
|
||||
# Snapshot review actions before teardown. close() is allowed to
|
||||
# clean per-session state, but the user-visible self-improvement
|
||||
# summary still needs the completed review agent's tool results.
|
||||
review_messages = list(getattr(review_agent, "_session_messages", []))
|
||||
|
||||
# Tear down memory providers while stdout is still
|
||||
# redirected so background thread teardown (Honcho flush,
|
||||
# Hindsight sync, etc.) stays silent. The finally block
|
||||
# below is a safety net for the exception path.
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
review_agent = None
|
||||
|
||||
# Scan the review agent's messages for successful tool actions
|
||||
# and surface a compact summary to the user. Tool messages
|
||||
# already present in messages_snapshot must be skipped, since
|
||||
# the review agent inherits that history and would otherwise
|
||||
# re-surface stale "created"/"updated" messages from the prior
|
||||
# conversation as if they just happened (issue #14944).
|
||||
actions = summarize_background_review_actions(
|
||||
review_messages,
|
||||
messages_snapshot,
|
||||
)
|
||||
|
||||
if actions:
|
||||
summary = " · ".join(dict.fromkeys(actions))
|
||||
agent._safe_print(
|
||||
f" 💾 Self-improvement review: {summary}"
|
||||
)
|
||||
_bg_cb = agent.background_review_callback
|
||||
if _bg_cb:
|
||||
try:
|
||||
_bg_cb(
|
||||
f"💾 Self-improvement review: {summary}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
agent._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Safety-net cleanup for the exception path. Normal
|
||||
# completion already shut down inside redirect_stdout above.
|
||||
# Re-open devnull here so any teardown output (Honcho flush,
|
||||
# Hindsight sync, background thread joins) stays silent even
|
||||
# on the exception path where redirect_stdout already exited.
|
||||
if review_agent is not None:
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _fn, \
|
||||
contextlib.redirect_stdout(_fn), \
|
||||
contextlib.redirect_stderr(_fn):
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
# Clear the approval callback on this bg-review thread so a
|
||||
# recycled thread-id doesn't inherit a stale reference.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def spawn_background_review_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
review_memory: bool = False,
|
||||
review_skills: bool = False,
|
||||
):
|
||||
"""Build the review thread target and prompt for a background review.
|
||||
|
||||
Returns a ``(target, prompt)`` tuple. The caller (``AIAgent._spawn_background_review``)
|
||||
owns the actual ``threading.Thread`` construction so test-level patches
|
||||
of ``run_agent.threading.Thread`` keep working.
|
||||
"""
|
||||
# Pick the right prompt based on which triggers fired. Allow per-agent
|
||||
# override (the prompts moved to module-level constants but old code paths
|
||||
# that set agent._MEMORY_REVIEW_PROMPT etc. directly keep working).
|
||||
if review_memory and review_skills:
|
||||
prompt = getattr(agent, "_COMBINED_REVIEW_PROMPT", _COMBINED_REVIEW_PROMPT)
|
||||
elif review_memory:
|
||||
prompt = getattr(agent, "_MEMORY_REVIEW_PROMPT", _MEMORY_REVIEW_PROMPT)
|
||||
else:
|
||||
prompt = getattr(agent, "_SKILL_REVIEW_PROMPT", _SKILL_REVIEW_PROMPT)
|
||||
|
||||
def _target() -> None:
|
||||
_run_review_in_thread(agent, messages_snapshot, prompt)
|
||||
|
||||
return _target, prompt
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_MEMORY_REVIEW_PROMPT",
|
||||
"_SKILL_REVIEW_PROMPT",
|
||||
"_COMBINED_REVIEW_PROMPT",
|
||||
"spawn_background_review_thread",
|
||||
"summarize_background_review_actions",
|
||||
"build_memory_write_metadata",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,175 +0,0 @@
|
||||
"""
|
||||
Browser Provider ABC
|
||||
====================
|
||||
|
||||
Defines the pluggable-backend interface for cloud browser providers
|
||||
(Browserbase, Browser Use, Firecrawl, …). Providers register instances via
|
||||
:meth:`PluginContext.register_browser_provider`; the active one (selected via
|
||||
``browser.cloud_provider`` in ``config.yaml``) services every cloud-mode
|
||||
``browser_*`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/browser/<name>/`` (built-in, auto-loaded as
|
||||
``kind: backend``) or ``~/.hermes/plugins/browser/<name>/`` (user, opt-in via
|
||||
``plugins.enabled``).
|
||||
|
||||
This ABC mirrors :class:`agent.web_search_provider.WebSearchProvider` (PR
|
||||
#25182) — same shape, same registration flow, same picker integration. The
|
||||
legacy in-tree ``tools.browser_providers.base.CloudBrowserProvider`` ABC was
|
||||
deleted in PR #25214 (this work) along with the per-vendor inline modules in
|
||||
``tools/browser_providers/``; the lifecycle contract documented below is
|
||||
preserved bit-for-bit so the tool wrapper (:mod:`tools.browser_tool`) does
|
||||
not have to translate.
|
||||
|
||||
Session metadata contract (preserved from the legacy ``CloudBrowserProvider``)::
|
||||
|
||||
{
|
||||
"session_name": str, # unique name for agent-browser --session
|
||||
"bb_session_id": str, # provider session ID (for close/cleanup)
|
||||
"cdp_url": str, # CDP websocket URL
|
||||
"features": dict, # feature flags that were enabled
|
||||
"external_call_id": str, # optional, managed-gateway billing key
|
||||
}
|
||||
|
||||
``bb_session_id`` is a legacy key name kept verbatim for backward compat with
|
||||
:mod:`tools.browser_tool` — it holds the provider's session ID regardless of
|
||||
which provider is in use.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BrowserProvider(abc.ABC):
|
||||
"""Abstract base class for a cloud browser backend.
|
||||
|
||||
Subclasses must implement :meth:`name`, :meth:`is_available`, and the
|
||||
three lifecycle methods: :meth:`create_session`, :meth:`close_session`,
|
||||
:meth:`emergency_cleanup`.
|
||||
|
||||
The lifecycle shape preserves the legacy ``CloudBrowserProvider`` contract
|
||||
bit-for-bit so the dispatcher in :mod:`tools.browser_tool` is a pure
|
||||
registry lookup — no per-provider conditionals, no shape translation.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in the ``browser.cloud_provider``
|
||||
config key.
|
||||
|
||||
Lowercase, hyphens permitted to preserve existing user-visible names.
|
||||
Examples: ``browserbase``, ``browser-use``, ``firecrawl``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
|
||||
return self.name
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically a cheap check (env var present, managed-gateway token
|
||||
readable, optional Python dep importable). Must NOT make network
|
||||
calls — this runs at tool-registration time and on every
|
||||
``hermes tools`` paint.
|
||||
|
||||
Mirrors the legacy ``CloudBrowserProvider.is_configured()`` method;
|
||||
renamed for parity with :class:`agent.web_search_provider.WebSearchProvider`.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def create_session(self, task_id: str) -> Dict[str, object]:
|
||||
"""Create a cloud browser session and return session metadata.
|
||||
|
||||
Must return a dict with at least::
|
||||
|
||||
{
|
||||
"session_name": str, # unique name for agent-browser --session
|
||||
"bb_session_id": str, # provider session ID (for close/cleanup)
|
||||
"cdp_url": str, # CDP websocket URL
|
||||
"features": dict, # feature flags that were enabled
|
||||
}
|
||||
|
||||
``bb_session_id`` is a legacy key name kept for backward compat with
|
||||
the rest of :mod:`tools.browser_tool` — it holds the provider's
|
||||
session ID regardless of which provider is in use.
|
||||
|
||||
May raise ``ValueError`` (missing credentials) or ``RuntimeError``
|
||||
(network / API failure); the dispatcher surfaces these to the user.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def close_session(self, session_id: str) -> bool:
|
||||
"""Release / terminate a cloud session by its provider session ID.
|
||||
|
||||
Returns True on success, False on failure. Should not raise — log and
|
||||
return False on any exception so the dispatcher's cleanup loop keeps
|
||||
moving across sessions.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
"""Best-effort session teardown during process exit.
|
||||
|
||||
Called from atexit / signal handlers. Must tolerate missing
|
||||
credentials, network errors, etc. — log and move on. Must not raise.
|
||||
"""
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by :mod:`hermes_cli.tools_config` to inject this provider as a
|
||||
row in the Browser Automation picker. Shape mirrors the existing
|
||||
hardcoded entries in ``TOOL_CATEGORIES["browser"]``::
|
||||
|
||||
{
|
||||
"name": "Browserbase",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with stealth and proxies",
|
||||
"env_vars": [
|
||||
{"key": "BROWSERBASE_API_KEY",
|
||||
"prompt": "Browserbase API key",
|
||||
"url": "https://browserbase.com"},
|
||||
],
|
||||
"post_setup": "agent_browser",
|
||||
}
|
||||
|
||||
Default: minimal entry derived from :attr:`display_name`. Override to
|
||||
expose API key prompts, badges, managed-Nous gating, and the
|
||||
``post_setup`` install hook.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Backward-compat shims for the legacy CloudBrowserProvider API
|
||||
# ------------------------------------------------------------------
|
||||
#
|
||||
# The pre-PR-#25214 ABC exposed ``is_configured()`` and ``provider_name()``;
|
||||
# ``tools.browser_tool`` has ~6 callers that still use those names. Rather
|
||||
# than churn every callsite (and break out-of-tree downstream code that
|
||||
# subclassed CloudBrowserProvider), we expose the old names as thin
|
||||
# delegations to the new API. Subclasses MUST implement :meth:`is_available`
|
||||
# and :attr:`name`; they may override ``is_configured`` / ``provider_name``
|
||||
# for compatibility with the legacy ABC but it is not required.
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Backward-compat alias for :meth:`is_available`."""
|
||||
return self.is_available()
|
||||
|
||||
def provider_name(self) -> str:
|
||||
"""Backward-compat alias returning :attr:`display_name`."""
|
||||
return self.display_name
|
||||
@@ -1,192 +0,0 @@
|
||||
"""
|
||||
Browser Provider Registry
|
||||
=========================
|
||||
|
||||
Central map of registered cloud browser providers. Populated by plugins at
|
||||
import-time via :meth:`PluginContext.register_browser_provider`; consumed by
|
||||
:func:`tools.browser_tool._get_cloud_provider` to route each cloud-mode
|
||||
``browser_*`` tool call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by configuration with this precedence:
|
||||
|
||||
1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
|
||||
2. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by
|
||||
availability. Matches the historic auto-detect order in
|
||||
:func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
|
||||
because it covers both the managed Nous gateway and direct API key path;
|
||||
Browserbase as the older direct-credentials fallback). ``firecrawl`` is
|
||||
intentionally NOT in the legacy walk — users only get Firecrawl as a
|
||||
cloud browser when they explicitly set ``browser.cloud_provider:
|
||||
firecrawl``, matching pre-migration behaviour where Firecrawl was never
|
||||
auto-selected.
|
||||
3. Otherwise ``None`` — the dispatcher falls back to local browser mode.
|
||||
|
||||
The explicit-config branch (rule 1) intentionally ignores ``is_available()``
|
||||
so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
|
||||
instead of silently switching backends. Matches the legacy
|
||||
:func:`tools.browser_tool._get_cloud_provider` behaviour for configured names.
|
||||
|
||||
Note: there is no "capability" split here (unlike the web subsystem, which
|
||||
has search/extract/crawl). Every browser provider implements the full
|
||||
:class:`agent.browser_provider.BrowserProvider` lifecycle; the registry's
|
||||
job is purely selection, not capability routing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.browser_provider import BrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, BrowserProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: BrowserProvider) -> None:
|
||||
"""Register a cloud browser provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — makes hot-reload scenarios (tests, dev loops) behave
|
||||
predictably.
|
||||
"""
|
||||
if not isinstance(provider, BrowserProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects a BrowserProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Browser provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug(
|
||||
"Browser provider '%s' re-registered (was %r)",
|
||||
name, type(existing).__name__,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Registered browser provider '%s' (%s)",
|
||||
name, type(provider).__name__,
|
||||
)
|
||||
|
||||
|
||||
def list_providers() -> List[BrowserProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[BrowserProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active-provider resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Legacy auto-detect order — used when no ``browser.cloud_provider`` is set.
|
||||
# Matches the pre-migration walk in :func:`tools.browser_tool._get_cloud_provider`.
|
||||
# Firecrawl is intentionally absent so users with ``FIRECRAWL_API_KEY`` set
|
||||
# for web-extract don't get silently routed to a paid cloud browser. See
|
||||
# :func:`_resolve` for the full rationale.
|
||||
_LEGACY_PREFERENCE = (
|
||||
"browser-use",
|
||||
"browserbase",
|
||||
)
|
||||
|
||||
|
||||
def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
|
||||
"""Resolve the active browser provider.
|
||||
|
||||
Resolution rules (in order):
|
||||
|
||||
1. **Explicit "local".** Returns None — the dispatcher disables cloud
|
||||
mode entirely. Mirrors legacy short-circuit in
|
||||
:func:`tools.browser_tool._get_cloud_provider`.
|
||||
2. **Explicit config wins, ignoring availability.** If ``configured``
|
||||
names a registered provider, return it even if its
|
||||
:meth:`is_available` returns False — the dispatcher will surface a
|
||||
precise "X_API_KEY is not set" error instead of silently routing
|
||||
somewhere else.
|
||||
3. **Legacy preference walk, filtered by availability.** Walk
|
||||
:data:`_LEGACY_PREFERENCE` (``browser-use`` → ``browserbase``) looking
|
||||
for a provider whose ``is_available()`` is True.
|
||||
|
||||
There is intentionally NO "single-eligible shortcut" rule here (unlike
|
||||
:func:`agent.web_search_registry._resolve`). Pre-migration, the
|
||||
auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
|
||||
considered Browser Use and Browserbase; Firecrawl was reachable only
|
||||
via an explicit ``browser.cloud_provider: firecrawl`` config key.
|
||||
Preserving that gate matters because Firecrawl shares its API key with
|
||||
the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
|
||||
``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
|
||||
paid cloud browser on a fresh install. Third-party browser-provider
|
||||
plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
|
||||
to the same gate — they must be explicitly configured to take effect.
|
||||
|
||||
Returns None when no provider is configured AND no available provider
|
||||
matches the legacy preference; the dispatcher then falls back to local
|
||||
browser mode.
|
||||
"""
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
def _is_available_safe(p: BrowserProvider) -> bool:
|
||||
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
|
||||
try:
|
||||
return bool(p.is_available())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"Browser provider %s.is_available() raised %s — treating as unavailable",
|
||||
p.name, exc, exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
# 1. Explicit "local" short-circuit.
|
||||
if configured == "local":
|
||||
return None
|
||||
|
||||
# 2. Explicit config wins — return regardless of is_available() so the
|
||||
# user gets a precise downstream error message rather than a silent
|
||||
# backend switch. Matches _get_cloud_provider() in browser_tool.py.
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"browser cloud_provider '%s' configured but not registered; "
|
||||
"falling back to auto-detect",
|
||||
configured,
|
||||
)
|
||||
|
||||
# 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
|
||||
# auto-eligible. Filtered by availability so we don't surface a
|
||||
# provider the user has no credentials for. See docstring for why
|
||||
# we do NOT fall back to "any single-eligible registered provider".
|
||||
for legacy in _LEGACY_PREFERENCE:
|
||||
provider = snapshot.get(legacy)
|
||||
if provider is not None and _is_available_safe(provider):
|
||||
return provider
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,535 +0,0 @@
|
||||
"""Codex API runtime — App Server and Responses-API streaming paths.
|
||||
|
||||
Extracted from :class:`AIAgent` to keep the agent loop file focused.
|
||||
Each function takes the parent ``AIAgent`` as its first argument
|
||||
(``agent``). AIAgent keeps thin forwarder methods for backward
|
||||
compatibility.
|
||||
|
||||
* ``run_codex_app_server_turn`` — drives one turn through the
|
||||
``codex_app_server`` subprocess client (used when a Codex CLI install
|
||||
is the active provider).
|
||||
* ``run_codex_stream`` — streams a Codex Responses API call (the
|
||||
``codex_responses`` api_mode).
|
||||
* ``run_codex_create_stream_fallback`` — recovery path when the
|
||||
Responses ``stream=True`` initial create fails.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_codex_app_server_turn(
|
||||
agent,
|
||||
*,
|
||||
user_message: str,
|
||||
original_user_message: Any,
|
||||
messages: List[Dict[str, Any]],
|
||||
effective_task_id: str,
|
||||
should_review_memory: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Codex app-server runtime path. Hands the entire turn to a `codex
|
||||
app-server` subprocess and projects its events back into Hermes'
|
||||
messages list so memory/skill review keep working.
|
||||
|
||||
Called from run_conversation() when agent.api_mode == "codex_app_server".
|
||||
Returns the same dict shape as the chat_completions path.
|
||||
"""
|
||||
from agent.transports.codex_app_server_session import CodexAppServerSession
|
||||
|
||||
# Lazy session: one CodexAppServerSession per AIAgent instance.
|
||||
# Spawned on first turn, reused across turns, closed at AIAgent
|
||||
# shutdown (see _cleanup hook).
|
||||
if not hasattr(agent, "_codex_session") or agent._codex_session is None:
|
||||
cwd = getattr(agent, "session_cwd", None) or os.getcwd()
|
||||
# Approval callback: defer to Hermes' standard prompt flow if a
|
||||
# CLI thread has installed one. Gateway / cron contexts get the
|
||||
# codex-side fail-closed default.
|
||||
try:
|
||||
from tools.terminal_tool import _get_approval_callback
|
||||
approval_callback = _get_approval_callback()
|
||||
except Exception:
|
||||
approval_callback = None
|
||||
agent._codex_session = CodexAppServerSession(
|
||||
cwd=cwd,
|
||||
approval_callback=approval_callback,
|
||||
)
|
||||
|
||||
# NOTE: the user message is ALREADY appended to messages by the
|
||||
# standard run_conversation() flow (line ~11823) before the early
|
||||
# return reaches us. Do NOT append again — that would duplicate.
|
||||
|
||||
try:
|
||||
turn = agent._codex_session.run_turn(user_input=user_message)
|
||||
except Exception as exc:
|
||||
logger.exception("codex app-server turn failed")
|
||||
# Crash → unconditionally drop the session so the next turn
|
||||
# respawns from scratch instead of reusing a dead client.
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
return {
|
||||
"final_response": (
|
||||
f"Codex app-server turn failed: {exc}. "
|
||||
f"Fall back to default runtime with `/codex-runtime auto`."
|
||||
),
|
||||
"messages": messages,
|
||||
"api_calls": 0,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
# If the turn signalled the underlying client is wedged (deadline
|
||||
# blown, post-tool watchdog tripped, OAuth refresh died, subprocess
|
||||
# exited), retire the session so the next turn respawns codex
|
||||
# rather than riding the broken process. Mirrors openclaw beta.8's
|
||||
# "retire timed-out app-server clients" fix.
|
||||
if getattr(turn, "should_retire", False):
|
||||
logger.warning(
|
||||
"codex app-server session retired (turn error: %s)",
|
||||
turn.error,
|
||||
)
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
|
||||
# Splice projected messages into the conversation. The projector emits
|
||||
# standard {role, content, tool_calls, tool_call_id} entries, which
|
||||
# is exactly what curator.py / sessions DB expect.
|
||||
if turn.projected_messages:
|
||||
messages.extend(turn.projected_messages)
|
||||
|
||||
# Counter ticks for the agent-improvement loop.
|
||||
# _turns_since_memory and _user_turn_count are ALREADY incremented
|
||||
# in the run_conversation() pre-loop block (lines ~11793-11817) so we
|
||||
# do NOT touch them here — that would double-count.
|
||||
# Only _iters_since_skill needs explicit increment, since the
|
||||
# chat_completions loop bumps it per tool iteration (line ~12110)
|
||||
# and that loop is bypassed on this path.
|
||||
agent._iters_since_skill = (
|
||||
getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
|
||||
)
|
||||
|
||||
# Now check the skill nudge AFTER iters were incremented — same
|
||||
# pattern the chat_completions path uses (line ~15432).
|
||||
should_review_skills = False
|
||||
if (
|
||||
agent._skill_nudge_interval > 0
|
||||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||||
and "skill_manage" in agent.valid_tool_names
|
||||
):
|
||||
should_review_skills = True
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
# External memory provider sync (mirrors line ~15439). Skipped on
|
||||
# interrupt/error to avoid feeding partial transcripts to memory.
|
||||
if not turn.interrupted and turn.error is None:
|
||||
try:
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=turn.final_text,
|
||||
interrupted=False,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("external memory sync raised", exc_info=True)
|
||||
|
||||
# Background review fork — same cadence + signature as the default
|
||||
# path (line ~15449). Only fires when a trigger actually tripped AND
|
||||
# we have a real final response.
|
||||
if (
|
||||
turn.final_text
|
||||
and not turn.interrupted
|
||||
and (should_review_memory or should_review_skills)
|
||||
):
|
||||
try:
|
||||
agent._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=should_review_memory,
|
||||
review_skills=should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("background review spawn raised", exc_info=True)
|
||||
|
||||
return {
|
||||
"final_response": turn.final_text,
|
||||
"messages": messages,
|
||||
"api_calls": 1, # one app-server "turn" maps to one logical API call
|
||||
"completed": not turn.interrupted and turn.error is None,
|
||||
"partial": turn.interrupted or turn.error is not None,
|
||||
"error": turn.error,
|
||||
"codex_thread_id": turn.thread_id,
|
||||
"codex_turn_id": turn.turn_id,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Event-driven Responses streaming
|
||||
#
|
||||
# OpenAI ships its consumer Codex backend (chatgpt.com/backend-api/codex) on
|
||||
# a different schedule from the openai Python SDK. The high-level
|
||||
# ``client.responses.stream(...)`` helper reconstructs a typed Response from
|
||||
# the terminal ``response.completed`` event's ``response.output`` field, and
|
||||
# when that field drifts to ``null`` (gpt-5.5, May 2026) the SDK raises
|
||||
# ``TypeError: 'NoneType' object is not iterable`` mid-iteration.
|
||||
#
|
||||
# We sidestep the whole class of failure by going one level lower:
|
||||
# ``client.responses.create(stream=True)`` returns the raw AsyncIterable of
|
||||
# SSE events, and we assemble the final response object purely from
|
||||
# ``response.output_item.done`` events as they arrive. We never read
|
||||
# ``response.completed.response.output`` for content reconstruction, so the
|
||||
# backend can return ``null``, ``[]``, a string, or omit the field entirely
|
||||
# and we don't care.
|
||||
#
|
||||
# This mirrors what the OpenClaw TS implementation does for the same backend
|
||||
# and is structurally immune to the bug class rather than patched.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_TERMINAL_EVENT_TYPES = frozenset({
|
||||
"response.completed",
|
||||
"response.incomplete",
|
||||
"response.failed",
|
||||
})
|
||||
|
||||
|
||||
def _event_field(event: Any, name: str, default: Any = None) -> Any:
|
||||
"""Field access that handles both attr-style (SDK objects) and dict (raw JSON) events."""
|
||||
value = getattr(event, name, None)
|
||||
if value is None and isinstance(event, dict):
|
||||
value = event.get(name, default)
|
||||
return value if value is not None else default
|
||||
|
||||
|
||||
def _raise_stream_error(event: Any) -> None:
|
||||
"""Raise a ``_StreamErrorEvent`` from a ``type=error`` SSE frame.
|
||||
|
||||
Imported lazily so this module stays importable from places that don't
|
||||
pull in ``run_agent`` (e.g. plugin code, doc tools).
|
||||
"""
|
||||
from run_agent import _StreamErrorEvent
|
||||
message = (_event_field(event, "message", "") or "stream emitted error event").strip()
|
||||
raise _StreamErrorEvent(
|
||||
message,
|
||||
code=_event_field(event, "code"),
|
||||
param=_event_field(event, "param"),
|
||||
)
|
||||
|
||||
|
||||
def _consume_codex_event_stream(
|
||||
event_iter: Any,
|
||||
*,
|
||||
model: str,
|
||||
on_text_delta=None,
|
||||
on_reasoning_delta=None,
|
||||
on_first_delta=None,
|
||||
on_event=None,
|
||||
interrupt_check=None,
|
||||
) -> SimpleNamespace:
|
||||
"""Consume a Codex Responses SSE event stream and return a final response.
|
||||
|
||||
The returned object is a ``SimpleNamespace`` shaped like the SDK's typed
|
||||
``Response`` for the fields downstream code actually reads:
|
||||
|
||||
* ``output``: list of output items, assembled from ``response.output_item.done``.
|
||||
For tool-call turns this contains the function_call items; for plain-text
|
||||
turns it contains a synthesized ``message`` item built from streamed deltas
|
||||
if no message item was emitted directly.
|
||||
* ``output_text``: assembled text from ``response.output_text.delta`` deltas.
|
||||
* ``usage``: copied from the terminal event's ``response.usage`` (when present).
|
||||
* ``status``: ``completed`` / ``incomplete`` / ``failed`` (or ``completed`` if
|
||||
the stream ended without a terminal frame but produced content).
|
||||
* ``id``: ``response.id`` when present.
|
||||
* ``incomplete_details``: passed through for ``response.incomplete`` frames.
|
||||
* ``error``: passed through for ``response.failed`` frames.
|
||||
* ``model``: from kwargs (the wire model name is not authoritative).
|
||||
|
||||
Critically, we never read ``response.output`` from the terminal event for
|
||||
content reconstruction — only ``usage``, ``status``, ``id``. That field
|
||||
being ``null`` / ``[]`` / missing is fine.
|
||||
|
||||
Callbacks:
|
||||
|
||||
* ``on_text_delta(str)`` — fires per ``response.output_text.delta``, suppressed
|
||||
once a function_call event is seen (so tool-call turns don't bleed text
|
||||
into the chat).
|
||||
* ``on_reasoning_delta(str)`` — fires per ``response.reasoning.*.delta``.
|
||||
* ``on_first_delta()`` — one-shot, fires on the first text delta only.
|
||||
* ``on_event(event)`` — fires for every event before any other processing.
|
||||
Used for watchdog activity, debug logging, anything wire-shape-agnostic.
|
||||
* ``interrupt_check()`` — returns True to break the loop early.
|
||||
"""
|
||||
collected_output_items: List[Any] = []
|
||||
collected_text_deltas: List[str] = []
|
||||
has_tool_calls = False
|
||||
first_delta_fired = False
|
||||
terminal_status: str = "completed"
|
||||
terminal_usage: Any = None
|
||||
terminal_response_id: str = None
|
||||
terminal_incomplete_details: Any = None
|
||||
terminal_error: Any = None
|
||||
saw_terminal = False
|
||||
|
||||
for event in event_iter:
|
||||
if on_event is not None:
|
||||
try:
|
||||
on_event(event)
|
||||
except (TimeoutError, InterruptedError):
|
||||
# Control-flow signals from watchdog/cancellation hooks must
|
||||
# propagate, not get swallowed as "debug noise".
|
||||
raise
|
||||
except Exception:
|
||||
# Genuine bugs in third-party debug/log hooks shouldn't break
|
||||
# stream consumption.
|
||||
logger.debug("Codex stream on_event hook raised", exc_info=True)
|
||||
if interrupt_check is not None and interrupt_check():
|
||||
break
|
||||
|
||||
event_type = _event_field(event, "type", "")
|
||||
if not isinstance(event_type, str):
|
||||
event_type = ""
|
||||
|
||||
# ``error`` SSE frames carry the provider's real failure reason
|
||||
# (subscription / quota / model-not-available / rejected-reasoning-replay)
|
||||
# but never appear in the terminal set. Surface them as a structured
|
||||
# exception so the credential pool + error classifier see the body.
|
||||
if event_type == "error":
|
||||
_raise_stream_error(event)
|
||||
|
||||
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
|
||||
delta_text = _event_field(event, "delta", "")
|
||||
if delta_text:
|
||||
collected_text_deltas.append(delta_text)
|
||||
if not has_tool_calls:
|
||||
if not first_delta_fired:
|
||||
first_delta_fired = True
|
||||
if on_first_delta is not None:
|
||||
try:
|
||||
on_first_delta()
|
||||
except Exception:
|
||||
logger.debug("Codex stream on_first_delta raised", exc_info=True)
|
||||
if on_text_delta is not None:
|
||||
try:
|
||||
on_text_delta(delta_text)
|
||||
except Exception:
|
||||
logger.debug("Codex stream on_text_delta raised", exc_info=True)
|
||||
continue
|
||||
|
||||
if "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
# fall through — function_call items still get added on output_item.done
|
||||
|
||||
if "reasoning" in event_type and "delta" in event_type:
|
||||
reasoning_text = _event_field(event, "delta", "")
|
||||
if reasoning_text and on_reasoning_delta is not None:
|
||||
try:
|
||||
on_reasoning_delta(reasoning_text)
|
||||
except Exception:
|
||||
logger.debug("Codex stream on_reasoning_delta raised", exc_info=True)
|
||||
continue
|
||||
|
||||
if event_type == "response.output_item.done":
|
||||
done_item = _event_field(event, "item")
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
continue
|
||||
|
||||
if event_type in _TERMINAL_EVENT_TYPES:
|
||||
saw_terminal = True
|
||||
resp_obj = _event_field(event, "response")
|
||||
if resp_obj is not None:
|
||||
terminal_usage = getattr(resp_obj, "usage", None)
|
||||
if terminal_usage is None and isinstance(resp_obj, dict):
|
||||
terminal_usage = resp_obj.get("usage")
|
||||
rid = getattr(resp_obj, "id", None)
|
||||
if rid is None and isinstance(resp_obj, dict):
|
||||
rid = resp_obj.get("id")
|
||||
terminal_response_id = rid
|
||||
rstatus = getattr(resp_obj, "status", None)
|
||||
if rstatus is None and isinstance(resp_obj, dict):
|
||||
rstatus = resp_obj.get("status")
|
||||
if isinstance(rstatus, str):
|
||||
terminal_status = rstatus
|
||||
if event_type == "response.incomplete":
|
||||
terminal_incomplete_details = getattr(resp_obj, "incomplete_details", None)
|
||||
if terminal_incomplete_details is None and isinstance(resp_obj, dict):
|
||||
terminal_incomplete_details = resp_obj.get("incomplete_details")
|
||||
if event_type == "response.failed":
|
||||
terminal_error = getattr(resp_obj, "error", None)
|
||||
if terminal_error is None and isinstance(resp_obj, dict):
|
||||
terminal_error = resp_obj.get("error")
|
||||
if event_type == "response.completed":
|
||||
terminal_status = terminal_status or "completed"
|
||||
elif event_type == "response.incomplete":
|
||||
terminal_status = terminal_status or "incomplete"
|
||||
elif event_type == "response.failed":
|
||||
terminal_status = terminal_status or "failed"
|
||||
# Stop on terminal event.
|
||||
break
|
||||
|
||||
# Build the final output list. Prefer items observed via output_item.done;
|
||||
# if none arrived but we streamed plain text deltas (no tool calls), synthesize
|
||||
# a single message item so downstream normalization has something to work with.
|
||||
if collected_output_items:
|
||||
output = list(collected_output_items)
|
||||
elif collected_text_deltas and not has_tool_calls:
|
||||
assembled = "".join(collected_text_deltas)
|
||||
output = [SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
else:
|
||||
output = []
|
||||
|
||||
# If the stream ended without any terminal event AND produced no usable
|
||||
# content (no items, no text deltas), surface that as a RuntimeError so
|
||||
# callers can distinguish "stream truncated mid-flight / provider rejected
|
||||
# the call" from "stream completed with empty body". This preserves the
|
||||
# signal the SDK's high-level helper used to raise as
|
||||
# ``RuntimeError("Didn't receive a `response.completed` event.")``.
|
||||
if not saw_terminal and not output:
|
||||
raise RuntimeError(
|
||||
"Codex Responses stream did not emit a terminal response"
|
||||
)
|
||||
|
||||
assembled_text = "".join(collected_text_deltas)
|
||||
|
||||
final = SimpleNamespace(
|
||||
output=output,
|
||||
output_text=assembled_text,
|
||||
usage=terminal_usage,
|
||||
status=terminal_status,
|
||||
id=terminal_response_id,
|
||||
model=model,
|
||||
incomplete_details=terminal_incomplete_details,
|
||||
error=terminal_error,
|
||||
)
|
||||
return final
|
||||
|
||||
|
||||
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta=None):
|
||||
"""Execute one streaming Responses API request and return the final response.
|
||||
|
||||
Uses ``responses.create(stream=True)`` (low-level raw event iteration)
|
||||
rather than the high-level ``responses.stream(...)`` helper. This makes
|
||||
us structurally immune to backend drift in the ``response.completed``
|
||||
payload shape — we never let the SDK reconstruct a typed object from
|
||||
the terminal event's ``output`` field.
|
||||
"""
|
||||
import httpx as _httpx
|
||||
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
|
||||
max_stream_retries = 1
|
||||
# Accumulate streamed text so callers / compat shims can read it.
|
||||
agent._codex_streamed_text_parts: list = []
|
||||
|
||||
def _on_text_delta(text: str) -> None:
|
||||
agent._codex_streamed_text_parts.append(text)
|
||||
agent._fire_stream_delta(text)
|
||||
|
||||
def _on_reasoning_delta(text: str) -> None:
|
||||
agent._fire_reasoning_delta(text)
|
||||
|
||||
def _on_event(event: Any) -> None:
|
||||
# TTFB watchdog and activity touch — runs once per SSE event.
|
||||
agent._codex_stream_last_event_ts = time.time()
|
||||
agent._touch_activity("receiving stream response")
|
||||
|
||||
def _interrupt_check() -> bool:
|
||||
return bool(agent._interrupt_requested)
|
||||
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if agent._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
|
||||
stream_kwargs = dict(api_kwargs)
|
||||
stream_kwargs["stream"] = True
|
||||
|
||||
try:
|
||||
event_stream = active_client.responses.create(**stream_kwargs)
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream connect failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1, max_stream_retries + 1,
|
||||
agent._client_log_context(), exc,
|
||||
)
|
||||
continue
|
||||
raise
|
||||
|
||||
try:
|
||||
# Compatibility: some mocks/providers return a concrete response
|
||||
# instead of an iterable. Pass it straight through.
|
||||
if hasattr(event_stream, "output") and not hasattr(event_stream, "__iter__"):
|
||||
return event_stream
|
||||
|
||||
try:
|
||||
final = _consume_codex_event_stream(
|
||||
event_stream,
|
||||
model=api_kwargs.get("model"),
|
||||
on_text_delta=_on_text_delta,
|
||||
on_reasoning_delta=_on_reasoning_delta,
|
||||
on_first_delta=on_first_delta,
|
||||
on_event=_on_event,
|
||||
interrupt_check=_interrupt_check,
|
||||
)
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed mid-iteration "
|
||||
"(attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1, max_stream_retries + 1,
|
||||
agent._client_log_context(), exc,
|
||||
)
|
||||
continue
|
||||
raise
|
||||
|
||||
if final.status in {"incomplete", "failed"}:
|
||||
logger.warning(
|
||||
"Codex Responses stream terminal status=%s "
|
||||
"(incomplete_details=%s, error=%s, streamed_chars=%d). %s",
|
||||
final.status, final.incomplete_details, final.error,
|
||||
sum(len(p) for p in agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
|
||||
return final
|
||||
finally:
|
||||
close_fn = getattr(event_stream, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
|
||||
"""Backward-compatible alias for the unified event-driven path.
|
||||
|
||||
Historically this was the fallback when the SDK's high-level
|
||||
``responses.stream(...)`` helper raised on shape drift. The primary
|
||||
path now does exactly what the fallback did, so this just forwards.
|
||||
Kept as a public symbol because tests and a small number of call sites
|
||||
still reference it by name.
|
||||
"""
|
||||
return run_codex_stream(agent, api_kwargs, client=client)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"run_codex_app_server_turn",
|
||||
"run_codex_stream",
|
||||
"run_codex_create_stream_fallback",
|
||||
"_consume_codex_event_stream",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,226 +0,0 @@
|
||||
"""Abstract base class for pluggable context engines.
|
||||
|
||||
A context engine controls how conversation context is managed when
|
||||
approaching the model's token limit. The built-in ContextCompressor
|
||||
is the default implementation. Third-party engines (e.g. LCM) can
|
||||
replace it via the plugin system or by being placed in the
|
||||
``plugins/context_engine/<name>/`` directory.
|
||||
|
||||
Selection is config-driven: ``context.engine`` in config.yaml.
|
||||
Default is ``"compressor"`` (the built-in). Only one engine is active.
|
||||
|
||||
The engine is responsible for:
|
||||
- Deciding when compaction should fire
|
||||
- Performing compaction (summarization, DAG construction, etc.)
|
||||
- Optionally exposing tools the agent can call (e.g. lcm_grep)
|
||||
- Tracking token usage from API responses
|
||||
|
||||
Lifecycle:
|
||||
1. Engine is instantiated and registered (plugin register() or default)
|
||||
2. on_session_start() called when a conversation begins
|
||||
3. update_from_response() called after each API response with usage data
|
||||
4. should_compress() checked after each turn
|
||||
5. compress() called when should_compress() returns True
|
||||
6. on_session_end() called at real session boundaries (CLI exit, /reset,
|
||||
gateway session expiry) — NOT per-turn
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class ContextEngine(ABC):
|
||||
"""Base class all context engines must implement."""
|
||||
|
||||
# -- Identity ----------------------------------------------------------
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Short identifier (e.g. 'compressor', 'lcm')."""
|
||||
|
||||
# -- Token state (read by run_agent.py for display/logging) ------------
|
||||
#
|
||||
# Engines MUST maintain these. run_agent.py reads them directly.
|
||||
|
||||
last_prompt_tokens: int = 0
|
||||
last_completion_tokens: int = 0
|
||||
last_total_tokens: int = 0
|
||||
threshold_tokens: int = 0
|
||||
context_length: int = 0
|
||||
compression_count: int = 0
|
||||
|
||||
# -- Compaction parameters (read by run_agent.py for preflight) --------
|
||||
#
|
||||
# These control the preflight compression check. Subclasses may
|
||||
# override via __init__ or property; defaults are sensible for most
|
||||
# engines.
|
||||
#
|
||||
# protect_first_n semantics (since PR #13754): count of non-system head
|
||||
# messages always preserved verbatim, IN ADDITION to the system prompt
|
||||
# which is always implicitly protected. Default 3 keeps the
|
||||
# historical "system + first 3 non-system messages" head shape.
|
||||
|
||||
threshold_percent: float = 0.75
|
||||
protect_first_n: int = 3
|
||||
protect_last_n: int = 6
|
||||
|
||||
# -- Core interface ----------------------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def update_from_response(self, usage: Dict[str, Any]) -> None:
|
||||
"""Update tracked token usage from an API response.
|
||||
|
||||
Called after every LLM call with a normalized usage dict. The legacy
|
||||
keys ``prompt_tokens``, ``completion_tokens``, and ``total_tokens``
|
||||
are always present. Newer hosts also include canonical buckets:
|
||||
``input_tokens``, ``output_tokens``, ``cache_read_tokens``,
|
||||
``cache_write_tokens``, and ``reasoning_tokens``. Engines should
|
||||
treat those fields as optional for compatibility with older hosts.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Return True if compaction should fire this turn."""
|
||||
|
||||
@abstractmethod
|
||||
def compress(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
focus_topic: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
This is the main entry point. The engine receives the full message
|
||||
list and returns a (possibly shorter) list that fits within the
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||
Engines that support guided compression should prioritise
|
||||
preserving information related to this topic. Engines that
|
||||
don't support it may simply ignore this argument.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick rough check before the API call (no real token count yet).
|
||||
|
||||
Default returns False (skip pre-flight). Override if your engine
|
||||
can do a cheap estimate.
|
||||
"""
|
||||
return False
|
||||
|
||||
def should_defer_preflight_to_real_usage(self, rough_tokens: int) -> bool:
|
||||
"""Return True when preflight should trust recent real usage instead.
|
||||
|
||||
Built-in compression uses this to avoid re-compacting from known-noisy
|
||||
rough estimates after a compressed request has already fit. Third-party
|
||||
engines can ignore it safely.
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: manual /compress preflight ------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||
|
||||
Used by the gateway ``/compress`` command as a preflight guard —
|
||||
returning False lets the gateway report "nothing to compress yet"
|
||||
without making an LLM call.
|
||||
|
||||
Default returns True (always attempt). Engines with a cheap way
|
||||
to introspect their own head/tail boundaries should override this
|
||||
to return False when the transcript is still entirely protected.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
"""Called when a new conversation session begins.
|
||||
|
||||
Use this to load persisted state (DAG, store) for the session.
|
||||
kwargs may include hermes_home, platform, model, etc.
|
||||
"""
|
||||
|
||||
def on_session_end(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Called at real session boundaries (CLI exit, /reset, gateway expiry).
|
||||
|
||||
Use this to flush state, close DB connections, etc.
|
||||
NOT called per-turn — only when the session truly ends.
|
||||
"""
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Called on /new or /reset. Reset per-session state.
|
||||
|
||||
Default resets compression_count and token tracking.
|
||||
"""
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
self.compression_count = 0
|
||||
|
||||
# -- Optional: tools ---------------------------------------------------
|
||||
|
||||
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
||||
"""Return tool schemas this engine provides to the agent.
|
||||
|
||||
Default returns empty list (no tools). LCM would return schemas
|
||||
for lcm_grep, lcm_describe, lcm_expand here.
|
||||
"""
|
||||
return []
|
||||
|
||||
def handle_tool_call(self, name: str, args: Dict[str, Any], **kwargs) -> str:
|
||||
"""Handle a tool call from the agent.
|
||||
|
||||
Only called for tool names returned by get_tool_schemas().
|
||||
Must return a JSON string.
|
||||
|
||||
kwargs may include:
|
||||
messages: the current in-memory message list (for live ingestion)
|
||||
"""
|
||||
import json
|
||||
return json.dumps({"error": f"Unknown context engine tool: {name}"})
|
||||
|
||||
# -- Optional: status / display ----------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return status dict for display/logging.
|
||||
|
||||
Default returns the standard fields run_agent.py expects.
|
||||
"""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": (
|
||||
min(100, self.last_prompt_tokens / self.context_length * 100)
|
||||
if self.context_length else 0
|
||||
),
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
# -- Optional: model switch support ------------------------------------
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
api_mode: str = "",
|
||||
) -> None:
|
||||
"""Called when the user switches models or on fallback activation.
|
||||
|
||||
Default updates context_length and recalculates threshold_tokens
|
||||
from threshold_percent. Override if your engine needs more
|
||||
(e.g. recalculate DAG budgets, switch summary models).
|
||||
"""
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
@@ -1,518 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from agent.model_metadata import estimate_tokens_rough
|
||||
|
||||
_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
|
||||
REFERENCE_PATTERN = re.compile(
|
||||
rf"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>{_QUOTED_REFERENCE_VALUE}(?::\d+(?:-\d+)?)?|\S+))"
|
||||
)
|
||||
TRAILING_PUNCTUATION = ",.;!?"
|
||||
_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
|
||||
_SENSITIVE_HERMES_DIRS = (Path("skills") / ".hub",)
|
||||
_SENSITIVE_HOME_FILES = (
|
||||
Path(".ssh") / "authorized_keys",
|
||||
Path(".ssh") / "id_rsa",
|
||||
Path(".ssh") / "id_ed25519",
|
||||
Path(".ssh") / "config",
|
||||
Path(".bashrc"),
|
||||
Path(".zshrc"),
|
||||
Path(".profile"),
|
||||
Path(".bash_profile"),
|
||||
Path(".zprofile"),
|
||||
Path(".netrc"),
|
||||
Path(".pgpass"),
|
||||
Path(".npmrc"),
|
||||
Path(".pypirc"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ContextReference:
|
||||
raw: str
|
||||
kind: str
|
||||
target: str
|
||||
start: int
|
||||
end: int
|
||||
line_start: int | None = None
|
||||
line_end: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContextReferenceResult:
|
||||
message: str
|
||||
original_message: str
|
||||
references: list[ContextReference] = field(default_factory=list)
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
injected_tokens: int = 0
|
||||
expanded: bool = False
|
||||
blocked: bool = False
|
||||
|
||||
|
||||
def parse_context_references(message: str) -> list[ContextReference]:
|
||||
refs: list[ContextReference] = []
|
||||
if not message:
|
||||
return refs
|
||||
|
||||
for match in REFERENCE_PATTERN.finditer(message):
|
||||
simple = match.group("simple")
|
||||
if simple:
|
||||
refs.append(
|
||||
ContextReference(
|
||||
raw=match.group(0),
|
||||
kind=simple,
|
||||
target="",
|
||||
start=match.start(),
|
||||
end=match.end(),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
kind = match.group("kind")
|
||||
value = _strip_trailing_punctuation(match.group("value") or "")
|
||||
line_start = None
|
||||
line_end = None
|
||||
target = _strip_reference_wrappers(value)
|
||||
|
||||
if kind == "file":
|
||||
target, line_start, line_end = _parse_file_reference_value(value)
|
||||
|
||||
refs.append(
|
||||
ContextReference(
|
||||
raw=match.group(0),
|
||||
kind=kind,
|
||||
target=target,
|
||||
start=match.start(),
|
||||
end=match.end(),
|
||||
line_start=line_start,
|
||||
line_end=line_end,
|
||||
)
|
||||
)
|
||||
|
||||
return refs
|
||||
|
||||
|
||||
def preprocess_context_references(
|
||||
message: str,
|
||||
*,
|
||||
cwd: str | Path,
|
||||
context_length: int,
|
||||
url_fetcher: Callable[[str], str | Awaitable[str]] | None = None,
|
||||
allowed_root: str | Path | None = None,
|
||||
) -> ContextReferenceResult:
|
||||
coro = preprocess_context_references_async(
|
||||
message,
|
||||
cwd=cwd,
|
||||
context_length=context_length,
|
||||
url_fetcher=url_fetcher,
|
||||
allowed_root=allowed_root,
|
||||
)
|
||||
# Safe for both CLI (no loop) and gateway (loop already running).
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
loop = None
|
||||
if loop and loop.is_running():
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
return pool.submit(asyncio.run, coro).result()
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
async def preprocess_context_references_async(
|
||||
message: str,
|
||||
*,
|
||||
cwd: str | Path,
|
||||
context_length: int,
|
||||
url_fetcher: Callable[[str], str | Awaitable[str]] | None = None,
|
||||
allowed_root: str | Path | None = None,
|
||||
) -> ContextReferenceResult:
|
||||
refs = parse_context_references(message)
|
||||
if not refs:
|
||||
return ContextReferenceResult(message=message, original_message=message)
|
||||
|
||||
cwd_path = Path(cwd).expanduser().resolve()
|
||||
# Default to the current working directory so @ references cannot escape
|
||||
# the active workspace unless a caller explicitly widens the root.
|
||||
allowed_root_path = (
|
||||
Path(allowed_root).expanduser().resolve() if allowed_root is not None else cwd_path
|
||||
)
|
||||
warnings: list[str] = []
|
||||
blocks: list[str] = []
|
||||
injected_tokens = 0
|
||||
|
||||
for ref in refs:
|
||||
warning, block = await _expand_reference(
|
||||
ref,
|
||||
cwd_path,
|
||||
url_fetcher=url_fetcher,
|
||||
allowed_root=allowed_root_path,
|
||||
)
|
||||
if warning:
|
||||
warnings.append(warning)
|
||||
if block:
|
||||
blocks.append(block)
|
||||
injected_tokens += estimate_tokens_rough(block)
|
||||
|
||||
hard_limit = max(1, int(context_length * 0.50))
|
||||
soft_limit = max(1, int(context_length * 0.25))
|
||||
if injected_tokens > hard_limit:
|
||||
warnings.append(
|
||||
f"@ context injection refused: {injected_tokens} tokens exceeds the 50% hard limit ({hard_limit})."
|
||||
)
|
||||
return ContextReferenceResult(
|
||||
message=message,
|
||||
original_message=message,
|
||||
references=refs,
|
||||
warnings=warnings,
|
||||
injected_tokens=injected_tokens,
|
||||
expanded=False,
|
||||
blocked=True,
|
||||
)
|
||||
|
||||
if injected_tokens > soft_limit:
|
||||
warnings.append(
|
||||
f"@ context injection warning: {injected_tokens} tokens exceeds the 25% soft limit ({soft_limit})."
|
||||
)
|
||||
|
||||
stripped = _remove_reference_tokens(message, refs)
|
||||
final = stripped
|
||||
if warnings:
|
||||
final = f"{final}\n\n--- Context Warnings ---\n" + "\n".join(f"- {warning}" for warning in warnings)
|
||||
if blocks:
|
||||
final = f"{final}\n\n--- Attached Context ---\n\n" + "\n\n".join(blocks)
|
||||
|
||||
return ContextReferenceResult(
|
||||
message=final.strip(),
|
||||
original_message=message,
|
||||
references=refs,
|
||||
warnings=warnings,
|
||||
injected_tokens=injected_tokens,
|
||||
expanded=bool(blocks or warnings),
|
||||
blocked=False,
|
||||
)
|
||||
|
||||
|
||||
async def _expand_reference(
|
||||
ref: ContextReference,
|
||||
cwd: Path,
|
||||
*,
|
||||
url_fetcher: Callable[[str], str | Awaitable[str]] | None = None,
|
||||
allowed_root: Path | None = None,
|
||||
) -> tuple[str | None, str | None]:
|
||||
try:
|
||||
if ref.kind == "file":
|
||||
return _expand_file_reference(ref, cwd, allowed_root=allowed_root)
|
||||
if ref.kind == "folder":
|
||||
return _expand_folder_reference(ref, cwd, allowed_root=allowed_root)
|
||||
if ref.kind == "diff":
|
||||
return _expand_git_reference(ref, cwd, ["diff"], "git diff")
|
||||
if ref.kind == "staged":
|
||||
return _expand_git_reference(ref, cwd, ["diff", "--staged"], "git diff --staged")
|
||||
if ref.kind == "git":
|
||||
count = max(1, min(int(ref.target or "1"), 10))
|
||||
return _expand_git_reference(ref, cwd, ["log", f"-{count}", "-p"], f"git log -{count} -p")
|
||||
if ref.kind == "url":
|
||||
content = await _fetch_url_content(ref.target, url_fetcher=url_fetcher)
|
||||
if not content:
|
||||
return f"{ref.raw}: no content extracted", None
|
||||
return None, f"🌐 {ref.raw} ({estimate_tokens_rough(content)} tokens)\n{content}"
|
||||
except Exception as exc:
|
||||
return f"{ref.raw}: {exc}", None
|
||||
|
||||
return f"{ref.raw}: unsupported reference type", None
|
||||
|
||||
|
||||
def _expand_file_reference(
|
||||
ref: ContextReference,
|
||||
cwd: Path,
|
||||
*,
|
||||
allowed_root: Path | None = None,
|
||||
) -> tuple[str | None, str | None]:
|
||||
path = _resolve_path(cwd, ref.target, allowed_root=allowed_root)
|
||||
_ensure_reference_path_allowed(path)
|
||||
if not path.exists():
|
||||
return f"{ref.raw}: file not found", None
|
||||
if not path.is_file():
|
||||
return f"{ref.raw}: path is not a file", None
|
||||
if _is_binary_file(path):
|
||||
return f"{ref.raw}: binary files are not supported", None
|
||||
|
||||
text = path.read_text(encoding="utf-8")
|
||||
if ref.line_start is not None:
|
||||
lines = text.splitlines()
|
||||
start_idx = max(ref.line_start - 1, 0)
|
||||
end_idx = min(ref.line_end or ref.line_start, len(lines))
|
||||
text = "\n".join(lines[start_idx:end_idx])
|
||||
|
||||
lang = _code_fence_language(path)
|
||||
label = ref.raw
|
||||
return None, f"📄 {label} ({estimate_tokens_rough(text)} tokens)\n```{lang}\n{text}\n```"
|
||||
|
||||
|
||||
def _expand_folder_reference(
|
||||
ref: ContextReference,
|
||||
cwd: Path,
|
||||
*,
|
||||
allowed_root: Path | None = None,
|
||||
) -> tuple[str | None, str | None]:
|
||||
path = _resolve_path(cwd, ref.target, allowed_root=allowed_root)
|
||||
_ensure_reference_path_allowed(path)
|
||||
if not path.exists():
|
||||
return f"{ref.raw}: folder not found", None
|
||||
if not path.is_dir():
|
||||
return f"{ref.raw}: path is not a folder", None
|
||||
|
||||
listing = _build_folder_listing(path, cwd)
|
||||
return None, f"📁 {ref.raw} ({estimate_tokens_rough(listing)} tokens)\n{listing}"
|
||||
|
||||
|
||||
def _expand_git_reference(
|
||||
ref: ContextReference,
|
||||
cwd: Path,
|
||||
args: list[str],
|
||||
label: str,
|
||||
) -> tuple[str | None, str | None]:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", *args],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"{ref.raw}: git command timed out (30s)", None
|
||||
if result.returncode != 0:
|
||||
stderr = (result.stderr or "").strip() or "git command failed"
|
||||
return f"{ref.raw}: {stderr}", None
|
||||
content = result.stdout.strip()
|
||||
if not content:
|
||||
content = "(no output)"
|
||||
return None, f"🧾 {label} ({estimate_tokens_rough(content)} tokens)\n```diff\n{content}\n```"
|
||||
|
||||
|
||||
async def _fetch_url_content(
|
||||
url: str,
|
||||
*,
|
||||
url_fetcher: Callable[[str], str | Awaitable[str]] | None = None,
|
||||
) -> str:
|
||||
fetcher = url_fetcher or _default_url_fetcher
|
||||
content = fetcher(url)
|
||||
if inspect.isawaitable(content):
|
||||
content = await content
|
||||
return str(content or "").strip()
|
||||
|
||||
|
||||
async def _default_url_fetcher(url: str) -> str:
|
||||
from tools.web_tools import web_extract_tool
|
||||
|
||||
raw = await web_extract_tool([url], format="markdown", use_llm_processing=True)
|
||||
payload = json.loads(raw)
|
||||
docs = payload.get("data", {}).get("documents", [])
|
||||
if not docs:
|
||||
return ""
|
||||
doc = docs[0]
|
||||
return str(doc.get("content") or doc.get("raw_content") or "").strip()
|
||||
|
||||
|
||||
def _resolve_path(cwd: Path, target: str, *, allowed_root: Path | None = None) -> Path:
|
||||
path = Path(os.path.expanduser(target))
|
||||
if not path.is_absolute():
|
||||
path = cwd / path
|
||||
resolved = path.resolve()
|
||||
if allowed_root is not None:
|
||||
try:
|
||||
resolved.relative_to(allowed_root)
|
||||
except ValueError as exc:
|
||||
raise ValueError("path is outside the allowed workspace") from exc
|
||||
return resolved
|
||||
|
||||
|
||||
def _ensure_reference_path_allowed(path: Path) -> None:
|
||||
from hermes_constants import get_hermes_home
|
||||
home = Path(os.path.expanduser("~")).resolve()
|
||||
hermes_home = get_hermes_home().resolve()
|
||||
|
||||
blocked_exact = {home / rel for rel in _SENSITIVE_HOME_FILES}
|
||||
blocked_exact.add(hermes_home / ".env")
|
||||
blocked_dirs = [home / rel for rel in _SENSITIVE_HOME_DIRS]
|
||||
blocked_dirs.extend(hermes_home / rel for rel in _SENSITIVE_HERMES_DIRS)
|
||||
|
||||
if path in blocked_exact:
|
||||
raise ValueError("path is a sensitive credential file and cannot be attached")
|
||||
|
||||
for blocked_dir in blocked_dirs:
|
||||
try:
|
||||
path.relative_to(blocked_dir)
|
||||
except ValueError:
|
||||
continue
|
||||
raise ValueError("path is a sensitive credential or internal Hermes path and cannot be attached")
|
||||
|
||||
|
||||
def _strip_trailing_punctuation(value: str) -> str:
|
||||
stripped = value.rstrip(TRAILING_PUNCTUATION)
|
||||
while stripped.endswith((")", "]", "}")):
|
||||
closer = stripped[-1]
|
||||
opener = {")": "(", "]": "[", "}": "{"}[closer]
|
||||
if stripped.count(closer) > stripped.count(opener):
|
||||
stripped = stripped[:-1]
|
||||
continue
|
||||
break
|
||||
return stripped
|
||||
|
||||
|
||||
def _strip_reference_wrappers(value: str) -> str:
|
||||
if len(value) >= 2 and value[0] == value[-1] and value[0] in "`\"'":
|
||||
return value[1:-1]
|
||||
return value
|
||||
|
||||
|
||||
def _parse_file_reference_value(value: str) -> tuple[str, int | None, int | None]:
|
||||
quoted_match = re.match(
|
||||
r'^(?P<quote>`|"|\')(?P<path>.+?)(?P=quote)(?::(?P<start>\d+)(?:-(?P<end>\d+))?)?$',
|
||||
value,
|
||||
)
|
||||
if quoted_match:
|
||||
line_start = quoted_match.group("start")
|
||||
line_end = quoted_match.group("end")
|
||||
return (
|
||||
quoted_match.group("path"),
|
||||
int(line_start) if line_start is not None else None,
|
||||
int(line_end or line_start) if line_start is not None else None,
|
||||
)
|
||||
|
||||
range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
|
||||
if range_match:
|
||||
line_start = int(range_match.group("start"))
|
||||
return (
|
||||
range_match.group("path"),
|
||||
line_start,
|
||||
int(range_match.group("end") or range_match.group("start")),
|
||||
)
|
||||
|
||||
return _strip_reference_wrappers(value), None, None
|
||||
|
||||
|
||||
def _remove_reference_tokens(message: str, refs: list[ContextReference]) -> str:
|
||||
pieces: list[str] = []
|
||||
cursor = 0
|
||||
for ref in refs:
|
||||
pieces.append(message[cursor:ref.start])
|
||||
cursor = ref.end
|
||||
pieces.append(message[cursor:])
|
||||
text = "".join(pieces)
|
||||
text = re.sub(r"\s{2,}", " ", text)
|
||||
text = re.sub(r"\s+([,.;:!?])", r"\1", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _is_binary_file(path: Path) -> bool:
|
||||
mime, _ = mimetypes.guess_type(path.name)
|
||||
if mime and not mime.startswith("text/") and not any(
|
||||
path.name.endswith(ext) for ext in (".py", ".md", ".txt", ".json", ".yaml", ".yml", ".toml", ".js", ".ts")
|
||||
):
|
||||
return True
|
||||
chunk = path.read_bytes()[:4096]
|
||||
return b"\x00" in chunk
|
||||
|
||||
|
||||
def _build_folder_listing(path: Path, cwd: Path, limit: int = 200) -> str:
|
||||
lines = [f"{path.relative_to(cwd)}/"]
|
||||
entries = _iter_visible_entries(path, cwd, limit=limit)
|
||||
for entry in entries:
|
||||
rel = entry.relative_to(cwd)
|
||||
indent = " " * max(len(rel.parts) - len(path.relative_to(cwd).parts) - 1, 0)
|
||||
if entry.is_dir():
|
||||
lines.append(f"{indent}- {entry.name}/")
|
||||
else:
|
||||
meta = _file_metadata(entry)
|
||||
lines.append(f"{indent}- {entry.name} ({meta})")
|
||||
if len(entries) >= limit:
|
||||
lines.append("- ...")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:
|
||||
rg_entries = _rg_files(path, cwd, limit=limit)
|
||||
if rg_entries is not None:
|
||||
output: list[Path] = []
|
||||
seen_dirs: set[Path] = set()
|
||||
for rel in rg_entries:
|
||||
full = cwd / rel
|
||||
for parent in full.parents:
|
||||
if parent == cwd or parent in seen_dirs or path not in {parent, *parent.parents}:
|
||||
continue
|
||||
seen_dirs.add(parent)
|
||||
output.append(parent)
|
||||
output.append(full)
|
||||
return sorted({p for p in output if p.exists()}, key=lambda p: (not p.is_dir(), str(p)))
|
||||
|
||||
output = []
|
||||
for root, dirs, files in os.walk(path):
|
||||
dirs[:] = sorted(d for d in dirs if not d.startswith(".") and d != "__pycache__")
|
||||
files = sorted(f for f in files if not f.startswith("."))
|
||||
root_path = Path(root)
|
||||
for d in dirs:
|
||||
output.append(root_path / d)
|
||||
if len(output) >= limit:
|
||||
return output
|
||||
for f in files:
|
||||
output.append(root_path / f)
|
||||
if len(output) >= limit:
|
||||
return output
|
||||
return output
|
||||
|
||||
|
||||
def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["rg", "--files", str(path.relative_to(cwd))],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
files = [Path(line.strip()) for line in result.stdout.splitlines() if line.strip()]
|
||||
return files[:limit]
|
||||
|
||||
|
||||
def _file_metadata(path: Path) -> str:
|
||||
if _is_binary_file(path):
|
||||
return f"{path.stat().st_size} bytes"
|
||||
try:
|
||||
line_count = path.read_text(encoding="utf-8").count("\n") + 1
|
||||
except Exception:
|
||||
return f"{path.stat().st_size} bytes"
|
||||
return f"{line_count} lines"
|
||||
|
||||
|
||||
def _code_fence_language(path: Path) -> str:
|
||||
mapping = {
|
||||
".py": "python",
|
||||
".js": "javascript",
|
||||
".ts": "typescript",
|
||||
".tsx": "tsx",
|
||||
".jsx": "jsx",
|
||||
".json": "json",
|
||||
".md": "markdown",
|
||||
".sh": "bash",
|
||||
".yml": "yaml",
|
||||
".yaml": "yaml",
|
||||
".toml": "toml",
|
||||
}
|
||||
return mapping.get(path.suffix.lower(), "")
|
||||
@@ -1,785 +0,0 @@
|
||||
"""Context compression — extract the AIAgent methods that drive summarisation.
|
||||
|
||||
Three concerns live here:
|
||||
|
||||
* :func:`check_compression_model_feasibility` — startup probe of the
|
||||
configured auxiliary compression model. Warns when the aux context
|
||||
window can't fit the main model's compression threshold; auto-lowers
|
||||
the session threshold when possible; hard-rejects auxes below
|
||||
``MINIMUM_CONTEXT_LENGTH``.
|
||||
|
||||
* :func:`replay_compression_warning` — re-emit a stored warning through
|
||||
the gateway ``status_callback`` once it's wired up (the callback is
|
||||
set after :class:`AIAgent` construction).
|
||||
|
||||
* :func:`compress_context` — the actual compression call. Runs the
|
||||
configured compressor, splits the SQLite session, rotates the
|
||||
session_id, notifies plugin context engines / memory providers, and
|
||||
returns the compressed message list and freshly-built system prompt.
|
||||
|
||||
* :func:`try_shrink_image_parts_in_messages` — image-too-large recovery
|
||||
helper that re-encodes ``data:image/...;base64,...`` parts at a smaller
|
||||
size so retries can fit under provider ceilings (Anthropic's 5 MB).
|
||||
|
||||
``run_agent`` keeps thin wrappers for each so existing call sites
|
||||
(``self._compress_context(...)``) keep working. Tests that exercise
|
||||
these paths see no behavioural change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compression_lock_holder(agent: Any) -> str:
|
||||
"""Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
|
||||
|
||||
The pid+tid prefix lets ops tell crashed/abandoned holders apart from
|
||||
live ones (expiry-based recovery uses the timestamp, but ``holder``
|
||||
is what shows up in diagnostics + log lines). The agent instance id
|
||||
and a per-acquire uuid disambiguate two co-resident agents on the
|
||||
same thread (background_review forks run on a worker thread, but
|
||||
on machines where compression itself dispatches to a thread pool
|
||||
we want each acquire to be unique).
|
||||
"""
|
||||
import threading
|
||||
return (
|
||||
f"pid={os.getpid()}"
|
||||
f":tid={threading.get_ident()}"
|
||||
f":agent={id(agent):x}"
|
||||
f":nonce={uuid.uuid4().hex[:8]}"
|
||||
)
|
||||
|
||||
|
||||
def check_compression_model_feasibility(agent: Any) -> None:
|
||||
"""Warn at session start if the auxiliary compression model's context
|
||||
window is smaller than the main model's compression threshold.
|
||||
|
||||
When the auxiliary model cannot fit the content that needs summarising,
|
||||
compression will either fail outright (the LLM call errors) or produce
|
||||
a severely truncated summary.
|
||||
|
||||
Called during ``AIAgent.__init__`` so CLI users see the warning
|
||||
immediately (via ``_vprint``). The gateway sets ``status_callback``
|
||||
*after* construction, so :func:`replay_compression_warning` re-sends
|
||||
the stored warning through the callback on the first
|
||||
``run_conversation()`` call.
|
||||
"""
|
||||
if not agent.compression_enabled:
|
||||
return
|
||||
try:
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_task_provider_model,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
get_model_context_length,
|
||||
)
|
||||
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
# Best-effort aux provider label for the warning message. The
|
||||
# configured provider may be "auto", in which case we fall back
|
||||
# to the client's base_url hostname so the user can still tell
|
||||
# where the compression model is actually being called.
|
||||
try:
|
||||
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
|
||||
except Exception:
|
||||
_aux_cfg_provider = ""
|
||||
if client is None or not aux_model:
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto":
|
||||
msg = (
|
||||
"⚠ Configured auxiliary compression provider "
|
||||
f"'{_aux_cfg_provider}' is unavailable — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Check auxiliary.compression in config.yaml and "
|
||||
"reauthenticate that provider."
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"⚠ No auxiliary LLM provider configured — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Run `hermes setup` or set OPENROUTER_API_KEY."
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"No auxiliary LLM provider for compression — "
|
||||
"summaries will be unavailable."
|
||||
)
|
||||
return
|
||||
|
||||
aux_base_url = str(getattr(client, "base_url", ""))
|
||||
# ``client.api_key`` may be a callable (Azure Foundry Entra ID
|
||||
# bearer provider). The context-length resolver chain expects a
|
||||
# string, but it only needs a key for live catalogue probes
|
||||
# (provider model lists). For Entra clients the model-metadata
|
||||
# chain still resolves via models.dev + hardcoded family
|
||||
# fallbacks, which don't require auth — pass empty string rather
|
||||
# than minting a bearer JWT just to look up a context length.
|
||||
_raw_aux_key = getattr(client, "api_key", "")
|
||||
aux_api_key = "" if (callable(_raw_aux_key) and not isinstance(_raw_aux_key, str)) else str(_raw_aux_key or "")
|
||||
|
||||
aux_context = get_model_context_length(
|
||||
aux_model,
|
||||
base_url=aux_base_url,
|
||||
api_key=aux_api_key,
|
||||
config_context_length=getattr(agent, "_aux_compression_context_length_config", None),
|
||||
# Each model must be resolved with its own provider so that
|
||||
# provider-specific paths (e.g. Bedrock static table, OpenRouter API)
|
||||
# are invoked for the correct client, not inherited from the main model.
|
||||
provider=(_aux_cfg_provider if _aux_cfg_provider and _aux_cfg_provider != "auto" else getattr(agent, "provider", "")),
|
||||
custom_providers=agent._custom_providers,
|
||||
)
|
||||
|
||||
# Hard floor: the auxiliary compression model must have at least
|
||||
# MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model
|
||||
# is already required to meet this floor (checked earlier in
|
||||
# __init__), so the compression model must too — otherwise it
|
||||
# cannot summarise a full threshold-sized window of main-model
|
||||
# content. Mirrors the main-model rejection pattern.
|
||||
if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
|
||||
raise ValueError(
|
||||
f"Auxiliary compression model {aux_model} has a context "
|
||||
f"window of {aux_context:,} tokens, which is below the "
|
||||
f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
|
||||
f"Agent. Choose a compression model with at least "
|
||||
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
|
||||
f"auxiliary.compression.model in config.yaml), or set "
|
||||
f"auxiliary.compression.context_length to override the "
|
||||
f"detected value if it is wrong."
|
||||
)
|
||||
|
||||
threshold = agent.context_compressor.threshold_tokens
|
||||
if aux_context < threshold:
|
||||
# Auto-correct: lower the live session threshold so
|
||||
# compression actually works this session. The hard floor
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = aux_context
|
||||
agent.context_compressor.threshold_tokens = new_threshold
|
||||
# Keep threshold_percent in sync so future main-model
|
||||
# context_length changes (update_model) re-derive from a
|
||||
# sensible number rather than the original too-high value.
|
||||
main_ctx = agent.context_compressor.context_length
|
||||
if main_ctx:
|
||||
agent.context_compressor.threshold_percent = (
|
||||
new_threshold / main_ctx
|
||||
)
|
||||
safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
|
||||
# Build human-readable "model (provider)" labels for both
|
||||
# the main model and the compression model so users can
|
||||
# tell at a glance which provider each side is actually
|
||||
# using. When the configured provider is empty or "auto",
|
||||
# fall back to the client's base_url hostname.
|
||||
_main_model = getattr(agent, "model", "") or "?"
|
||||
_main_provider = getattr(agent, "provider", "") or ""
|
||||
_aux_provider_label = (
|
||||
_aux_cfg_provider
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto"
|
||||
else ""
|
||||
)
|
||||
if not _aux_provider_label:
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
_aux_provider_label = (
|
||||
urlparse(aux_base_url).hostname or aux_base_url
|
||||
)
|
||||
except Exception:
|
||||
_aux_provider_label = aux_base_url or "auto"
|
||||
_main_label = (
|
||||
f"{_main_model} ({_main_provider})"
|
||||
if _main_provider
|
||||
else _main_model
|
||||
)
|
||||
_aux_label = f"{aux_model} ({_aux_provider_label})"
|
||||
msg = (
|
||||
f"⚠ Compression model {_aux_label} context is "
|
||||
f"{aux_context:,} tokens, but the main model "
|
||||
f"{_main_label}'s compression threshold was "
|
||||
f"{old_threshold:,} tokens. "
|
||||
f"Auto-lowered this session's threshold to "
|
||||
f"{new_threshold:,} tokens so compression can run.\n"
|
||||
f" To make this permanent, edit config.yaml — either:\n"
|
||||
f" 1. Use a larger compression model:\n"
|
||||
f" auxiliary:\n"
|
||||
f" compression:\n"
|
||||
f" model: <model-with-{old_threshold:,}+-context>\n"
|
||||
f" 2. Lower the compression threshold:\n"
|
||||
f" compression:\n"
|
||||
f" threshold: 0.{safe_pct:02d}"
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"Auxiliary compression model %s has %d token context, "
|
||||
"below the main model's compression threshold of %d "
|
||||
"tokens — auto-lowered session threshold to %d to "
|
||||
"keep compression working.",
|
||||
aux_model,
|
||||
aux_context,
|
||||
old_threshold,
|
||||
new_threshold,
|
||||
)
|
||||
except ValueError:
|
||||
# Hard rejections (aux below minimum context) must propagate
|
||||
# so the session refuses to start.
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Compression feasibility check failed (non-fatal): %s", exc
|
||||
)
|
||||
|
||||
|
||||
def replay_compression_warning(agent: Any) -> None:
|
||||
"""Re-send the compression warning through ``status_callback``.
|
||||
|
||||
During ``__init__`` the gateway's ``status_callback`` is not yet
|
||||
wired, so ``_emit_status`` only reaches ``_vprint`` (CLI). This
|
||||
method is called once at the start of the first
|
||||
``run_conversation()`` — by then the gateway has set the callback,
|
||||
so every platform (Telegram, Discord, Slack, etc.) receives the
|
||||
warning.
|
||||
"""
|
||||
msg = getattr(agent, "_compression_warning", None)
|
||||
if msg and agent.status_callback:
|
||||
try:
|
||||
agent.status_callback("lifecycle", msg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def compress_context(
|
||||
agent: Any,
|
||||
messages: list,
|
||||
system_message: str,
|
||||
*,
|
||||
approx_tokens: Optional[int] = None,
|
||||
task_id: str = "default",
|
||||
focus_topic: Optional[str] = None,
|
||||
force: bool = False,
|
||||
) -> Tuple[list, str]:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
Args:
|
||||
agent: The owning :class:`AIAgent`.
|
||||
messages: Current message history (will be summarised).
|
||||
system_message: Current system prompt; rebuilt after compression.
|
||||
approx_tokens: Pre-compression token estimate, logged for ops.
|
||||
task_id: Tool task scope (used for clearing file-read dedup state).
|
||||
focus_topic: Optional focus string for guided compression — the
|
||||
summariser will prioritise preserving information related to
|
||||
this topic. Inspired by Claude Code's ``/compact <focus>``.
|
||||
force: If True, bypass any active summary-failure cooldown. Set
|
||||
by the manual ``/compress`` slash command so users can retry
|
||||
immediately after an auto-compress abort. Auto-compress
|
||||
callers use the default ``False``.
|
||||
|
||||
Returns:
|
||||
``(compressed_messages, new_system_prompt)`` tuple. When
|
||||
compression aborts (aux LLM failed to produce a usable summary),
|
||||
returns the original messages unchanged and the existing system
|
||||
prompt — the session is NOT rotated. Callers should detect the
|
||||
no-op via ``len(returned) == len(input)`` and stop the retry loop.
|
||||
"""
|
||||
# Lazy feasibility check — run the auxiliary-provider probe + context
|
||||
# length lookup just-in-time on the first compression attempt instead of
|
||||
# at AIAgent.__init__. Saves ~400ms cold off every short session that
|
||||
# never reaches the threshold (the vast majority of ``chat -q`` runs).
|
||||
# The check itself sets ``agent._compression_warning`` so the
|
||||
# status-callback replay machinery still emits the warning to the user
|
||||
# the first time it would matter.
|
||||
if not getattr(agent, "_compression_feasibility_checked", False):
|
||||
# Mark as checked only after the probe completes. If the check
|
||||
# raises (e.g. a fatal aux-context ValueError that aborts the
|
||||
# session), leaving the flag unset is harmless; a non-fatal
|
||||
# transient failure is swallowed inside the function so the flag
|
||||
# is set normally on the next successful pass.
|
||||
check_compression_model_feasibility(agent)
|
||||
agent._compression_feasibility_checked = True
|
||||
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
|
||||
agent.session_id or "none", _pre_msg_count,
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
|
||||
focus_topic,
|
||||
)
|
||||
agent._emit_status(
|
||||
"🗜️ Compacting context — summarizing earlier conversation so I can continue..."
|
||||
)
|
||||
|
||||
# ── Compression lock ────────────────────────────────────────────────
|
||||
# Atomic, state.db-backed lock per session_id. Without this, two
|
||||
# AIAgent instances that share the same session_id (most commonly the
|
||||
# parent-turn agent and its background-review fork — see
|
||||
# ``agent/background_review.py``: ``review_agent.session_id =
|
||||
# agent.session_id``) can each call compress() on overlapping
|
||||
# snapshots of the same conversation. Both succeed, both rotate
|
||||
# ``agent.session_id`` to a fresh id, both create child sessions in
|
||||
# state.db parented to the same old id. The gateway's SessionEntry
|
||||
# only catches one rotation, so the other child becomes an orphan
|
||||
# that silently accumulates writes — Damien's repro shape.
|
||||
#
|
||||
# Acquire keyed on the OLD session_id (the rotation target's parent),
|
||||
# because that's the id that competing paths see and read from
|
||||
# SessionEntry at the start of their own compression attempt.
|
||||
#
|
||||
# If we can't acquire the lock, another path is mid-compression on
|
||||
# this session. Aborting is correct: the messages are unchanged, the
|
||||
# other path's rotation will produce the canonical new session_id,
|
||||
# and our caller's auto-compress loop sees ``len(returned) == len(input)``
|
||||
# and stops retrying for this cycle. The session is NOT corrupted —
|
||||
# we just sit out this round and let the winner finish.
|
||||
_lock_db = getattr(agent, "_session_db", None)
|
||||
_lock_sid = agent.session_id or ""
|
||||
_lock_holder: Optional[str] = None
|
||||
# Probe whether the lock subsystem is actually available on this
|
||||
# SessionDB instance. A process running mismatched module versions
|
||||
# (e.g. ``conversation_compression.py`` reloaded after a pull but the
|
||||
# long-lived ``hermes_state.SessionDB`` class still bound to the
|
||||
# pre-#34351 version in memory) has the call site but not the method.
|
||||
# In that case ``try_acquire_compression_lock`` raises AttributeError —
|
||||
# NOT a ``sqlite3.Error`` — so the method's own fail-open guard never
|
||||
# runs and the exception propagates to the outer agent loop, which
|
||||
# prints the error and retries. Because compression never succeeds,
|
||||
# the token count never drops and the loop re-triggers compaction
|
||||
# forever (the "API call #47/#48/#49 ... has no attribute
|
||||
# try_acquire_compression_lock" spin). Fail OPEN here: if the lock
|
||||
# subsystem is missing or broken in any unexpected way, skip locking
|
||||
# and proceed with compression. Skipping the lock risks a rare
|
||||
# concurrent-compression session fork; an infinite no-progress loop
|
||||
# that never compresses at all is strictly worse.
|
||||
if _lock_db is not None and _lock_sid:
|
||||
_lock_holder = _compression_lock_holder(agent)
|
||||
try:
|
||||
_lock_acquired = _lock_db.try_acquire_compression_lock(
|
||||
_lock_sid, _lock_holder
|
||||
)
|
||||
except Exception as _lock_err:
|
||||
# Broken/absent lock subsystem (version skew, etc.). Log once
|
||||
# per session and proceed WITHOUT the lock rather than letting
|
||||
# the exception spin the outer loop.
|
||||
_lock_holder = None # we don't own anything to release
|
||||
if getattr(agent, "_last_compression_lock_error_sid", None) != _lock_sid:
|
||||
agent._last_compression_lock_error_sid = _lock_sid
|
||||
logger.warning(
|
||||
"compression lock subsystem unavailable for session=%s "
|
||||
"(%s: %s) — proceeding without lock. This usually means a "
|
||||
"stale in-memory module after an update; restart the "
|
||||
"process (or `hermes update`) to resync.",
|
||||
_lock_sid, type(_lock_err).__name__, _lock_err,
|
||||
)
|
||||
_lock_acquired = True # treat as acquired-but-unlocked; proceed
|
||||
if not _lock_acquired:
|
||||
try:
|
||||
existing = _lock_db.get_compression_lock_holder(_lock_sid)
|
||||
except Exception:
|
||||
existing = None
|
||||
logger.warning(
|
||||
"compression skipped: another path is compressing session=%s "
|
||||
"(holder=%s) — returning messages unchanged to avoid session fork",
|
||||
_lock_sid, existing,
|
||||
)
|
||||
_lock_holder = None # don't release a lock we don't own
|
||||
# Surface to the user once — quiet for downstream auto-compress loops
|
||||
if getattr(agent, "_last_compression_lock_warning_sid", None) != _lock_sid:
|
||||
agent._last_compression_lock_warning_sid = _lock_sid
|
||||
try:
|
||||
agent._emit_warning(
|
||||
"⚠ Skipping concurrent compression — another path "
|
||||
"is already compressing this session. Will retry "
|
||||
"after it finishes."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
_existing_sp = getattr(agent, "_cached_system_prompt", None)
|
||||
if not _existing_sp:
|
||||
_existing_sp = agent._build_system_prompt(system_message)
|
||||
return messages, _existing_sp
|
||||
|
||||
def _release_lock() -> None:
|
||||
"""Release the lock keyed on the OLD session_id (before rotation)."""
|
||||
if _lock_db is not None and _lock_sid and _lock_holder:
|
||||
try:
|
||||
_lock_db.release_compression_lock(_lock_sid, _lock_holder)
|
||||
except Exception as _rel_err:
|
||||
logger.debug("compression lock release failed: %s", _rel_err)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if agent._memory_manager:
|
||||
try:
|
||||
agent._memory_manager.on_pre_compress(messages)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic, force=force)
|
||||
except TypeError:
|
||||
# Plugin context engine with strict signature that doesn't accept
|
||||
# focus_topic / force — fall back to calling without them.
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
except BaseException:
|
||||
# ANY exception during compress() must release the lock so the
|
||||
# session isn't permanently blocked from future compression.
|
||||
_release_lock()
|
||||
raise
|
||||
|
||||
# If compression aborted (aux LLM failed to produce a usable summary)
|
||||
# the compressor returns the input messages unchanged. Surface the
|
||||
# error to the user, skip the session-rotation work entirely (no
|
||||
# session has logically ended), and let auto-compress callers detect
|
||||
# the no-op via len(returned) == len(input).
|
||||
if getattr(agent.context_compressor, "_last_compress_aborted", False):
|
||||
_err = getattr(agent.context_compressor, "_last_summary_error", None) or "unknown error"
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != _err:
|
||||
agent._last_compression_summary_warning = _err
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression aborted: {_err}. "
|
||||
"No messages were dropped — conversation continues unchanged. "
|
||||
"Run /compress to retry, or /new to start a fresh session."
|
||||
)
|
||||
_existing_sp = getattr(agent, "_cached_system_prompt", None)
|
||||
if not _existing_sp:
|
||||
_existing_sp = agent._build_system_prompt(system_message)
|
||||
_release_lock() # compression aborted — no rotation will happen
|
||||
return messages, _existing_sp
|
||||
|
||||
summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
|
||||
agent._last_compression_summary_warning = summary_error
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
else:
|
||||
# No hard failure — but did the configured aux model error out
|
||||
# and get recovered by retrying on main? Surface that so users
|
||||
# know their auxiliary.compression.model setting is broken even
|
||||
# though compression succeeded.
|
||||
_aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
|
||||
_aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
|
||||
if _aux_fail_model:
|
||||
# Dedup on (model, error) so we don't spam on every compaction
|
||||
_aux_key = (_aux_fail_model, _aux_fail_err)
|
||||
if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
|
||||
agent._last_aux_fallback_warning_key = _aux_key
|
||||
agent._emit_warning(
|
||||
f"ℹ Configured compression model '{_aux_fail_model}' failed "
|
||||
f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
|
||||
"check auxiliary.compression.model in config.yaml."
|
||||
)
|
||||
|
||||
todo_snapshot = agent._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
|
||||
agent._invalidate_system_prompt()
|
||||
new_system_prompt = agent._build_system_prompt(system_message)
|
||||
agent._cached_system_prompt = new_system_prompt
|
||||
|
||||
if agent._session_db:
|
||||
try:
|
||||
# Propagate title to the new session with auto-numbering
|
||||
old_title = agent._session_db.get_session_title(agent.session_id)
|
||||
# Trigger memory extraction on the old session before it rotates.
|
||||
agent.commit_memory_session(messages)
|
||||
agent._session_db.end_session(agent.session_id, "compression")
|
||||
old_session_id = agent.session_id
|
||||
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
try:
|
||||
from gateway.session_context import set_current_session_id
|
||||
|
||||
set_current_session_id(agent.session_id)
|
||||
except Exception:
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
model=agent.model,
|
||||
model_config=agent._session_init_model_config,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
agent._session_db_created = True
|
||||
# Auto-number the title for the continuation session
|
||||
if old_title:
|
||||
try:
|
||||
new_title = agent._session_db.get_next_title_in_lineage(old_title)
|
||||
agent._session_db.set_session_title(agent.session_id, new_title)
|
||||
except (ValueError, Exception) as e:
|
||||
logger.debug("Could not propagate title on compression: %s", e)
|
||||
agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
|
||||
# Reset flush cursor — new session starts with no messages written
|
||||
agent._last_flushed_db_idx = 0
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Notify the context engine that the session_id rotated because of
|
||||
# compression (not a fresh /new). Plugin engines (e.g. hermes-lcm) use
|
||||
# boundary_reason="compression" to preserve DAG lineage across the
|
||||
# rollover instead of re-initializing fresh per-session state.
|
||||
# See hermes-lcm#68. Built-in ContextCompressor ignores kwargs.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and hasattr(agent.context_compressor, "on_session_start"):
|
||||
agent.context_compressor.on_session_start(
|
||||
agent.session_id or "",
|
||||
boundary_reason="compression",
|
||||
old_session_id=_old_sid,
|
||||
conversation_id=getattr(agent, "_gateway_session_key", None),
|
||||
)
|
||||
except Exception as _ce_err:
|
||||
logger.debug("context engine on_session_start (compression): %s", _ce_err)
|
||||
|
||||
# Notify memory providers of the compression-driven session_id rotation
|
||||
# so provider-cached per-session state (Hindsight's _document_id,
|
||||
# accumulated turn buffers, counters) refreshes. reset=False because
|
||||
# the logical conversation continues; only the id and DB row rolled
|
||||
# over. See #6672.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and agent._memory_manager:
|
||||
agent._memory_manager.on_session_switch(
|
||||
agent.session_id or "",
|
||||
parent_session_id=_old_sid,
|
||||
reset=False,
|
||||
reason="compression",
|
||||
)
|
||||
except Exception as _me_err:
|
||||
logger.debug("memory manager on_session_switch (compression): %s", _me_err)
|
||||
|
||||
# Warn on repeated compressions (quality degrades with each pass)
|
||||
_cc = agent.context_compressor.compression_count
|
||||
if _cc >= 2:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Session compressed {_cc} times — "
|
||||
f"accuracy may degrade. Consider /new to start fresh.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Keep the post-compression rough estimate for diagnostics, but do not
|
||||
# treat it as provider-reported prompt usage. Schema-heavy rough estimates
|
||||
# can remain above threshold even after the next real API request fits.
|
||||
_compressed_est = estimate_request_tokens_rough(
|
||||
compressed,
|
||||
system_prompt=new_system_prompt or "",
|
||||
tools=agent.tools or None,
|
||||
)
|
||||
agent.context_compressor.last_compression_rough_tokens = _compressed_est
|
||||
agent.context_compressor.last_prompt_tokens = -1
|
||||
agent.context_compressor.last_completion_tokens = 0
|
||||
agent.context_compressor.awaiting_real_usage_after_compression = True
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
# read content is summarised away — if the model re-reads the same
|
||||
# file it needs the full content, not a "file unchanged" stub.
|
||||
try:
|
||||
from tools.file_tools import reset_file_dedup
|
||||
reset_file_dedup(task_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true",
|
||||
agent.session_id or "none", _pre_msg_count, len(compressed),
|
||||
f"{_compressed_est:,}",
|
||||
)
|
||||
# Release the lock on the OLD session_id only AFTER rotation completed
|
||||
# and all post-rotation bookkeeping (memory manager, context engine,
|
||||
# file dedup) ran. A concurrent path that wakes up the moment we
|
||||
# release will see the NEW session_id in state.db / SessionEntry and
|
||||
# acquire on that — no race against our just-finished work.
|
||||
_release_lock()
|
||||
return compressed, new_system_prompt
|
||||
|
||||
|
||||
def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
|
||||
"""Re-encode all native image parts at a smaller size to recover from
|
||||
image-too-large errors (Anthropic 5 MB, unknown other providers).
|
||||
|
||||
Mutates ``api_messages`` in place. Returns True if any image part was
|
||||
actually replaced, False if there were no image parts to shrink or
|
||||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead), write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
Non-data-URL images (http/https URLs) are not touched — the provider
|
||||
fetches those itself and the size limit is different.
|
||||
"""
|
||||
if not api_messages:
|
||||
return False
|
||||
|
||||
try:
|
||||
from tools.vision_tools import _resize_image_for_vision
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
|
||||
return False
|
||||
|
||||
# 4 MB target leaves comfortable headroom under Anthropic's 5 MB.
|
||||
# Non-Anthropic providers we haven't observed rejecting are fine with
|
||||
# much larger; shrinking to 4 MB here loses quality but only fires
|
||||
# after a confirmed provider rejection, so the alternative is failure.
|
||||
target_bytes = 4 * 1024 * 1024
|
||||
# Anthropic enforces an 8000px per-side dimension cap independently of
|
||||
# the 5 MB byte cap. A tall screenshot can be well under 5 MB yet far
|
||||
# over 8000px (e.g. 1200×12000 at 0.06 MB). We check pixel dimensions
|
||||
# even when the byte budget is fine.
|
||||
max_dimension = 8000
|
||||
changed_count = 0
|
||||
# Track parts that are over the target but could NOT be shrunk under it.
|
||||
# If any survive, retrying is pointless — the same oversized payload will
|
||||
# be re-sent and rejected again, wasting the single retry budget. We only
|
||||
# report success (caller retries) when every over-threshold image was
|
||||
# actually brought under the target.
|
||||
unshrinkable_oversized = 0
|
||||
|
||||
def _shrink_data_url(url: str) -> Optional[str]:
|
||||
"""Return a smaller data URL, or None if shrink can't help."""
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
return None
|
||||
|
||||
# Check both byte size AND pixel dimensions.
|
||||
needs_shrink = len(url) > target_bytes # over byte budget
|
||||
if not needs_shrink:
|
||||
# Even if bytes are fine, check pixel dimensions against
|
||||
# Anthropic's 8000px cap. A tall image can be tiny in bytes
|
||||
# yet huge in pixels.
|
||||
try:
|
||||
import base64 as _b64_dim
|
||||
header_d, _, data_d = url.partition(",")
|
||||
if not data_d:
|
||||
return None
|
||||
raw_d = _b64_dim.b64decode(data_d)
|
||||
from PIL import Image as _PILImage
|
||||
import io as _io_dim
|
||||
with _PILImage.open(_io_dim.BytesIO(raw_d)) as _img:
|
||||
if max(_img.size) <= max_dimension:
|
||||
return None # both bytes and pixels are fine
|
||||
needs_shrink = True # pixels exceed limit, force shrink
|
||||
except Exception:
|
||||
# If we can't check dimensions (Pillow unavailable, corrupt
|
||||
# image, etc.), fall back to byte-only check.
|
||||
return None
|
||||
|
||||
try:
|
||||
header, _, data = url.partition(",")
|
||||
mime = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
mime_part = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if mime_part.startswith("image/"):
|
||||
mime = mime_part
|
||||
import base64 as _b64
|
||||
raw = _b64.b64decode(data)
|
||||
suffix = {
|
||||
"image/png": ".png", "image/gif": ".gif", "image/webp": ".webp",
|
||||
"image/jpeg": ".jpg", "image/jpg": ".jpg", "image/bmp": ".bmp",
|
||||
}.get(mime, ".jpg")
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
prefix="hermes_shrink_", suffix=suffix, delete=False,
|
||||
)
|
||||
try:
|
||||
tmp.write(raw)
|
||||
tmp.close()
|
||||
resized = _resize_image_for_vision(
|
||||
Path(tmp.name),
|
||||
mime_type=mime,
|
||||
max_base64_bytes=target_bytes,
|
||||
max_dimension=max_dimension,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not resized or len(resized) >= len(url):
|
||||
# Shrink didn't help (or made it bigger — corrupt input?).
|
||||
return None
|
||||
return resized
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
# OpenAI chat.completions: {"image_url": {"url": "data:..."}}
|
||||
# OpenAI Responses: {"image_url": "data:..."}
|
||||
if isinstance(image_value, dict):
|
||||
url = image_value.get("url", "")
|
||||
resized = _shrink_data_url(url)
|
||||
if resized:
|
||||
image_value["url"] = resized
|
||||
changed_count += 1
|
||||
elif isinstance(url, str) and url.startswith("data:") \
|
||||
and len(url) > target_bytes:
|
||||
unshrinkable_oversized += 1
|
||||
elif isinstance(image_value, str):
|
||||
resized = _shrink_data_url(image_value)
|
||||
if resized:
|
||||
part["image_url"] = resized
|
||||
changed_count += 1
|
||||
elif image_value.startswith("data:") \
|
||||
and len(image_value) > target_bytes:
|
||||
unshrinkable_oversized += 1
|
||||
|
||||
if changed_count:
|
||||
logger.info(
|
||||
"image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
|
||||
changed_count, target_bytes / (1024 * 1024),
|
||||
)
|
||||
if unshrinkable_oversized:
|
||||
# At least one oversized image could not be shrunk under the target.
|
||||
# Retrying would re-send it and fail identically, so signal "no
|
||||
# progress" even if other parts shrank — the caller will surface the
|
||||
# original error rather than burning its single retry on a no-op.
|
||||
logger.warning(
|
||||
"image-shrink recovery: %d oversized image part(s) could not be "
|
||||
"shrunk under %.0f MB — not retrying (would re-send rejected payload)",
|
||||
unshrinkable_oversized, target_bytes / (1024 * 1024),
|
||||
)
|
||||
return False
|
||||
return changed_count > 0
|
||||
|
||||
|
||||
__all__ = [
|
||||
"check_compression_model_feasibility",
|
||||
"replay_compression_warning",
|
||||
"compress_context",
|
||||
"try_shrink_image_parts_in_messages",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,686 +0,0 @@
|
||||
"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
|
||||
|
||||
This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
|
||||
backend. Each request starts a short-lived ACP session, sends the formatted
|
||||
conversation as a single prompt, collects text chunks, and converts the result
|
||||
back into the minimal shape Hermes expects from an OpenAI client.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
from collections import deque
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from agent.file_safety import get_read_block_error, is_write_denied
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
ACP_MARKER_BASE_URL = "acp://copilot"
|
||||
_DEFAULT_TIMEOUT_SECONDS = 900.0
|
||||
|
||||
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
|
||||
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
|
||||
|
||||
# Stderr fingerprint of the deprecated `gh copilot` CLI extension
|
||||
# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
|
||||
# We require BOTH the literal product name ("gh-copilot") AND a deprecation
|
||||
# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
|
||||
# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
|
||||
# in its own banners and error messages — doesn't get misclassified as the
|
||||
# deprecated extension.
|
||||
_DEPRECATION_REQUIRED = ("gh-copilot",)
|
||||
_DEPRECATION_MARKERS = (
|
||||
"has been deprecated",
|
||||
"no commands will be executed",
|
||||
)
|
||||
|
||||
|
||||
def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
|
||||
"""True iff stderr looks like the deprecated gh-copilot extension's banner."""
|
||||
|
||||
lower = stderr_text.lower()
|
||||
if not any(req in lower for req in _DEPRECATION_REQUIRED):
|
||||
return False
|
||||
return any(marker in lower for marker in _DEPRECATION_MARKERS)
|
||||
|
||||
|
||||
def _resolve_command() -> str:
|
||||
return (
|
||||
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
|
||||
or os.getenv("COPILOT_CLI_PATH", "").strip()
|
||||
or "copilot"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_args() -> list[str]:
|
||||
raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
|
||||
if not raw:
|
||||
return ["--acp", "--stdio"]
|
||||
return shlex.split(raw)
|
||||
|
||||
|
||||
def _resolve_home_dir() -> str:
|
||||
"""Return a stable HOME for child ACP processes."""
|
||||
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
profile_home = get_subprocess_home()
|
||||
if profile_home:
|
||||
return profile_home
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
home = os.environ.get("HOME", "").strip()
|
||||
if home:
|
||||
return home
|
||||
|
||||
expanded = os.path.expanduser("~")
|
||||
if expanded and expanded != "~":
|
||||
return expanded
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
||||
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() # windows-footgun: ok — POSIX fallback inside try/except (pwd import fails on Windows)
|
||||
if resolved:
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
|
||||
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
|
||||
# need a different writable dir.
|
||||
return "/tmp"
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = _resolve_home_dir()
|
||||
return env
|
||||
|
||||
|
||||
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"error": {
|
||||
"code": code,
|
||||
"message": message,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _permission_denied(message_id: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "cancelled",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _format_messages_as_prompt(
|
||||
messages: list[dict[str, Any]],
|
||||
model: str | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
tool_choice: Any = None,
|
||||
) -> str:
|
||||
sections: list[str] = [
|
||||
"You are being used as the active ACP agent backend for Hermes.",
|
||||
"Use ACP capabilities to complete tasks.",
|
||||
"IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
|
||||
"If no tool is needed, answer normally.",
|
||||
]
|
||||
if model:
|
||||
sections.append(f"Hermes requested model hint: {model}")
|
||||
|
||||
if isinstance(tools, list) and tools:
|
||||
tool_specs: list[dict[str, Any]] = []
|
||||
for t in tools:
|
||||
if not isinstance(t, dict):
|
||||
continue
|
||||
fn = t.get("function") or {}
|
||||
if not isinstance(fn, dict):
|
||||
continue
|
||||
name = fn.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
continue
|
||||
tool_specs.append(
|
||||
{
|
||||
"name": name.strip(),
|
||||
"description": fn.get("description", ""),
|
||||
"parameters": fn.get("parameters", {}),
|
||||
}
|
||||
)
|
||||
if tool_specs:
|
||||
sections.append(
|
||||
"Available tools (OpenAI function schema). "
|
||||
"When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
|
||||
"containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
|
||||
+ json.dumps(tool_specs, ensure_ascii=False)
|
||||
)
|
||||
|
||||
if tool_choice is not None:
|
||||
sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
|
||||
|
||||
transcript: list[str] = []
|
||||
for message in messages:
|
||||
if not isinstance(message, dict):
|
||||
continue
|
||||
role = str(message.get("role") or "unknown").strip().lower()
|
||||
if role == "tool":
|
||||
role = "tool"
|
||||
elif role not in {"system", "user", "assistant"}:
|
||||
role = "context"
|
||||
|
||||
content = message.get("content")
|
||||
rendered = _render_message_content(content)
|
||||
if not rendered:
|
||||
continue
|
||||
|
||||
label = {
|
||||
"system": "System",
|
||||
"user": "User",
|
||||
"assistant": "Assistant",
|
||||
"tool": "Tool",
|
||||
"context": "Context",
|
||||
}.get(role, role.title())
|
||||
transcript.append(f"{label}:\n{rendered}")
|
||||
|
||||
if transcript:
|
||||
sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
|
||||
|
||||
sections.append("Continue the conversation from the latest user request.")
|
||||
return "\n\n".join(section.strip() for section in sections if section and section.strip())
|
||||
|
||||
|
||||
def _render_message_content(content: Any) -> str:
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
if isinstance(content, dict):
|
||||
if "text" in content:
|
||||
return str(content.get("text") or "").strip()
|
||||
if "content" in content and isinstance(content.get("content"), str):
|
||||
return str(content.get("content") or "").strip()
|
||||
return json.dumps(content, ensure_ascii=True)
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append(item)
|
||||
elif isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str) and text.strip():
|
||||
parts.append(text.strip())
|
||||
return "\n".join(parts).strip()
|
||||
return str(content).strip()
|
||||
|
||||
|
||||
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
|
||||
if not isinstance(text, str) or not text.strip():
|
||||
return [], ""
|
||||
|
||||
extracted: list[SimpleNamespace] = []
|
||||
consumed_spans: list[tuple[int, int]] = []
|
||||
|
||||
def _try_add_tool_call(raw_json: str) -> None:
|
||||
try:
|
||||
obj = json.loads(raw_json)
|
||||
except Exception:
|
||||
return
|
||||
if not isinstance(obj, dict):
|
||||
return
|
||||
fn = obj.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
return
|
||||
fn_name = fn.get("name")
|
||||
if not isinstance(fn_name, str) or not fn_name.strip():
|
||||
return
|
||||
fn_args = fn.get("arguments", "{}")
|
||||
if not isinstance(fn_args, str):
|
||||
fn_args = json.dumps(fn_args, ensure_ascii=False)
|
||||
call_id = obj.get("id")
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
call_id = f"acp_call_{len(extracted)+1}"
|
||||
|
||||
extracted.append(
|
||||
SimpleNamespace(
|
||||
id=call_id,
|
||||
call_id=call_id,
|
||||
response_item_id=None,
|
||||
type="function",
|
||||
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
|
||||
)
|
||||
)
|
||||
|
||||
for m in _TOOL_CALL_BLOCK_RE.finditer(text):
|
||||
raw = m.group(1)
|
||||
_try_add_tool_call(raw)
|
||||
consumed_spans.append((m.start(), m.end()))
|
||||
|
||||
# Only try bare-JSON fallback when no XML blocks were found.
|
||||
if not extracted:
|
||||
for m in _TOOL_CALL_JSON_RE.finditer(text):
|
||||
raw = m.group(0)
|
||||
_try_add_tool_call(raw)
|
||||
consumed_spans.append((m.start(), m.end()))
|
||||
|
||||
if not consumed_spans:
|
||||
return extracted, text.strip()
|
||||
|
||||
consumed_spans.sort()
|
||||
merged: list[tuple[int, int]] = []
|
||||
for start, end in consumed_spans:
|
||||
if not merged or start > merged[-1][1]:
|
||||
merged.append((start, end))
|
||||
else:
|
||||
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
|
||||
|
||||
parts: list[str] = []
|
||||
cursor = 0
|
||||
for start, end in merged:
|
||||
if cursor < start:
|
||||
parts.append(text[cursor:start])
|
||||
cursor = max(cursor, end)
|
||||
if cursor < len(text):
|
||||
parts.append(text[cursor:])
|
||||
|
||||
cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
|
||||
return extracted, cleaned
|
||||
|
||||
|
||||
|
||||
def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
|
||||
candidate = Path(path_text)
|
||||
if not candidate.is_absolute():
|
||||
raise PermissionError("ACP file-system paths must be absolute.")
|
||||
resolved = candidate.resolve()
|
||||
root = Path(cwd).resolve()
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
except ValueError as exc:
|
||||
raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
|
||||
return resolved
|
||||
|
||||
|
||||
class _ACPChatCompletions:
|
||||
def __init__(self, client: "CopilotACPClient"):
|
||||
self._client = client
|
||||
|
||||
def create(self, **kwargs: Any) -> Any:
|
||||
return self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _ACPChatNamespace:
|
||||
def __init__(self, client: "CopilotACPClient"):
|
||||
self.completions = _ACPChatCompletions(client)
|
||||
|
||||
|
||||
class CopilotACPClient:
|
||||
"""Minimal OpenAI-client-compatible facade for Copilot ACP."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str | None = None,
|
||||
base_url: str | None = None,
|
||||
default_headers: dict[str, str] | None = None,
|
||||
acp_command: str | None = None,
|
||||
acp_args: list[str] | None = None,
|
||||
acp_cwd: str | None = None,
|
||||
command: str | None = None,
|
||||
args: list[str] | None = None,
|
||||
**_: Any,
|
||||
):
|
||||
self.api_key = api_key or "copilot-acp"
|
||||
self.base_url = base_url or ACP_MARKER_BASE_URL
|
||||
self._default_headers = dict(default_headers or {})
|
||||
self._acp_command = acp_command or command or _resolve_command()
|
||||
self._acp_args = list(acp_args or args or _resolve_args())
|
||||
self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
|
||||
self.chat = _ACPChatNamespace(self)
|
||||
self.is_closed = False
|
||||
self._active_process: subprocess.Popen[str] | None = None
|
||||
self._active_process_lock = threading.Lock()
|
||||
|
||||
def close(self) -> None:
|
||||
proc: subprocess.Popen[str] | None
|
||||
with self._active_process_lock:
|
||||
proc = self._active_process
|
||||
self._active_process = None
|
||||
self.is_closed = True
|
||||
if proc is None:
|
||||
return
|
||||
try:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=2)
|
||||
except Exception:
|
||||
try:
|
||||
proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
model: str | None = None,
|
||||
messages: list[dict[str, Any]] | None = None,
|
||||
timeout: float | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
tool_choice: Any = None,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
prompt_text = _format_messages_as_prompt(
|
||||
messages or [],
|
||||
model=model,
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
)
|
||||
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
|
||||
# (used natively by the OpenAI SDK) rather than a plain float.
|
||||
if timeout is None:
|
||||
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
|
||||
elif isinstance(timeout, (int, float)):
|
||||
_effective_timeout = float(timeout)
|
||||
else:
|
||||
# httpx.Timeout or similar — pick the largest component so the
|
||||
# subprocess has enough wall-clock time for the full response.
|
||||
_candidates = [
|
||||
getattr(timeout, attr, None)
|
||||
for attr in ("read", "write", "connect", "pool", "timeout")
|
||||
]
|
||||
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
|
||||
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
|
||||
|
||||
response_text, reasoning_text = self._run_prompt(
|
||||
prompt_text,
|
||||
timeout_seconds=_effective_timeout,
|
||||
)
|
||||
|
||||
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
|
||||
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
|
||||
)
|
||||
assistant_message = SimpleNamespace(
|
||||
content=cleaned_text,
|
||||
tool_calls=tool_calls,
|
||||
reasoning=reasoning_text or None,
|
||||
reasoning_content=reasoning_text or None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
finish_reason = "tool_calls" if tool_calls else "stop"
|
||||
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
|
||||
return SimpleNamespace(
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
model=model or "copilot-acp",
|
||||
)
|
||||
|
||||
def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
[self._acp_command] + self._acp_args,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
cwd=self._acp_cwd,
|
||||
env=_build_subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
f"Could not start Copilot ACP command '{self._acp_command}'. "
|
||||
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
|
||||
) from exc
|
||||
|
||||
if proc.stdin is None or proc.stdout is None:
|
||||
proc.kill()
|
||||
raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
|
||||
|
||||
self.is_closed = False
|
||||
with self._active_process_lock:
|
||||
self._active_process = proc
|
||||
|
||||
inbox: queue.Queue[dict[str, Any]] = queue.Queue()
|
||||
stderr_tail: deque[str] = deque(maxlen=40)
|
||||
|
||||
def _stdout_reader() -> None:
|
||||
if proc.stdout is None:
|
||||
return
|
||||
for line in proc.stdout:
|
||||
try:
|
||||
inbox.put(json.loads(line))
|
||||
except Exception:
|
||||
inbox.put({"raw": line.rstrip("\n")})
|
||||
|
||||
def _stderr_reader() -> None:
|
||||
if proc.stderr is None:
|
||||
return
|
||||
for line in proc.stderr:
|
||||
stderr_tail.append(line.rstrip("\n"))
|
||||
|
||||
out_thread = threading.Thread(target=_stdout_reader, daemon=True)
|
||||
err_thread = threading.Thread(target=_stderr_reader, daemon=True)
|
||||
out_thread.start()
|
||||
err_thread.start()
|
||||
|
||||
next_id = 0
|
||||
|
||||
def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
|
||||
nonlocal next_id
|
||||
next_id += 1
|
||||
request_id = next_id
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request_id,
|
||||
"method": method,
|
||||
"params": params,
|
||||
}
|
||||
proc.stdin.write(json.dumps(payload) + "\n")
|
||||
proc.stdin.flush()
|
||||
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
while time.monotonic() < deadline:
|
||||
if proc.poll() is not None:
|
||||
break
|
||||
try:
|
||||
msg = inbox.get(timeout=0.1)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
if self._handle_server_message(
|
||||
msg,
|
||||
process=proc,
|
||||
cwd=self._acp_cwd,
|
||||
text_parts=text_parts,
|
||||
reasoning_parts=reasoning_parts,
|
||||
):
|
||||
continue
|
||||
|
||||
if msg.get("id") != request_id:
|
||||
continue
|
||||
if "error" in msg:
|
||||
err = msg.get("error") or {}
|
||||
raise RuntimeError(
|
||||
f"Copilot ACP {method} failed: {err.get('message') or err}"
|
||||
)
|
||||
return msg.get("result")
|
||||
|
||||
stderr_text = "\n".join(stderr_tail).strip()
|
||||
if proc.poll() is not None and stderr_text:
|
||||
if _is_gh_copilot_deprecation_message(stderr_text):
|
||||
raise RuntimeError(
|
||||
"Hermes ACP mode requires the NEW GitHub Copilot CLI "
|
||||
"(github.com/github/copilot-cli), but the binary it just "
|
||||
"spawned is the deprecated `gh copilot` extension.\n\n"
|
||||
"Install the new CLI:\n"
|
||||
" npm install -g @github/copilot\n"
|
||||
" # then verify with: copilot --help\n\n"
|
||||
"If `copilot` already resolves to the new CLI but you still see this,\n"
|
||||
"point Hermes at it explicitly:\n"
|
||||
" export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
|
||||
"Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
|
||||
"directly with a Copilot subscription token) via `hermes setup`.\n\n"
|
||||
f"Original error:\n{stderr_text}"
|
||||
)
|
||||
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
|
||||
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
|
||||
|
||||
try:
|
||||
_request(
|
||||
"initialize",
|
||||
{
|
||||
"protocolVersion": 1,
|
||||
"clientCapabilities": {
|
||||
"fs": {
|
||||
"readTextFile": True,
|
||||
"writeTextFile": True,
|
||||
}
|
||||
},
|
||||
"clientInfo": {
|
||||
"name": "hermes-agent",
|
||||
"title": "Hermes Agent",
|
||||
"version": "0.0.0",
|
||||
},
|
||||
},
|
||||
)
|
||||
session = _request(
|
||||
"session/new",
|
||||
{
|
||||
"cwd": self._acp_cwd,
|
||||
"mcpServers": [],
|
||||
},
|
||||
) or {}
|
||||
session_id = str(session.get("sessionId") or "").strip()
|
||||
if not session_id:
|
||||
raise RuntimeError("Copilot ACP did not return a sessionId.")
|
||||
|
||||
text_parts: list[str] = []
|
||||
reasoning_parts: list[str] = []
|
||||
_request(
|
||||
"session/prompt",
|
||||
{
|
||||
"sessionId": session_id,
|
||||
"prompt": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt_text,
|
||||
}
|
||||
],
|
||||
},
|
||||
text_parts=text_parts,
|
||||
reasoning_parts=reasoning_parts,
|
||||
)
|
||||
return "".join(text_parts), "".join(reasoning_parts)
|
||||
finally:
|
||||
self.close()
|
||||
|
||||
def _handle_server_message(
|
||||
self,
|
||||
msg: dict[str, Any],
|
||||
*,
|
||||
process: subprocess.Popen[str],
|
||||
cwd: str,
|
||||
text_parts: list[str] | None,
|
||||
reasoning_parts: list[str] | None,
|
||||
) -> bool:
|
||||
method = msg.get("method")
|
||||
if not isinstance(method, str):
|
||||
return False
|
||||
|
||||
if method == "session/update":
|
||||
params = msg.get("params") or {}
|
||||
update = params.get("update") or {}
|
||||
kind = str(update.get("sessionUpdate") or "").strip()
|
||||
content = update.get("content") or {}
|
||||
chunk_text = ""
|
||||
if isinstance(content, dict):
|
||||
chunk_text = str(content.get("text") or "")
|
||||
if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
|
||||
text_parts.append(chunk_text)
|
||||
elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
|
||||
reasoning_parts.append(chunk_text)
|
||||
return True
|
||||
|
||||
if process.stdin is None:
|
||||
return True
|
||||
|
||||
message_id = msg.get("id")
|
||||
params = msg.get("params") or {}
|
||||
|
||||
if method == "session/request_permission":
|
||||
response = _permission_denied(message_id)
|
||||
elif method == "fs/read_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
block_error = get_read_block_error(str(path))
|
||||
if block_error:
|
||||
raise PermissionError(block_error)
|
||||
try:
|
||||
content = path.read_text()
|
||||
except FileNotFoundError:
|
||||
content = ""
|
||||
line = params.get("line")
|
||||
limit = params.get("limit")
|
||||
if isinstance(line, int) and line > 1:
|
||||
lines = content.splitlines(keepends=True)
|
||||
start = line - 1
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content, force=True)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"content": content,
|
||||
},
|
||||
}
|
||||
except Exception as exc:
|
||||
response = _jsonrpc_error(message_id, -32602, str(exc))
|
||||
elif method == "fs/write_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
if is_write_denied(str(path)):
|
||||
raise PermissionError(
|
||||
f"Write denied: '{path}' is a protected system/credential file."
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(str(params.get("content") or ""))
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": None,
|
||||
}
|
||||
except Exception as exc:
|
||||
response = _jsonrpc_error(message_id, -32602, str(exc))
|
||||
else:
|
||||
response = _jsonrpc_error(
|
||||
message_id,
|
||||
-32601,
|
||||
f"ACP client method '{method}' is not supported by Hermes yet.",
|
||||
)
|
||||
|
||||
process.stdin.write(json.dumps(response) + "\n")
|
||||
process.stdin.flush()
|
||||
return True
|
||||
@@ -1,174 +0,0 @@
|
||||
"""Credential-pool disk-boundary sanitization helpers.
|
||||
|
||||
These helpers define which credential-pool entries are references to borrowed
|
||||
runtime secrets and strip raw values before those entries are written to
|
||||
``auth.json``. They intentionally have no dependency on ``hermes_cli.auth`` so
|
||||
both the pool model and the final auth-store write boundary can share the same
|
||||
policy without import cycles.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from typing import Any, Dict, Mapping
|
||||
|
||||
|
||||
# Sources Hermes owns and can intentionally persist in auth.json. Everything
|
||||
# else with a non-empty source is treated as borrowed/reference-only by default
|
||||
# so future external secret providers fail closed at the disk boundary.
|
||||
_PERSISTABLE_PROVIDER_SOURCES = frozenset({
|
||||
("anthropic", "hermes_pkce"),
|
||||
("minimax-oauth", "oauth"),
|
||||
("nous", "device_code"),
|
||||
("openai-codex", "device_code"),
|
||||
("xai-oauth", "loopback_pkce"),
|
||||
})
|
||||
|
||||
_SAFE_SECRETISH_METADATA_KEYS = frozenset({
|
||||
"secret_fingerprint",
|
||||
"secret_source",
|
||||
"token_type",
|
||||
"scope",
|
||||
"client_id",
|
||||
"agent_key_id",
|
||||
"agent_key_expires_at",
|
||||
"agent_key_expires_in",
|
||||
"agent_key_reused",
|
||||
"agent_key_obtained_at",
|
||||
"expires_at",
|
||||
"expires_at_ms",
|
||||
"expires_in",
|
||||
"last_refresh",
|
||||
"last_status",
|
||||
"last_status_at",
|
||||
"last_error_code",
|
||||
"last_error_reason",
|
||||
"last_error_message",
|
||||
"last_error_reset_at",
|
||||
})
|
||||
|
||||
_SECRET_VALUE_KEYS = frozenset({
|
||||
"access_token",
|
||||
"refresh_token",
|
||||
"agent_key",
|
||||
"api_key",
|
||||
"apikey",
|
||||
"api_token",
|
||||
"auth_token",
|
||||
"authorization",
|
||||
"bearer_token",
|
||||
"client_secret",
|
||||
"credential",
|
||||
"credentials",
|
||||
"id_token",
|
||||
"oauth_token",
|
||||
"private_key",
|
||||
"secret_key",
|
||||
"session_token",
|
||||
"password",
|
||||
"secret",
|
||||
"token",
|
||||
"tokens",
|
||||
})
|
||||
|
||||
_SECRET_VALUE_SUFFIXES = (
|
||||
"_api_key",
|
||||
"_api_token",
|
||||
"_access_token",
|
||||
"_auth_token",
|
||||
"_refresh_token",
|
||||
"_bearer_token",
|
||||
"_client_secret",
|
||||
"_id_token",
|
||||
"_oauth_token",
|
||||
"_private_key",
|
||||
"_session_token",
|
||||
"_secret_key",
|
||||
"_password",
|
||||
"_secret",
|
||||
"_token",
|
||||
"_key",
|
||||
)
|
||||
|
||||
_CAMEL_CASE_BOUNDARY = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
|
||||
|
||||
|
||||
def _normalize_key(key: Any) -> str:
|
||||
raw = str(key or "").strip()
|
||||
raw = _CAMEL_CASE_BOUNDARY.sub("_", raw)
|
||||
return raw.lower().replace("-", "_").replace(".", "_")
|
||||
|
||||
|
||||
def is_borrowed_credential_source(source: Any, provider_id: Any = None) -> bool:
|
||||
"""Return True when ``source`` points at a borrowed/reference-only secret."""
|
||||
normalized_source = str(source or "").strip().lower()
|
||||
if not normalized_source:
|
||||
return False
|
||||
if normalized_source == "manual" or normalized_source.startswith("manual:"):
|
||||
return False
|
||||
normalized_provider = str(provider_id or "").strip().lower()
|
||||
return (normalized_provider, normalized_source) not in _PERSISTABLE_PROVIDER_SOURCES
|
||||
|
||||
|
||||
def _is_secret_payload_key(key: Any) -> bool:
|
||||
normalized = _normalize_key(key)
|
||||
if not normalized or normalized in _SAFE_SECRETISH_METADATA_KEYS:
|
||||
return False
|
||||
if normalized in _SECRET_VALUE_KEYS:
|
||||
return True
|
||||
return normalized.endswith(_SECRET_VALUE_SUFFIXES)
|
||||
|
||||
|
||||
def _fingerprint_value(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value)
|
||||
if not text:
|
||||
return None
|
||||
digest = hashlib.sha256(text.encode("utf-8", errors="surrogatepass")).hexdigest()
|
||||
return f"sha256:{digest[:16]}"
|
||||
|
||||
|
||||
def _credential_secret_fingerprint(payload: Mapping[str, Any]) -> str | None:
|
||||
for key in ("agent_key", "access_token", "refresh_token", "api_key", "token", "secret"):
|
||||
fingerprint = _fingerprint_value(payload.get(key))
|
||||
if fingerprint:
|
||||
return fingerprint
|
||||
|
||||
for key, value in payload.items():
|
||||
if _is_secret_payload_key(key):
|
||||
fingerprint = _fingerprint_value(value)
|
||||
if fingerprint:
|
||||
return fingerprint
|
||||
|
||||
existing = payload.get("secret_fingerprint")
|
||||
if isinstance(existing, str) and existing.startswith("sha256:"):
|
||||
return existing
|
||||
return None
|
||||
|
||||
|
||||
def sanitize_borrowed_credential_payload(
|
||||
payload: Mapping[str, Any],
|
||||
provider_id: Any = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Return a disk-safe credential-pool payload.
|
||||
|
||||
Owned sources (manual entries and Hermes-owned OAuth/device-code state)
|
||||
pass through unchanged. Borrowed/reference-only sources keep labels,
|
||||
source refs, status/cooldown metadata, counters, and a non-reversible
|
||||
fingerprint, but raw secret value fields are removed.
|
||||
"""
|
||||
result = dict(payload)
|
||||
if not is_borrowed_credential_source(result.get("source"), provider_id):
|
||||
return result
|
||||
|
||||
fingerprint = _credential_secret_fingerprint(result)
|
||||
sanitized = {
|
||||
key: value
|
||||
for key, value in result.items()
|
||||
if not _is_secret_payload_key(key)
|
||||
}
|
||||
if fingerprint:
|
||||
sanitized["secret_fingerprint"] = fingerprint
|
||||
return sanitized
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,448 +0,0 @@
|
||||
"""Unified removal contract for every credential source Hermes reads from.
|
||||
|
||||
Hermes seeds its credential pool from many places:
|
||||
|
||||
env:<VAR> — os.environ / ~/.hermes/.env
|
||||
claude_code — ~/.claude/.credentials.json
|
||||
hermes_pkce — ~/.hermes/.anthropic_oauth.json
|
||||
device_code — auth.json providers.<provider> (nous, openai-codex, ...)
|
||||
qwen-cli — ~/.qwen/oauth_creds.json
|
||||
gh_cli — gh auth token
|
||||
config:<name> — custom_providers config entry
|
||||
model_config — model.api_key when model.provider == "custom"
|
||||
manual — user ran `hermes auth add`
|
||||
|
||||
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
|
||||
(which keep their existing shape — we haven't restructured them). What we
|
||||
unify here is **removal**:
|
||||
|
||||
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
|
||||
|
||||
Before this module, every source had an ad-hoc removal branch in
|
||||
``auth_remove_command``, and several sources had no branch at all — so
|
||||
``auth remove`` silently reverted on the next ``load_pool()`` call for
|
||||
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
|
||||
custom-config sources.
|
||||
|
||||
Now every source registers a ``RemovalStep`` that does exactly three things
|
||||
in the same shape:
|
||||
|
||||
1. Clean up whatever externally-readable state the source reads from
|
||||
(.env line, auth.json block, OAuth file, etc.)
|
||||
2. Suppress the ``(provider, source_id)`` in auth.json so the
|
||||
corresponding ``_seed_from_*`` branch skips the upsert on re-load
|
||||
3. Return ``RemovalResult`` describing what was cleaned and any
|
||||
diagnostic hints the user should see (shell-exported env vars,
|
||||
external credential files we deliberately don't delete, etc.)
|
||||
|
||||
Adding a new credential source is:
|
||||
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
|
||||
- gate that reader behind ``is_source_suppressed(provider, source_id)``
|
||||
- register a ``RemovalStep`` here
|
||||
|
||||
No more per-source if/elif chain in ``auth_remove_command``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalResult:
|
||||
"""Outcome of removing a credential source.
|
||||
|
||||
Attributes:
|
||||
cleaned: Short strings describing external state that was actually
|
||||
mutated (``"Cleared XAI_API_KEY from .env"``,
|
||||
``"Cleared openai-codex OAuth tokens from auth store"``).
|
||||
Printed as plain lines to the user.
|
||||
hints: Diagnostic lines ABOUT state the user may need to clean up
|
||||
themselves or is deliberately left intact (shell-exported env
|
||||
var, Claude Code credential file we don't delete, etc.).
|
||||
Printed as plain lines to the user. Always non-destructive.
|
||||
suppress: Whether to call ``suppress_credential_source`` after
|
||||
cleanup so future ``load_pool`` calls skip this source.
|
||||
Default True — almost every source needs this to stay sticky.
|
||||
The only legitimate False is ``manual`` entries, which aren't
|
||||
seeded from anywhere external.
|
||||
"""
|
||||
|
||||
cleaned: List[str] = field(default_factory=list)
|
||||
hints: List[str] = field(default_factory=list)
|
||||
suppress: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalStep:
|
||||
"""How to remove one specific credential source cleanly.
|
||||
|
||||
Attributes:
|
||||
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
|
||||
Special value ``"*"`` means "matches any provider" — used for
|
||||
sources like ``manual`` that aren't provider-specific.
|
||||
source_id: Source identifier as it appears in
|
||||
``PooledCredential.source``. May be a literal (``"claude_code"``)
|
||||
or a prefix pattern matched via ``match_fn``.
|
||||
match_fn: Optional predicate overriding literal ``source_id``
|
||||
matching. Gets the removed entry's source string. Used for
|
||||
``env:*`` (any env-seeded key), ``config:*`` (any custom
|
||||
pool), and ``manual:*`` (any manual-source variant).
|
||||
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
|
||||
actual cleanup and returns what happened for the user.
|
||||
description: One-line human-readable description for docs / tests.
|
||||
"""
|
||||
|
||||
provider: str
|
||||
source_id: str
|
||||
remove_fn: Callable[..., RemovalResult]
|
||||
match_fn: Optional[Callable[[str], bool]] = None
|
||||
description: str = ""
|
||||
|
||||
def matches(self, provider: str, source: str) -> bool:
|
||||
if self.provider != "*" and self.provider != provider:
|
||||
return False
|
||||
if self.match_fn is not None:
|
||||
return self.match_fn(source)
|
||||
return source == self.source_id
|
||||
|
||||
|
||||
_REGISTRY: List[RemovalStep] = []
|
||||
|
||||
|
||||
def register(step: RemovalStep) -> RemovalStep:
|
||||
_REGISTRY.append(step)
|
||||
return step
|
||||
|
||||
|
||||
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
|
||||
"""Return the first matching RemovalStep, or None if unregistered.
|
||||
|
||||
Unregistered sources fall through to the default remove path in
|
||||
``auth_remove_command``: the pool entry is already gone (that happens
|
||||
before dispatch), no external cleanup, no suppression. This is the
|
||||
correct behaviour for ``manual`` entries — they were only ever stored
|
||||
in the pool, nothing external to clean up.
|
||||
"""
|
||||
for step in _REGISTRY:
|
||||
if step.matches(provider, source):
|
||||
return step
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Individual RemovalStep implementations — one per source.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Each remove_fn is intentionally small and single-purpose. Adding a new
|
||||
# credential source means adding ONE entry here — no other changes to
|
||||
# auth_remove_command.
|
||||
|
||||
|
||||
def _remove_env_source(provider: str, removed) -> RemovalResult:
|
||||
"""env:<VAR> — the most common case.
|
||||
|
||||
Handles three user situations:
|
||||
1. Var lives only in ~/.hermes/.env → clear it
|
||||
2. Var lives only in the user's shell (shell profile, systemd
|
||||
EnvironmentFile, launchd plist) → hint them where to unset it
|
||||
3. Var lives in both → clear from .env, hint about shell
|
||||
"""
|
||||
from hermes_cli.config import get_env_path, remove_env_value
|
||||
|
||||
result = RemovalResult()
|
||||
env_var = removed.source[len("env:"):]
|
||||
if not env_var:
|
||||
return result
|
||||
|
||||
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
|
||||
env_in_process = bool(os.getenv(env_var))
|
||||
env_in_dotenv = False
|
||||
try:
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
env_in_dotenv = any(
|
||||
line.strip().startswith(f"{env_var}=")
|
||||
for line in env_path.read_text(errors="replace").splitlines()
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
shell_exported = env_in_process and not env_in_dotenv
|
||||
|
||||
cleared = remove_env_value(env_var)
|
||||
if cleared:
|
||||
result.cleaned.append(f"Cleared {env_var} from .env")
|
||||
|
||||
if shell_exported:
|
||||
result.hints.extend([
|
||||
f"Note: {env_var} is still set in your shell environment "
|
||||
f"(not in ~/.hermes/.env).",
|
||||
" Unset it there (shell profile, systemd EnvironmentFile, "
|
||||
"launchd plist, etc.) or it will keep being visible to Hermes.",
|
||||
f" The pool entry is now suppressed — Hermes will ignore "
|
||||
f"{env_var} until you run `hermes auth add {provider}`.",
|
||||
])
|
||||
else:
|
||||
result.hints.append(
|
||||
f"Suppressed env:{env_var} — it will not be re-seeded even "
|
||||
f"if the variable is re-exported later."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_claude_code(provider: str, removed) -> RemovalResult:
|
||||
"""~/.claude/.credentials.json is owned by Claude Code itself.
|
||||
|
||||
We don't delete it — the user's Claude Code install still needs to
|
||||
work. We just suppress it so Hermes stops reading it.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed claude_code credential — it will not be re-seeded.",
|
||||
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
|
||||
"Run `hermes auth add anthropic` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
result = RemovalResult()
|
||||
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
|
||||
if oauth_file.exists():
|
||||
try:
|
||||
oauth_file.unlink()
|
||||
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
|
||||
except OSError as exc:
|
||||
result.hints.append(f"Could not delete {oauth_file}: {exc}")
|
||||
return result
|
||||
|
||||
|
||||
def _clear_auth_store_provider(provider: str) -> bool:
|
||||
"""Delete auth_store.providers[provider]. Returns True if deleted."""
|
||||
from hermes_cli.auth import (
|
||||
_auth_store_lock,
|
||||
_load_auth_store,
|
||||
_save_auth_store,
|
||||
)
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
|
||||
|
||||
We suppress in addition to clearing because nothing else stops a future
|
||||
`hermes auth add nous` (or any other path that writes providers.nous)
|
||||
from re-seeding before the user has decided to. Suppression forces
|
||||
them to go through `hermes auth add nous` to re-engage, which is the
|
||||
documented re-add path and clears the suppression atomically.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
|
||||
"""MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
|
||||
|
||||
Same pattern as Nous: single-source OAuth state with refresh tokens.
|
||||
Suppression of the `oauth` source ensures the pool reseed path
|
||||
(_seed_from_singletons) doesn't instantly undo the removal.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
|
||||
|
||||
Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
|
||||
itself: the central dispatcher only removes the in-memory pool entry,
|
||||
leaves ``providers.xai-oauth`` in auth.json intact, and on the next
|
||||
``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
|
||||
entry from the still-present singleton — credentials reappear with no
|
||||
user feedback. Clearing the singleton in step with the suppression set
|
||||
by the central dispatcher makes the removal stick.
|
||||
|
||||
Belt-and-braces against the manual entry path: ``hermes auth add
|
||||
xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
|
||||
falls through to "unregistered → nothing to clean up" (correct —
|
||||
manual entries are pool-only).
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
result.hints.append(
|
||||
"Run `hermes model` → xAI Grok OAuth (SuperGrok / Premium+) to re-authenticate if needed."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
refresh_codex_oauth_pure() writes both every time, so clearing only
|
||||
the Hermes auth store is not enough — _seed_from_singletons() would
|
||||
re-import from ~/.codex/auth.json on the next load_pool() call and
|
||||
the removal would be instantly undone. We suppress instead of
|
||||
deleting Codex CLI's file, so the Codex CLI itself keeps working.
|
||||
|
||||
The canonical source name in ``_seed_from_singletons`` is
|
||||
``"device_code"`` (no prefix). Entries may show up in the pool as
|
||||
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
|
||||
via ``hermes auth add openai-codex``), but in both cases the re-seed
|
||||
gate lives at the ``"device_code"`` suppression key. We suppress
|
||||
that canonical key here; the central dispatcher also suppresses
|
||||
``removed.source`` which is fine — belt-and-suspenders, idempotent.
|
||||
"""
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
# Suppress the canonical re-seed source, not just whatever source the
|
||||
# removed entry had. Otherwise `manual:device_code` removals wouldn't
|
||||
# block the `device_code` re-seed path.
|
||||
suppress_credential_source(provider, "device_code")
|
||||
result.hints.extend([
|
||||
"Suppressed openai-codex device_code source — it will not be re-seeded.",
|
||||
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
|
||||
"Run `hermes auth add openai-codex` to re-enable if needed.",
|
||||
])
|
||||
return result
|
||||
|
||||
|
||||
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
|
||||
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
|
||||
|
||||
Same pattern as claude_code — suppress, don't delete. The user's
|
||||
Qwen CLI install still reads from that file.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed qwen-cli credential — it will not be re-seeded.",
|
||||
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
|
||||
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
|
||||
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
|
||||
|
||||
Copilot is special: the same token can be seeded as multiple source
|
||||
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
|
||||
``_seed_from_env``), so removing one entry without suppressing the
|
||||
others lets the duplicates resurrect. We suppress ALL known copilot
|
||||
sources here so removal is stable regardless of which entry the
|
||||
user clicked.
|
||||
|
||||
We don't touch the user's gh CLI or shell state — just suppress so
|
||||
Hermes stops picking the token up.
|
||||
"""
|
||||
# Suppress ALL copilot source variants up-front so no path resurrects
|
||||
# the pool entry. The central dispatcher in auth_remove_command will
|
||||
# ALSO suppress removed.source, but it's idempotent so double-calling
|
||||
# is harmless.
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "gh_cli")
|
||||
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
|
||||
suppress_credential_source(provider, f"env:{env_var}")
|
||||
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
|
||||
"Note: Your gh CLI / shell environment is unchanged.",
|
||||
"Run `hermes auth add copilot` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_custom_config(provider: str, removed) -> RemovalResult:
|
||||
"""Custom provider pools are seeded from custom_providers config or
|
||||
model.api_key. Both are in config.yaml — modifying that from here
|
||||
is more invasive than suppression. We suppress; the user can edit
|
||||
config.yaml if they want to remove the key from disk entirely.
|
||||
"""
|
||||
source_label = removed.source
|
||||
return RemovalResult(hints=[
|
||||
f"Suppressed {source_label} — it will not be re-seeded.",
|
||||
"Note: The underlying value in config.yaml is unchanged. Edit it "
|
||||
"directly if you want to remove the credential from disk.",
|
||||
])
|
||||
|
||||
|
||||
def _register_all_sources() -> None:
|
||||
"""Called once on module import.
|
||||
|
||||
ORDER MATTERS — ``find_removal_step`` returns the first match. Put
|
||||
provider-specific steps before the generic ``env:*`` step so that e.g.
|
||||
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
|
||||
doesn't touch the user's shell), not the generic env-var removal
|
||||
(which would try to clear .env).
|
||||
"""
|
||||
register(RemovalStep(
|
||||
provider="copilot", source_id="gh_cli",
|
||||
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
|
||||
remove_fn=_remove_copilot_gh,
|
||||
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="env:",
|
||||
match_fn=lambda src: src.startswith("env:"),
|
||||
remove_fn=_remove_env_source,
|
||||
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="claude_code",
|
||||
remove_fn=_remove_claude_code,
|
||||
description="~/.claude/.credentials.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="hermes_pkce",
|
||||
remove_fn=_remove_hermes_pkce,
|
||||
description="~/.hermes/.anthropic_oauth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="nous", source_id="device_code",
|
||||
remove_fn=_remove_nous_device_code,
|
||||
description="auth.json providers.nous",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="openai-codex", source_id="device_code",
|
||||
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
|
||||
remove_fn=_remove_codex_device_code,
|
||||
description="auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="xai-oauth", source_id="loopback_pkce",
|
||||
remove_fn=_remove_xai_oauth_loopback_pkce,
|
||||
description="auth.json providers.xai-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="qwen-oauth", source_id="qwen-cli",
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="minimax-oauth", source_id="oauth",
|
||||
remove_fn=_remove_minimax_oauth,
|
||||
description="auth.json providers.minimax-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
remove_fn=_remove_custom_config,
|
||||
description="Custom provider config.yaml api_key field",
|
||||
))
|
||||
|
||||
|
||||
_register_all_sources()
|
||||
@@ -1,723 +0,0 @@
|
||||
"""Credits tracking for Nous inference API responses.
|
||||
|
||||
Parses x-nous-credits-* (and optional x-nous-tool-pool-*) headers from
|
||||
inference responses into a validated CreditsState dataclass. Provides
|
||||
depletion detection (paid_access), subscription-cap used_fraction, and
|
||||
warn-once schema-version gating. This is the hardened parser used by all
|
||||
live consumers (run_agent, tui_gateway) — not a dev-only shim.
|
||||
|
||||
Header schema (x-nous-credits-* family):
|
||||
x-nous-credits-version contract/schema version
|
||||
x-nous-credits-remaining-micros total remaining balance (micros)
|
||||
x-nous-credits-remaining-usd same, formatted USD string
|
||||
x-nous-credits-subscription-micros subscription balance (SIGNED; may be negative/debt)
|
||||
x-nous-credits-subscription-usd same, formatted USD string
|
||||
x-nous-credits-subscription-limit-micros subscription cap (PAIRED/optional)
|
||||
x-nous-credits-subscription-limit-usd same, formatted USD string (PAIRED/optional)
|
||||
x-nous-credits-rollover-micros rolled-over balance (micros)
|
||||
x-nous-credits-purchased-micros purchased balance (micros)
|
||||
x-nous-credits-purchased-usd same, formatted USD string
|
||||
x-nous-credits-denominator-kind "subscription_cap" | "none"
|
||||
x-nous-credits-paid-access "true" | "false" (STRING!)
|
||||
x-nous-credits-disabled-reason reason string (header omitted when null)
|
||||
x-nous-credits-as-of-ms server-side timestamp (ms epoch)
|
||||
|
||||
Tool-pool headers use a SEPARATE prefix:
|
||||
x-nous-tool-pool-micros tool-pool balance (micros)
|
||||
x-nous-tool-pool-gated-off "true" | "false" (STRING!)
|
||||
|
||||
Money is handled as micros ints only; *_usd values are preserved verbatim as
|
||||
the raw strings the server sent (never re-parsed to float).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
from utils import is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Warn-once latch: emit the version-unsupported warning at most once per process.
|
||||
_version_warning_emitted: bool = False
|
||||
|
||||
# Valid denominator kinds (exhaustive set from the API contract).
|
||||
_VALID_DENOMINATOR_KINDS = frozenset({"subscription_cap", "none"})
|
||||
|
||||
# USD format: optional leading minus, one-or-more digits, dot, exactly 2 digits.
|
||||
_USD_RE = re.compile(r"^-?\d+\.\d{2}$")
|
||||
|
||||
|
||||
# ── Internal helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
_SENTINEL = object() # singleton sentinel for "parse failed"
|
||||
|
||||
|
||||
def _safe_int(value: Any) -> Any:
|
||||
"""Parse a header value to an exact int (money-safe).
|
||||
|
||||
The contract guarantees every ``*_micros`` field is an integer string —
|
||||
we parse with ``int()`` directly, NOT ``int(float(...))``, to avoid float-
|
||||
precision loss above 2**53 that would silently corrupt large money values.
|
||||
|
||||
Returns the parsed int, or ``_SENTINEL`` if the value is not a valid integer
|
||||
string (including float-shaped strings like "1.5"). The sentinel lets callers
|
||||
detect the failure and return None from the overall parse (fail-hard-on-bad-
|
||||
input, not silently coerce).
|
||||
"""
|
||||
if value is None:
|
||||
return _SENTINEL
|
||||
try:
|
||||
return int(str(value))
|
||||
except (TypeError, ValueError):
|
||||
return _SENTINEL
|
||||
|
||||
|
||||
|
||||
def _validate_usd(value: Optional[str]) -> bool:
|
||||
"""Return True iff value is a non-None string matching ^-?\\d+\\.\\d{2}$."""
|
||||
if value is None:
|
||||
return False
|
||||
return bool(_USD_RE.match(value))
|
||||
|
||||
|
||||
# ── CreditsState dataclass ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclass
|
||||
class CreditsState:
|
||||
"""Full credits state parsed from x-nous-credits-* response headers."""
|
||||
|
||||
version: int = 0
|
||||
remaining_micros: int = 0
|
||||
remaining_usd: str = ""
|
||||
subscription_micros: int = 0 # SIGNED — may be negative (debt). ONLY field allowed negative.
|
||||
subscription_usd: str = ""
|
||||
subscription_limit_micros: Optional[int] = None # PAIRED + OPTIONAL (only when subscription_cap)
|
||||
subscription_limit_usd: Optional[str] = None
|
||||
rollover_micros: int = 0
|
||||
purchased_micros: int = 0
|
||||
purchased_usd: str = ""
|
||||
tool_pool_micros: int = 0
|
||||
tool_pool_gated_off: bool = False
|
||||
denominator_kind: str = "none" # "subscription_cap" | "none"
|
||||
paid_access: bool = True # depletion keys off THIS == False, NEVER remaining==0
|
||||
disabled_reason: Optional[str] = None # header omitted entirely when null
|
||||
as_of_ms: int = 0
|
||||
captured_at: float = 0.0 # time.time() when this was captured
|
||||
from_header: bool = False # True only when populated by parse_credits_headers()
|
||||
|
||||
@property
|
||||
def has_data(self) -> bool:
|
||||
return self.captured_at > 0
|
||||
|
||||
@property
|
||||
def age_seconds(self) -> float:
|
||||
if not self.has_data:
|
||||
return float("inf")
|
||||
return time.time() - self.captured_at
|
||||
|
||||
@property
|
||||
def depleted(self) -> bool:
|
||||
"""True when the account has lost paid access.
|
||||
|
||||
Keyed off ``paid_access == False`` ONLY — never ``remaining_micros == 0``,
|
||||
which would give a false positive whenever the balance is zero but access
|
||||
is still live (e.g. subscription renewal pending).
|
||||
"""
|
||||
return not self.paid_access
|
||||
|
||||
@property
|
||||
def used_fraction(self) -> Optional[float]:
|
||||
"""Fraction of the subscription cap consumed, in [0.0, 1.0].
|
||||
|
||||
Computable only when ``subscription_limit_micros`` is a truthy (non-zero,
|
||||
non-None) int. Guarded on the LIMIT FIELD, not ``denominator_kind`` —
|
||||
the limit field is the real denominator; ``denominator_kind`` is metadata.
|
||||
Returns None when there is no computable denominator (no limit, or limit==0).
|
||||
"""
|
||||
if not isinstance(self.subscription_limit_micros, int):
|
||||
return None
|
||||
if self.subscription_limit_micros <= 0:
|
||||
return None
|
||||
used = self.subscription_limit_micros - self.subscription_micros
|
||||
return max(0.0, min(1.0, used / self.subscription_limit_micros))
|
||||
|
||||
|
||||
# ── Credits policy constants ─────────────────────────────────────────────────
|
||||
# Switching credits notices from sticky→TTL later would also require wiring a
|
||||
# paired *_TTL_MS companion for each notice kind — the field exists on AgentNotice
|
||||
# but is not yet plumbed through the policy loop.
|
||||
|
||||
CREDITS_NOTICE_KIND = "sticky" # v1: credits notices are sticky
|
||||
CREDITS_RESTORED_TTL_MS = 8000 # the only TTL notice in v1 (depletion-recovery confirmation)
|
||||
|
||||
# Usage-gauge bands (ascending). Each is (threshold_fraction, level, label_pct).
|
||||
# The notice shows the HIGHEST band the current used_fraction has reached — a single
|
||||
# escalating status-bar line (50 → 75 → 90), not three stacked notices. Crossing the
|
||||
# next band up replaces the line; recovering below a band steps it back down. Edit
|
||||
# this list to retune the bands; the policy derives everything from it.
|
||||
CREDITS_USAGE_BANDS: tuple[tuple[float, str, int], ...] = (
|
||||
(0.50, "info", 50),
|
||||
(0.75, "warn", 75),
|
||||
(0.90, "warn", 90),
|
||||
)
|
||||
CREDITS_USAGE_KEY = "credits.usage" # single key for the escalating usage notice
|
||||
|
||||
|
||||
# ── AgentNotice (out-of-band notice payload; driver-agnostic) ────────────────
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentNotice:
|
||||
"""A structured, driver-agnostic out-of-band notice.
|
||||
|
||||
The agent fires these via ``AIAgent.notice_callback`` (and clears them via
|
||||
``notice_clear_callback``); each driver renders it its own way — the TUI as a
|
||||
status-bar override, the CLI as a console line, etc. v1 credits notices are all
|
||||
``kind="sticky"``; ``kind``/``ttl_ms`` are kept fully expressive so a future
|
||||
config/slash-command can switch them to TTL without touching the policy (a
|
||||
single default seam — see L4).
|
||||
"""
|
||||
|
||||
text: str
|
||||
level: str = "info" # info | warn | error | success
|
||||
kind: str = "sticky" # sticky | ttl
|
||||
ttl_ms: Optional[int] = None # honored only when kind == "ttl"
|
||||
key: Optional[str] = None # dedupe / fired-once-latch / clear key
|
||||
id: Optional[str] = None
|
||||
|
||||
|
||||
# ── evaluate_credits_notices (pure reconciliation function) ──────────────────
|
||||
|
||||
|
||||
def evaluate_credits_notices(
|
||||
state: CreditsState,
|
||||
latch: dict,
|
||||
) -> tuple[list[AgentNotice], list[str]]:
|
||||
"""Reconcile credits notices against the latch. Mutates ``latch`` IN PLACE.
|
||||
|
||||
latch = {"active": set[str], "seen_below_90": bool, "usage_band": Optional[int]}.
|
||||
|
||||
Returns ``(to_show: list[AgentNotice], to_clear: list[str])``.
|
||||
Caller emits to_clear FIRST, then to_show.
|
||||
|
||||
Pure function — no I/O, no agent/run_agent imports.
|
||||
"""
|
||||
to_show: list[AgentNotice] = []
|
||||
to_clear: list[str] = []
|
||||
|
||||
uf = state.used_fraction
|
||||
|
||||
# Crossing latch: once we've observed uf below the LOWEST band, escalating
|
||||
# usage notices may fire. This prevents a brand-new session that opens
|
||||
# mid-range from firing spuriously on the first observation (the cold-start
|
||||
# seed primes this explicitly when it WANTS an open-high warning).
|
||||
_lowest_band = CREDITS_USAGE_BANDS[0][0]
|
||||
if uf is not None and uf < _lowest_band:
|
||||
latch["seen_below_90"] = True # gate opened: usage-band notices may now fire
|
||||
|
||||
active = latch["active"]
|
||||
|
||||
# ── Conditions ───────────────────────────────────────────────────────────
|
||||
# Highest band whose threshold the current usage has reached (None below all).
|
||||
current_band: Optional[tuple[float, str, int]] = None
|
||||
if uf is not None:
|
||||
for band in CREDITS_USAGE_BANDS: # ascending → last match wins = highest
|
||||
if uf >= band[0]:
|
||||
current_band = band
|
||||
grant_cond = (
|
||||
state.denominator_kind == "subscription_cap"
|
||||
and uf is not None
|
||||
and uf >= 1.0
|
||||
and state.purchased_micros > 0
|
||||
)
|
||||
depleted_cond = not state.paid_access
|
||||
|
||||
# ── usage gauge (escalating single notice: 50 → 75 → 90) ──────────────────
|
||||
# Show only the highest crossed band; replace the line when the band changes
|
||||
# (climb or step-down on recovery); clear entirely when usage drops below the
|
||||
# lowest band or the denominator disappears (uf is None).
|
||||
shown_band = latch.get("usage_band") # the pct label currently displayed, or None
|
||||
target_band = current_band[2] if (current_band and latch["seen_below_90"]) else None
|
||||
if target_band != shown_band:
|
||||
if CREDITS_USAGE_KEY in active:
|
||||
to_clear.append(CREDITS_USAGE_KEY)
|
||||
active.discard(CREDITS_USAGE_KEY)
|
||||
if target_band is not None:
|
||||
# Belt-and-suspenders: a producer could set subscription_limit_micros
|
||||
# without subscription_limit_usd. Render "$? cap" rather than "$None cap".
|
||||
_cap_usd = state.subscription_limit_usd or "?"
|
||||
_level = current_band[1] # type: ignore[index] (current_band set when target_band set)
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text=f"{'⚠' if _level == 'warn' else '•'} Credits {target_band}% used · ${_cap_usd} cap",
|
||||
level=_level,
|
||||
kind=CREDITS_NOTICE_KIND,
|
||||
key=CREDITS_USAGE_KEY,
|
||||
id=CREDITS_USAGE_KEY,
|
||||
)
|
||||
)
|
||||
active.add(CREDITS_USAGE_KEY)
|
||||
latch["usage_band"] = target_band
|
||||
|
||||
# ── grant_spent ──────────────────────────────────────────────────────────
|
||||
if grant_cond and "credits.grant_spent" not in active:
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text=f"• Grant spent · ${state.purchased_usd} top-up left",
|
||||
level="info",
|
||||
kind=CREDITS_NOTICE_KIND,
|
||||
key="credits.grant_spent",
|
||||
id="credits.grant_spent",
|
||||
)
|
||||
)
|
||||
active.add("credits.grant_spent")
|
||||
elif "credits.grant_spent" in active and not grant_cond:
|
||||
to_clear.append("credits.grant_spent")
|
||||
active.discard("credits.grant_spent")
|
||||
|
||||
# ── depleted ─────────────────────────────────────────────────────────────
|
||||
if depleted_cond and "credits.depleted" not in active:
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text="✕ Credit access paused · run /usage for balance",
|
||||
level="error",
|
||||
kind=CREDITS_NOTICE_KIND,
|
||||
key="credits.depleted",
|
||||
id="credits.depleted",
|
||||
)
|
||||
)
|
||||
active.add("credits.depleted")
|
||||
elif "credits.depleted" in active and not depleted_cond:
|
||||
to_clear.append("credits.depleted")
|
||||
active.discard("credits.depleted")
|
||||
# Recovery: also emit the success notice
|
||||
to_show.append(
|
||||
AgentNotice(
|
||||
text="✓ Credit access restored",
|
||||
level="success",
|
||||
kind="ttl",
|
||||
ttl_ms=CREDITS_RESTORED_TTL_MS,
|
||||
key="credits.restored",
|
||||
id="credits.restored",
|
||||
)
|
||||
)
|
||||
|
||||
return (to_show, to_clear)
|
||||
|
||||
|
||||
# ── parse_credits_headers ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def parse_credits_headers(
|
||||
headers: Mapping[str, str],
|
||||
provider: str = "",
|
||||
) -> Optional[CreditsState]:
|
||||
"""Parse x-nous-credits-* (and x-nous-tool-pool-*) headers into a CreditsState.
|
||||
|
||||
Returns None (miss) on ANY of:
|
||||
- No ``x-nous-credits-version`` header present.
|
||||
- Version != 1 (> 1 also emits a one-time logger.warning).
|
||||
- Any ``*_micros`` field is non-integer, or negative for a non-subscription field.
|
||||
- Any ``*_usd`` field doesn't match ``^-?\\d+\\.\\d{2}$``.
|
||||
- ``denominator_kind`` is not in {"subscription_cap", "none"}.
|
||||
- ``paid_access`` / ``tool_pool_gated_off`` is not exactly "true"/"false".
|
||||
- ``as_of_ms`` is not a valid integer.
|
||||
- Any unexpected exception.
|
||||
|
||||
Fail-open on the subscription_limit pair: a half-pair (only -micros or only
|
||||
-usd present) is treated as both-absent; the overall parse STILL SUCCEEDS
|
||||
but with subscription_limit_micros/usd both None.
|
||||
"""
|
||||
global _version_warning_emitted
|
||||
|
||||
try:
|
||||
# Cheap probe before the full lowercase copy: bail when the version
|
||||
# sentinel header is absent (the common case for non-Nous providers, on
|
||||
# every API call) — skips allocating a dict over the whole response's
|
||||
# headers on the hot path, while preserving case-insensitivity. Behaviour
|
||||
# is identical: a missing version header was already a None return below.
|
||||
if not any(k.lower() == "x-nous-credits-version" for k in headers):
|
||||
return None
|
||||
# Normalize to lowercase so lookups work regardless of how the server
|
||||
# capitalises headers (HTTP header names are case-insensitive per RFC 7230).
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
# ── Version check ────────────────────────────────────────────────────
|
||||
# Must be present and exactly 1; > 1 warns once then returns None.
|
||||
version_raw = lowered.get("x-nous-credits-version")
|
||||
if version_raw is None:
|
||||
return None
|
||||
version_val = _safe_int(version_raw)
|
||||
if version_val is _SENTINEL:
|
||||
return None
|
||||
if version_val != 1:
|
||||
if version_val > 1 and not _version_warning_emitted:
|
||||
_version_warning_emitted = True
|
||||
logger.warning(
|
||||
"credits header version %d unsupported, ignoring — update Hermes",
|
||||
version_val,
|
||||
)
|
||||
return None
|
||||
|
||||
# ── Helper: parse a required non-negative int field (fail → None) ───
|
||||
def _req_nonneg(key: str) -> Any:
|
||||
raw = lowered.get(key)
|
||||
val = _safe_int(raw)
|
||||
if val is _SENTINEL:
|
||||
return _SENTINEL
|
||||
if val < 0:
|
||||
return _SENTINEL
|
||||
return val
|
||||
|
||||
# ── Helper: parse a required int field that may be negative (subscription only) ─
|
||||
def _req_int(key: str) -> Any:
|
||||
raw = lowered.get(key)
|
||||
val = _safe_int(raw)
|
||||
if val is _SENTINEL:
|
||||
return _SENTINEL
|
||||
return val
|
||||
|
||||
# ── Parse micros fields ──────────────────────────────────────────────
|
||||
remaining_micros = _req_nonneg("x-nous-credits-remaining-micros")
|
||||
if remaining_micros is _SENTINEL:
|
||||
return None
|
||||
|
||||
subscription_micros = _req_int("x-nous-credits-subscription-micros")
|
||||
if subscription_micros is _SENTINEL:
|
||||
return None
|
||||
|
||||
rollover_micros = _req_nonneg("x-nous-credits-rollover-micros")
|
||||
if rollover_micros is _SENTINEL:
|
||||
return None
|
||||
|
||||
purchased_micros = _req_nonneg("x-nous-credits-purchased-micros")
|
||||
if purchased_micros is _SENTINEL:
|
||||
return None
|
||||
|
||||
# tool_pool_micros is OPTIONAL: absent → 0 (default); present-but-invalid → None (miss).
|
||||
_tp_raw = lowered.get("x-nous-tool-pool-micros")
|
||||
if _tp_raw is None:
|
||||
tool_pool_micros = 0
|
||||
else:
|
||||
_tp_val = _safe_int(_tp_raw)
|
||||
if _tp_val is _SENTINEL or _tp_val < 0:
|
||||
return None
|
||||
tool_pool_micros = _tp_val
|
||||
|
||||
as_of_ms = _req_nonneg("x-nous-credits-as-of-ms")
|
||||
if as_of_ms is _SENTINEL:
|
||||
return None
|
||||
|
||||
# ── Validate USD strings ─────────────────────────────────────────────
|
||||
remaining_usd = lowered.get("x-nous-credits-remaining-usd", "")
|
||||
if not _validate_usd(remaining_usd):
|
||||
return None
|
||||
|
||||
subscription_usd = lowered.get("x-nous-credits-subscription-usd", "")
|
||||
if not _validate_usd(subscription_usd):
|
||||
return None
|
||||
|
||||
purchased_usd = lowered.get("x-nous-credits-purchased-usd", "")
|
||||
if not _validate_usd(purchased_usd):
|
||||
return None
|
||||
|
||||
# ── subscription_limit_* PAIRED + OPTIONAL ───────────────────────────
|
||||
# Both present → validate both; half-pair → treat BOTH as absent (parse
|
||||
# still succeeds, just with no limit pair).
|
||||
sub_limit_micros_raw = lowered.get("x-nous-credits-subscription-limit-micros")
|
||||
sub_limit_usd_raw = lowered.get("x-nous-credits-subscription-limit-usd")
|
||||
|
||||
subscription_limit_micros: Optional[int] = None
|
||||
subscription_limit_usd: Optional[str] = None
|
||||
|
||||
if sub_limit_micros_raw is not None and sub_limit_usd_raw is not None:
|
||||
# Both present — validate both; any invalid → return None (bad data)
|
||||
lm = _safe_int(sub_limit_micros_raw)
|
||||
if lm is _SENTINEL:
|
||||
return None
|
||||
if lm < 0:
|
||||
return None
|
||||
if not _validate_usd(sub_limit_usd_raw):
|
||||
return None
|
||||
subscription_limit_micros = lm
|
||||
subscription_limit_usd = sub_limit_usd_raw
|
||||
# else: half-pair or both absent → leave both None, parse continues
|
||||
|
||||
# ── denominator_kind ─────────────────────────────────────────────────
|
||||
denominator_kind = lowered.get("x-nous-credits-denominator-kind", "none")
|
||||
if denominator_kind not in _VALID_DENOMINATOR_KINDS:
|
||||
return None
|
||||
|
||||
# ── paid_access / tool_pool_gated_off ────────────────────────────────
|
||||
# Both must be exactly "true" or "false" (case-insensitive). An absent
|
||||
# paid_access header → fail-open (assume access); absent tool_pool_gated_off
|
||||
# → default False. Present but invalid → return None.
|
||||
if "x-nous-credits-paid-access" in lowered:
|
||||
pa_raw = lowered["x-nous-credits-paid-access"].strip().lower()
|
||||
if pa_raw not in ("true", "false"):
|
||||
return None
|
||||
paid_access = pa_raw == "true"
|
||||
else:
|
||||
paid_access = True # fail-open
|
||||
|
||||
if "x-nous-tool-pool-gated-off" in lowered:
|
||||
tpgo_raw = lowered["x-nous-tool-pool-gated-off"].strip().lower()
|
||||
if tpgo_raw not in ("true", "false"):
|
||||
return None
|
||||
tool_pool_gated_off = tpgo_raw == "true"
|
||||
else:
|
||||
tool_pool_gated_off = False
|
||||
|
||||
# ── disabled_reason: header omitted when null ────────────────────────
|
||||
disabled_reason = lowered.get("x-nous-credits-disabled-reason") # None if absent
|
||||
|
||||
return CreditsState(
|
||||
version=version_val,
|
||||
remaining_micros=remaining_micros,
|
||||
remaining_usd=remaining_usd,
|
||||
subscription_micros=subscription_micros,
|
||||
subscription_usd=subscription_usd,
|
||||
subscription_limit_micros=subscription_limit_micros,
|
||||
subscription_limit_usd=subscription_limit_usd,
|
||||
rollover_micros=rollover_micros,
|
||||
purchased_micros=purchased_micros,
|
||||
purchased_usd=purchased_usd,
|
||||
tool_pool_micros=tool_pool_micros,
|
||||
tool_pool_gated_off=tool_pool_gated_off,
|
||||
denominator_kind=denominator_kind,
|
||||
paid_access=paid_access,
|
||||
disabled_reason=disabled_reason,
|
||||
as_of_ms=as_of_ms,
|
||||
captured_at=time.time(),
|
||||
from_header=True,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
# Fail-open → miss, but leave a breadcrumb so a parser/import regression
|
||||
# (feature silently dead) is distinguishable from a legitimate no-headers
|
||||
# response in agent.log, without needing a dev flag.
|
||||
logger.debug("credits ▸ parse_credits_headers raised (fail-open miss)", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
# ── Dev test fixtures (HERMES_DEV_CREDITS_FIXTURE) ───────────────────────────
|
||||
# Throwaway dev scaffolding: trigger any notice state on demand for testing,
|
||||
# without real spend or Redis seeding. Set HERMES_DEV_CREDITS_FIXTURE to either a
|
||||
# state NAME (fixed for the session) or a FILE PATH whose contents are a state
|
||||
# name (re-read every turn → flip states live: `echo depleted > /tmp/cf`, take a
|
||||
# turn; `echo healthy > /tmp/cf`, take a turn → recovery).
|
||||
#
|
||||
# A fixture drives THREE surfaces uniformly, so the whole credits UX is testable
|
||||
# offline: (1) the per-turn capture/notice path (_capture_credits), (2) the
|
||||
# cold-start seed at session open (conversation_loop → depletion/warn90 hydrate
|
||||
# immediately), and (3) the /usage view (nous_credits_lines renders the fixture).
|
||||
# `clear` / `none` / unset → real behaviour. Delete with the rest of the
|
||||
# HERMES_DEV_CREDITS scaffolding.
|
||||
_DEV_FIXTURES: dict[str, dict] = {
|
||||
"healthy": dict( # used_fraction ~0.1, paid → no notice (recovery target)
|
||||
remaining_micros=30_340_000, remaining_usd="30.34",
|
||||
subscription_micros=18_000_000, subscription_usd="18.00",
|
||||
subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
|
||||
purchased_micros=12_340_000, purchased_usd="12.34",
|
||||
denominator_kind="subscription_cap", paid_access=True,
|
||||
),
|
||||
"sub_50pct": dict( # used_fraction == 0.5 → credits.usage band 50 (info)
|
||||
remaining_micros=10_000_000, remaining_usd="10.00",
|
||||
subscription_micros=10_000_000, subscription_usd="10.00",
|
||||
subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
|
||||
denominator_kind="subscription_cap", paid_access=True,
|
||||
),
|
||||
"sub_75pct": dict( # used_fraction == 0.75 → credits.usage band 75 (warn)
|
||||
remaining_micros=5_000_000, remaining_usd="5.00",
|
||||
subscription_micros=5_000_000, subscription_usd="5.00",
|
||||
subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
|
||||
denominator_kind="subscription_cap", paid_access=True,
|
||||
),
|
||||
"sub_90pct": dict( # used_fraction == 0.9 → credits.usage band 90 (warn)
|
||||
remaining_micros=2_000_000, remaining_usd="2.00",
|
||||
subscription_micros=2_000_000, subscription_usd="2.00",
|
||||
subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
|
||||
denominator_kind="subscription_cap", paid_access=True,
|
||||
),
|
||||
"grant_exhausted": dict( # used_fraction == 1.0 + purchased>0 → credits.grant_spent
|
||||
remaining_micros=12_340_000, remaining_usd="12.34",
|
||||
subscription_micros=0, subscription_usd="0.00",
|
||||
subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
|
||||
purchased_micros=12_340_000, purchased_usd="12.34",
|
||||
denominator_kind="subscription_cap", paid_access=True,
|
||||
),
|
||||
"depleted": dict( # paid_access False → credits.depleted (sticky)
|
||||
remaining_micros=0, remaining_usd="0.00",
|
||||
subscription_micros=0, subscription_usd="0.00",
|
||||
purchased_micros=0, purchased_usd="0.00",
|
||||
paid_access=False, disabled_reason="out_of_credits",
|
||||
),
|
||||
"debt": dict( # subscription in debt (negative, the only signed field) → depleted
|
||||
remaining_micros=0, remaining_usd="0.00",
|
||||
subscription_micros=-5_000_000, subscription_usd="-5.00",
|
||||
subscription_limit_micros=20_000_000, subscription_limit_usd="20.00",
|
||||
purchased_micros=0, purchased_usd="0.00",
|
||||
denominator_kind="subscription_cap", paid_access=False,
|
||||
disabled_reason="out_of_credits",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def dev_fixture_credits_state() -> Optional[CreditsState]:
|
||||
"""Return a fixture CreditsState for HERMES_DEV_CREDITS_FIXTURE, or None.
|
||||
|
||||
The env value is a state name, OR a path to a file whose contents are a state
|
||||
name (re-read each call → flip states live without a restart). Unknown name /
|
||||
"clear" / "none" / unset → None (normal behaviour). Throwaway test scaffolding.
|
||||
|
||||
Hard prod-leak guard: a fixture applies ONLY when the dev flag HERMES_DEV_CREDITS
|
||||
is also on, so a stray HERMES_DEV_CREDITS_FIXTURE (leaked into a shell profile, a
|
||||
container env, a launch plist, …) can never surface fabricated balances/notices
|
||||
on a real account.
|
||||
"""
|
||||
if not is_truthy_value(os.environ.get("HERMES_DEV_CREDITS")):
|
||||
return None
|
||||
raw = os.environ.get("HERMES_DEV_CREDITS_FIXTURE", "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
name = raw
|
||||
if os.path.sep in raw or "/" in raw: # looks like a path → read the name from the file
|
||||
try:
|
||||
with open(raw, "r", encoding="utf-8") as fh:
|
||||
name = fh.read().strip()
|
||||
except OSError:
|
||||
return None
|
||||
spec = _DEV_FIXTURES.get(name.lower())
|
||||
if not spec:
|
||||
return None
|
||||
# Stamp the fields the REAL parser always guarantees, so a fixture state is
|
||||
# field-identical to a parse_credits_headers() result from equivalent headers
|
||||
# (verified by the differential test): version is always 1, and purchased_usd
|
||||
# is always a valid usd string (the parser rejects a missing/empty one, so a
|
||||
# real zero-top-up account still carries "0.00"). Specs may override these.
|
||||
merged = {"version": 1, "purchased_usd": "0.00", **spec}
|
||||
return CreditsState(**merged, from_header=True, captured_at=time.time())
|
||||
|
||||
|
||||
def _credits_state_from_account(info) -> Optional[CreditsState]:
|
||||
"""Map a NousPortalAccountInfo into a header-shaped CreditsState for the seed.
|
||||
|
||||
Float account dollars → micros (plus a DISPLAY *_usd string — allowed, since
|
||||
we're formatting account floats, NOT parsing a server-provided *_usd). Returns
|
||||
None if the account can't yield a usable state (fail-open)."""
|
||||
try:
|
||||
_acc = getattr(info, "paid_service_access_info", None)
|
||||
_sub = getattr(info, "subscription", None)
|
||||
|
||||
def _to_micros(dollars):
|
||||
return int(round(dollars * 1_000_000)) if isinstance(dollars, (int, float)) else 0
|
||||
|
||||
def _to_usd(dollars):
|
||||
# DISPLAY formatting of an account float (not a server *_usd string);
|
||||
# "" when absent so render/notice copy falls back gracefully.
|
||||
return f"{dollars:.2f}" if isinstance(dollars, (int, float)) else ""
|
||||
|
||||
_monthly = getattr(_sub, "monthly_credits", None)
|
||||
_has_cap = isinstance(_monthly, (int, float)) and _monthly > 0
|
||||
_paid = getattr(info, "paid_service_access", None)
|
||||
return CreditsState(
|
||||
remaining_micros=_to_micros(getattr(_acc, "total_usable_credits", None)),
|
||||
remaining_usd=_to_usd(getattr(_acc, "total_usable_credits", None)),
|
||||
subscription_micros=_to_micros(getattr(_acc, "subscription_credits_remaining", None)),
|
||||
subscription_usd=_to_usd(getattr(_acc, "subscription_credits_remaining", None)),
|
||||
subscription_limit_micros=_to_micros(_monthly) if _has_cap else None,
|
||||
subscription_limit_usd=_to_usd(_monthly) if _has_cap else None,
|
||||
purchased_micros=_to_micros(getattr(_acc, "purchased_credits_remaining", None)),
|
||||
purchased_usd=_to_usd(getattr(_acc, "purchased_credits_remaining", None)),
|
||||
rollover_micros=_to_micros(getattr(_sub, "rollover_credits", None)),
|
||||
denominator_kind="subscription_cap" if _has_cap else "none",
|
||||
paid_access=_paid if isinstance(_paid, bool) else True,
|
||||
from_header=False,
|
||||
captured_at=time.time(),
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("credits ▸ seed account→state mapping failed", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def _hydrate_seed_state(agent, state) -> None:
|
||||
"""Install a seed CreditsState on the agent and fire the notice policy once.
|
||||
|
||||
Sets _credits_state, latches session-start remaining, and primes the crossing
|
||||
gate (the cold-start snapshot IS the first observation, so a session that opens
|
||||
already in a band warns immediately — the live header path keeps true crossing
|
||||
semantics), then emits. Safe to call from a worker thread: emit already runs
|
||||
off-thread in the TUI build path."""
|
||||
agent._credits_state = state
|
||||
if getattr(agent, "_credits_session_start_micros", None) is None:
|
||||
agent._credits_session_start_micros = state.remaining_micros
|
||||
_latch = getattr(agent, "_credits_latch", None)
|
||||
if isinstance(_latch, dict) and state.used_fraction is not None:
|
||||
_latch["seen_below_90"] = True
|
||||
emit = getattr(agent, "_emit_credits_notices", None)
|
||||
if callable(emit):
|
||||
emit()
|
||||
|
||||
|
||||
def seed_credits_at_session_start(agent) -> bool:
|
||||
"""Hydrate agent._credits_state from /api/oauth/account (or a dev fixture) and
|
||||
fire the notice policy, so depletion / usage-band warnings show at session OPEN.
|
||||
|
||||
Shared by (a) the TUI/desktop agent build (fires at "ready", before any message)
|
||||
and (b) the first-turn conversation setup (fallback for plain CLI / when the
|
||||
build path didn't seed). Idempotent: a second call is a no-op once a seed or a
|
||||
real header has already populated _credits_state.
|
||||
|
||||
Returns True if it seeded this call, False otherwise (not nous / already seeded /
|
||||
fail-open error). Never raises — credits must never block session startup.
|
||||
"""
|
||||
try:
|
||||
if getattr(agent, "provider", "") != "nous":
|
||||
return False
|
||||
# Idempotent: don't re-seed if state already exists (seed or live header).
|
||||
if getattr(agent, "_credits_state", None) is not None:
|
||||
return False
|
||||
fixture = None
|
||||
try:
|
||||
fixture = dev_fixture_credits_state()
|
||||
except Exception:
|
||||
fixture = None
|
||||
if fixture is not None:
|
||||
# Synchronous: a fixture is instant (no network), and tests rely on the
|
||||
# state + notice landing before this returns.
|
||||
_hydrate_seed_state(agent, fixture)
|
||||
return True
|
||||
|
||||
# Real portal fetch is FIRE-AND-FORGET: a slow/unreachable portal must never
|
||||
# delay session "ready". A daemon thread hydrates + emits when it resolves,
|
||||
# re-checking idempotency first (a live inference header may land before it).
|
||||
import threading
|
||||
|
||||
def _bg_seed() -> None:
|
||||
try:
|
||||
from hermes_cli.nous_account import get_nous_portal_account_info
|
||||
info = get_nous_portal_account_info(force_fresh=True)
|
||||
if getattr(agent, "_credits_state", None) is not None:
|
||||
return # a live inference header beat us — don't clobber it
|
||||
state = _credits_state_from_account(info)
|
||||
if state is not None:
|
||||
_hydrate_seed_state(agent, state)
|
||||
except Exception:
|
||||
logger.debug("credits ▸ session-start seed (background) failed", exc_info=True)
|
||||
|
||||
threading.Thread(target=_bg_seed, name="credits-seed", daemon=True).start()
|
||||
return True
|
||||
except Exception:
|
||||
# Fail-open: any auth/portal hiccup leaves _credits_state as-is, never blocks.
|
||||
# Innermost log across all four call sites (TUI build / CLI build / first
|
||||
# turn / desktop), so a dead session-open seed is diagnosable in agent.log.
|
||||
logger.debug("credits ▸ session-start seed failed (fail-open)", exc_info=True)
|
||||
return False
|
||||
1843
agent/curator.py
1843
agent/curator.py
File diff suppressed because it is too large
Load Diff
@@ -1,695 +0,0 @@
|
||||
"""Curator snapshot + rollback.
|
||||
|
||||
A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
|
||||
itself) is taken before any mutating curator pass. Snapshots are tar.gz
|
||||
files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
|
||||
companion ``manifest.json`` describing the snapshot (reason, time, size,
|
||||
counted skill files). Rollback picks a snapshot, moves the current
|
||||
``skills/`` tree aside into another snapshot so even the rollback itself
|
||||
is undoable, then extracts the chosen snapshot into place.
|
||||
|
||||
The snapshot does NOT include:
|
||||
- ``.curator_backups/`` (would recurse)
|
||||
- ``.hub/`` (hub-installed skills — managed by the hub, not us)
|
||||
|
||||
It DOES include:
|
||||
- all SKILL.md files + their directories (``scripts/``, ``references/``,
|
||||
``templates/``, ``assets/``)
|
||||
- ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
|
||||
- ``.archive/`` (so rollback restores previously-archived skills too)
|
||||
- ``.curator_state`` (so rolling back also restores the last-run-at
|
||||
pointer — otherwise the curator would immediately re-fire on the next
|
||||
tick)
|
||||
- ``.bundled_manifest`` (so protection markers stay consistent)
|
||||
- ``.curator_suppressed`` (so rollback restores the set of pruned built-ins
|
||||
the re-seeder must leave archived)
|
||||
|
||||
Alongside the skills tarball, each snapshot also captures a copy of
|
||||
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
|
||||
jobs reference skills by name in their ``skills``/``skill`` fields; the
|
||||
curator's consolidation pass rewrites those in place via
|
||||
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
|
||||
rolling back the skills tree would leave cron jobs pointing at the
|
||||
umbrella skills even though the narrow skills they were originally
|
||||
configured with have been restored. We store the whole jobs.json for
|
||||
fidelity but rollback only touches the ``skills``/``skill`` fields — the
|
||||
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
|
||||
we leave it alone.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from agent.skill_utils import is_excluded_skill_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_KEEP = 5
|
||||
|
||||
# Entries under skills/ that should NEVER be rolled up into a snapshot.
|
||||
# .hub/ is managed by the skills hub; rolling it back would break lockfile
|
||||
# invariants. .curator_backups is the backup dir itself — recursion bomb.
|
||||
_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
|
||||
|
||||
# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
|
||||
# is portable (Windows-safe). An optional ``-NN`` suffix handles two
|
||||
# snapshots landing in the same wallclock second.
|
||||
_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
|
||||
|
||||
|
||||
def _backups_dir() -> Path:
|
||||
return get_hermes_home() / "skills" / ".curator_backups"
|
||||
|
||||
|
||||
def _skills_dir() -> Path:
|
||||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _cron_jobs_file() -> Path:
|
||||
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
|
||||
return get_hermes_home() / "cron" / "jobs.json"
|
||||
|
||||
|
||||
CRON_JOBS_FILENAME = "cron-jobs.json"
|
||||
|
||||
|
||||
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
|
||||
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
|
||||
|
||||
Returns a small dict describing what was captured so the caller can
|
||||
fold it into the manifest. Never raises — if the cron file is missing
|
||||
or unreadable, the return dict has ``backed_up=False`` and the reason,
|
||||
and the snapshot proceeds without cron data (the snapshot is still
|
||||
useful for rolling back skills).
|
||||
"""
|
||||
src = _cron_jobs_file()
|
||||
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
|
||||
if not src.exists():
|
||||
info["reason"] = "no cron/jobs.json present"
|
||||
return info
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
|
||||
info["reason"] = f"read error: {e}"
|
||||
return info
|
||||
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
|
||||
# file is unparseable; just store the raw text and let rollback deal
|
||||
# with it (or not, if it's corrupted). jobs.json wraps the list as
|
||||
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
|
||||
# fall back to bare-list shape just in case the format ever changes.
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
inner = parsed.get("jobs")
|
||||
if isinstance(inner, list):
|
||||
info["jobs_count"] = len(inner)
|
||||
elif isinstance(parsed, list):
|
||||
info["jobs_count"] = len(parsed)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
info["jobs_count"] = 0
|
||||
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
|
||||
try:
|
||||
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to write cron backup file: %s", e)
|
||||
info["reason"] = f"write error: {e}"
|
||||
return info
|
||||
info["backed_up"] = True
|
||||
return info
|
||||
|
||||
|
||||
def _utc_id(now: Optional[datetime] = None) -> str:
|
||||
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
# isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
|
||||
s = now.replace(microsecond=0).isoformat()
|
||||
if s.endswith("+00:00"):
|
||||
s = s[:-6]
|
||||
return s.replace(":", "-") + "Z"
|
||||
|
||||
|
||||
def _load_config() -> Dict[str, Any]:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load config for curator backup: %s", e)
|
||||
return {}
|
||||
if not isinstance(cfg, dict):
|
||||
return {}
|
||||
cur = cfg.get("curator") or {}
|
||||
if not isinstance(cur, dict):
|
||||
return {}
|
||||
bk = cur.get("backup") or {}
|
||||
return bk if isinstance(bk, dict) else {}
|
||||
|
||||
|
||||
def is_enabled() -> bool:
|
||||
"""Default ON — the whole point of the backup is safety by default."""
|
||||
return bool(_load_config().get("enabled", True))
|
||||
|
||||
|
||||
def get_keep() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
n = int(cfg.get("keep", DEFAULT_KEEP))
|
||||
except (TypeError, ValueError):
|
||||
n = DEFAULT_KEEP
|
||||
return max(1, n)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(
|
||||
1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
|
||||
)
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
skills_counted: int,
|
||||
cron_info: Optional[Dict[str, Any]] = None) -> None:
|
||||
manifest = {
|
||||
"id": dest.name,
|
||||
"reason": reason,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"archive": archive_path.name,
|
||||
"archive_bytes": archive_path.stat().st_size,
|
||||
"skill_files": skills_counted,
|
||||
}
|
||||
if cron_info is not None:
|
||||
manifest["cron_jobs"] = {
|
||||
"backed_up": bool(cron_info.get("backed_up", False)),
|
||||
"jobs_count": int(cron_info.get("jobs_count", 0)),
|
||||
}
|
||||
if not cron_info.get("backed_up"):
|
||||
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
|
||||
if cron_info.get("parse_warning"):
|
||||
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
|
||||
(dest / "manifest.json").write_text(
|
||||
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
"""Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
|
||||
|
||||
Returns the snapshot directory path, or ``None`` if the snapshot was
|
||||
skipped (backup disabled, skills dir missing, or an IO error occurred —
|
||||
in which case we log at debug and return None so the curator never
|
||||
aborts a pass because of a backup failure).
|
||||
"""
|
||||
if not is_enabled():
|
||||
logger.debug("Curator backup disabled by config; skipping snapshot")
|
||||
return None
|
||||
|
||||
skills = _skills_dir()
|
||||
if not skills.exists():
|
||||
logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
|
||||
return None
|
||||
|
||||
backups = _backups_dir()
|
||||
try:
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create backups dir %s: %s", backups, e)
|
||||
return None
|
||||
|
||||
# Uniquify: if a snapshot with the same second already exists (can
|
||||
# happen if two curator runs fire in the same second), append a short
|
||||
# counter. Avoids clobbering and avoids timestamp collisions.
|
||||
base_id = _utc_id()
|
||||
snap_id = base_id
|
||||
counter = 1
|
||||
while (backups / snap_id).exists():
|
||||
snap_id = f"{base_id}-{counter:02d}"
|
||||
counter += 1
|
||||
|
||||
dest = backups / snap_id
|
||||
try:
|
||||
dest.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create snapshot dir %s: %s", dest, e)
|
||||
return None
|
||||
|
||||
archive = dest / "skills.tar.gz"
|
||||
try:
|
||||
# Stream into the tarball — no tempdir copy needed.
|
||||
with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
|
||||
for entry in sorted(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
# arcname: store paths relative to skills/ so extraction
|
||||
# drops cleanly back into the skills dir.
|
||||
tf.add(str(entry), arcname=entry.name, recursive=True)
|
||||
# Capture cron/jobs.json alongside the tarball. Never fails the
|
||||
# snapshot — the skills side is the core guarantee; cron is
|
||||
# additive. We still record in the manifest whether it was
|
||||
# captured so rollback can surface "no cron data in this snapshot".
|
||||
cron_info = _backup_cron_jobs_into(dest)
|
||||
_write_manifest(dest, reason, archive,
|
||||
_count_skill_files(skills),
|
||||
cron_info=cron_info)
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
|
||||
# Clean up partial snapshot
|
||||
try:
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_old(keep=get_keep())
|
||||
logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
|
||||
return dest
|
||||
|
||||
|
||||
def _prune_old(keep: int) -> List[str]:
|
||||
"""Delete regular snapshots beyond the newest *keep*. Returns deleted
|
||||
ids. Staging dirs (``.rollback-staging-*``) are implementation detail
|
||||
and pruned independently on every call."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
entries: List[Tuple[str, Path]] = []
|
||||
stale_staging: List[Path] = []
|
||||
for child in backups.iterdir():
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if child.name.startswith(".rollback-staging-"):
|
||||
# Staging dirs are only supposed to exist briefly during a
|
||||
# rollback. If we find one here (e.g. from a crashed rollback),
|
||||
# clean it up opportunistically.
|
||||
stale_staging.append(child)
|
||||
continue
|
||||
if _ID_RE.match(child.name):
|
||||
entries.append((child.name, child))
|
||||
# Newest first (lexicographic works because the id is UTC ISO).
|
||||
entries.sort(key=lambda t: t[0], reverse=True)
|
||||
deleted: List[str] = []
|
||||
for _, path in entries[keep:]:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted.append(path.name)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to prune %s: %s", path, e)
|
||||
for path in stale_staging:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to clean stale staging dir %s: %s", path, e)
|
||||
return deleted
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# List + rollback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
|
||||
mf = snap_dir / "manifest.json"
|
||||
if not mf.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(mf.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def list_backups() -> List[Dict[str, Any]]:
|
||||
"""Return all restorable snapshots, newest first. Only entries with a
|
||||
real ``skills.tar.gz`` tarball are listed — transient
|
||||
``.rollback-staging-*`` directories created mid-rollback are
|
||||
implementation detail and not shown."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for child in sorted(backups.iterdir(), reverse=True):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if not _ID_RE.match(child.name):
|
||||
continue
|
||||
if not (child / "skills.tar.gz").exists():
|
||||
continue
|
||||
mf = _read_manifest(child)
|
||||
mf.setdefault("id", child.name)
|
||||
mf.setdefault("path", str(child))
|
||||
if "archive_bytes" not in mf:
|
||||
arc = child / "skills.tar.gz"
|
||||
try:
|
||||
mf["archive_bytes"] = arc.stat().st_size
|
||||
except OSError:
|
||||
mf["archive_bytes"] = 0
|
||||
out.append(mf)
|
||||
return out
|
||||
|
||||
|
||||
def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
|
||||
"""Return the path of the requested backup, or the newest one if
|
||||
*backup_id* is None. Returns None if no match."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return None
|
||||
if backup_id:
|
||||
target = backups / backup_id
|
||||
if (
|
||||
target.is_dir()
|
||||
and _ID_RE.match(backup_id)
|
||||
and (target / "skills.tar.gz").exists()
|
||||
):
|
||||
return target
|
||||
return None
|
||||
candidates = [
|
||||
c for c in sorted(backups.iterdir(), reverse=True)
|
||||
if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
|
||||
]
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
|
||||
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
|
||||
|
||||
We do NOT overwrite the whole cron file. Only the ``skills`` and
|
||||
``skill`` fields are restored, and only on jobs that still exist in the
|
||||
current file (matched by ``id``). Everything else about the job —
|
||||
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
|
||||
is live state that the user/scheduler has modified since the snapshot;
|
||||
overwriting it would regress unrelated cron activity.
|
||||
|
||||
Rules:
|
||||
- Jobs present in backup AND live, with differing skills → skills restored.
|
||||
- Jobs present in backup AND live, with matching skills → no-op.
|
||||
- Jobs present in backup but gone from live (user deleted the job
|
||||
after the snapshot) → skipped, noted in the return report.
|
||||
- Jobs present in live but not in backup (user created a new cron
|
||||
job after the snapshot) → left untouched.
|
||||
|
||||
Never raises; failures are captured in the return dict. Writes through
|
||||
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
|
||||
uses, so we don't race the scheduler.
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"attempted": False,
|
||||
"restored": [],
|
||||
"skipped_missing": [],
|
||||
"unchanged": 0,
|
||||
"error": None,
|
||||
}
|
||||
backup_file = snapshot_dir / CRON_JOBS_FILENAME
|
||||
if not backup_file.exists():
|
||||
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
|
||||
return report
|
||||
|
||||
try:
|
||||
backup_text = backup_file.read_text(encoding="utf-8")
|
||||
backup_parsed = json.loads(backup_text)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
report["error"] = f"failed to load backed-up jobs: {e}"
|
||||
return report
|
||||
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
|
||||
# that shape and a bare list for forward compat.
|
||||
if isinstance(backup_parsed, dict):
|
||||
backup_jobs = backup_parsed.get("jobs")
|
||||
elif isinstance(backup_parsed, list):
|
||||
backup_jobs = backup_parsed
|
||||
else:
|
||||
backup_jobs = None
|
||||
if not isinstance(backup_jobs, list):
|
||||
report["error"] = "backed-up cron-jobs.json has no jobs list"
|
||||
return report
|
||||
|
||||
# Build a lookup of the backed-up skill state keyed by job id.
|
||||
# We only need the two skill-ish fields (legacy single and modern list).
|
||||
backup_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
for job in backup_jobs:
|
||||
if not isinstance(job, dict):
|
||||
continue
|
||||
jid = job.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
backup_by_id[jid] = {
|
||||
"skills": job.get("skills"),
|
||||
"skill": job.get("skill"),
|
||||
"name": job.get("name") or jid,
|
||||
}
|
||||
|
||||
if not backup_by_id:
|
||||
report["attempted"] = True # we tried but there was nothing to do
|
||||
return report
|
||||
|
||||
# Load and rewrite the live jobs under the scheduler's lock.
|
||||
try:
|
||||
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
|
||||
except ImportError as e:
|
||||
report["error"] = f"cron module unavailable: {e}"
|
||||
return report
|
||||
|
||||
report["attempted"] = True
|
||||
try:
|
||||
with _jobs_file_lock:
|
||||
live_jobs = load_jobs()
|
||||
changed = False
|
||||
|
||||
live_ids = set()
|
||||
for live in live_jobs:
|
||||
if not isinstance(live, dict):
|
||||
continue
|
||||
jid = live.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
live_ids.add(jid)
|
||||
|
||||
backup = backup_by_id.get(jid)
|
||||
if backup is None:
|
||||
continue # live job didn't exist at snapshot time
|
||||
|
||||
cur_skills = live.get("skills")
|
||||
cur_skill = live.get("skill")
|
||||
bkp_skills = backup.get("skills")
|
||||
bkp_skill = backup.get("skill")
|
||||
|
||||
if cur_skills == bkp_skills and cur_skill == bkp_skill:
|
||||
report["unchanged"] += 1
|
||||
continue
|
||||
|
||||
# Restore. Preserve absence (don't force the key to appear
|
||||
# if the backup didn't have it either).
|
||||
if bkp_skills is None:
|
||||
live.pop("skills", None)
|
||||
else:
|
||||
live["skills"] = bkp_skills
|
||||
if bkp_skill is None:
|
||||
live.pop("skill", None)
|
||||
else:
|
||||
live["skill"] = bkp_skill
|
||||
|
||||
report["restored"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
"from": {"skills": cur_skills, "skill": cur_skill},
|
||||
"to": {"skills": bkp_skills, "skill": bkp_skill},
|
||||
})
|
||||
changed = True
|
||||
|
||||
# Jobs in backup but not in live = user deleted them after snapshot
|
||||
for jid, backup in backup_by_id.items():
|
||||
if jid not in live_ids:
|
||||
report["skipped_missing"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(live_jobs)
|
||||
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
|
||||
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
|
||||
report["error"] = f"restore failed mid-flight: {e}"
|
||||
|
||||
return report
|
||||
|
||||
|
||||
|
||||
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
|
||||
"""Restore ``~/.hermes/skills/`` from a snapshot.
|
||||
|
||||
Strategy:
|
||||
1. Resolve the target snapshot (explicit id or newest regular).
|
||||
2. Take a safety snapshot of the CURRENT skills tree under
|
||||
``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
|
||||
undoable.
|
||||
3. Move all current top-level entries (except ``.curator_backups``
|
||||
and ``.hub``) into a tempdir.
|
||||
4. Extract the chosen snapshot into ``~/.hermes/skills/``.
|
||||
5. On failure during 4, move the tempdir contents back (best-effort)
|
||||
and return failure.
|
||||
|
||||
Returns ``(ok, message, snapshot_path)``.
|
||||
"""
|
||||
target = _resolve_backup(backup_id)
|
||||
if target is None:
|
||||
return (
|
||||
False,
|
||||
f"no matching backup found"
|
||||
+ (f" for id '{backup_id}'" if backup_id else "")
|
||||
+ " (use `hermes curator rollback --list` to see available snapshots)",
|
||||
None,
|
||||
)
|
||||
archive = target / "skills.tar.gz"
|
||||
if not archive.exists():
|
||||
return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
|
||||
|
||||
skills = _skills_dir()
|
||||
skills.mkdir(parents=True, exist_ok=True)
|
||||
backups = _backups_dir()
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Step 2: safety snapshot of current state FIRST. If this fails we bail
|
||||
# out before touching anything — otherwise a failed extract could leave
|
||||
# the user with no skills.
|
||||
try:
|
||||
snapshot_skills(reason=f"pre-rollback to {target.name}")
|
||||
except Exception as e:
|
||||
return (False, f"pre-rollback safety snapshot failed: {e}", None)
|
||||
|
||||
# Additionally move current entries into an internal staging dir so
|
||||
# the extract happens into an empty skills tree (predictable result).
|
||||
# This dir is implementation detail — not listed as a restorable
|
||||
# backup. The safety snapshot above is the user-facing undo handle.
|
||||
staged = backups / f".rollback-staging-{_utc_id()}"
|
||||
try:
|
||||
staged.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
return (False, f"failed to create staging dir: {e}", None)
|
||||
|
||||
moved: List[Tuple[Path, Path]] = []
|
||||
try:
|
||||
for entry in list(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
dest = staged / entry.name
|
||||
shutil.move(str(entry), str(dest))
|
||||
moved.append((entry, dest))
|
||||
except OSError as e:
|
||||
# Best-effort rollback of the move
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"failed to stage current skills: {e}", None)
|
||||
|
||||
# Step 4: extract the snapshot into skills/
|
||||
try:
|
||||
with tarfile.open(archive, "r:gz") as tf:
|
||||
# Python 3.12+ supports filter='data' for safer extraction.
|
||||
# Fall back to the unfiltered call for older interpreters but
|
||||
# still reject absolute paths and .. components defensively.
|
||||
for member in tf.getmembers():
|
||||
name = member.name
|
||||
if name.startswith("/") or ".." in Path(name).parts:
|
||||
raise tarfile.TarError(
|
||||
f"refusing to extract unsafe path: {name!r}"
|
||||
)
|
||||
try:
|
||||
tf.extractall(str(skills), filter="data") # type: ignore[call-arg]
|
||||
except TypeError:
|
||||
# Python < 3.12 — no filter kwarg
|
||||
tf.extractall(str(skills))
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
# Best-effort recover: move staged contents back
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"snapshot extract failed (state restored): {e}", None)
|
||||
|
||||
# Extract succeeded — the staging dir has served its purpose. The
|
||||
# user's undo handle is the safety snapshot tarball we took earlier.
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Reconcile cron skill-links. Surgical: only the skills/skill fields
|
||||
# on jobs matched by id. Everything else in jobs.json is live state
|
||||
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
|
||||
# alone. Failures here don't fail the overall rollback — the skills
|
||||
# tree is already restored, which is the main guarantee.
|
||||
cron_report = _restore_cron_skill_links(target)
|
||||
|
||||
summary_bits = [f"restored from snapshot {target.name}"]
|
||||
if cron_report.get("attempted"):
|
||||
restored_n = len(cron_report.get("restored") or [])
|
||||
skipped_n = len(cron_report.get("skipped_missing") or [])
|
||||
if cron_report.get("error"):
|
||||
summary_bits.append(f"cron links: error — {cron_report['error']}")
|
||||
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
|
||||
# Attempted but nothing matched — empty snapshot or no overlapping ids.
|
||||
pass
|
||||
else:
|
||||
parts = []
|
||||
if restored_n:
|
||||
parts.append(f"{restored_n} job(s) had skill links restored")
|
||||
if skipped_n:
|
||||
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
|
||||
if cron_report.get("unchanged"):
|
||||
parts.append(f"{cron_report['unchanged']} already matched")
|
||||
summary_bits.append("cron links: " + ", ".join(parts))
|
||||
|
||||
logger.info("Curator rollback: restored from %s (cron_report=%s)",
|
||||
target.name, cron_report)
|
||||
return (True, "; ".join(summary_bits), target)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Human-readable summary for CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def format_size(n: int) -> str:
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if n < 1024 or unit == "GB":
|
||||
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
|
||||
n /= 1024
|
||||
return f"{n:.1f} GB"
|
||||
|
||||
|
||||
def summarize_backups() -> str:
|
||||
rows = list_backups()
|
||||
if not rows:
|
||||
return "No curator snapshots yet."
|
||||
lines = [f"{'id':<24} {'reason':<40} {'skills':>6} {'size':>8}"]
|
||||
lines.append("─" * len(lines[0]))
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"{r.get('id','?'):<24} "
|
||||
f"{(r.get('reason','?') or '?')[:40]:<40} "
|
||||
f"{r.get('skill_files', 0):>6} "
|
||||
f"{format_size(int(r.get('archive_bytes', 0))):>8}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
1033
agent/display.py
1033
agent/display.py
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,640 +0,0 @@
|
||||
"""Shared file safety rules used by both tools and ACP shims."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _hermes_home_path() -> Path:
|
||||
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
|
||||
try:
|
||||
from hermes_constants import get_hermes_home # local import to avoid cycles
|
||||
return get_hermes_home()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def _hermes_root_path() -> Path:
|
||||
"""Resolve the Hermes root dir (always the parent of any profile, never per-profile)."""
|
||||
try:
|
||||
from hermes_constants import get_default_hermes_root # local import to avoid cycles
|
||||
return get_default_hermes_root()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def build_write_denied_paths(home: str) -> set[str]:
|
||||
"""Return exact sensitive paths that must never be written."""
|
||||
hermes_home = _hermes_home_path()
|
||||
hermes_root = _hermes_root_path()
|
||||
return {
|
||||
os.path.realpath(p)
|
||||
for p in [
|
||||
os.path.join(home, ".ssh", "authorized_keys"),
|
||||
os.path.join(home, ".ssh", "id_rsa"),
|
||||
os.path.join(home, ".ssh", "id_ed25519"),
|
||||
os.path.join(home, ".ssh", "config"),
|
||||
# Active profile .env (or top-level .env when not in profile mode).
|
||||
str(hermes_home / ".env"),
|
||||
# Top-level .env, even when running under a profile — overwriting it
|
||||
# leaks credentials across every profile that inherits from root (#15981).
|
||||
str(hermes_root / ".env"),
|
||||
# Active profile Anthropic PKCE credential store.
|
||||
str(hermes_home / ".anthropic_oauth.json"),
|
||||
# Top-level Anthropic PKCE credential store remains sensitive even
|
||||
# when a profile is active; default/non-profile sessions still read it.
|
||||
str(hermes_root / ".anthropic_oauth.json"),
|
||||
os.path.join(home, ".bashrc"),
|
||||
os.path.join(home, ".zshrc"),
|
||||
os.path.join(home, ".profile"),
|
||||
os.path.join(home, ".bash_profile"),
|
||||
os.path.join(home, ".zprofile"),
|
||||
os.path.join(home, ".netrc"),
|
||||
os.path.join(home, ".pgpass"),
|
||||
os.path.join(home, ".npmrc"),
|
||||
os.path.join(home, ".pypirc"),
|
||||
os.path.join(home, ".git-credentials"),
|
||||
"/etc/sudoers",
|
||||
"/etc/passwd",
|
||||
"/etc/shadow",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def build_write_denied_prefixes(home: str) -> list[str]:
|
||||
"""Return sensitive directory prefixes that must never be written."""
|
||||
return [
|
||||
os.path.realpath(p) + os.sep
|
||||
for p in [
|
||||
os.path.join(home, ".ssh"),
|
||||
os.path.join(home, ".aws"),
|
||||
os.path.join(home, ".gnupg"),
|
||||
os.path.join(home, ".kube"),
|
||||
"/etc/sudoers.d",
|
||||
"/etc/systemd",
|
||||
os.path.join(home, ".docker"),
|
||||
os.path.join(home, ".azure"),
|
||||
os.path.join(home, ".config", "gh"),
|
||||
os.path.join(home, ".config", "gcloud"),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def get_safe_write_root() -> Optional[str]:
|
||||
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
|
||||
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not root:
|
||||
return None
|
||||
try:
|
||||
return os.path.realpath(os.path.expanduser(root))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def is_write_denied(path: str) -> bool:
|
||||
"""Return True if path is blocked by the write denylist or safe root."""
|
||||
home = os.path.realpath(os.path.expanduser("~"))
|
||||
resolved = os.path.realpath(os.path.expanduser(str(path)))
|
||||
|
||||
if resolved in build_write_denied_paths(home):
|
||||
return True
|
||||
for prefix in build_write_denied_prefixes(home):
|
||||
if resolved.startswith(prefix):
|
||||
return True
|
||||
|
||||
# Hermes control-plane files: block both the ACTIVE profile's view
|
||||
# (hermes_home) AND the global root view. Without the root pass, a
|
||||
# profile-mode session leaves <root>/auth.json + <root>/config.yaml
|
||||
# writable — letting a prompt-injected write_file overwrite the global
|
||||
# files that every profile inherits from (same shape as #15981).
|
||||
control_file_names = ("auth.json", "config.yaml", "webhook_subscriptions.json")
|
||||
mcp_tokens_dir_name = "mcp-tokens"
|
||||
|
||||
hermes_dirs = []
|
||||
for base in (_hermes_home_path(), _hermes_root_path()):
|
||||
try:
|
||||
real = os.path.realpath(base)
|
||||
if real not in hermes_dirs:
|
||||
hermes_dirs.append(real)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for base_real in hermes_dirs:
|
||||
for name in control_file_names:
|
||||
try:
|
||||
if resolved == os.path.realpath(os.path.join(base_real, name)):
|
||||
return True
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
mcp_real = os.path.realpath(os.path.join(base_real, mcp_tokens_dir_name))
|
||||
if resolved == mcp_real or resolved.startswith(mcp_real + os.sep):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
pairing_real = os.path.realpath(os.path.join(base_real, "pairing"))
|
||||
if resolved == pairing_real or resolved.startswith(pairing_real + os.sep):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
safe_root = get_safe_write_root()
|
||||
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# Common secret-bearing project-local environment file basenames.
|
||||
# These are blocked because .env files routinely contain API keys,
|
||||
# database passwords, and other credentials.
|
||||
_BLOCKED_PROJECT_ENV_BASENAMES: set[str] = {
|
||||
".env",
|
||||
".env.local",
|
||||
".env.development",
|
||||
".env.production",
|
||||
".env.test",
|
||||
".env.staging",
|
||||
".envrc",
|
||||
}
|
||||
|
||||
|
||||
def get_read_block_error(path: str) -> Optional[str]:
|
||||
"""Return an error message when a read targets a denied Hermes path.
|
||||
|
||||
Three categories are blocked:
|
||||
|
||||
* Internal Hermes cache files under ``HERMES_HOME/skills/.hub`` —
|
||||
readable metadata that an attacker could use as a prompt-injection
|
||||
carrier.
|
||||
* Credential / secret stores under HERMES_HOME and the global Hermes
|
||||
root: ``auth.json``, ``auth.lock``, ``.anthropic_oauth.json``,
|
||||
``.env``, ``webhook_subscriptions.json``, ``auth/google_oauth.json``,
|
||||
and anything under ``mcp-tokens/``. These hold plaintext provider keys,
|
||||
OAuth tokens, and HMAC secrets that the agent never needs to read
|
||||
directly — provider tools / gateway adapters consume them through
|
||||
internal channels.
|
||||
* Project-local environment files anywhere on disk: ``.env``,
|
||||
``.env.local``, ``.env.development``, ``.env.production``,
|
||||
``.env.test``, ``.env.staging``, ``.envrc``. These routinely hold
|
||||
API keys, database passwords, and other credentials for the user's
|
||||
own projects. The agent helping debug a project shouldn't normally
|
||||
need to read these — ``.env.example`` is the documented-shape
|
||||
substitute.
|
||||
|
||||
**This is NOT a security boundary.** The terminal tool runs as the
|
||||
same OS user with shell access; the agent can still ``cat auth.json``
|
||||
or ``cat ~/.hermes/.env`` and exfiltrate the file. The read-deny exists
|
||||
as defense-in-depth that:
|
||||
|
||||
* Returns a clear error to models that respect tool denials, which
|
||||
empirically prompts most modern models to stop rather than reach
|
||||
for the shell.
|
||||
* Surfaces a visible audit trail when something tries to read
|
||||
credentials — easier to spot in logs than a generic ``cat``.
|
||||
|
||||
Treat any user-visible framing around this as "may help" rather than
|
||||
"stops attackers." A determined model or malicious instruction can
|
||||
always shell out.
|
||||
|
||||
Callers that resolve relative paths against a non-process cwd
|
||||
(e.g. ``TERMINAL_CWD`` in ``tools/file_tools.py``) MUST pre-resolve
|
||||
and pass the absolute path string. This function's own ``resolve()``
|
||||
is anchored at the Python process cwd, so a relative input like
|
||||
``"auth.json"`` would otherwise miss the denylist when the task's
|
||||
terminal cwd differs from the process cwd.
|
||||
"""
|
||||
resolved = Path(path).expanduser().resolve()
|
||||
|
||||
# Resolve BOTH the active HERMES_HOME (profile-aware) AND the global
|
||||
# Hermes root so credential stores at <root>/auth.json etc. are also
|
||||
# blocked when running under a profile (HERMES_HOME points at
|
||||
# <root>/profiles/<name> in profile mode). Same shape as the write
|
||||
# deny widening (#15981, #14157).
|
||||
hermes_dirs: list[Path] = []
|
||||
for base in (_hermes_home_path(), _hermes_root_path()):
|
||||
try:
|
||||
real = base.resolve()
|
||||
if real not in hermes_dirs:
|
||||
hermes_dirs.append(real)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Skills .hub: prompt-injection carriers.
|
||||
for hd in hermes_dirs:
|
||||
blocked_dirs = [
|
||||
hd / "skills" / ".hub" / "index-cache",
|
||||
hd / "skills" / ".hub",
|
||||
]
|
||||
for blocked in blocked_dirs:
|
||||
try:
|
||||
resolved.relative_to(blocked)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
|
||||
# Credential / secret stores. Exact-file matches under either
|
||||
# HERMES_HOME or <root>.
|
||||
credential_file_names = (
|
||||
"auth.json",
|
||||
"auth.lock",
|
||||
".anthropic_oauth.json",
|
||||
".env",
|
||||
"webhook_subscriptions.json",
|
||||
os.path.join("auth", "google_oauth.json"),
|
||||
# Bitwarden Secrets Manager disk cache: stores plaintext secret values
|
||||
# to avoid re-fetching across back-to-back CLI invocations. The file
|
||||
# was introduced by #31968 but not added to this guard.
|
||||
os.path.join("cache", "bws_cache.json"),
|
||||
)
|
||||
for hd in hermes_dirs:
|
||||
for name in credential_file_names:
|
||||
try:
|
||||
blocked = (hd / name).resolve()
|
||||
except Exception:
|
||||
continue
|
||||
if resolved == blocked:
|
||||
return (
|
||||
f"Access denied: {path} is a Hermes credential store "
|
||||
"and cannot be read directly. Provider tools consume "
|
||||
"these credentials through internal channels. "
|
||||
"(Defense-in-depth — not a security boundary; the "
|
||||
"terminal tool can still bypass.)"
|
||||
)
|
||||
|
||||
# mcp-tokens/: directory prefix match — anything inside is OAuth
|
||||
# token material.
|
||||
for hd in hermes_dirs:
|
||||
try:
|
||||
mcp_tokens = (hd / "mcp-tokens").resolve()
|
||||
except Exception:
|
||||
continue
|
||||
if resolved == mcp_tokens:
|
||||
return (
|
||||
f"Access denied: {path} is the Hermes MCP token directory "
|
||||
"and cannot be read directly. (Defense-in-depth — not a "
|
||||
"security boundary; the terminal tool can still bypass.)"
|
||||
)
|
||||
try:
|
||||
resolved.relative_to(mcp_tokens)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is a Hermes MCP token file "
|
||||
"and cannot be read directly. (Defense-in-depth — not a "
|
||||
"security boundary; the terminal tool can still bypass.)"
|
||||
)
|
||||
|
||||
# Block common secret-bearing project-local .env files anywhere on disk.
|
||||
# The agent helping a user with their project rarely needs to read raw
|
||||
# .env contents — .env.example is the documented-shape substitute. The
|
||||
# terminal tool can still ``cat .env``; this is defense-in-depth, not a
|
||||
# boundary (see module docstring).
|
||||
if resolved.name in _BLOCKED_PROJECT_ENV_BASENAMES:
|
||||
return (
|
||||
f"Access denied: {path} is a secret-bearing environment file "
|
||||
"and cannot be read to prevent credential leakage. "
|
||||
"If you need to check the file structure, read .env.example instead. "
|
||||
"(Defense-in-depth — not a security boundary; the terminal tool can still bypass.)"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-profile write guard (#TBD)
|
||||
#
|
||||
# Hermes profiles are separate HERMES_HOME dirs under
|
||||
# ``<root>/profiles/<name>/``. Each profile has its own skills/, plugins/,
|
||||
# cron/, memories/. When an agent runs under one profile, writing into
|
||||
# ANOTHER profile's directories is almost always wrong — those skills /
|
||||
# plugins / cron jobs / memories affect a different session the user runs
|
||||
# from a different shell.
|
||||
#
|
||||
# Soft guard, NOT a security boundary: the agent runs as the same OS user
|
||||
# and has unrestricted terminal access, so this returns a warning the model
|
||||
# can choose to honor or override with ``cross_profile=True``. Same shape
|
||||
# as the dangerous-command approval flow — the agent is told the boundary
|
||||
# exists, and explicit user direction is required to cross it.
|
||||
#
|
||||
# Reference: May 2026 incident where a hermes-security profile session
|
||||
# edited skills under both ``~/.hermes/profiles/hermes-security/skills/``
|
||||
# AND ``~/.hermes/skills/`` (the default profile's skills) without realizing
|
||||
# the second path belonged to a different profile.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Profile-scoped directories under HERMES_HOME / <root> / <root>/profiles/<X>/
|
||||
# that should be guarded. Adding a new area here extends the guard with no
|
||||
# other code change.
|
||||
PROFILE_SCOPED_AREAS = ("skills", "plugins", "cron", "memories")
|
||||
|
||||
|
||||
def _resolve_active_profile_name() -> str:
|
||||
"""Return the active profile name derived from HERMES_HOME.
|
||||
|
||||
``~/.hermes`` -> ``"default"``
|
||||
``~/.hermes/profiles/X`` -> ``"X"``
|
||||
|
||||
Falls back to ``"default"`` on any resolution failure so the guard
|
||||
never raises into the tool path.
|
||||
"""
|
||||
try:
|
||||
home_real = _hermes_home_path().resolve()
|
||||
root_real = _hermes_root_path().resolve()
|
||||
except (OSError, RuntimeError):
|
||||
return "default"
|
||||
profiles_dir = root_real / "profiles"
|
||||
try:
|
||||
rel = home_real.relative_to(profiles_dir)
|
||||
parts = rel.parts
|
||||
if len(parts) >= 1:
|
||||
return parts[0]
|
||||
except ValueError:
|
||||
pass
|
||||
return "default"
|
||||
|
||||
|
||||
def classify_cross_profile_target(path: str) -> Optional[dict]:
|
||||
"""Classify a write target as cross-profile if it lands in another
|
||||
profile's scoped area (skills/plugins/cron/memories).
|
||||
|
||||
Returns ``None`` when the target is outside Hermes scope, or is inside
|
||||
the ACTIVE profile, or doesn't hit a profile-scoped area. Otherwise
|
||||
returns a dict with:
|
||||
|
||||
* ``active_profile``: name of the profile the agent is running as
|
||||
* ``target_profile``: name of the profile the path belongs to
|
||||
* ``area``: which scoped area (``"skills"``, ``"plugins"``, etc.)
|
||||
* ``target_path``: the resolved path string
|
||||
|
||||
The caller decides what to do with the result — surface a warning to
|
||||
the model, prompt the user, or (with explicit consent /
|
||||
``cross_profile=True``) proceed anyway.
|
||||
"""
|
||||
try:
|
||||
target = Path(os.path.expanduser(str(path))).resolve()
|
||||
root_real = _hermes_root_path().resolve()
|
||||
except (OSError, RuntimeError):
|
||||
return None
|
||||
|
||||
target_profile: Optional[str] = None
|
||||
area: Optional[str] = None
|
||||
|
||||
try:
|
||||
rel = target.relative_to(root_real)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
parts = rel.parts
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
if parts[0] in PROFILE_SCOPED_AREAS:
|
||||
# ``<root>/<area>/...`` → default profile.
|
||||
target_profile = "default"
|
||||
area = parts[0]
|
||||
elif (
|
||||
parts[0] == "profiles"
|
||||
and len(parts) >= 3
|
||||
and parts[2] in PROFILE_SCOPED_AREAS
|
||||
):
|
||||
# ``<root>/profiles/<name>/<area>/...`` → named profile.
|
||||
target_profile = parts[1]
|
||||
area = parts[2]
|
||||
else:
|
||||
return None
|
||||
|
||||
active_profile = _resolve_active_profile_name()
|
||||
if target_profile == active_profile:
|
||||
# In-profile write — not a cross-profile event.
|
||||
return None
|
||||
|
||||
return {
|
||||
"active_profile": active_profile,
|
||||
"target_profile": target_profile,
|
||||
"area": area,
|
||||
"target_path": str(target),
|
||||
}
|
||||
|
||||
|
||||
def get_cross_profile_warning(path: str) -> Optional[str]:
|
||||
"""Return a model-facing warning string when ``path`` is cross-profile.
|
||||
|
||||
Returns ``None`` when the write is in-scope (same profile) or outside
|
||||
Hermes entirely. Caller is expected to surface the warning to the
|
||||
agent as a tool-result error, NOT to silently allow the write — the
|
||||
agent must either get explicit user direction to proceed, or pass
|
||||
``cross_profile=True`` to its write tool.
|
||||
|
||||
This is defense-in-depth: the terminal tool runs as the same OS user
|
||||
and can write any of these paths without going through this guard.
|
||||
Treat the guard as a confusion-reducer, not a security boundary.
|
||||
"""
|
||||
info = classify_cross_profile_target(path)
|
||||
if info is None:
|
||||
return None
|
||||
return (
|
||||
f"Cross-profile write blocked by soft guard: {info['target_path']} "
|
||||
f"belongs to Hermes profile {info['target_profile']!r}, but the "
|
||||
f"agent is running under profile {info['active_profile']!r}. "
|
||||
f"Editing another profile's {info['area']}/ will affect that "
|
||||
f"profile's future sessions, not the one you are currently in. "
|
||||
f"Confirm with the user before proceeding. To bypass this guard "
|
||||
f"after explicit user direction, retry the call with "
|
||||
f"``cross_profile=True``. (Defense-in-depth — not a security "
|
||||
f"boundary; the terminal tool can still bypass.)"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sandbox-mirror write guard (#32049)
|
||||
#
|
||||
# Non-local terminal backends (Docker, Daytona, etc.) bind a sandbox-local
|
||||
# directory to the container's ``$HOME``. The on-disk layout looks like
|
||||
#
|
||||
# <HERMES_HOME>/profiles/<name>/sandboxes/<backend>/<task>/home/.hermes/...
|
||||
#
|
||||
# When the agent (running host-side) speculates that authoritative profile
|
||||
# state lives at one of those sandbox-mirror paths, the write lands on the
|
||||
# mirror — never read by the host process — while the host file is left
|
||||
# untouched. The agent reports success, the user sees no change, and on
|
||||
# disk two divergent copies accumulate. See #32049 for evidence.
|
||||
#
|
||||
# This guard is path-shape-only: it detects the
|
||||
# ``…/sandboxes/<backend>/<task>/home/.hermes/…`` segment and warns
|
||||
# regardless of which Hermes profile is active. It does NOT cover the
|
||||
# inner-container case where the bind mount strips the ``sandboxes/`` prefix
|
||||
# (the agent's view inside the container is plain ``/root/.hermes/...``);
|
||||
# that case needs a separate dispatch-layer or host-side ``profile_state``
|
||||
# tool.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _find_sandbox_mirror_segments(parts: tuple) -> Optional[int]:
|
||||
"""Return the index of the inner ``.hermes`` part in a sandbox-mirror path.
|
||||
|
||||
Matches ``…/sandboxes/<backend>/<task>/home/.hermes/…`` and returns the
|
||||
index where the inner Hermes-state portion starts. Returns ``None`` for
|
||||
paths that do not contain the sandbox-mirror shape.
|
||||
"""
|
||||
for i, part in enumerate(parts):
|
||||
if part != "sandboxes":
|
||||
continue
|
||||
# Need at least: sandboxes / <backend> / <task> / home / .hermes / <thing>
|
||||
if i + 5 >= len(parts):
|
||||
continue
|
||||
if parts[i + 3] == "home" and parts[i + 4] == ".hermes":
|
||||
return i + 4
|
||||
return None
|
||||
|
||||
|
||||
def classify_sandbox_mirror_target(path: str) -> Optional[dict]:
|
||||
"""Classify a write target as a sandbox-mirror of authoritative Hermes state.
|
||||
|
||||
Returns ``None`` when the path does not match the sandbox-mirror shape.
|
||||
Otherwise returns a dict with:
|
||||
|
||||
* ``target_path``: the resolved path string
|
||||
* ``mirror_root``: the ``…/sandboxes/<backend>/<task>/home/.hermes``
|
||||
prefix (so callers can show users which sandbox owns the mirror)
|
||||
* ``inner_path``: the portion under the mirror's ``.hermes`` (what the
|
||||
agent likely meant to address on the host)
|
||||
|
||||
Detection is path-shape-only — does not require any Hermes resolver to
|
||||
succeed, so it works correctly even when called from contexts where
|
||||
HERMES_HOME resolution would be ambiguous.
|
||||
"""
|
||||
try:
|
||||
target = Path(os.path.expanduser(str(path))).resolve()
|
||||
except (OSError, RuntimeError):
|
||||
return None
|
||||
|
||||
parts = target.parts
|
||||
inner_idx = _find_sandbox_mirror_segments(parts)
|
||||
if inner_idx is None:
|
||||
return None
|
||||
|
||||
mirror_root = str(Path(*parts[: inner_idx + 1]))
|
||||
inner_path = str(Path(*parts[inner_idx + 1 :])) if inner_idx + 1 < len(parts) else ""
|
||||
|
||||
return {
|
||||
"target_path": str(target),
|
||||
"mirror_root": mirror_root,
|
||||
"inner_path": inner_path,
|
||||
}
|
||||
|
||||
|
||||
def get_sandbox_mirror_warning(path: str) -> Optional[str]:
|
||||
"""Return a model-facing warning when ``path`` lands in a sandbox mirror.
|
||||
|
||||
Returns ``None`` when the path is not a sandbox-mirror target. Caller
|
||||
is expected to surface the warning to the agent as a tool-result
|
||||
error. The bypass kwarg (``cross_profile=True``) is shared with the
|
||||
cross-profile guard: both are soft "I know what I'm doing" overrides
|
||||
a user can authorise.
|
||||
|
||||
Defense-in-depth, NOT a security boundary: the terminal tool runs as
|
||||
the same OS user and can write the mirror path directly. The guard
|
||||
exists to surface the misclassification before the silent-success +
|
||||
divergent-copy footgun in #32049 fires.
|
||||
"""
|
||||
info = classify_sandbox_mirror_target(path)
|
||||
if info is None:
|
||||
return None
|
||||
return (
|
||||
f"Sandbox-mirror write blocked by soft guard: {info['target_path']} "
|
||||
f"sits under {info['mirror_root']!r}, which is a per-task mirror "
|
||||
f"created by a non-local terminal backend (docker/daytona/etc.). "
|
||||
f"Writes here land on a copy that the host Hermes process never "
|
||||
f"reads — the authoritative file is likely {info['inner_path']!r} "
|
||||
f"under the real HERMES_HOME. Use the host-side tool for "
|
||||
f"authoritative state (e.g. ``memory`` for memories), or address "
|
||||
f"the host path directly. To bypass this guard after explicit "
|
||||
f"user direction, retry the call with ``cross_profile=True``. "
|
||||
f"(Defense-in-depth — not a security boundary; the terminal tool "
|
||||
f"can still bypass.)"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Container-context mirror guard (inner-container case — #32049 follow-up)
|
||||
#
|
||||
# Brian's shape-based detector (#32213) catches paths that still carry the
|
||||
# full ``…/sandboxes/<backend>/<task>/home/.hermes/…`` prefix on the host.
|
||||
# But when file tools execute *inside* the container the bind-mount strips
|
||||
# that prefix: the agent sees plain ``/root/.hermes/…``. The root:root
|
||||
# ownership on the divergent SOUL.md in #32049 confirms this is the primary
|
||||
# failure mode.
|
||||
#
|
||||
# Fix: file_tools passes the active Docker mirror prefix when the terminal
|
||||
# backend is docker + persistent. This catches the very first file-tool call,
|
||||
# before a DockerEnvironment object necessarily exists.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def classify_container_mirror_target(
|
||||
path: str,
|
||||
mirror_prefix: str | None = None,
|
||||
) -> Optional[dict]:
|
||||
"""Classify a write target as a container-side sandbox mirror.
|
||||
|
||||
``mirror_prefix`` must be supplied by the caller after it has established
|
||||
that file tools are executing in a container whose home is a sandbox
|
||||
mirror. Returns ``None`` when no such context is active or the path is not
|
||||
under the mirror prefix. Otherwise returns:
|
||||
|
||||
* ``target_path``: resolved path string
|
||||
* ``mirror_root``: the declared container mirror prefix
|
||||
* ``inner_path``: portion under the mirror root (what the agent
|
||||
likely meant to address in the host HERMES_HOME)
|
||||
"""
|
||||
if not mirror_prefix:
|
||||
return None
|
||||
try:
|
||||
target = Path(os.path.expanduser(str(path))).resolve()
|
||||
mirror = Path(os.path.expanduser(mirror_prefix)).resolve()
|
||||
inner = target.relative_to(mirror)
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
return None
|
||||
return {
|
||||
"target_path": str(target),
|
||||
"mirror_root": str(mirror),
|
||||
"inner_path": inner.as_posix(),
|
||||
}
|
||||
|
||||
|
||||
def get_container_mirror_warning(
|
||||
path: str,
|
||||
mirror_prefix: str | None = None,
|
||||
) -> Optional[str]:
|
||||
"""Return a model-facing warning when *path* lands in the container's
|
||||
sandbox mirror of authoritative Hermes state.
|
||||
|
||||
The caller supplies ``mirror_prefix`` only when the current file-tool
|
||||
backend is known to execute inside a Docker sandbox. Same contract as
|
||||
``get_cross_profile_warning``: soft guard, returns ``None`` for
|
||||
non-mirror paths, caller surfaces as a tool-result error. Bypass via
|
||||
``cross_profile=True`` after explicit user direction.
|
||||
"""
|
||||
info = classify_container_mirror_target(path, mirror_prefix)
|
||||
if info is None:
|
||||
return None
|
||||
return (
|
||||
f"Sandbox-mirror write blocked by soft guard: {info['target_path']} "
|
||||
f"sits under {info['mirror_root']!r}, which is the container's "
|
||||
f"bind-mounted home — a per-task mirror that the host Hermes "
|
||||
f"process never reads. The authoritative file is "
|
||||
f"{info['inner_path']!r} under the real HERMES_HOME. Use the "
|
||||
f"host-side tool for authoritative state (e.g. ``memory`` for "
|
||||
f"memories), or address the host path directly. To bypass after "
|
||||
f"explicit user direction, retry with ``cross_profile=True``. "
|
||||
f"(Defense-in-depth — not a security boundary; the terminal tool "
|
||||
f"can still bypass.)"
|
||||
)
|
||||
@@ -1,909 +0,0 @@
|
||||
"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend.
|
||||
|
||||
This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were
|
||||
a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP
|
||||
traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent,
|
||||
streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE.
|
||||
|
||||
Architecture
|
||||
------------
|
||||
- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)``
|
||||
mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses.
|
||||
- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated
|
||||
to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` /
|
||||
``toolConfig`` / ``systemInstruction`` shape.
|
||||
- The request body is wrapped ``{project, model, user_prompt_id, request}``
|
||||
per Code Assist API expectations.
|
||||
- Responses (``candidates[].content.parts[]``) are converted back to
|
||||
OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``.
|
||||
- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks.
|
||||
|
||||
Attribution
|
||||
-----------
|
||||
Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public
|
||||
Gemini API docs. Request envelope shape
|
||||
(``{project, model, user_prompt_id, request}``) is documented nowhere; it is
|
||||
reverse-engineered from the opencode-gemini-auth and clawdbot implementations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
CodeAssistError,
|
||||
ProjectContext,
|
||||
resolve_project_context,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Request translation: OpenAI → Gemini
|
||||
# =============================================================================
|
||||
|
||||
_ROLE_MAP_OPENAI_TO_GEMINI = {
|
||||
"user": "user",
|
||||
"assistant": "model",
|
||||
"system": "user", # handled separately via systemInstruction
|
||||
"tool": "user", # functionResponse is wrapped in a user-role turn
|
||||
"function": "user",
|
||||
}
|
||||
|
||||
|
||||
def _coerce_content_to_text(content: Any) -> str:
|
||||
"""OpenAI content may be str or a list of parts; reduce to plain text."""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
pieces: List[str] = []
|
||||
for p in content:
|
||||
if isinstance(p, str):
|
||||
pieces.append(p)
|
||||
elif isinstance(p, dict):
|
||||
if p.get("type") == "text" and isinstance(p.get("text"), str):
|
||||
pieces.append(p["text"])
|
||||
# Multimodal (image_url, etc.) — stub for now; log and skip
|
||||
elif p.get("type") in {"image_url", "input_audio"}:
|
||||
logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
|
||||
return "\n".join(pieces)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""OpenAI tool_call -> Gemini functionCall part."""
|
||||
fn = tool_call.get("function") or {}
|
||||
args_raw = fn.get("arguments", "")
|
||||
try:
|
||||
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
|
||||
except json.JSONDecodeError:
|
||||
args = {"_raw": args_raw}
|
||||
if not isinstance(args, dict):
|
||||
args = {"_value": args}
|
||||
return {
|
||||
"functionCall": {
|
||||
"name": fn.get("name") or "",
|
||||
"args": args,
|
||||
},
|
||||
# Sentinel signature — matches opencode-gemini-auth's approach.
|
||||
# Without this, Code Assist rejects function calls that originated
|
||||
# outside its own chain.
|
||||
"thoughtSignature": "skip_thought_signature_validator",
|
||||
}
|
||||
|
||||
|
||||
def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""OpenAI tool-role message -> Gemini functionResponse part.
|
||||
|
||||
The function name isn't in the OpenAI tool message directly; it must be
|
||||
passed via the assistant message that issued the call. For simplicity we
|
||||
look up ``name`` on the message (OpenAI SDK copies it there) or on the
|
||||
``tool_call_id`` cross-reference.
|
||||
"""
|
||||
name = str(message.get("name") or message.get("tool_call_id") or "tool")
|
||||
content = _coerce_content_to_text(message.get("content"))
|
||||
# Gemini expects the response as a dict under `response`. We wrap plain
|
||||
# text in {"output": "..."}.
|
||||
try:
|
||||
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
response = parsed if isinstance(parsed, dict) else {"output": content}
|
||||
return {
|
||||
"functionResponse": {
|
||||
"name": name,
|
||||
"response": response,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _build_gemini_contents(
|
||||
messages: List[Dict[str, Any]],
|
||||
) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
"""Convert OpenAI messages[] to Gemini contents[] + systemInstruction."""
|
||||
system_text_parts: List[str] = []
|
||||
contents: List[Dict[str, Any]] = []
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = str(msg.get("role") or "user")
|
||||
|
||||
if role == "system":
|
||||
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
|
||||
continue
|
||||
|
||||
# Tool result message — emit a user-role turn with functionResponse
|
||||
if role == "tool" or role == "function":
|
||||
contents.append({
|
||||
"role": "user",
|
||||
"parts": [_translate_tool_result_to_gemini(msg)],
|
||||
})
|
||||
continue
|
||||
|
||||
gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user")
|
||||
parts: List[Dict[str, Any]] = []
|
||||
|
||||
text = _coerce_content_to_text(msg.get("content"))
|
||||
if text:
|
||||
parts.append({"text": text})
|
||||
|
||||
# Assistant messages can carry tool_calls
|
||||
tool_calls = msg.get("tool_calls") or []
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
parts.append(_translate_tool_call_to_gemini(tc))
|
||||
|
||||
if not parts:
|
||||
# Gemini rejects empty parts; skip the turn entirely
|
||||
continue
|
||||
|
||||
contents.append({"role": gemini_role, "parts": parts})
|
||||
|
||||
system_instruction: Optional[Dict[str, Any]] = None
|
||||
joined_system = "\n".join(p for p in system_text_parts if p).strip()
|
||||
if joined_system:
|
||||
system_instruction = {
|
||||
"role": "system",
|
||||
"parts": [{"text": joined_system}],
|
||||
}
|
||||
|
||||
return contents, system_instruction
|
||||
|
||||
|
||||
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
|
||||
"""OpenAI tools[] -> Gemini tools[].functionDeclarations[]."""
|
||||
if not isinstance(tools, list) or not tools:
|
||||
return []
|
||||
declarations: List[Dict[str, Any]] = []
|
||||
for t in tools:
|
||||
if not isinstance(t, dict):
|
||||
continue
|
||||
fn = t.get("function") or {}
|
||||
if not isinstance(fn, dict):
|
||||
continue
|
||||
name = fn.get("name")
|
||||
if not name:
|
||||
continue
|
||||
decl = {"name": str(name)}
|
||||
if fn.get("description"):
|
||||
decl["description"] = str(fn["description"])
|
||||
params = fn.get("parameters")
|
||||
if isinstance(params, dict):
|
||||
decl["parameters"] = sanitize_gemini_tool_parameters(params)
|
||||
declarations.append(decl)
|
||||
if not declarations:
|
||||
return []
|
||||
return [{"functionDeclarations": declarations}]
|
||||
|
||||
|
||||
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
|
||||
"""OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig."""
|
||||
if tool_choice is None:
|
||||
return None
|
||||
if isinstance(tool_choice, str):
|
||||
if tool_choice == "auto":
|
||||
return {"functionCallingConfig": {"mode": "AUTO"}}
|
||||
if tool_choice == "required":
|
||||
return {"functionCallingConfig": {"mode": "ANY"}}
|
||||
if tool_choice == "none":
|
||||
return {"functionCallingConfig": {"mode": "NONE"}}
|
||||
if isinstance(tool_choice, dict):
|
||||
fn = tool_choice.get("function") or {}
|
||||
name = fn.get("name")
|
||||
if name:
|
||||
return {
|
||||
"functionCallingConfig": {
|
||||
"mode": "ANY",
|
||||
"allowedFunctionNames": [str(name)],
|
||||
},
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case)."""
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
budget = config.get("thinkingBudget", config.get("thinking_budget"))
|
||||
level = config.get("thinkingLevel", config.get("thinking_level"))
|
||||
include = config.get("includeThoughts", config.get("include_thoughts"))
|
||||
normalized: Dict[str, Any] = {}
|
||||
if isinstance(budget, (int, float)):
|
||||
normalized["thinkingBudget"] = int(budget)
|
||||
if isinstance(level, str) and level.strip():
|
||||
normalized["thinkingLevel"] = level.strip().lower()
|
||||
if isinstance(include, bool):
|
||||
normalized["includeThoughts"] = include
|
||||
return normalized or None
|
||||
|
||||
|
||||
def build_gemini_request(
|
||||
*,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
thinking_config: Any = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the inner Gemini request body (goes inside ``request`` wrapper)."""
|
||||
contents, system_instruction = _build_gemini_contents(messages)
|
||||
|
||||
body: Dict[str, Any] = {"contents": contents}
|
||||
if system_instruction is not None:
|
||||
body["systemInstruction"] = system_instruction
|
||||
|
||||
gemini_tools = _translate_tools_to_gemini(tools)
|
||||
if gemini_tools:
|
||||
body["tools"] = gemini_tools
|
||||
tool_cfg = _translate_tool_choice_to_gemini(tool_choice)
|
||||
if tool_cfg is not None:
|
||||
body["toolConfig"] = tool_cfg
|
||||
|
||||
generation_config: Dict[str, Any] = {}
|
||||
if isinstance(temperature, (int, float)):
|
||||
generation_config["temperature"] = float(temperature)
|
||||
if isinstance(max_tokens, int) and max_tokens > 0:
|
||||
generation_config["maxOutputTokens"] = max_tokens
|
||||
if isinstance(top_p, (int, float)):
|
||||
generation_config["topP"] = float(top_p)
|
||||
if isinstance(stop, str) and stop:
|
||||
generation_config["stopSequences"] = [stop]
|
||||
elif isinstance(stop, list) and stop:
|
||||
generation_config["stopSequences"] = [str(s) for s in stop if s]
|
||||
normalized_thinking = _normalize_thinking_config(thinking_config)
|
||||
if normalized_thinking:
|
||||
generation_config["thinkingConfig"] = normalized_thinking
|
||||
if generation_config:
|
||||
body["generationConfig"] = generation_config
|
||||
|
||||
return body
|
||||
|
||||
|
||||
def wrap_code_assist_request(
|
||||
*,
|
||||
project_id: str,
|
||||
model: str,
|
||||
inner_request: Dict[str, Any],
|
||||
user_prompt_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Wrap the inner Gemini request in the Code Assist envelope."""
|
||||
return {
|
||||
"project": project_id,
|
||||
"model": model,
|
||||
"user_prompt_id": user_prompt_id or str(uuid.uuid4()),
|
||||
"request": inner_request,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Response translation: Gemini → OpenAI
|
||||
# =============================================================================
|
||||
|
||||
def _translate_gemini_response(
|
||||
resp: Dict[str, Any],
|
||||
model: str,
|
||||
) -> SimpleNamespace:
|
||||
"""Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace.
|
||||
|
||||
Code Assist wraps the actual Gemini response inside ``response``, so we
|
||||
unwrap it first if present.
|
||||
"""
|
||||
inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp
|
||||
|
||||
candidates = inner.get("candidates") or []
|
||||
if not isinstance(candidates, list) or not candidates:
|
||||
return _empty_response(model)
|
||||
|
||||
cand = candidates[0]
|
||||
content_obj = cand.get("content") if isinstance(cand, dict) else {}
|
||||
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
|
||||
|
||||
text_pieces: List[str] = []
|
||||
reasoning_pieces: List[str] = []
|
||||
tool_calls: List[SimpleNamespace] = []
|
||||
|
||||
for i, part in enumerate(parts or []):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
# Thought parts are model's internal reasoning — surface as reasoning,
|
||||
# don't mix into content.
|
||||
if part.get("thought") is True:
|
||||
if isinstance(part.get("text"), str):
|
||||
reasoning_pieces.append(part["text"])
|
||||
continue
|
||||
if isinstance(part.get("text"), str):
|
||||
text_pieces.append(part["text"])
|
||||
continue
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
tool_calls.append(SimpleNamespace(
|
||||
id=f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
index=i,
|
||||
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
|
||||
))
|
||||
|
||||
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(
|
||||
str(cand.get("finishReason") or "")
|
||||
)
|
||||
|
||||
usage_meta = inner.get("usageMetadata") or {}
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="".join(text_pieces) if text_pieces else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning="".join(reasoning_pieces) or None,
|
||||
reasoning_content="".join(reasoning_pieces) or None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(
|
||||
index=0,
|
||||
message=message,
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def _empty_response(model: str) -> SimpleNamespace:
|
||||
message = SimpleNamespace(
|
||||
role="assistant", content="", tool_calls=None,
|
||||
reasoning=None, reasoning_content=None, reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=0, completion_tokens=0, total_tokens=0,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def _map_gemini_finish_reason(reason: str) -> str:
|
||||
mapping = {
|
||||
"STOP": "stop",
|
||||
"MAX_TOKENS": "length",
|
||||
"SAFETY": "content_filter",
|
||||
"RECITATION": "content_filter",
|
||||
"OTHER": "stop",
|
||||
}
|
||||
return mapping.get(reason.upper(), "stop")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Streaming SSE iterator
|
||||
# =============================================================================
|
||||
|
||||
class _GeminiStreamChunk(SimpleNamespace):
|
||||
"""Mimics an OpenAI ChatCompletionChunk with .choices[0].delta."""
|
||||
pass
|
||||
|
||||
|
||||
def _make_stream_chunk(
|
||||
*,
|
||||
model: str,
|
||||
content: str = "",
|
||||
tool_call_delta: Optional[Dict[str, Any]] = None,
|
||||
finish_reason: Optional[str] = None,
|
||||
reasoning: str = "",
|
||||
) -> _GeminiStreamChunk:
|
||||
delta_kwargs: Dict[str, Any] = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"reasoning": None,
|
||||
"reasoning_content": None,
|
||||
}
|
||||
if content:
|
||||
delta_kwargs["content"] = content
|
||||
if tool_call_delta is not None:
|
||||
delta_kwargs["tool_calls"] = [SimpleNamespace(
|
||||
index=tool_call_delta.get("index", 0),
|
||||
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=tool_call_delta.get("name") or "",
|
||||
arguments=tool_call_delta.get("arguments") or "",
|
||||
),
|
||||
)]
|
||||
if reasoning:
|
||||
delta_kwargs["reasoning"] = reasoning
|
||||
delta_kwargs["reasoning_content"] = reasoning
|
||||
delta = SimpleNamespace(**delta_kwargs)
|
||||
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
|
||||
return _GeminiStreamChunk(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion.chunk",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
|
||||
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
|
||||
"""Parse Server-Sent Events from an httpx streaming response."""
|
||||
buffer = ""
|
||||
for chunk in response.iter_text():
|
||||
if not chunk:
|
||||
continue
|
||||
buffer += chunk
|
||||
while "\n" in buffer:
|
||||
line, buffer = buffer.split("\n", 1)
|
||||
line = line.rstrip("\r")
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
return
|
||||
try:
|
||||
yield json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Non-JSON SSE line: %s", data[:200])
|
||||
|
||||
|
||||
def _translate_stream_event(
|
||||
event: Dict[str, Any],
|
||||
model: str,
|
||||
tool_call_counter: List[int],
|
||||
) -> List[_GeminiStreamChunk]:
|
||||
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
|
||||
|
||||
``tool_call_counter`` is a single-element list used as a mutable counter
|
||||
across events in the same stream. Each ``functionCall`` part gets a
|
||||
fresh, unique OpenAI ``index`` — keying by function name would collide
|
||||
whenever the model issues parallel calls to the same tool (e.g. reading
|
||||
three files in one turn).
|
||||
"""
|
||||
inner = event.get("response") if isinstance(event.get("response"), dict) else event
|
||||
candidates = inner.get("candidates") or []
|
||||
if not candidates:
|
||||
return []
|
||||
cand = candidates[0]
|
||||
if not isinstance(cand, dict):
|
||||
return []
|
||||
|
||||
chunks: List[_GeminiStreamChunk] = []
|
||||
|
||||
content = cand.get("content") or {}
|
||||
parts = content.get("parts") if isinstance(content, dict) else []
|
||||
for part in parts or []:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
chunks.append(_make_stream_chunk(
|
||||
model=model, reasoning=part["text"],
|
||||
))
|
||||
continue
|
||||
if isinstance(part.get("text"), str) and part["text"]:
|
||||
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
name = str(fc["name"])
|
||||
idx = tool_call_counter[0]
|
||||
tool_call_counter[0] += 1
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
chunks.append(_make_stream_chunk(
|
||||
model=model,
|
||||
tool_call_delta={
|
||||
"index": idx,
|
||||
"name": name,
|
||||
"arguments": args_str,
|
||||
},
|
||||
))
|
||||
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = _map_gemini_finish_reason(finish_reason_raw)
|
||||
if tool_call_counter[0] > 0:
|
||||
mapped = "tool_calls"
|
||||
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
|
||||
return chunks
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# GeminiCloudCodeClient — OpenAI-compatible facade
|
||||
# =============================================================================
|
||||
|
||||
MARKER_BASE_URL = "cloudcode-pa://google"
|
||||
|
||||
|
||||
class _GeminiChatCompletions:
|
||||
def __init__(self, client: "GeminiCloudCodeClient"):
|
||||
self._client = client
|
||||
|
||||
def create(self, **kwargs: Any) -> Any:
|
||||
return self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _GeminiChatNamespace:
|
||||
def __init__(self, client: "GeminiCloudCodeClient"):
|
||||
self.completions = _GeminiChatCompletions(client)
|
||||
|
||||
|
||||
class GeminiCloudCodeClient:
|
||||
"""Minimal OpenAI-SDK-compatible facade over Code Assist v1internal."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
default_headers: Optional[Dict[str, str]] = None,
|
||||
project_id: str = "",
|
||||
**_: Any,
|
||||
):
|
||||
# `api_key` here is a dummy — real auth is the OAuth access token
|
||||
# fetched on every call via agent.google_oauth.get_valid_access_token().
|
||||
# We accept the kwarg for openai.OpenAI interface parity.
|
||||
self.api_key = api_key or "google-oauth"
|
||||
self.base_url = base_url or MARKER_BASE_URL
|
||||
self._default_headers = dict(default_headers or {})
|
||||
self._configured_project_id = project_id
|
||||
self._project_context: Optional[ProjectContext] = None
|
||||
self._project_context_lock = False # simple single-thread guard
|
||||
self.chat = _GeminiChatNamespace(self)
|
||||
self.is_closed = False
|
||||
self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0))
|
||||
|
||||
def close(self) -> None:
|
||||
self.is_closed = True
|
||||
try:
|
||||
self._http.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Implement the OpenAI SDK's context-manager-ish closure check
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext:
|
||||
"""Lazily resolve and cache the project context for this client."""
|
||||
if self._project_context is not None:
|
||||
return self._project_context
|
||||
|
||||
env_project = google_oauth.resolve_project_id_from_env()
|
||||
creds = google_oauth.load_credentials()
|
||||
stored_project = creds.project_id if creds else ""
|
||||
|
||||
# Prefer what's already baked into the creds
|
||||
if stored_project:
|
||||
self._project_context = ProjectContext(
|
||||
project_id=stored_project,
|
||||
managed_project_id=creds.managed_project_id if creds else "",
|
||||
tier_id="",
|
||||
source="stored",
|
||||
)
|
||||
return self._project_context
|
||||
|
||||
ctx = resolve_project_context(
|
||||
access_token,
|
||||
configured_project_id=self._configured_project_id,
|
||||
env_project_id=env_project,
|
||||
user_agent_model=model,
|
||||
)
|
||||
# Persist discovered project back to the creds file so the next
|
||||
# session doesn't re-run the discovery.
|
||||
if ctx.project_id or ctx.managed_project_id:
|
||||
google_oauth.update_project_ids(
|
||||
project_id=ctx.project_id,
|
||||
managed_project_id=ctx.managed_project_id,
|
||||
)
|
||||
self._project_context = ctx
|
||||
return ctx
|
||||
|
||||
def _create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
stream: bool = False,
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Any = None,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
access_token = google_oauth.get_valid_access_token()
|
||||
ctx = self._ensure_project_context(access_token, model)
|
||||
|
||||
thinking_config = None
|
||||
if isinstance(extra_body, dict):
|
||||
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
|
||||
|
||||
inner = build_gemini_request(
|
||||
messages=messages or [],
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
stop=stop,
|
||||
thinking_config=thinking_config,
|
||||
)
|
||||
wrapped = wrap_code_assist_request(
|
||||
project_id=ctx.project_id,
|
||||
model=model,
|
||||
inner_request=inner,
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": "hermes-agent (gemini-cli-compat)",
|
||||
"X-Goog-Api-Client": "gl-python/hermes",
|
||||
"x-activity-request-id": str(uuid.uuid4()),
|
||||
}
|
||||
headers.update(self._default_headers)
|
||||
|
||||
if stream:
|
||||
return self._stream_completion(model=model, wrapped=wrapped, headers=headers)
|
||||
|
||||
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent"
|
||||
response = self._http.post(url, json=wrapped, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise _gemini_http_error(response)
|
||||
try:
|
||||
payload = response.json()
|
||||
except ValueError as exc:
|
||||
raise CodeAssistError(
|
||||
f"Invalid JSON from Code Assist: {exc}",
|
||||
code="code_assist_invalid_json",
|
||||
) from exc
|
||||
return _translate_gemini_response(payload, model=model)
|
||||
|
||||
def _stream_completion(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
wrapped: Dict[str, Any],
|
||||
headers: Dict[str, str],
|
||||
) -> Iterator[_GeminiStreamChunk]:
|
||||
"""Generator that yields OpenAI-shaped streaming chunks."""
|
||||
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse"
|
||||
stream_headers = dict(headers)
|
||||
stream_headers["Accept"] = "text/event-stream"
|
||||
|
||||
def _generator() -> Iterator[_GeminiStreamChunk]:
|
||||
try:
|
||||
with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response:
|
||||
if response.status_code != 200:
|
||||
# Materialize error body for better diagnostics
|
||||
response.read()
|
||||
raise _gemini_http_error(response)
|
||||
tool_call_counter: List[int] = [0]
|
||||
for event in _iter_sse_events(response):
|
||||
for chunk in _translate_stream_event(event, model, tool_call_counter):
|
||||
yield chunk
|
||||
except httpx.HTTPError as exc:
|
||||
raise CodeAssistError(
|
||||
f"Streaming request failed: {exc}",
|
||||
code="code_assist_stream_error",
|
||||
) from exc
|
||||
|
||||
return _generator()
|
||||
|
||||
|
||||
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
|
||||
"""Translate an httpx response into a CodeAssistError with rich metadata.
|
||||
|
||||
Parses Google's error envelope (``{"error": {"code", "message", "status",
|
||||
"details": [...]}}``) so the agent's error classifier can reason about
|
||||
the failure — ``status_code`` enables the rate_limit / auth classification
|
||||
paths, and ``response`` lets the main loop honor ``Retry-After`` just
|
||||
like it does for OpenAI SDK exceptions.
|
||||
|
||||
Also lifts a few recognizable Google conditions into human-readable
|
||||
messages so the user sees something better than a 500-char JSON dump:
|
||||
|
||||
MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for
|
||||
<model>. This is a Google-side throttle..."
|
||||
RESOURCE_EXHAUSTED w/o reason → quota-style message
|
||||
404 → "Model <name> not found at cloudcode-pa..."
|
||||
"""
|
||||
status = response.status_code
|
||||
|
||||
# Parse the body once, surviving any weird encodings.
|
||||
body_text = ""
|
||||
body_json: Dict[str, Any] = {}
|
||||
try:
|
||||
body_text = response.text
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if body_text:
|
||||
try:
|
||||
parsed = json.loads(body_text)
|
||||
if isinstance(parsed, dict):
|
||||
body_json = parsed
|
||||
except (ValueError, TypeError):
|
||||
body_json = {}
|
||||
|
||||
# Dig into Google's error envelope. Shape is:
|
||||
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
|
||||
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
# "metadata": {...}},
|
||||
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
|
||||
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||
if not isinstance(err_obj, dict):
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
_raw_details = err_obj.get("details")
|
||||
err_details_list = _raw_details if isinstance(_raw_details, list) else []
|
||||
|
||||
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
|
||||
# than one ErrorInfo (rare), so we pick the first one with a reason.
|
||||
error_reason = ""
|
||||
error_metadata: Dict[str, Any] = {}
|
||||
retry_delay_seconds: Optional[float] = None
|
||||
for detail in err_details_list:
|
||||
if not isinstance(detail, dict):
|
||||
continue
|
||||
type_url = str(detail.get("@type") or "")
|
||||
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||
reason = detail.get("reason")
|
||||
if isinstance(reason, str) and reason:
|
||||
error_reason = reason
|
||||
md = detail.get("metadata")
|
||||
if isinstance(md, dict):
|
||||
error_metadata = md
|
||||
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
|
||||
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
|
||||
delay_raw = detail.get("retryDelay")
|
||||
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
|
||||
try:
|
||||
retry_delay_seconds = float(delay_raw[:-1])
|
||||
except ValueError:
|
||||
pass
|
||||
elif isinstance(delay_raw, (int, float)):
|
||||
retry_delay_seconds = float(delay_raw)
|
||||
|
||||
# Fall back to the Retry-After header if the body didn't include RetryInfo.
|
||||
if retry_delay_seconds is None:
|
||||
try:
|
||||
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||
except Exception:
|
||||
header_val = None
|
||||
if header_val:
|
||||
try:
|
||||
retry_delay_seconds = float(header_val)
|
||||
except (TypeError, ValueError):
|
||||
retry_delay_seconds = None
|
||||
|
||||
# Classify the error code. ``code_assist_rate_limited`` stays the default
|
||||
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
|
||||
# logs) without changing the rate_limit classification path.
|
||||
code = f"code_assist_http_{status}"
|
||||
if status == 401:
|
||||
code = "code_assist_unauthorized"
|
||||
elif status == 429:
|
||||
code = "code_assist_rate_limited"
|
||||
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||
code = "code_assist_capacity_exhausted"
|
||||
|
||||
# Build a human-readable message. Keep the status + a raw-body tail for
|
||||
# debugging, but lead with a friendlier summary when we recognize the
|
||||
# Google signal.
|
||||
model_hint = ""
|
||||
if isinstance(error_metadata, dict):
|
||||
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
|
||||
|
||||
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||
target = model_hint or "this Gemini model"
|
||||
message = (
|
||||
f"Gemini capacity exhausted for {target} (Google-side throttle, "
|
||||
f"not a Hermes issue). Try a different Gemini model or set a "
|
||||
f"fallback_providers entry to a non-Gemini provider."
|
||||
)
|
||||
if retry_delay_seconds is not None:
|
||||
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
|
||||
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
|
||||
message = (
|
||||
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
|
||||
f"Check /gquota for remaining daily requests."
|
||||
)
|
||||
if retry_delay_seconds is not None:
|
||||
message += f" Retry suggested in {retry_delay_seconds:g}s."
|
||||
elif status == 404:
|
||||
# Google returns 404 when a model has been retired or renamed.
|
||||
target = model_hint or (err_message or "model")
|
||||
message = (
|
||||
f"Code Assist 404: {target} is not available at "
|
||||
f"cloudcode-pa.googleapis.com. It may have been renamed or "
|
||||
f"retired. Check hermes_cli/models.py for the current list."
|
||||
)
|
||||
elif err_message:
|
||||
# Generic fallback with the parsed message.
|
||||
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||
else:
|
||||
# Last-ditch fallback — raw body snippet.
|
||||
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
return CodeAssistError(
|
||||
message,
|
||||
code=code,
|
||||
status_code=status,
|
||||
response=response,
|
||||
retry_after=retry_delay_seconds,
|
||||
details={
|
||||
"status": err_status,
|
||||
"reason": error_reason,
|
||||
"metadata": error_metadata,
|
||||
"message": err_message,
|
||||
},
|
||||
)
|
||||
@@ -1,990 +0,0 @@
|
||||
"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
|
||||
|
||||
Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
|
||||
main agent loop can keep using its existing OpenAI-shaped message flow.
|
||||
This adapter is the transport shim that converts those OpenAI-style
|
||||
``messages[]`` / ``tools[]`` requests into Gemini's native
|
||||
``models/{model}:generateContent`` schema and converts the responses back.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
|
||||
agent/tool loop (auth churn, tool-call replay quirks, thought-signature
|
||||
requirements). The native Gemini API is the canonical path and avoids the
|
||||
OpenAI-compat layer entirely.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
|
||||
|
||||
# Published max output-token ceiling shared by every current Gemini text model
|
||||
# (2.5 + 3.x: flash, flash-lite, pro). Used as the default when the caller
|
||||
# passes max_tokens=None, because Gemini's native API otherwise applies a low
|
||||
# internal default and truncates output (unlike OpenAI-compat endpoints where
|
||||
# an omitted limit means full budget).
|
||||
GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535
|
||||
|
||||
|
||||
def is_native_gemini_base_url(base_url: str) -> bool:
|
||||
"""Return True when the endpoint speaks Gemini's native REST API."""
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
def probe_gemini_tier(
|
||||
api_key: str,
|
||||
base_url: str = DEFAULT_GEMINI_BASE_URL,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
timeout: float = 10.0,
|
||||
) -> str:
|
||||
"""Probe a Google AI Studio API key and return its tier.
|
||||
|
||||
Returns one of:
|
||||
|
||||
- ``"free"`` -- key is on the free tier (unusable with Hermes)
|
||||
- ``"paid"`` -- key is on a paid tier
|
||||
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
|
||||
"""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return "unknown"
|
||||
|
||||
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
|
||||
if not normalized_base:
|
||||
normalized_base = DEFAULT_GEMINI_BASE_URL
|
||||
if normalized_base.lower().endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
|
||||
url = f"{normalized_base}/models/{model}:generateContent"
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
|
||||
"generationConfig": {"maxOutputTokens": 1},
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
resp = client.post(
|
||||
url,
|
||||
params={"key": key},
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("probe_gemini_tier: network error: %s", exc)
|
||||
return "unknown"
|
||||
|
||||
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
|
||||
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
|
||||
if rpd_header:
|
||||
try:
|
||||
rpd_val = int(rpd_header)
|
||||
except (TypeError, ValueError):
|
||||
rpd_val = None
|
||||
# Published free-tier daily caps (Dec 2025):
|
||||
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
|
||||
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
|
||||
if rpd_val is not None and rpd_val <= 1000:
|
||||
return "free"
|
||||
if rpd_val is not None and rpd_val > 1000:
|
||||
return "paid"
|
||||
|
||||
if resp.status_code == 429:
|
||||
body_text = ""
|
||||
try:
|
||||
body_text = resp.text or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "free_tier" in body_text.lower():
|
||||
return "free"
|
||||
return "paid"
|
||||
|
||||
if 200 <= resp.status_code < 300:
|
||||
return "paid"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_free_tier_quota_error(error_message: str) -> bool:
|
||||
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
|
||||
if not error_message:
|
||||
return False
|
||||
return "free_tier" in error_message.lower()
|
||||
|
||||
|
||||
_FREE_TIER_GUIDANCE = (
|
||||
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
|
||||
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
|
||||
"so the free tier is exhausted in a handful of messages and cannot sustain "
|
||||
"an agent session. Enable billing on your Google Cloud project and "
|
||||
"regenerate the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
code: str = "gemini_api_error",
|
||||
status_code: Optional[int] = None,
|
||||
response: Optional[httpx.Response] = None,
|
||||
retry_after: Optional[float] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.code = code
|
||||
self.status_code = status_code
|
||||
self.response = response
|
||||
self.retry_after = retry_after
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
def _coerce_content_to_text(content: Any) -> str:
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
pieces: List[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
pieces.append(part)
|
||||
elif isinstance(part, dict) and part.get("type") == "text":
|
||||
text = part.get("text")
|
||||
if isinstance(text, str):
|
||||
pieces.append(text)
|
||||
return "\n".join(pieces)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(content, list):
|
||||
text = _coerce_content_to_text(content)
|
||||
return [{"text": text}] if text else []
|
||||
|
||||
parts: List[Dict[str, Any]] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append({"text": item})
|
||||
continue
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
ptype = item.get("type")
|
||||
if ptype == "text":
|
||||
text = item.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
parts.append({"text": text})
|
||||
elif ptype == "image_url":
|
||||
url = ((item.get("image_url") or {}).get("url") or "")
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
continue
|
||||
try:
|
||||
header, encoded = url.split(",", 1)
|
||||
mime = header.split(":", 1)[1].split(";", 1)[0]
|
||||
raw = base64.b64decode(encoded)
|
||||
except Exception:
|
||||
continue
|
||||
parts.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": mime,
|
||||
"data": base64.b64encode(raw).decode("ascii"),
|
||||
}
|
||||
}
|
||||
)
|
||||
return parts
|
||||
|
||||
|
||||
def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
|
||||
extra = tool_call.get("extra_content") or {}
|
||||
if not isinstance(extra, dict):
|
||||
return None
|
||||
google = extra.get("google") or extra.get("thought_signature")
|
||||
if isinstance(google, dict):
|
||||
sig = google.get("thought_signature") or google.get("thoughtSignature")
|
||||
return str(sig) if isinstance(sig, str) and sig else None
|
||||
if isinstance(google, str) and google:
|
||||
return google
|
||||
return None
|
||||
|
||||
|
||||
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
||||
fn = tool_call.get("function") or {}
|
||||
args_raw = fn.get("arguments", "")
|
||||
try:
|
||||
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
|
||||
except json.JSONDecodeError:
|
||||
args = {"_raw": args_raw}
|
||||
if not isinstance(args, dict):
|
||||
args = {"_value": args}
|
||||
|
||||
part: Dict[str, Any] = {
|
||||
"functionCall": {
|
||||
"name": str(fn.get("name") or ""),
|
||||
"args": args,
|
||||
}
|
||||
}
|
||||
thought_signature = _tool_call_extra_signature(tool_call)
|
||||
if thought_signature:
|
||||
part["thoughtSignature"] = thought_signature
|
||||
return part
|
||||
|
||||
|
||||
def _translate_tool_result_to_gemini(
|
||||
message: Dict[str, Any],
|
||||
tool_name_by_call_id: Optional[Dict[str, str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
tool_name_by_call_id = tool_name_by_call_id or {}
|
||||
tool_call_id = str(message.get("tool_call_id") or "")
|
||||
name = str(
|
||||
message.get("name")
|
||||
or tool_name_by_call_id.get(tool_call_id)
|
||||
or tool_call_id
|
||||
or "tool"
|
||||
)
|
||||
content = _coerce_content_to_text(message.get("content"))
|
||||
try:
|
||||
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
response = parsed if isinstance(parsed, dict) else {"output": content}
|
||||
return {
|
||||
"functionResponse": {
|
||||
"name": name,
|
||||
"response": response,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
system_text_parts: List[str] = []
|
||||
contents: List[Dict[str, Any]] = []
|
||||
tool_name_by_call_id: Dict[str, str] = {}
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = str(msg.get("role") or "user")
|
||||
|
||||
if role == "system":
|
||||
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
|
||||
continue
|
||||
|
||||
if role in {"tool", "function"}:
|
||||
contents.append(
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
_translate_tool_result_to_gemini(
|
||||
msg,
|
||||
tool_name_by_call_id=tool_name_by_call_id,
|
||||
)
|
||||
],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
gemini_role = "model" if role == "assistant" else "user"
|
||||
parts: List[Dict[str, Any]] = []
|
||||
|
||||
content_parts = _extract_multimodal_parts(msg.get("content"))
|
||||
parts.extend(content_parts)
|
||||
|
||||
tool_calls = msg.get("tool_calls") or []
|
||||
if isinstance(tool_calls, list):
|
||||
for tool_call in tool_calls:
|
||||
if isinstance(tool_call, dict):
|
||||
tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
|
||||
tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
|
||||
if tool_call_id and tool_name:
|
||||
tool_name_by_call_id[tool_call_id] = tool_name
|
||||
parts.append(_translate_tool_call_to_gemini(tool_call))
|
||||
|
||||
if parts:
|
||||
contents.append({"role": gemini_role, "parts": parts})
|
||||
|
||||
system_instruction = None
|
||||
joined_system = "\n".join(part for part in system_text_parts if part).strip()
|
||||
if joined_system:
|
||||
system_instruction = {"parts": [{"text": joined_system}]}
|
||||
return contents, system_instruction
|
||||
|
||||
|
||||
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(tools, list):
|
||||
return []
|
||||
declarations: List[Dict[str, Any]] = []
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
continue
|
||||
fn = tool.get("function") or {}
|
||||
if not isinstance(fn, dict):
|
||||
continue
|
||||
name = fn.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
decl: Dict[str, Any] = {"name": name}
|
||||
description = fn.get("description")
|
||||
if isinstance(description, str) and description:
|
||||
decl["description"] = description
|
||||
parameters = fn.get("parameters")
|
||||
if isinstance(parameters, dict):
|
||||
decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
|
||||
declarations.append(decl)
|
||||
return [{"functionDeclarations": declarations}] if declarations else []
|
||||
|
||||
|
||||
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
|
||||
if tool_choice is None:
|
||||
return None
|
||||
if isinstance(tool_choice, str):
|
||||
if tool_choice == "auto":
|
||||
return {"functionCallingConfig": {"mode": "AUTO"}}
|
||||
if tool_choice == "required":
|
||||
return {"functionCallingConfig": {"mode": "ANY"}}
|
||||
if tool_choice == "none":
|
||||
return {"functionCallingConfig": {"mode": "NONE"}}
|
||||
if isinstance(tool_choice, dict):
|
||||
fn = tool_choice.get("function") or {}
|
||||
name = fn.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
budget = config.get("thinkingBudget", config.get("thinking_budget"))
|
||||
include = config.get("includeThoughts", config.get("include_thoughts"))
|
||||
level = config.get("thinkingLevel", config.get("thinking_level"))
|
||||
normalized: Dict[str, Any] = {}
|
||||
if isinstance(budget, (int, float)):
|
||||
normalized["thinkingBudget"] = int(budget)
|
||||
if isinstance(include, bool):
|
||||
normalized["includeThoughts"] = include
|
||||
if isinstance(level, str) and level.strip():
|
||||
normalized["thinkingLevel"] = level.strip().lower()
|
||||
return normalized or None
|
||||
|
||||
|
||||
def build_gemini_request(
|
||||
*,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
thinking_config: Any = None,
|
||||
) -> Dict[str, Any]:
|
||||
contents, system_instruction = _build_gemini_contents(messages)
|
||||
request: Dict[str, Any] = {"contents": contents}
|
||||
if system_instruction:
|
||||
request["systemInstruction"] = system_instruction
|
||||
|
||||
gemini_tools = _translate_tools_to_gemini(tools)
|
||||
if gemini_tools:
|
||||
request["tools"] = gemini_tools
|
||||
|
||||
tool_config = _translate_tool_choice_to_gemini(tool_choice)
|
||||
if tool_config:
|
||||
request["toolConfig"] = tool_config
|
||||
|
||||
generation_config: Dict[str, Any] = {}
|
||||
if temperature is not None:
|
||||
generation_config["temperature"] = temperature
|
||||
if max_tokens is not None:
|
||||
generation_config["maxOutputTokens"] = max_tokens
|
||||
else:
|
||||
# Gemini's native generateContent does NOT treat an omitted
|
||||
# maxOutputTokens as "use the model's full output budget" — it applies
|
||||
# a low internal default and the model stops early with
|
||||
# finishReason=MAX_TOKENS, truncating tool calls mid-stream (Hermes
|
||||
# then retries 3× and refuses the incomplete call). Every current
|
||||
# Gemini text model (2.5 + 3.x, flash / flash-lite / pro) caps at
|
||||
# 65,535 output tokens, so default to that ceiling when the caller
|
||||
# passes None ("unlimited"). See the OpenAI-compat path where omitting
|
||||
# the field genuinely means full budget — that assumption does not
|
||||
# hold on the native API.
|
||||
generation_config["maxOutputTokens"] = GEMINI_DEFAULT_MAX_OUTPUT_TOKENS
|
||||
if top_p is not None:
|
||||
generation_config["topP"] = top_p
|
||||
if stop:
|
||||
generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
|
||||
normalized_thinking = _normalize_thinking_config(thinking_config)
|
||||
if normalized_thinking:
|
||||
generation_config["thinkingConfig"] = normalized_thinking
|
||||
if generation_config:
|
||||
request["generationConfig"] = generation_config
|
||||
|
||||
return request
|
||||
|
||||
|
||||
def _map_gemini_finish_reason(reason: str) -> str:
|
||||
mapping = {
|
||||
"STOP": "stop",
|
||||
"MAX_TOKENS": "length",
|
||||
"SAFETY": "content_filter",
|
||||
"RECITATION": "content_filter",
|
||||
"OTHER": "stop",
|
||||
}
|
||||
return mapping.get(str(reason or "").upper(), "stop")
|
||||
|
||||
|
||||
def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
sig = part.get("thoughtSignature")
|
||||
if isinstance(sig, str) and sig:
|
||||
return {"google": {"thought_signature": sig}}
|
||||
return None
|
||||
|
||||
|
||||
def _empty_response(model: str) -> SimpleNamespace:
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="",
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
|
||||
candidates = resp.get("candidates") or []
|
||||
if not isinstance(candidates, list) or not candidates:
|
||||
return _empty_response(model)
|
||||
|
||||
cand = candidates[0] if isinstance(candidates[0], dict) else {}
|
||||
content_obj = cand.get("content") if isinstance(cand, dict) else {}
|
||||
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
|
||||
|
||||
text_pieces: List[str] = []
|
||||
reasoning_pieces: List[str] = []
|
||||
tool_calls: List[SimpleNamespace] = []
|
||||
|
||||
for index, part in enumerate(parts or []):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
reasoning_pieces.append(part["text"])
|
||||
continue
|
||||
if isinstance(part.get("text"), str):
|
||||
text_pieces.append(part["text"])
|
||||
continue
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
tool_call = SimpleNamespace(
|
||||
id=f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
index=index,
|
||||
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
|
||||
)
|
||||
extra_content = _tool_call_extra_from_part(part)
|
||||
if extra_content:
|
||||
tool_call.extra_content = extra_content
|
||||
tool_calls.append(tool_call)
|
||||
|
||||
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
|
||||
usage_meta = resp.get("usageMetadata") or {}
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
reasoning = "".join(reasoning_pieces) or None
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="".join(text_pieces) if text_pieces else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning=reasoning,
|
||||
reasoning_content=reasoning,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _GeminiStreamChunk(SimpleNamespace):
|
||||
pass
|
||||
|
||||
|
||||
def _make_stream_chunk(
|
||||
*,
|
||||
model: str,
|
||||
content: str = "",
|
||||
tool_call_delta: Optional[Dict[str, Any]] = None,
|
||||
finish_reason: Optional[str] = None,
|
||||
reasoning: str = "",
|
||||
) -> _GeminiStreamChunk:
|
||||
delta_kwargs: Dict[str, Any] = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"reasoning": None,
|
||||
"reasoning_content": None,
|
||||
}
|
||||
if content:
|
||||
delta_kwargs["content"] = content
|
||||
if tool_call_delta is not None:
|
||||
tool_delta = SimpleNamespace(
|
||||
index=tool_call_delta.get("index", 0),
|
||||
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=tool_call_delta.get("name") or "",
|
||||
arguments=tool_call_delta.get("arguments") or "",
|
||||
),
|
||||
)
|
||||
extra_content = tool_call_delta.get("extra_content")
|
||||
if isinstance(extra_content, dict):
|
||||
tool_delta.extra_content = extra_content
|
||||
delta_kwargs["tool_calls"] = [tool_delta]
|
||||
if reasoning:
|
||||
delta_kwargs["reasoning"] = reasoning
|
||||
delta_kwargs["reasoning_content"] = reasoning
|
||||
delta = SimpleNamespace(**delta_kwargs)
|
||||
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
|
||||
return _GeminiStreamChunk(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion.chunk",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
|
||||
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
|
||||
buffer = ""
|
||||
for chunk in response.iter_text():
|
||||
if not chunk:
|
||||
continue
|
||||
buffer += chunk
|
||||
while "\n" in buffer:
|
||||
line, buffer = buffer.split("\n", 1)
|
||||
line = line.rstrip("\r")
|
||||
if not line:
|
||||
continue
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
return
|
||||
try:
|
||||
payload = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
|
||||
continue
|
||||
if isinstance(payload, dict):
|
||||
yield payload
|
||||
|
||||
|
||||
def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
|
||||
candidates = event.get("candidates") or []
|
||||
if not candidates:
|
||||
return []
|
||||
cand = candidates[0] if isinstance(candidates[0], dict) else {}
|
||||
parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
|
||||
chunks: List[_GeminiStreamChunk] = []
|
||||
|
||||
for part_index, part in enumerate(parts):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
|
||||
continue
|
||||
if isinstance(part.get("text"), str) and part["text"]:
|
||||
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
name = str(fc["name"])
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
|
||||
call_key = json.dumps(
|
||||
{
|
||||
"part_index": part_index,
|
||||
"name": name,
|
||||
"thought_signature": thought_signature,
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
slot = tool_call_indices.get(call_key)
|
||||
if slot is None:
|
||||
slot = {
|
||||
"index": len(tool_call_indices),
|
||||
"id": f"call_{uuid.uuid4().hex[:12]}",
|
||||
"last_arguments": "",
|
||||
}
|
||||
tool_call_indices[call_key] = slot
|
||||
emitted_arguments = args_str
|
||||
last_arguments = str(slot.get("last_arguments") or "")
|
||||
if last_arguments:
|
||||
if args_str == last_arguments:
|
||||
emitted_arguments = ""
|
||||
elif args_str.startswith(last_arguments):
|
||||
emitted_arguments = args_str[len(last_arguments):]
|
||||
slot["last_arguments"] = args_str
|
||||
chunks.append(
|
||||
_make_stream_chunk(
|
||||
model=model,
|
||||
tool_call_delta={
|
||||
"index": slot["index"],
|
||||
"id": slot["id"],
|
||||
"name": name,
|
||||
"arguments": emitted_arguments,
|
||||
"extra_content": _tool_call_extra_from_part(part),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
|
||||
finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped)
|
||||
# Attach usage from this event's usageMetadata so the streaming
|
||||
# loop in run_agent.py can record token counts (mirrors the
|
||||
# non-streaming path in translate_gemini_response).
|
||||
usage_meta = event.get("usageMetadata") or {}
|
||||
if usage_meta:
|
||||
finish_chunk.usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
chunks.append(finish_chunk)
|
||||
return chunks
|
||||
|
||||
|
||||
def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
||||
status = response.status_code
|
||||
body_text = ""
|
||||
body_json: Dict[str, Any] = {}
|
||||
try:
|
||||
body_text = response.text
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if body_text:
|
||||
try:
|
||||
parsed = json.loads(body_text)
|
||||
if isinstance(parsed, dict):
|
||||
body_json = parsed
|
||||
except (ValueError, TypeError):
|
||||
body_json = {}
|
||||
|
||||
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||
if not isinstance(err_obj, dict):
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
_raw_details = err_obj.get("details")
|
||||
details_list = _raw_details if isinstance(_raw_details, list) else []
|
||||
|
||||
reason = ""
|
||||
retry_after: Optional[float] = None
|
||||
metadata: Dict[str, Any] = {}
|
||||
for detail in details_list:
|
||||
if not isinstance(detail, dict):
|
||||
continue
|
||||
type_url = str(detail.get("@type") or "")
|
||||
if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||
reason_value = detail.get("reason")
|
||||
if isinstance(reason_value, str):
|
||||
reason = reason_value
|
||||
md = detail.get("metadata")
|
||||
if isinstance(md, dict):
|
||||
metadata = md
|
||||
header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||
if header_retry:
|
||||
try:
|
||||
retry_after = float(header_retry)
|
||||
except (TypeError, ValueError):
|
||||
retry_after = None
|
||||
|
||||
code = f"gemini_http_{status}"
|
||||
if status == 401:
|
||||
code = "gemini_unauthorized"
|
||||
elif status == 429:
|
||||
code = "gemini_rate_limited"
|
||||
elif status == 404:
|
||||
code = "gemini_model_not_found"
|
||||
|
||||
if err_message:
|
||||
message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
# Free-tier quota exhaustion -> append actionable guidance so users who
|
||||
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
|
||||
# that the free tier cannot sustain an agent session.
|
||||
if status == 429 and is_free_tier_quota_error(err_message or body_text):
|
||||
message = message + _FREE_TIER_GUIDANCE
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
status_code=status,
|
||||
response=response,
|
||||
retry_after=retry_after,
|
||||
details={
|
||||
"status": err_status,
|
||||
"reason": reason,
|
||||
"metadata": metadata,
|
||||
"message": err_message,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class _GeminiChatCompletions:
|
||||
def __init__(self, client: "GeminiNativeClient"):
|
||||
self._client = client
|
||||
|
||||
def create(self, **kwargs: Any) -> Any:
|
||||
return self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _AsyncGeminiChatCompletions:
|
||||
def __init__(self, client: "AsyncGeminiNativeClient"):
|
||||
self._client = client
|
||||
|
||||
async def create(self, **kwargs: Any) -> Any:
|
||||
return await self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _GeminiChatNamespace:
|
||||
def __init__(self, client: "GeminiNativeClient"):
|
||||
self.completions = _GeminiChatCompletions(client)
|
||||
|
||||
|
||||
class _AsyncGeminiChatNamespace:
|
||||
def __init__(self, client: "AsyncGeminiNativeClient"):
|
||||
self.completions = _AsyncGeminiChatCompletions(client)
|
||||
|
||||
|
||||
class GeminiNativeClient:
|
||||
"""Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: Optional[str] = None,
|
||||
default_headers: Optional[Dict[str, str]] = None,
|
||||
timeout: Any = None,
|
||||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
if not (api_key or "").strip():
|
||||
raise RuntimeError(
|
||||
"Gemini native client requires an API key, but none was provided. "
|
||||
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
|
||||
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
|
||||
"to configure the Google provider."
|
||||
)
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
self.base_url = normalized_base
|
||||
self._default_headers = dict(default_headers or {})
|
||||
self.chat = _GeminiChatNamespace(self)
|
||||
self.is_closed = False
|
||||
self._http = http_client or httpx.Client(
|
||||
timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
self.is_closed = True
|
||||
try:
|
||||
self._http.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"x-goog-api-key": self.api_key,
|
||||
"User-Agent": "hermes-agent (gemini-native)",
|
||||
}
|
||||
headers.update(self._default_headers)
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
|
||||
try:
|
||||
return False, next(iterator)
|
||||
except StopIteration:
|
||||
return True, None
|
||||
|
||||
def _create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
stream: bool = False,
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Any = None,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
thinking_config = None
|
||||
if isinstance(extra_body, dict):
|
||||
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
|
||||
|
||||
request = build_gemini_request(
|
||||
messages=messages or [],
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
stop=stop,
|
||||
thinking_config=thinking_config,
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._stream_completion(model=model, request=request, timeout=timeout)
|
||||
|
||||
url = f"{self.base_url}/models/{model}:generateContent"
|
||||
response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
|
||||
if response.status_code != 200:
|
||||
raise gemini_http_error(response)
|
||||
try:
|
||||
payload = response.json()
|
||||
except ValueError as exc:
|
||||
raise GeminiAPIError(
|
||||
f"Invalid JSON from Gemini native API: {exc}",
|
||||
code="gemini_invalid_json",
|
||||
status_code=response.status_code,
|
||||
response=response,
|
||||
) from exc
|
||||
return translate_gemini_response(payload, model=model)
|
||||
|
||||
def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
|
||||
url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
|
||||
stream_headers = dict(self._headers())
|
||||
stream_headers["Accept"] = "text/event-stream"
|
||||
|
||||
def _generator() -> Iterator[_GeminiStreamChunk]:
|
||||
try:
|
||||
with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
|
||||
if response.status_code != 200:
|
||||
response.read()
|
||||
raise gemini_http_error(response)
|
||||
tool_call_indices: Dict[str, Dict[str, Any]] = {}
|
||||
for event in _iter_sse_events(response):
|
||||
for chunk in translate_stream_event(event, model, tool_call_indices):
|
||||
yield chunk
|
||||
except httpx.HTTPError as exc:
|
||||
raise GeminiAPIError(
|
||||
f"Gemini streaming request failed: {exc}",
|
||||
code="gemini_stream_error",
|
||||
) from exc
|
||||
|
||||
return _generator()
|
||||
|
||||
|
||||
class AsyncGeminiNativeClient:
|
||||
"""Async wrapper used by auxiliary_client for native Gemini calls."""
|
||||
|
||||
def __init__(self, sync_client: GeminiNativeClient):
|
||||
self._sync = sync_client
|
||||
self.api_key = sync_client.api_key
|
||||
self.base_url = sync_client.base_url
|
||||
self.chat = _AsyncGeminiChatNamespace(self)
|
||||
# Expose the underlying sync client as _real_client so the auxiliary
|
||||
# cache's eviction-by-leaf-client helper (#23482) can find and drop
|
||||
# this async entry when the sync GeminiNativeClient is poisoned.
|
||||
# GeminiNativeClient is itself the leaf (no OpenAI client beneath
|
||||
# it), so we point at the sync_client directly.
|
||||
self._real_client = sync_client
|
||||
|
||||
async def _create_chat_completion(self, **kwargs: Any) -> Any:
|
||||
stream = bool(kwargs.get("stream"))
|
||||
result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
|
||||
if not stream:
|
||||
return result
|
||||
|
||||
async def _async_stream() -> Any:
|
||||
while True:
|
||||
done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
|
||||
if done:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return _async_stream()
|
||||
|
||||
async def close(self) -> None:
|
||||
await asyncio.to_thread(self._sync.close)
|
||||
@@ -1,99 +0,0 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
# outside that subset before sending Hermes tool schemas to Google.
|
||||
_GEMINI_SCHEMA_ALLOWED_KEYS = {
|
||||
"type",
|
||||
"format",
|
||||
"title",
|
||||
"description",
|
||||
"nullable",
|
||||
"enum",
|
||||
"maxItems",
|
||||
"minItems",
|
||||
"properties",
|
||||
"required",
|
||||
"minProperties",
|
||||
"maxProperties",
|
||||
"minLength",
|
||||
"maxLength",
|
||||
"pattern",
|
||||
"example",
|
||||
"anyOf",
|
||||
"propertyOrdering",
|
||||
"default",
|
||||
"items",
|
||||
"minimum",
|
||||
"maximum",
|
||||
}
|
||||
|
||||
|
||||
def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Return a Gemini-compatible copy of a tool parameter schema.
|
||||
|
||||
Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
|
||||
such as ``$schema`` or ``additionalProperties`` that Google's Gemini
|
||||
``Schema`` object rejects. This helper preserves the documented Gemini
|
||||
subset and recursively sanitizes nested ``properties`` / ``items`` /
|
||||
``anyOf`` definitions.
|
||||
"""
|
||||
|
||||
if not isinstance(schema, dict):
|
||||
return {}
|
||||
|
||||
cleaned: Dict[str, Any] = {}
|
||||
for key, value in schema.items():
|
||||
if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
|
||||
continue
|
||||
if key == "properties":
|
||||
if not isinstance(value, dict):
|
||||
continue
|
||||
props: Dict[str, Any] = {}
|
||||
for prop_name, prop_schema in value.items():
|
||||
if not isinstance(prop_name, str):
|
||||
continue
|
||||
props[prop_name] = sanitize_gemini_schema(prop_schema)
|
||||
cleaned[key] = props
|
||||
continue
|
||||
if key == "items":
|
||||
cleaned[key] = sanitize_gemini_schema(value)
|
||||
continue
|
||||
if key == "anyOf":
|
||||
if not isinstance(value, list):
|
||||
continue
|
||||
cleaned[key] = [
|
||||
sanitize_gemini_schema(item)
|
||||
for item in value
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
|
||||
# Gemini's Schema validator requires every ``enum`` entry to be a string,
|
||||
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
|
||||
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
|
||||
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
|
||||
# so we only drop the ``enum`` when it would collide with Gemini's rule.
|
||||
# Keeping ``type: integer`` plus the human-readable description gives the
|
||||
# model enough guidance; the tool handler still validates the value.
|
||||
enum_val = cleaned.get("enum")
|
||||
type_val = cleaned.get("type")
|
||||
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
|
||||
if any(not isinstance(item, str) for item in enum_val):
|
||||
cleaned.pop("enum", None)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a valid Gemini object schema."""
|
||||
|
||||
cleaned = sanitize_gemini_schema(parameters)
|
||||
if not cleaned:
|
||||
return {"type": "object", "properties": {}}
|
||||
return cleaned
|
||||
@@ -1,451 +0,0 @@
|
||||
"""Google Code Assist API client — project discovery, onboarding, quota.
|
||||
|
||||
The Code Assist API powers Google's official gemini-cli. It sits at
|
||||
``cloudcode-pa.googleapis.com`` and provides:
|
||||
|
||||
- Free tier access (generous daily quota) for personal Google accounts
|
||||
- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise
|
||||
|
||||
This module handles the control-plane dance needed before inference:
|
||||
|
||||
1. ``load_code_assist()`` — probe the user's account to learn what tier they're on
|
||||
and whether a ``cloudaicompanionProject`` is already assigned.
|
||||
2. ``onboard_user()`` — if the user hasn't been onboarded yet (new account, fresh
|
||||
free tier, etc.), call this with the chosen tier + project id. Supports LRO
|
||||
polling for slow provisioning.
|
||||
3. ``retrieve_user_quota()`` — fetch the ``buckets[]`` array showing remaining
|
||||
quota per model, used by the ``/gquota`` slash command.
|
||||
|
||||
VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter
|
||||
will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this
|
||||
and force the account to ``standard-tier`` so the call chain still succeeds.
|
||||
|
||||
Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The
|
||||
request/response shapes are specific to Google's internal Code Assist API,
|
||||
documented nowhere public — we copy them from the reference implementations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Constants
|
||||
# =============================================================================
|
||||
|
||||
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
|
||||
|
||||
# Fallback endpoints tried when prod returns an error during project discovery
|
||||
FALLBACK_ENDPOINTS = [
|
||||
"https://daily-cloudcode-pa.sandbox.googleapis.com",
|
||||
"https://autopush-cloudcode-pa.sandbox.googleapis.com",
|
||||
]
|
||||
|
||||
# Tier identifiers that Google's API uses
|
||||
FREE_TIER_ID = "free-tier"
|
||||
LEGACY_TIER_ID = "legacy-tier"
|
||||
STANDARD_TIER_ID = "standard-tier"
|
||||
|
||||
# Default HTTP headers matching gemini-cli's fingerprint.
|
||||
# Google may reject unrecognized User-Agents on these internal endpoints.
|
||||
_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)"
|
||||
_X_GOOG_API_CLIENT = "gl-node/24.0.0"
|
||||
_DEFAULT_REQUEST_TIMEOUT = 30.0
|
||||
_ONBOARDING_POLL_ATTEMPTS = 12
|
||||
_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
|
||||
|
||||
|
||||
class CodeAssistError(RuntimeError):
|
||||
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
|
||||
|
||||
Carries HTTP status / response / retry-after metadata so the agent's
|
||||
``error_classifier._extract_status_code`` and the main loop's Retry-After
|
||||
handling (which walks ``error.response.headers``) pick up the right
|
||||
signals. Without these, 429s from the OAuth path look like opaque
|
||||
``RuntimeError`` and skip the rate-limit path.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
code: str = "code_assist_error",
|
||||
status_code: Optional[int] = None,
|
||||
response: Any = None,
|
||||
retry_after: Optional[float] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.code = code
|
||||
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
|
||||
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
|
||||
# triggers the main loop's fallback_providers chain the same way SDK
|
||||
# errors do.
|
||||
self.status_code = status_code
|
||||
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
|
||||
# ``.headers`` mapping and ``.json()`` method). The main loop reads
|
||||
# ``error.response.headers["Retry-After"]`` to honor Google's retry
|
||||
# hints when the backend throttles us.
|
||||
self.response = response
|
||||
# Parsed ``Retry-After`` seconds (kept separately for convenience —
|
||||
# Google returns retry hints in both the header and the error body's
|
||||
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
|
||||
self.retry_after = retry_after
|
||||
# Parsed structured error details from the Google error envelope
|
||||
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
|
||||
# Useful for logging and for tests that want to assert on specifics.
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
class ProjectIdRequiredError(CodeAssistError):
|
||||
def __init__(self, message: str = "GCP project id required for this tier") -> None:
|
||||
super().__init__(message, code="code_assist_project_id_required")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HTTP primitive (auth via Bearer token passed per-call)
|
||||
# =============================================================================
|
||||
|
||||
def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]:
|
||||
ua = _GEMINI_CLI_USER_AGENT
|
||||
if user_agent_model:
|
||||
ua = f"{ua} model/{user_agent_model}"
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": ua,
|
||||
"X-Goog-Api-Client": _X_GOOG_API_CLIENT,
|
||||
"x-activity-request-id": str(uuid.uuid4()),
|
||||
}
|
||||
|
||||
|
||||
def _client_metadata() -> Dict[str, str]:
|
||||
"""Match Google's gemini-cli exactly — unrecognized metadata may be rejected."""
|
||||
return {
|
||||
"ideType": "IDE_UNSPECIFIED",
|
||||
"platform": "PLATFORM_UNSPECIFIED",
|
||||
"pluginType": "GEMINI",
|
||||
}
|
||||
|
||||
|
||||
def _post_json(
|
||||
url: str,
|
||||
body: Dict[str, Any],
|
||||
access_token: str,
|
||||
*,
|
||||
timeout: float = _DEFAULT_REQUEST_TIMEOUT,
|
||||
user_agent_model: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
url, data=data, method="POST",
|
||||
headers=_build_headers(access_token, user_agent_model=user_agent_model),
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as response:
|
||||
raw = response.read().decode("utf-8", errors="replace")
|
||||
return json.loads(raw) if raw else {}
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = ""
|
||||
try:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
pass
|
||||
# Special case: VPC-SC violation should be distinguishable
|
||||
if _is_vpc_sc_violation(detail):
|
||||
raise CodeAssistError(
|
||||
f"VPC-SC policy violation: {detail}",
|
||||
code="code_assist_vpc_sc",
|
||||
) from exc
|
||||
raise CodeAssistError(
|
||||
f"Code Assist HTTP {exc.code}: {detail or exc.reason}",
|
||||
code=f"code_assist_http_{exc.code}",
|
||||
) from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise CodeAssistError(
|
||||
f"Code Assist request failed: {exc}",
|
||||
code="code_assist_network_error",
|
||||
) from exc
|
||||
|
||||
|
||||
def _is_vpc_sc_violation(body: str) -> bool:
|
||||
"""Detect a VPC Service Controls violation from a response body."""
|
||||
if not body:
|
||||
return False
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return "SECURITY_POLICY_VIOLATED" in body
|
||||
# Walk the nested error structure Google uses
|
||||
error = parsed.get("error") if isinstance(parsed, dict) else None
|
||||
if not isinstance(error, dict):
|
||||
return False
|
||||
details = error.get("details") or []
|
||||
if isinstance(details, list):
|
||||
for item in details:
|
||||
if isinstance(item, dict):
|
||||
reason = item.get("reason") or ""
|
||||
if reason == "SECURITY_POLICY_VIOLATED":
|
||||
return True
|
||||
msg = str(error.get("message", ""))
|
||||
return "SECURITY_POLICY_VIOLATED" in msg
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# load_code_assist — discovers current tier + assigned project
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class CodeAssistProjectInfo:
|
||||
"""Result from ``load_code_assist``."""
|
||||
current_tier_id: str = ""
|
||||
cloudaicompanion_project: str = "" # Google-managed project (free tier)
|
||||
allowed_tiers: List[str] = field(default_factory=list)
|
||||
raw: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def load_code_assist(
|
||||
access_token: str,
|
||||
*,
|
||||
project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> CodeAssistProjectInfo:
|
||||
"""Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback.
|
||||
|
||||
Returns whatever tier + project info Google reports. On VPC-SC violations,
|
||||
returns a synthetic ``standard-tier`` result so the chain can continue.
|
||||
"""
|
||||
body: Dict[str, Any] = {
|
||||
"metadata": {
|
||||
"duetProject": project_id,
|
||||
**_client_metadata(),
|
||||
},
|
||||
}
|
||||
if project_id:
|
||||
body["cloudaicompanionProject"] = project_id
|
||||
|
||||
endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS
|
||||
last_err: Optional[Exception] = None
|
||||
for endpoint in endpoints:
|
||||
url = f"{endpoint}/v1internal:loadCodeAssist"
|
||||
try:
|
||||
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
|
||||
return _parse_load_response(resp)
|
||||
except CodeAssistError as exc:
|
||||
if exc.code == "code_assist_vpc_sc":
|
||||
logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint)
|
||||
return CodeAssistProjectInfo(
|
||||
current_tier_id=STANDARD_TIER_ID,
|
||||
cloudaicompanion_project=project_id,
|
||||
)
|
||||
last_err = exc
|
||||
logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc)
|
||||
continue
|
||||
if last_err:
|
||||
raise last_err
|
||||
return CodeAssistProjectInfo()
|
||||
|
||||
|
||||
def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo:
|
||||
current_tier = resp.get("currentTier") or {}
|
||||
tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else ""
|
||||
project = str(resp.get("cloudaicompanionProject") or "")
|
||||
allowed = resp.get("allowedTiers") or []
|
||||
allowed_ids: List[str] = []
|
||||
if isinstance(allowed, list):
|
||||
for t in allowed:
|
||||
if isinstance(t, dict):
|
||||
tid = str(t.get("id") or "")
|
||||
if tid:
|
||||
allowed_ids.append(tid)
|
||||
return CodeAssistProjectInfo(
|
||||
current_tier_id=tier_id,
|
||||
cloudaicompanion_project=project,
|
||||
allowed_tiers=allowed_ids,
|
||||
raw=resp,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# onboard_user — provisions a new user on a tier (with LRO polling)
|
||||
# =============================================================================
|
||||
|
||||
def onboard_user(
|
||||
access_token: str,
|
||||
*,
|
||||
tier_id: str,
|
||||
project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Call ``POST /v1internal:onboardUser`` to provision the user.
|
||||
|
||||
For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError).
|
||||
For free tiers, ``project_id`` is optional — Google will assign one.
|
||||
|
||||
Returns the final operation response. Polls ``/v1internal/<name>`` for up
|
||||
to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS``
|
||||
(default: 12 × 5s = 1 min).
|
||||
"""
|
||||
if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id:
|
||||
raise ProjectIdRequiredError(
|
||||
f"Tier {tier_id!r} requires a GCP project id. "
|
||||
"Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT."
|
||||
)
|
||||
|
||||
body: Dict[str, Any] = {
|
||||
"tierId": tier_id,
|
||||
"metadata": _client_metadata(),
|
||||
}
|
||||
if project_id:
|
||||
body["cloudaicompanionProject"] = project_id
|
||||
|
||||
endpoint = CODE_ASSIST_ENDPOINT
|
||||
url = f"{endpoint}/v1internal:onboardUser"
|
||||
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
|
||||
|
||||
# Poll if LRO (long-running operation)
|
||||
if not resp.get("done"):
|
||||
op_name = resp.get("name", "")
|
||||
if not op_name:
|
||||
return resp
|
||||
for attempt in range(_ONBOARDING_POLL_ATTEMPTS):
|
||||
time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS)
|
||||
poll_url = f"{endpoint}/v1internal/{op_name}"
|
||||
try:
|
||||
poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model)
|
||||
except CodeAssistError as exc:
|
||||
logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc)
|
||||
continue
|
||||
if poll_resp.get("done"):
|
||||
return poll_resp
|
||||
logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS)
|
||||
return resp
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# retrieve_user_quota — for /gquota
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class QuotaBucket:
|
||||
model_id: str
|
||||
token_type: str = ""
|
||||
remaining_fraction: float = 0.0
|
||||
reset_time_iso: str = ""
|
||||
raw: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def retrieve_user_quota(
|
||||
access_token: str,
|
||||
*,
|
||||
project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> List[QuotaBucket]:
|
||||
"""Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``."""
|
||||
body: Dict[str, Any] = {}
|
||||
if project_id:
|
||||
body["project"] = project_id
|
||||
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota"
|
||||
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
|
||||
raw_buckets = resp.get("buckets") or []
|
||||
buckets: List[QuotaBucket] = []
|
||||
if not isinstance(raw_buckets, list):
|
||||
return buckets
|
||||
for b in raw_buckets:
|
||||
if not isinstance(b, dict):
|
||||
continue
|
||||
buckets.append(QuotaBucket(
|
||||
model_id=str(b.get("modelId") or ""),
|
||||
token_type=str(b.get("tokenType") or ""),
|
||||
remaining_fraction=float(b.get("remainingFraction") or 0.0),
|
||||
reset_time_iso=str(b.get("resetTime") or ""),
|
||||
raw=b,
|
||||
))
|
||||
return buckets
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Project context resolution
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ProjectContext:
|
||||
"""Resolved state for a given OAuth session."""
|
||||
project_id: str = "" # effective project id sent on requests
|
||||
managed_project_id: str = "" # Google-assigned project (free tier)
|
||||
tier_id: str = ""
|
||||
source: str = "" # "env", "config", "discovered", "onboarded"
|
||||
|
||||
|
||||
def resolve_project_context(
|
||||
access_token: str,
|
||||
*,
|
||||
configured_project_id: str = "",
|
||||
env_project_id: str = "",
|
||||
user_agent_model: str = "",
|
||||
) -> ProjectContext:
|
||||
"""Figure out what project id + tier to use for requests.
|
||||
|
||||
Priority:
|
||||
1. If configured_project_id or env_project_id is set, use that directly
|
||||
and short-circuit (no discovery needed).
|
||||
2. Otherwise call loadCodeAssist to see what Google says.
|
||||
3. If no tier assigned yet, onboard the user (free tier default).
|
||||
"""
|
||||
# Short-circuit: caller provided a project id
|
||||
if configured_project_id:
|
||||
return ProjectContext(
|
||||
project_id=configured_project_id,
|
||||
tier_id=STANDARD_TIER_ID, # assume paid since they specified one
|
||||
source="config",
|
||||
)
|
||||
if env_project_id:
|
||||
return ProjectContext(
|
||||
project_id=env_project_id,
|
||||
tier_id=STANDARD_TIER_ID,
|
||||
source="env",
|
||||
)
|
||||
|
||||
# Discover via loadCodeAssist
|
||||
info = load_code_assist(access_token, user_agent_model=user_agent_model)
|
||||
|
||||
effective_project = info.cloudaicompanion_project
|
||||
tier = info.current_tier_id
|
||||
|
||||
if not tier:
|
||||
# User hasn't been onboarded — provision them on free tier
|
||||
onboard_resp = onboard_user(
|
||||
access_token,
|
||||
tier_id=FREE_TIER_ID,
|
||||
project_id="",
|
||||
user_agent_model=user_agent_model,
|
||||
)
|
||||
# Re-parse from the onboard response
|
||||
response_body = onboard_resp.get("response") or {}
|
||||
if isinstance(response_body, dict):
|
||||
effective_project = (
|
||||
effective_project
|
||||
or str(response_body.get("cloudaicompanionProject") or "")
|
||||
)
|
||||
tier = FREE_TIER_ID
|
||||
source = "onboarded"
|
||||
else:
|
||||
source = "discovered"
|
||||
|
||||
return ProjectContext(
|
||||
project_id=effective_project,
|
||||
managed_project_id=effective_project if tier == FREE_TIER_ID else "",
|
||||
tier_id=tier,
|
||||
source=source,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
302
agent/i18n.py
302
agent/i18n.py
@@ -1,302 +0,0 @@
|
||||
"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
|
||||
|
||||
Scope (thin slice, by design): only the highest-impact static strings shown
|
||||
to the user by Hermes itself -- approval prompts, a handful of gateway slash
|
||||
command replies, restart-drain notices. Agent-generated output, log lines,
|
||||
error tracebacks, tool outputs, and slash-command descriptions all stay in
|
||||
English.
|
||||
|
||||
Catalog files live under ``locales/<lang>.yaml`` at the repo root. Each
|
||||
catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
|
||||
``gateway.approval_expired``). Missing keys fall back to English; if English
|
||||
is missing too, the key path itself is returned so a broken catalog never
|
||||
crashes the agent.
|
||||
|
||||
Usage::
|
||||
|
||||
from agent.i18n import t
|
||||
print(t("approval.choose_long")) # current lang
|
||||
print(t("gateway.draining", count=3)) # {count} formatted
|
||||
print(t("approval.choose_long", lang="zh")) # explicit override
|
||||
|
||||
Language resolution order:
|
||||
1. Explicit ``lang=`` argument passed to :func:`t`
|
||||
2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
|
||||
3. ``display.language`` from config.yaml
|
||||
4. ``"en"`` (baseline)
|
||||
|
||||
Supported languages: en, zh, ja, de, es, fr, tr, uk. Unknown values fall back to en.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sysconfig
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LANGUAGES: tuple[str, ...] = (
|
||||
"en", "zh", "zh-hant", "ja", "de", "es", "fr", "tr", "uk",
|
||||
"af", "ko", "it", "ga", "pt", "ru", "hu",
|
||||
)
|
||||
DEFAULT_LANGUAGE = "en"
|
||||
|
||||
# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
|
||||
# get the right catalog instead of silently falling back to English.
|
||||
_LANGUAGE_ALIASES: dict[str, str] = {
|
||||
"english": "en", "en-us": "en", "en-gb": "en",
|
||||
# Simplified Chinese — explicit codes route here; bare "chinese" / "mandarin"
|
||||
# also default to Simplified since that's the larger user base.
|
||||
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-hans": "zh", "zh-sg": "zh",
|
||||
# Traditional Chinese — distinct catalog. Cover Taiwan / Hong Kong / Macau
|
||||
# locale tags plus the common "traditional" alias.
|
||||
"traditional-chinese": "zh-hant", "traditional_chinese": "zh-hant",
|
||||
"zh-tw": "zh-hant", "zh-hk": "zh-hant", "zh-mo": "zh-hant",
|
||||
"japanese": "ja", "jp": "ja", "ja-jp": "ja",
|
||||
"german": "de", "deutsch": "de", "de-de": "de", "de-at": "de", "de-ch": "de",
|
||||
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es", "es-ar": "es",
|
||||
"french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
|
||||
"ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
|
||||
"turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
|
||||
# Afrikaans — South African Dutch-derived language; "af-ZA" is the common BCP-47 tag.
|
||||
"afrikaans": "af", "af-za": "af",
|
||||
# Korean
|
||||
"korean": "ko", "한국어": "ko", "ko-kr": "ko",
|
||||
# Italian
|
||||
"italian": "it", "italiano": "it", "it-it": "it", "it-ch": "it",
|
||||
# Irish (Gaeilge) — ga is the BCP-47 code
|
||||
"irish": "ga", "gaeilge": "ga", "ga-ie": "ga",
|
||||
# Portuguese — bare "portuguese" routes to European Portuguese; pt-br
|
||||
# is in the same family but rendered identically here (no separate br catalog).
|
||||
"portuguese": "pt", "português": "pt", "portugues": "pt",
|
||||
"pt-pt": "pt", "pt-br": "pt", "brazilian": "pt", "brasileiro": "pt",
|
||||
# Russian
|
||||
"russian": "ru", "русский": "ru", "ru-ru": "ru",
|
||||
# Hungarian
|
||||
"hungarian": "hu", "magyar": "hu", "hu-hu": "hu",
|
||||
}
|
||||
|
||||
_catalog_cache: dict[str, dict[str, str]] = {}
|
||||
_catalog_lock = threading.Lock()
|
||||
|
||||
|
||||
def _locales_dir() -> Path:
|
||||
"""Return the directory containing locale YAML files.
|
||||
|
||||
Resolution order, first existing wins:
|
||||
|
||||
1. ``HERMES_BUNDLED_LOCALES`` env var -- set by the Nix wrapper (or any
|
||||
sealed-packaging system) to point at the installed catalog directory.
|
||||
2. ``<repo-root>/locales`` -- source checkouts and ``pip install -e .``,
|
||||
where the working tree sits next to ``agent/``.
|
||||
3. ``<sysconfig data|purelib|platlib>/locales`` -- pip wheel installs.
|
||||
setuptools ``data-files`` extracts ``locales/*.yaml`` under the
|
||||
interpreter's ``data`` scheme; the other schemes are checked as a
|
||||
safety net for nonstandard layouts.
|
||||
|
||||
Falling through to the source-style path (even when missing) keeps
|
||||
``_load_catalog`` error messages informative -- it logs the path it
|
||||
looked at -- rather than raising.
|
||||
"""
|
||||
override = os.getenv("HERMES_BUNDLED_LOCALES", "").strip()
|
||||
if override:
|
||||
candidate = Path(override)
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
logger.warning(
|
||||
"HERMES_BUNDLED_LOCALES points to a non-directory path (%s); "
|
||||
"falling back to bundled/source locale resolution",
|
||||
override,
|
||||
)
|
||||
|
||||
# agent/i18n.py -> agent/ -> repo root (source checkout, editable install)
|
||||
source_dir = Path(__file__).resolve().parent.parent / "locales"
|
||||
if source_dir.is_dir():
|
||||
return source_dir
|
||||
|
||||
# pip wheel install: data-files lands under the interpreter data scheme.
|
||||
# ``data`` (== sys.prefix in a venv) is where setuptools data-files extract
|
||||
# and is checked first. ``purelib``/``platlib`` (site-packages) are a safety
|
||||
# net for nonstandard layouts. NOTE: this does NOT cover ``pip install
|
||||
# --user`` (user scheme, ~/.local/locales) or ``pip install --target`` --
|
||||
# both are out of scope; see the plan header.
|
||||
for scheme in ("data", "purelib", "platlib"):
|
||||
raw = sysconfig.get_path(scheme)
|
||||
if not raw:
|
||||
continue
|
||||
candidate = Path(raw) / "locales"
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
|
||||
# Last resort: return the source-style path so _load_catalog's catalog-missing
|
||||
# log (logger.debug "i18n catalog missing for %s at %s") stays informative.
|
||||
return source_dir
|
||||
|
||||
|
||||
def _normalize_lang(value: Any) -> str:
|
||||
"""Normalize a user-supplied language value to a supported code.
|
||||
|
||||
Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
|
||||
and case-insensitive regional tags (``zh-CN`` -> ``zh``). Returns the
|
||||
default language for unknown values.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_LANGUAGE
|
||||
key = value.strip().lower()
|
||||
if not key:
|
||||
return DEFAULT_LANGUAGE
|
||||
if key in SUPPORTED_LANGUAGES:
|
||||
return key
|
||||
if key in _LANGUAGE_ALIASES:
|
||||
return _LANGUAGE_ALIASES[key]
|
||||
# Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
|
||||
# but "zh-CN" -> "zh" will).
|
||||
base = key.split("-", 1)[0]
|
||||
if base in SUPPORTED_LANGUAGES:
|
||||
return base
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def _load_catalog(lang: str) -> dict[str, str]:
|
||||
"""Load and flatten one locale YAML file into a dotted-key dict.
|
||||
|
||||
YAML files can be nested for human readability; this produces the flat
|
||||
key space :func:`t` expects. Cached per-language for the process.
|
||||
"""
|
||||
with _catalog_lock:
|
||||
cached = _catalog_cache.get(lang)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
path = _locales_dir() / f"{lang}.yaml"
|
||||
if not path.is_file():
|
||||
logger.debug("i18n catalog missing for %s at %s", lang, path)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
try:
|
||||
import yaml # PyYAML is already a hermes dependency
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load i18n catalog %s: %s", path, exc)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
flat: dict[str, str] = {}
|
||||
_flatten_into(raw, "", flat)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = flat
|
||||
return flat
|
||||
|
||||
|
||||
def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
child_key = f"{prefix}.{key}" if prefix else str(key)
|
||||
_flatten_into(value, child_key, out)
|
||||
elif isinstance(node, str):
|
||||
out[prefix] = node
|
||||
# Non-string, non-dict leaves are ignored -- catalogs are text-only.
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _config_language_cached() -> str | None:
|
||||
"""Read ``display.language`` from config.yaml once per process.
|
||||
|
||||
Cached because ``t()`` is called in hot paths (every approval prompt,
|
||||
every gateway reply) and re-reading YAML each call would be wasteful.
|
||||
``reset_language_cache()`` clears this when config changes at runtime
|
||||
(e.g. after the setup wizard).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
lang = (cfg.get("display") or {}).get("language")
|
||||
if lang:
|
||||
return _normalize_lang(lang)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read display.language from config: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def reset_language_cache() -> None:
|
||||
"""Invalidate cached language resolution and catalogs.
|
||||
|
||||
Call after :func:`hermes_cli.config.save_config` if a running process
|
||||
needs to pick up a changed ``display.language`` without restart.
|
||||
"""
|
||||
_config_language_cached.cache_clear()
|
||||
with _catalog_lock:
|
||||
_catalog_cache.clear()
|
||||
|
||||
|
||||
def get_language() -> str:
|
||||
"""Resolve the active language using env > config > default order."""
|
||||
env_lang = os.environ.get("HERMES_LANGUAGE")
|
||||
if env_lang:
|
||||
return _normalize_lang(env_lang)
|
||||
cfg_lang = _config_language_cached()
|
||||
if cfg_lang:
|
||||
return cfg_lang
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
|
||||
"""Translate a dotted key to the active language.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key
|
||||
Dotted path into the catalog, e.g. ``"approval.choose_long"``.
|
||||
lang
|
||||
Explicit language override. Takes precedence over env + config.
|
||||
**format_kwargs
|
||||
``str.format`` substitution arguments (``t("gateway.drain", count=3)``
|
||||
expects a catalog entry with a ``{count}`` placeholder).
|
||||
|
||||
Returns
|
||||
-------
|
||||
The translated string, or the English fallback if the key is missing in
|
||||
the target language, or the bare key if English is also missing.
|
||||
"""
|
||||
target = _normalize_lang(lang) if lang else get_language()
|
||||
catalog = _load_catalog(target)
|
||||
value = catalog.get(key)
|
||||
|
||||
if value is None and target != DEFAULT_LANGUAGE:
|
||||
# Fall through to English rather than showing a key path to the user.
|
||||
value = _load_catalog(DEFAULT_LANGUAGE).get(key)
|
||||
|
||||
if value is None:
|
||||
# Last-ditch: return the key itself. A broken catalog should not
|
||||
# crash anything; it just looks ugly until someone fixes it.
|
||||
logger.debug("i18n miss: key=%r lang=%r", key, target)
|
||||
value = key
|
||||
|
||||
if format_kwargs:
|
||||
try:
|
||||
return value.format(**format_kwargs)
|
||||
except (KeyError, IndexError, ValueError) as exc:
|
||||
logger.warning(
|
||||
"i18n format failed for key=%r lang=%r kwargs=%r: %s",
|
||||
key, target, format_kwargs, exc,
|
||||
)
|
||||
return value
|
||||
return value
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SUPPORTED_LANGUAGES",
|
||||
"DEFAULT_LANGUAGE",
|
||||
"t",
|
||||
"get_language",
|
||||
"reset_language_cache",
|
||||
]
|
||||
@@ -1,324 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider ABC
|
||||
=============================
|
||||
|
||||
Defines the pluggable-backend interface for image generation. Providers register
|
||||
instances via ``PluginContext.register_image_gen_provider()``; the active one
|
||||
(selected via ``image_gen.provider`` in ``config.yaml``) services every
|
||||
``image_generate`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
|
||||
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
|
||||
via ``plugins.enabled``).
|
||||
|
||||
Response shape
|
||||
--------------
|
||||
All providers return a dict that :func:`success_response` / :func:`error_response`
|
||||
produce. The tool wrapper JSON-serializes it. Keys:
|
||||
|
||||
success bool
|
||||
image str | None URL or absolute file path
|
||||
model str provider-specific model identifier
|
||||
prompt str echoed prompt
|
||||
aspect_ratio str "landscape" | "square" | "portrait"
|
||||
provider str provider name (for diagnostics)
|
||||
error str only when success=False
|
||||
error_type str only when success=False
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import base64
|
||||
import datetime
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
|
||||
DEFAULT_ASPECT_RATIO = "landscape"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ImageGenProvider(abc.ABC):
|
||||
"""Abstract base class for an image generation backend.
|
||||
|
||||
Subclasses must implement :meth:`generate`. Everything else has sane
|
||||
defaults — override only what your provider needs.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in ``image_gen.provider`` config.
|
||||
|
||||
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
|
||||
return self.name.title()
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically checks for a required API key. Default: True
|
||||
(providers with no external dependencies are always available).
|
||||
"""
|
||||
return True
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
"""Return catalog entries for ``hermes tools`` model picker.
|
||||
|
||||
Each entry::
|
||||
|
||||
{
|
||||
"id": "gpt-image-1.5", # required
|
||||
"display": "GPT Image 1.5", # optional; defaults to id
|
||||
"speed": "~10s", # optional
|
||||
"strengths": "...", # optional
|
||||
"price": "$...", # optional
|
||||
}
|
||||
|
||||
Default: empty list (provider has no user-selectable models).
|
||||
"""
|
||||
return []
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by ``tools_config.py`` to inject this provider as a row in
|
||||
the Image Generation provider list. Shape::
|
||||
|
||||
{
|
||||
"name": "OpenAI", # picker label
|
||||
"badge": "paid", # optional short tag
|
||||
"tag": "One-line description...", # optional subtitle
|
||||
"env_vars": [ # keys to prompt for
|
||||
{"key": "OPENAI_API_KEY",
|
||||
"prompt": "OpenAI API key",
|
||||
"url": "https://platform.openai.com/api-keys"},
|
||||
],
|
||||
}
|
||||
|
||||
Default: minimal entry derived from ``display_name``. Override to
|
||||
expose API key prompts and custom badges.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
"""Return the default model id, or None if not applicable."""
|
||||
models = self.list_models()
|
||||
if models:
|
||||
return models[0].get("id")
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image.
|
||||
|
||||
Implementations should return the dict from :func:`success_response`
|
||||
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
||||
parameters future versions of the schema will expose — implementations
|
||||
should ignore unknown keys.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def resolve_aspect_ratio(value: Optional[str]) -> str:
|
||||
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
|
||||
|
||||
Invalid values are coerced rather than rejected so the tool surface is
|
||||
forgiving of agent mistakes.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
v = value.strip().lower()
|
||||
if v in VALID_ASPECT_RATIOS:
|
||||
return v
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
|
||||
|
||||
def _images_cache_dir() -> Path:
|
||||
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
path = get_hermes_home() / "cache" / "images"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def save_b64_image(
|
||||
b64_data: str,
|
||||
*,
|
||||
prefix: str = "image",
|
||||
extension: str = "png",
|
||||
) -> Path:
|
||||
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
|
||||
|
||||
Returns the absolute :class:`Path` to the saved file.
|
||||
|
||||
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
|
||||
"""
|
||||
raw = base64.b64decode(b64_data)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
path.write_bytes(raw)
|
||||
return path
|
||||
|
||||
|
||||
# Extension inference for save_url_image — keep small and explicit. We don't
|
||||
# want to import mimetypes for a handful of formats every image_gen provider
|
||||
# actually returns, and we never want to inherit a content-type that points
|
||||
# at HTML or JSON when the API gives us a degenerate response.
|
||||
_URL_IMAGE_CONTENT_TYPES = {
|
||||
"image/png": "png",
|
||||
"image/jpeg": "jpg",
|
||||
"image/jpg": "jpg",
|
||||
"image/webp": "webp",
|
||||
"image/gif": "gif",
|
||||
}
|
||||
|
||||
|
||||
def save_url_image(
|
||||
url: str,
|
||||
*,
|
||||
prefix: str = "image",
|
||||
timeout: float = 60.0,
|
||||
max_bytes: int = 25 * 1024 * 1024,
|
||||
) -> Path:
|
||||
"""Download an image URL and write it under ``$HERMES_HOME/cache/images/``.
|
||||
|
||||
Used by providers (xAI, fallback OpenAI) whose API returns an *ephemeral*
|
||||
URL instead of inline base64 — those URLs frequently expire before a
|
||||
downstream consumer (Telegram ``send_photo``, browser fetch) can resolve
|
||||
them, so we materialise the bytes locally at tool-completion time.
|
||||
Mirrors :func:`save_b64_image`'s shape so providers can swap in one line.
|
||||
|
||||
Returns the absolute :class:`Path` to the saved file. Raises on any
|
||||
network / HTTP / oversize / non-image-content-type error so callers can
|
||||
fall back to returning the bare URL with a clear error message.
|
||||
"""
|
||||
import requests
|
||||
|
||||
response = requests.get(url, timeout=timeout, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Infer extension from the response content-type, falling back to the
|
||||
# URL suffix when xAI / OpenAI omit a precise type (some CDNs return
|
||||
# ``application/octet-stream``). Defaults to ``png``.
|
||||
content_type = (response.headers.get("Content-Type") or "").split(";", 1)[0].strip().lower()
|
||||
extension = _URL_IMAGE_CONTENT_TYPES.get(content_type)
|
||||
if extension is None:
|
||||
url_path = url.split("?", 1)[0].lower()
|
||||
for ext in ("png", "jpg", "jpeg", "webp", "gif"):
|
||||
if url_path.endswith(f".{ext}"):
|
||||
extension = "jpg" if ext == "jpeg" else ext
|
||||
break
|
||||
if extension is None:
|
||||
extension = "png"
|
||||
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
|
||||
bytes_written = 0
|
||||
with path.open("wb") as fh:
|
||||
for chunk in response.iter_content(chunk_size=64 * 1024):
|
||||
if not chunk:
|
||||
continue
|
||||
bytes_written += len(chunk)
|
||||
if bytes_written > max_bytes:
|
||||
fh.close()
|
||||
try:
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
raise ValueError(
|
||||
f"Image at {url} exceeds {max_bytes // (1024 * 1024)}MB cap; refusing to cache."
|
||||
)
|
||||
fh.write(chunk)
|
||||
|
||||
if bytes_written == 0:
|
||||
try:
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
raise ValueError(f"Image at {url} returned 0 bytes; refusing to cache.")
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def success_response(
|
||||
*,
|
||||
image: str,
|
||||
model: str,
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
provider: str,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform success response dict.
|
||||
|
||||
``image`` may be an HTTP URL or an absolute filesystem path (for b64
|
||||
providers like OpenAI). Callers that need to pass through additional
|
||||
backend-specific fields can supply ``extra``.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"image": image,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
if extra:
|
||||
for k, v in extra.items():
|
||||
payload.setdefault(k, v)
|
||||
return payload
|
||||
|
||||
|
||||
def error_response(
|
||||
*,
|
||||
error: str,
|
||||
error_type: str = "provider_error",
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
prompt: str = "",
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform error response dict."""
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": error,
|
||||
"error_type": error_type,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider Registry
|
||||
==================================
|
||||
|
||||
Central map of registered providers. Populated by plugins at import-time via
|
||||
``PluginContext.register_image_gen_provider()``; consumed by the
|
||||
``image_generate`` tool to dispatch each call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
|
||||
If unset, :func:`get_active_provider` applies fallback logic:
|
||||
|
||||
1. If exactly one provider is registered, use it.
|
||||
2. Otherwise if a provider named ``fal`` is registered, use it (legacy
|
||||
default — matches pre-plugin behavior).
|
||||
3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
|
||||
the user at ``hermes tools``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import ImageGenProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, ImageGenProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: ImageGenProvider) -> None:
|
||||
"""Register an image generation provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — this makes hot-reload scenarios (tests, dev loops)
|
||||
behave predictably.
|
||||
"""
|
||||
if not isinstance(provider, ImageGenProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects an ImageGenProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Image gen provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
|
||||
else:
|
||||
logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
|
||||
|
||||
|
||||
def list_providers() -> List[ImageGenProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[ImageGenProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
def get_active_provider() -> Optional[ImageGenProvider]:
|
||||
"""Resolve the currently-active provider.
|
||||
|
||||
Reads ``image_gen.provider`` from config.yaml; falls back per the
|
||||
module docstring.
|
||||
|
||||
**Availability semantics** (mirrors :mod:`agent.web_search_registry`):
|
||||
|
||||
- When ``image_gen.provider`` is explicitly set, the configured
|
||||
provider is returned even if :meth:`ImageGenProvider.is_available`
|
||||
reports False — the dispatcher surfaces a precise "X_API_KEY is not
|
||||
set" error rather than silently switching backends.
|
||||
- When ``image_gen.provider`` is unset, the fallback path (single-
|
||||
provider shortcut and the FAL legacy preference) is filtered by
|
||||
``is_available()`` so we don't pick a provider the user has no
|
||||
credentials for.
|
||||
"""
|
||||
configured: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
if isinstance(section, dict):
|
||||
raw = section.get("provider")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
configured = raw.strip()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read image_gen.provider from config: %s", exc)
|
||||
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
def _is_available_safe(p: ImageGenProvider) -> bool:
|
||||
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
|
||||
try:
|
||||
return bool(p.is_available())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("image_gen provider %s.is_available() raised %s", p.name, exc)
|
||||
return False
|
||||
|
||||
# 1. Explicit config wins — return regardless of is_available() so the
|
||||
# user gets a precise downstream error message rather than a silent
|
||||
# backend switch.
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"image_gen.provider='%s' configured but not registered; falling back",
|
||||
configured,
|
||||
)
|
||||
|
||||
# 2. Fallback: single registered provider — but only if it's actually
|
||||
# available (no credentials = don't surface it as "active").
|
||||
available = [p for p in snapshot.values() if _is_available_safe(p)]
|
||||
if len(available) == 1:
|
||||
return available[0]
|
||||
|
||||
# 3. Fallback: prefer legacy FAL for backward compat, when available.
|
||||
fal = snapshot.get("fal")
|
||||
if fal is not None and _is_available_safe(fal):
|
||||
return fal
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
@@ -1,511 +0,0 @@
|
||||
"""Routing helpers for inbound user-attached images.
|
||||
|
||||
Two modes:
|
||||
|
||||
native — attach images as OpenAI-style ``image_url`` content parts on the
|
||||
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
|
||||
OpenAI chat.completions) already translate these into their
|
||||
vendor-specific multimodal formats.
|
||||
|
||||
text — run ``vision_analyze`` on each image up-front and prepend the
|
||||
description to the user's text. The model never sees the pixels;
|
||||
it only sees a lossy text summary. This is the pre-existing
|
||||
behaviour and still the right choice for non-vision models.
|
||||
|
||||
The decision is made once per message turn by :func:`decide_image_input_mode`.
|
||||
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
|
||||
| ``text``, default ``auto``) and the active model's capability metadata.
|
||||
|
||||
In ``auto`` mode:
|
||||
- If the user has explicitly configured ``auxiliary.vision.provider``
|
||||
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
|
||||
regardless of the main model — they've opted in to a specific vision
|
||||
backend for a reason (cost, quality, local-only, etc.).
|
||||
- Otherwise, if the active model reports ``supports_vision=True`` in its
|
||||
models.dev metadata, we attach natively.
|
||||
- Otherwise (non-vision model, no explicit override), we fall back to text.
|
||||
|
||||
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
|
||||
and agent flows that chain it (browser screenshots, deeper inspection of
|
||||
URL-referenced images, style-gating loops) keep working. The routing only
|
||||
affects *how user-attached images on the current turn* are presented to the
|
||||
main model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
# Image extensions used by extract_image_refs(). Kept tight on purpose — we
|
||||
# only auto-attach things the model can actually see. Documents/archives are
|
||||
# excluded because the gateway's broader extract_local_files() also routes
|
||||
# them differently (send_document), and we don't want to attach a PDF as a
|
||||
# vision part.
|
||||
_IMAGE_EXTS = (
|
||||
".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".heic",
|
||||
)
|
||||
_IMAGE_EXT_PATTERN = "|".join(e.lstrip(".") for e in _IMAGE_EXTS)
|
||||
|
||||
# Absolute / home-relative local image path. Matches the same shape gateway's
|
||||
# extract_local_files() uses: anchors to ``~/`` or ``/``, ignores matches inside
|
||||
# URLs (the ``(?<![/:\w.])`` lookbehind), and case-insensitive on the extension.
|
||||
_LOCAL_IMAGE_PATH_RE = re.compile(
|
||||
r"(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:" + _IMAGE_EXT_PATTERN + r")\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# http(s) URL ending in an image extension (optionally followed by a
|
||||
# query string). Case-insensitive on the extension. Strict ``http(s)://``
|
||||
# scheme so we don't accidentally grab ``file://`` URLs or other shapes.
|
||||
_IMAGE_URL_RE = re.compile(
|
||||
r"https?://[^\s<>\"']+?\.(?:" + _IMAGE_EXT_PATTERN + r")(?:\?[^\s<>\"']*)?",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def extract_image_refs(text: str) -> Tuple[List[str], List[str]]:
|
||||
"""Scan free-form text for image references the model should see.
|
||||
|
||||
Returns ``(local_paths, urls)``:
|
||||
|
||||
* ``local_paths`` — absolute (``/``) or home-relative (``~/``) paths
|
||||
whose suffix is an image extension AND whose expanded form exists
|
||||
on disk as a file. Order-preserving, deduplicated.
|
||||
* ``urls`` — ``http(s)://…`` URLs whose path ends in an image
|
||||
extension (a ``?query`` is allowed after the extension).
|
||||
Order-preserving, deduplicated.
|
||||
|
||||
Matches inside fenced code blocks (``` ``` ```) and inline backticks
|
||||
(`` `…` ``) are skipped so that snippets pasted into a task body for
|
||||
reference aren't mistaken for live attachments. This mirrors the
|
||||
behaviour of ``gateway.platforms.base.BaseAdapter.extract_local_files``.
|
||||
|
||||
Local paths are validated against the filesystem; URLs are not
|
||||
(the provider fetches them at request time).
|
||||
"""
|
||||
if not isinstance(text, str) or not text:
|
||||
return [], []
|
||||
|
||||
# Build spans covered by fenced code blocks and inline code so we can
|
||||
# ignore references the author embedded purely as example text.
|
||||
code_spans: list[tuple[int, int]] = []
|
||||
for m in re.finditer(r"```[^\n]*\n.*?```", text, re.DOTALL):
|
||||
code_spans.append((m.start(), m.end()))
|
||||
for m in re.finditer(r"`[^`\n]+`", text):
|
||||
code_spans.append((m.start(), m.end()))
|
||||
|
||||
def _in_code(pos: int) -> bool:
|
||||
return any(s <= pos < e for s, e in code_spans)
|
||||
|
||||
local_paths: list[str] = []
|
||||
seen_paths: set[str] = set()
|
||||
for match in _LOCAL_IMAGE_PATH_RE.finditer(text):
|
||||
if _in_code(match.start()):
|
||||
continue
|
||||
raw = match.group(0)
|
||||
expanded = os.path.expanduser(raw)
|
||||
try:
|
||||
if not os.path.isfile(expanded):
|
||||
continue
|
||||
except OSError:
|
||||
# ENAMETOOLONG / EINVAL on pathological inputs — skip rather than crash.
|
||||
continue
|
||||
if expanded in seen_paths:
|
||||
continue
|
||||
seen_paths.add(expanded)
|
||||
local_paths.append(expanded)
|
||||
|
||||
urls: list[str] = []
|
||||
seen_urls: set[str] = set()
|
||||
for match in _IMAGE_URL_RE.finditer(text):
|
||||
if _in_code(match.start()):
|
||||
continue
|
||||
url = match.group(0)
|
||||
# Strip trailing punctuation that's almost certainly prose, not part
|
||||
# of the URL (e.g. "see https://x.com/a.png." or "/a.png)").
|
||||
url = url.rstrip(".,;:!?)]>")
|
||||
if url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(url)
|
||||
urls.append(url)
|
||||
|
||||
return local_paths, urls
|
||||
|
||||
|
||||
# Strict YAML/JSON boolean coercion for capability overrides.
|
||||
#
|
||||
# ``bool("false")`` is True in Python because non-empty strings are truthy, so
|
||||
# a user writing ``supports_vision: "false"`` (quoted — a common YAML mistake)
|
||||
# would silently enable native vision routing on a model that can't actually
|
||||
# handle it. Accept only the values YAML 1.1 / 1.2 treat as booleans, plus
|
||||
# real ``bool`` and integer 0/1. Anything else returns None so the caller
|
||||
# falls through to models.dev rather than honouring garbage.
|
||||
_TRUE_TOKENS = frozenset({"true", "yes", "on", "1"})
|
||||
_FALSE_TOKENS = frozenset({"false", "no", "off", "0"})
|
||||
|
||||
|
||||
def _coerce_capability_bool(raw: Any) -> Optional[bool]:
|
||||
"""Return True/False for recognised boolean values, None otherwise."""
|
||||
if isinstance(raw, bool):
|
||||
return raw
|
||||
if isinstance(raw, int):
|
||||
if raw in (0, 1):
|
||||
return bool(raw)
|
||||
return None
|
||||
if isinstance(raw, str):
|
||||
s = raw.strip().lower()
|
||||
if s in _TRUE_TOKENS:
|
||||
return True
|
||||
if s in _FALSE_TOKENS:
|
||||
return False
|
||||
return None
|
||||
|
||||
|
||||
def _supports_vision_override(
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
provider: str,
|
||||
model: str,
|
||||
) -> Optional[bool]:
|
||||
"""Resolve user-declared vision capability from config.yaml.
|
||||
|
||||
Resolution order, first hit wins:
|
||||
1. ``model.supports_vision`` (top-level shortcut for the active model)
|
||||
2. ``providers.<provider>.models.<model>.supports_vision``
|
||||
(named custom providers — ``provider`` may be the runtime-resolved
|
||||
value ``"custom"`` and/or the user-declared name under
|
||||
``model.provider``; both are tried)
|
||||
|
||||
Returns None when no override is set, so the caller falls through to
|
||||
models.dev. Returns False explicitly only when the user wrote a
|
||||
recognised boolean false token.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return None
|
||||
|
||||
# 1. Top-level shortcut
|
||||
model_cfg_raw = cfg.get("model")
|
||||
model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
|
||||
top = _coerce_capability_bool(model_cfg.get("supports_vision"))
|
||||
if top is not None:
|
||||
return top
|
||||
|
||||
# 2. Per-provider, per-model. Named custom providers (e.g. "my-vllm")
|
||||
# get rewritten to provider="custom" at runtime
|
||||
# (hermes_cli/runtime_provider.py:_resolve_named_custom_runtime), so the
|
||||
# config still holds the user-declared name under model.provider. Try
|
||||
# both as candidate provider keys.
|
||||
config_provider = str(model_cfg.get("provider") or "").strip()
|
||||
providers_raw = cfg.get("providers")
|
||||
providers_cfg: Dict[str, Any] = providers_raw if isinstance(providers_raw, dict) else {}
|
||||
for p in dict.fromkeys(filter(None, (provider, config_provider))):
|
||||
entry_raw = providers_cfg.get(p)
|
||||
entry: Dict[str, Any] = entry_raw if isinstance(entry_raw, dict) else {}
|
||||
models_raw = entry.get("models")
|
||||
models_cfg: Dict[str, Any] = models_raw if isinstance(models_raw, dict) else {}
|
||||
per_model_raw = models_cfg.get(model)
|
||||
per_model: Dict[str, Any] = per_model_raw if isinstance(per_model_raw, dict) else {}
|
||||
coerced = _coerce_capability_bool(per_model.get("supports_vision"))
|
||||
if coerced is not None:
|
||||
return coerced
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
return "auto"
|
||||
val = raw.strip().lower()
|
||||
if val in _VALID_MODES:
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True when the user configured a specific auxiliary vision backend.
|
||||
|
||||
An explicit override means the user *wants* the text pipeline (they're
|
||||
paying for a dedicated vision model), so we don't silently bypass it.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return False
|
||||
aux = cfg.get("auxiliary") or {}
|
||||
if not isinstance(aux, dict):
|
||||
return False
|
||||
vision = aux.get("vision") or {}
|
||||
if not isinstance(vision, dict):
|
||||
return False
|
||||
|
||||
provider = str(vision.get("provider") or "").strip().lower()
|
||||
model = str(vision.get("model") or "").strip()
|
||||
base_url = str(vision.get("base_url") or "").strip()
|
||||
|
||||
# "auto" / "" / blank = not explicit
|
||||
if provider in {"", "auto"} and not model and not base_url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown.
|
||||
|
||||
Consults the user's ``supports_vision`` override in config.yaml first
|
||||
(so custom/local models declared as vision-capable don't fall through to
|
||||
text routing in ``auto`` mode), then falls back to models.dev.
|
||||
"""
|
||||
override = _supports_vision_override(cfg, provider, model)
|
||||
if override is not None:
|
||||
return override
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Return ``"native"`` or ``"text"`` for the given turn.
|
||||
|
||||
Args:
|
||||
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
|
||||
model: active model slug as it would be sent to the provider.
|
||||
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
|
||||
"""
|
||||
mode_cfg = "auto"
|
||||
if isinstance(cfg, dict):
|
||||
agent_cfg = cfg.get("agent") or {}
|
||||
if isinstance(agent_cfg, dict):
|
||||
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
|
||||
|
||||
if mode_cfg == "native":
|
||||
return "native"
|
||||
if mode_cfg == "text":
|
||||
return "text"
|
||||
|
||||
# auto
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model, cfg)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
|
||||
# Image size handling is REACTIVE rather than proactive: we attempt native
|
||||
# attachment at full size regardless of provider, and rely on
|
||||
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
|
||||
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
|
||||
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
|
||||
#
|
||||
# Why reactive: our knowledge of provider ceilings is partial and evolving
|
||||
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
|
||||
# A proactive per-provider table would be stale the moment a provider raises
|
||||
# or lowers its limit, and silently degrading quality for users on providers
|
||||
# that would have accepted the full image is the worse failure mode.
|
||||
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
|
||||
# it fires, which is cheaper than permanent quality loss.
|
||||
|
||||
|
||||
def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
|
||||
"""Detect image MIME from magic bytes. Returns None if unrecognised.
|
||||
|
||||
Filename-based detection (``mimetypes.guess_type``) is unreliable when
|
||||
upstream platforms lie about content-type. Discord, for example, can
|
||||
serve a PNG with ``content_type=image/webp`` for proxied/animated
|
||||
stickers, custom emoji previews, or images uploaded via certain bots.
|
||||
Anthropic strictly validates that declared media_type matches the
|
||||
actual bytes and returns HTTP 400 on mismatch, so we sniff to be safe.
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
# PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if raw.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
# JPEG: FF D8 FF
|
||||
if raw.startswith(b"\xff\xd8\xff"):
|
||||
return "image/jpeg"
|
||||
# GIF87a / GIF89a
|
||||
if raw[:6] in {b"GIF87a", b"GIF89a"}:
|
||||
return "image/gif"
|
||||
# WEBP: "RIFF" .... "WEBP"
|
||||
if len(raw) >= 12 and raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
|
||||
return "image/webp"
|
||||
# BMP: "BM"
|
||||
if raw.startswith(b"BM"):
|
||||
return "image/bmp"
|
||||
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
|
||||
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
|
||||
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
|
||||
}:
|
||||
return "image/heic"
|
||||
return None
|
||||
|
||||
|
||||
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
|
||||
"""Return image MIME type for *path*.
|
||||
|
||||
If *raw* bytes are provided, magic-byte sniffing wins (authoritative).
|
||||
Otherwise we fall back to ``mimetypes`` then suffix-based defaults.
|
||||
"""
|
||||
if raw is not None:
|
||||
sniffed = _sniff_mime_from_bytes(raw)
|
||||
if sniffed:
|
||||
return sniffed
|
||||
mime, _ = mimetypes.guess_type(str(path))
|
||||
if mime and mime.startswith("image/"):
|
||||
return mime
|
||||
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
|
||||
# the suffix looks imagey.
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}.get(suffix, "image/jpeg")
|
||||
|
||||
|
||||
def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
"""Encode a local image as a base64 data URL at its native size.
|
||||
|
||||
Size limits are NOT enforced here — the agent retry loop
|
||||
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
|
||||
provider's first rejection. Keeping this simple means providers that
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
except Exception as exc:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path, raw=raw)
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
image_urls: Optional[List[str]] = None,
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
|
||||
...]
|
||||
|
||||
Local paths are read from disk and embedded as base64 ``data:`` URLs.
|
||||
Remote URLs (``http(s)://``) are passed through verbatim — the provider
|
||||
fetches them server-side. The model still sees the pixels either way.
|
||||
|
||||
For each successfully attached image, a hint is appended to the text
|
||||
part:
|
||||
|
||||
* local path → ``[Image attached at: <path>]``
|
||||
* URL → ``[Image attached: <url>]``
|
||||
|
||||
The hint gives the model a string handle so MCP/skill tools that take
|
||||
an image path or URL argument can be invoked on the same image without
|
||||
an extra round-trip. This parallels the text-mode hint produced by
|
||||
``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
|
||||
<path>``) so behaviour is consistent across both image input modes.
|
||||
|
||||
Images are attached at their native size. If a provider rejects the
|
||||
request because an image is too large (e.g. Anthropic's 5 MB per-image
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped). Skipped entries are local paths
|
||||
that couldn't be read from disk; URLs are never skipped (they're
|
||||
not validated here).
|
||||
"""
|
||||
skipped: List[str] = []
|
||||
image_parts: List[Dict[str, Any]] = []
|
||||
attached_paths: List[str] = []
|
||||
attached_urls: List[str] = []
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
if not p.exists() or not p.is_file():
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
data_url = _file_to_data_url(p)
|
||||
if not data_url:
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
image_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": data_url},
|
||||
})
|
||||
attached_paths.append(str(raw_path))
|
||||
|
||||
for url in image_urls or []:
|
||||
url = (url or "").strip()
|
||||
if not url:
|
||||
continue
|
||||
image_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": url},
|
||||
})
|
||||
attached_urls.append(url)
|
||||
|
||||
text = (user_text or "").strip()
|
||||
|
||||
# If at least one image attached, build a single text part that combines
|
||||
# the user's caption (or a neutral default) with one hint per image.
|
||||
if attached_paths or attached_urls:
|
||||
base_text = text or "What do you see in this image?"
|
||||
hint_lines: List[str] = []
|
||||
hint_lines.extend(f"[Image attached at: {p}]" for p in attached_paths)
|
||||
hint_lines.extend(f"[Image attached: {u}]" for u in attached_urls)
|
||||
combined_text = f"{base_text}\n\n" + "\n".join(hint_lines)
|
||||
parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}]
|
||||
parts.extend(image_parts)
|
||||
return parts, skipped
|
||||
|
||||
# No images successfully attached — fall back to plain text-only behaviour.
|
||||
parts = []
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
return parts, skipped
|
||||
|
||||
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
"extract_image_refs",
|
||||
]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user