mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 03:07:21 +08:00
Compare commits
1 Commits
feat/strea
...
atropos-in
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ae6435f787 |
124
.env.example
124
.env.example
@@ -10,41 +10,9 @@
|
||||
OPENROUTER_API_KEY=
|
||||
|
||||
# Default model to use (OpenRouter format: provider/model)
|
||||
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
|
||||
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
|
||||
LLM_MODEL=anthropic/claude-opus-4.6
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (z.ai / GLM)
|
||||
# =============================================================================
|
||||
# z.ai provides access to ZhipuAI GLM models (GLM-4-Plus, etc.)
|
||||
# Get your key at: https://z.ai or https://open.bigmodel.cn
|
||||
GLM_API_KEY=
|
||||
# GLM_BASE_URL=https://api.z.ai/api/paas/v4 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Kimi / Moonshot)
|
||||
# =============================================================================
|
||||
# Kimi Code provides access to Moonshot AI coding models (kimi-k2.5, etc.)
|
||||
# Get your key at: https://platform.kimi.ai (Kimi Code console)
|
||||
# Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default.
|
||||
# Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below.
|
||||
KIMI_API_KEY=
|
||||
# KIMI_BASE_URL=https://api.kimi.com/coding/v1 # Default for sk-kimi- keys
|
||||
# KIMI_BASE_URL=https://api.moonshot.ai/v1 # For legacy Moonshot keys
|
||||
# KIMI_BASE_URL=https://api.moonshot.cn/v1 # For Moonshot China keys
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (MiniMax)
|
||||
# =============================================================================
|
||||
# MiniMax provides access to MiniMax models (global endpoint)
|
||||
# Get your key at: https://www.minimax.io
|
||||
MINIMAX_API_KEY=
|
||||
# MINIMAX_BASE_URL=https://api.minimax.io/v1 # Override default base URL
|
||||
|
||||
# MiniMax China endpoint (for users in mainland China)
|
||||
MINIMAX_CN_API_KEY=
|
||||
# MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# TOOL API KEYS
|
||||
# =============================================================================
|
||||
@@ -53,38 +21,35 @@ MINIMAX_CN_API_KEY=
|
||||
# Get at: https://firecrawl.dev/
|
||||
FIRECRAWL_API_KEY=
|
||||
|
||||
# Nous Research API Key - Vision analysis and multi-model reasoning
|
||||
# Get at: https://inference-api.nousresearch.com/
|
||||
NOUS_API_KEY=
|
||||
|
||||
# FAL.ai API Key - Image generation
|
||||
# Get at: https://fal.ai/
|
||||
FAL_KEY=
|
||||
|
||||
# Honcho - Cross-session AI-native user modeling (optional)
|
||||
# Builds a persistent understanding of the user across sessions and tools.
|
||||
# Get at: https://app.honcho.dev
|
||||
# Also requires ~/.honcho/config.json with enabled=true (see README).
|
||||
HONCHO_API_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# TERMINAL TOOL CONFIGURATION (mini-swe-agent backend)
|
||||
# =============================================================================
|
||||
# Backend type: "local", "singularity", "docker", "modal", or "ssh"
|
||||
# Terminal backend is configured in ~/.hermes/config.yaml (terminal.backend).
|
||||
# Use 'hermes setup' or 'hermes config set terminal.backend docker' to change.
|
||||
# Supported: local, docker, singularity, modal, ssh
|
||||
#
|
||||
# Only override here if you need to force a backend without touching config.yaml:
|
||||
# TERMINAL_ENV=local
|
||||
# - local: Runs directly on your machine (fastest, no isolation)
|
||||
# - ssh: Runs on remote server via SSH (great for sandboxing - agent can't touch its own code)
|
||||
# - singularity: Runs in Apptainer/Singularity containers (HPC clusters, no root needed)
|
||||
# - docker: Runs in Docker containers (isolated, requires Docker + docker group)
|
||||
# - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account)
|
||||
TERMINAL_ENV=local
|
||||
|
||||
|
||||
# Container images (for singularity/docker/modal backends)
|
||||
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
|
||||
TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
|
||||
TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
|
||||
|
||||
# Working directory for terminal commands
|
||||
# For local backend: "." means current directory (resolved automatically)
|
||||
# For remote backends (ssh/docker/modal/singularity): use an absolute path
|
||||
# INSIDE the target environment, or leave unset for the backend's default
|
||||
# (/root for modal, / for docker, ~ for ssh). Do NOT use a host-local path.
|
||||
# For CLI: "." means current directory (resolved automatically from config.yaml)
|
||||
# For containers (docker/singularity/modal): absolute path inside the container
|
||||
# Usually managed by config.yaml (terminal.cwd) — uncomment to override
|
||||
# TERMINAL_CWD=.
|
||||
|
||||
@@ -176,43 +141,16 @@ BROWSER_INACTIVITY_TIMEOUT=120
|
||||
# Contains full conversation history in trajectory format for debugging/replay
|
||||
|
||||
# =============================================================================
|
||||
# VOICE TRANSCRIPTION & OPENAI TTS
|
||||
# LEGACY/OPTIONAL API KEYS
|
||||
# =============================================================================
|
||||
# Required for voice message transcription (Whisper) and OpenAI TTS voices.
|
||||
# Uses OpenAI's API directly (not via OpenRouter).
|
||||
# Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter.
|
||||
# Get at: https://platform.openai.com/api-keys
|
||||
VOICE_TOOLS_OPENAI_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# SLACK INTEGRATION
|
||||
# =============================================================================
|
||||
# Slack Bot Token - From Slack App settings (OAuth & Permissions)
|
||||
# Get at: https://api.slack.com/apps
|
||||
# SLACK_BOT_TOKEN=xoxb-...
|
||||
# Morph API Key - For legacy Hecate terminal backend (terminal-hecate tool)
|
||||
# Get at: https://morph.so/
|
||||
MORPH_API_KEY=
|
||||
|
||||
# Slack App Token - For Socket Mode (App-Level Tokens in Slack App settings)
|
||||
# SLACK_APP_TOKEN=xapp-...
|
||||
|
||||
# Slack allowed users (comma-separated Slack user IDs)
|
||||
# SLACK_ALLOWED_USERS=
|
||||
|
||||
# WhatsApp (built-in Baileys bridge — run `hermes whatsapp` to pair)
|
||||
# WHATSAPP_ENABLED=false
|
||||
# WHATSAPP_ALLOWED_USERS=15551234567
|
||||
|
||||
# Gateway-wide: allow ALL users without an allowlist (default: false = deny)
|
||||
# Only set to true if you intentionally want open access.
|
||||
# GATEWAY_ALLOW_ALL_USERS=false
|
||||
|
||||
# =============================================================================
|
||||
# RESPONSE PACING
|
||||
# =============================================================================
|
||||
# Human-like delays between message chunks on messaging platforms.
|
||||
# Makes the bot feel less robotic.
|
||||
# HERMES_HUMAN_DELAY_MODE=off # off | natural | custom
|
||||
# HERMES_HUMAN_DELAY_MIN_MS=800 # Min delay in ms (custom mode)
|
||||
# HERMES_HUMAN_DELAY_MAX_MS=2500 # Max delay in ms (custom mode)
|
||||
# Hecate VM Settings (only if using terminal-hecate tool)
|
||||
HECATE_VM_LIFETIME_SECONDS=300
|
||||
HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt
|
||||
|
||||
# =============================================================================
|
||||
# DEBUG OPTIONS
|
||||
@@ -228,10 +166,9 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# When conversation approaches model's context limit, middle turns are
|
||||
# automatically summarized to free up space.
|
||||
#
|
||||
# Context compression is configured in ~/.hermes/config.yaml under compression:
|
||||
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
|
||||
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
|
||||
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
|
||||
# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001 # Fast model for summaries
|
||||
|
||||
# =============================================================================
|
||||
# RL TRAINING (Tinker + Atropos)
|
||||
@@ -250,16 +187,3 @@ WANDB_API_KEY=
|
||||
# RL API Server URL (default: http://localhost:8080)
|
||||
# Change if running the rl-server on a different host/port
|
||||
# RL_API_URL=http://localhost:8080
|
||||
|
||||
# =============================================================================
|
||||
# SKILLS HUB (GitHub integration for skill search/install/publish)
|
||||
# =============================================================================
|
||||
|
||||
# GitHub Personal Access Token — for higher API rate limits on skill search/install
|
||||
# Get at: https://github.com/settings/tokens (Fine-grained recommended)
|
||||
# GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
|
||||
|
||||
# GitHub App credentials (optional — for bot identity on PRs)
|
||||
# GITHUB_APP_ID=
|
||||
# GITHUB_APP_PRIVATE_KEY_PATH=
|
||||
# GITHUB_APP_INSTALLATION_ID=
|
||||
|
||||
144
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
144
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,144 +0,0 @@
|
||||
name: "🐛 Bug Report"
|
||||
description: Report a bug — something that's broken, crashes, or behaves incorrectly.
|
||||
title: "[Bug]: "
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for reporting a bug! Please fill out the sections below so we can reproduce and fix it quickly.
|
||||
|
||||
**Before submitting**, please:
|
||||
- [ ] Search [existing issues](https://github.com/NousResearch/hermes-agent/issues) to avoid duplicates
|
||||
- [ ] Update to the latest version (`hermes update`) and confirm the bug still exists
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Bug Description
|
||||
description: A clear description of what's broken. Include error messages, tracebacks, or screenshots if relevant.
|
||||
placeholder: |
|
||||
What happened? What did you expect to happen instead?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
attributes:
|
||||
label: Steps to Reproduce
|
||||
description: Minimal steps to trigger the bug. The more specific, the faster we can fix it.
|
||||
placeholder: |
|
||||
1. Run `hermes chat`
|
||||
2. Send the message "..."
|
||||
3. Agent calls tool X
|
||||
4. Error appears: ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: What should have happened instead?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
label: Actual Behavior
|
||||
description: What actually happened? Include full error output if available.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
label: Affected Component
|
||||
description: Which part of Hermes is affected?
|
||||
multiple: true
|
||||
options:
|
||||
- CLI (interactive chat)
|
||||
- Gateway (Telegram/Discord/Slack/WhatsApp)
|
||||
- Setup / Installation
|
||||
- Tools (terminal, file ops, web, code execution, etc.)
|
||||
- Skills (skill loading, skill hub, skill guard)
|
||||
- Agent Core (conversation loop, context compression, memory)
|
||||
- Configuration (config.yaml, .env, hermes setup)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: platform
|
||||
attributes:
|
||||
label: Messaging Platform (if gateway-related)
|
||||
description: Which platform adapter is affected?
|
||||
multiple: true
|
||||
options:
|
||||
- N/A (CLI only)
|
||||
- Telegram
|
||||
- Discord
|
||||
- Slack
|
||||
- WhatsApp
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
description: e.g. Ubuntu 24.04, macOS 15.2, Windows 11
|
||||
placeholder: Ubuntu 24.04
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python Version
|
||||
description: Output of `python --version`
|
||||
placeholder: "3.11.9"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: hermes-version
|
||||
attributes:
|
||||
label: Hermes Version
|
||||
description: Output of `hermes version`
|
||||
placeholder: "2.1.0"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant Logs / Traceback
|
||||
description: Paste any error output, traceback, or log messages. This will be auto-formatted as code.
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
id: root-cause
|
||||
attributes:
|
||||
label: Root Cause Analysis (optional)
|
||||
description: |
|
||||
If you've dug into the code and identified the root cause, share it here.
|
||||
Include file paths, line numbers, and code snippets if possible. This massively speeds up fixes.
|
||||
placeholder: |
|
||||
The bug is in `gateway/run.py` line 949. `len(history)` counts session_meta entries
|
||||
but `agent_messages` was built from filtered history...
|
||||
|
||||
- type: textarea
|
||||
id: proposed-fix
|
||||
attributes:
|
||||
label: Proposed Fix (optional)
|
||||
description: If you have a fix in mind (or a PR ready), describe it here.
|
||||
placeholder: |
|
||||
Replace `.get()` with `.pop()` on line 289 of `gateway/platforms/base.py`
|
||||
to actually clear the pending message after retrieval.
|
||||
|
||||
- type: checkboxes
|
||||
id: pr-ready
|
||||
attributes:
|
||||
label: Are you willing to submit a PR for this?
|
||||
options:
|
||||
- label: I'd like to fix this myself and submit a PR
|
||||
11
.github/ISSUE_TEMPLATE/config.yml
vendored
11
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,11 +0,0 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: 💬 Nous Research Discord
|
||||
url: https://discord.gg/NousResearch
|
||||
about: For quick questions, showcasing projects, sharing skills, and community chat.
|
||||
- name: 📖 Documentation
|
||||
url: https://github.com/NousResearch/hermes-agent/blob/main/README.md
|
||||
about: Check the README and docs before opening an issue.
|
||||
- name: 🤝 Contributing Guide
|
||||
url: https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md
|
||||
about: Read this before submitting a PR.
|
||||
73
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
73
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -1,73 +0,0 @@
|
||||
name: "✨ Feature Request"
|
||||
description: Suggest a new feature or improvement.
|
||||
title: "[Feature]: "
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for the suggestion! Before submitting, please consider:
|
||||
|
||||
- **Is this a new skill?** Most capabilities should be [skills, not tools](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#should-it-be-a-skill-or-a-tool). If it's a specialized integration (crypto, NFT, niche SaaS), it belongs on the Skills Hub, not bundled.
|
||||
- **Search [existing issues](https://github.com/NousResearch/hermes-agent/issues)** — someone may have already proposed this.
|
||||
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: Problem or Use Case
|
||||
description: What problem does this solve? What are you trying to do that you can't today?
|
||||
placeholder: |
|
||||
I'm trying to use Hermes with [provider/platform/workflow] but currently
|
||||
there's no way to...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: Proposed Solution
|
||||
description: How do you think this should work? Be as specific as you can — CLI flags, config options, UI behavior.
|
||||
placeholder: |
|
||||
Add a `--foo` flag to `hermes chat` that enables...
|
||||
Or: Add a config key `bar.baz` that controls...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Alternatives Considered
|
||||
description: What other approaches did you consider? Why is the proposed solution better?
|
||||
|
||||
- type: dropdown
|
||||
id: type
|
||||
attributes:
|
||||
label: Feature Type
|
||||
options:
|
||||
- New tool
|
||||
- New bundled skill
|
||||
- CLI improvement
|
||||
- Gateway / messaging improvement
|
||||
- Configuration option
|
||||
- Performance / reliability
|
||||
- Developer experience (tests, docs, CI)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: scope
|
||||
attributes:
|
||||
label: Scope
|
||||
description: How big is this change?
|
||||
options:
|
||||
- Small (single file, < 50 lines)
|
||||
- Medium (few files, < 300 lines)
|
||||
- Large (new module or significant refactor)
|
||||
|
||||
- type: checkboxes
|
||||
id: pr-ready
|
||||
attributes:
|
||||
label: Contribution
|
||||
options:
|
||||
- label: I'd like to implement this myself and submit a PR
|
||||
100
.github/ISSUE_TEMPLATE/setup_help.yml
vendored
100
.github/ISSUE_TEMPLATE/setup_help.yml
vendored
@@ -1,100 +0,0 @@
|
||||
name: "🔧 Setup / Installation Help"
|
||||
description: Having trouble installing or configuring Hermes? Ask here.
|
||||
title: "[Setup]: "
|
||||
labels: ["setup"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Sorry you're having trouble! Please fill out the details below so we can help.
|
||||
|
||||
**Quick checks first:**
|
||||
- Run `hermes doctor` and include the output below
|
||||
- Try `hermes update` to get the latest version
|
||||
- Check the [README troubleshooting section](https://github.com/NousResearch/hermes-agent#troubleshooting)
|
||||
- For general questions, consider the [Nous Research Discord](https://discord.gg/NousResearch) for faster help
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: What's Going Wrong?
|
||||
description: Describe what you're trying to do and where it fails.
|
||||
placeholder: |
|
||||
I ran `hermes setup` and selected Nous Portal, but when I try to
|
||||
start the gateway I get...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: steps
|
||||
attributes:
|
||||
label: Steps Taken
|
||||
description: What did you do? Include the exact commands you ran.
|
||||
placeholder: |
|
||||
1. Ran the install script: `curl -fsSL ... | bash`
|
||||
2. Ran `hermes setup` and chose "Quick setup"
|
||||
3. Selected OpenRouter, entered API key
|
||||
4. Ran `hermes chat` and got error...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: install-method
|
||||
attributes:
|
||||
label: Installation Method
|
||||
options:
|
||||
- Install script (curl | bash)
|
||||
- Manual clone + pip/uv install
|
||||
- PowerShell installer (Windows)
|
||||
- Docker
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating System
|
||||
placeholder: Ubuntu 24.04 / macOS 15.2 / Windows 11
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python-version
|
||||
attributes:
|
||||
label: Python Version
|
||||
description: Output of `python --version` (or `python3 --version`)
|
||||
placeholder: "3.11.9"
|
||||
|
||||
- type: input
|
||||
id: hermes-version
|
||||
attributes:
|
||||
label: Hermes Version
|
||||
description: Output of `hermes version` (if install got that far)
|
||||
placeholder: "2.1.0"
|
||||
|
||||
- type: textarea
|
||||
id: doctor-output
|
||||
attributes:
|
||||
label: Output of `hermes doctor`
|
||||
description: Run `hermes doctor` and paste the full output. This will be auto-formatted.
|
||||
render: shell
|
||||
|
||||
- type: textarea
|
||||
id: error-output
|
||||
attributes:
|
||||
label: Full Error Output
|
||||
description: Paste the complete error message or traceback. This will be auto-formatted.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: tried
|
||||
attributes:
|
||||
label: What I've Already Tried
|
||||
description: List any fixes or workarounds you've already attempted.
|
||||
placeholder: |
|
||||
- Ran `hermes update`
|
||||
- Tried reinstalling with `pip install -e ".[all]"`
|
||||
- Checked that OPENROUTER_API_KEY is set in ~/.hermes/.env
|
||||
75
.github/PULL_REQUEST_TEMPLATE.md
vendored
75
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,75 +0,0 @@
|
||||
## What does this PR do?
|
||||
|
||||
<!-- Describe the change clearly. What problem does it solve? Why is this approach the right one? -->
|
||||
|
||||
|
||||
|
||||
## Related Issue
|
||||
|
||||
<!-- Link the issue this PR addresses. If no issue exists, consider creating one first. -->
|
||||
|
||||
Fixes #
|
||||
|
||||
## Type of Change
|
||||
|
||||
<!-- Check the one that applies. -->
|
||||
|
||||
- [ ] 🐛 Bug fix (non-breaking change that fixes an issue)
|
||||
- [ ] ✨ New feature (non-breaking change that adds functionality)
|
||||
- [ ] 🔒 Security fix
|
||||
- [ ] 📝 Documentation update
|
||||
- [ ] ✅ Tests (adding or improving test coverage)
|
||||
- [ ] ♻️ Refactor (no behavior change)
|
||||
- [ ] 🎯 New skill (bundled or hub)
|
||||
|
||||
## Changes Made
|
||||
|
||||
<!-- List the specific changes. Include file paths for code changes. -->
|
||||
|
||||
-
|
||||
|
||||
## How to Test
|
||||
|
||||
<!-- Steps to verify this change works. For bugs: reproduction steps + proof that the fix works. -->
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
## Checklist
|
||||
|
||||
<!-- Complete these before requesting review. -->
|
||||
|
||||
### Code
|
||||
|
||||
- [ ] I've read the [Contributing Guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md)
|
||||
- [ ] My commit messages follow [Conventional Commits](https://www.conventionalcommits.org/) (`fix(scope):`, `feat(scope):`, etc.)
|
||||
- [ ] I searched for [existing PRs](https://github.com/NousResearch/hermes-agent/pulls) to make sure this isn't a duplicate
|
||||
- [ ] My PR contains **only** changes related to this fix/feature (no unrelated commits)
|
||||
- [ ] I've run `pytest tests/ -q` and all tests pass
|
||||
- [ ] I've added tests for my changes (required for bug fixes, strongly encouraged for features)
|
||||
- [ ] I've tested on my platform: <!-- e.g. Ubuntu 24.04, macOS 15.2, Windows 11 -->
|
||||
|
||||
### Documentation & Housekeeping
|
||||
|
||||
<!-- Check all that apply. It's OK to check "N/A" if a category doesn't apply to your change. -->
|
||||
|
||||
- [ ] I've updated relevant documentation (README, `docs/`, docstrings) — or N/A
|
||||
- [ ] I've updated `cli-config.yaml.example` if I added/changed config keys — or N/A
|
||||
- [ ] I've updated `CONTRIBUTING.md` or `AGENTS.md` if I changed architecture or workflows — or N/A
|
||||
- [ ] I've considered cross-platform impact (Windows, macOS) per the [compatibility guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#cross-platform-compatibility) — or N/A
|
||||
- [ ] I've updated tool descriptions/schemas if I changed tool behavior — or N/A
|
||||
|
||||
## For New Skills
|
||||
|
||||
<!-- Only fill this out if you're adding a skill. Delete this section otherwise. -->
|
||||
|
||||
- [ ] This skill is **broadly useful** to most users (if bundled) — see [Contributing Guide](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#should-the-skill-be-bundled)
|
||||
- [ ] SKILL.md follows the [standard format](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#skillmd-format) (frontmatter, trigger conditions, steps, pitfalls)
|
||||
- [ ] No external dependencies that aren't already available (prefer stdlib, curl, existing Hermes tools)
|
||||
- [ ] I've tested the skill end-to-end: `hermes --toolsets skills -q "Use the X skill to do Y"`
|
||||
|
||||
## Screenshots / Logs
|
||||
|
||||
<!-- If applicable, add screenshots or log output showing the fix/feature in action. -->
|
||||
|
||||
60
.github/workflows/deploy-site.yml
vendored
60
.github/workflows/deploy-site.yml
vendored
@@ -1,60 +0,0 @@
|
||||
name: Deploy Site
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'website/**'
|
||||
- 'landingpage/**'
|
||||
- '.github/workflows/deploy-site.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: pages
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deploy.outputs.page_url }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: website/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
working-directory: website
|
||||
|
||||
- name: Build Docusaurus
|
||||
run: npm run build
|
||||
working-directory: website
|
||||
|
||||
- name: Stage deployment
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
# Landing page at root
|
||||
cp -r landingpage/* _site/
|
||||
# Docusaurus at /docs/
|
||||
cp -r website/build/* _site/docs/
|
||||
# CNAME so GitHub Pages keeps the custom domain between deploys
|
||||
echo "hermes-agent.nousresearch.com" > _site/CNAME
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: _site
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deploy
|
||||
uses: actions/deploy-pages@v4
|
||||
42
.github/workflows/tests.yml
vendored
42
.github/workflows/tests.yml
vendored
@@ -1,42 +0,0 @@
|
||||
name: Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
# Cancel in-progress runs for the same PR/branch
|
||||
concurrency:
|
||||
group: tests-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q --ignore=tests/integration --tb=short
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -1,5 +1,7 @@
|
||||
/venv/
|
||||
/_pycache/
|
||||
hecate/
|
||||
hecate-lib/
|
||||
*.pyc*
|
||||
__pycache__/
|
||||
.venv/
|
||||
@@ -44,8 +46,3 @@ testlogs
|
||||
|
||||
# CLI config (may contain sensitive SSH paths)
|
||||
cli-config.yaml
|
||||
|
||||
# Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case)
|
||||
skills/.hub/
|
||||
ignored/
|
||||
.worktrees/
|
||||
|
||||
607
AGENTS.md
607
AGENTS.md
@@ -1,242 +1,533 @@
|
||||
# Hermes Agent - Development Guide
|
||||
|
||||
Instructions for AI coding assistants and developers working on the hermes-agent codebase.
|
||||
Instructions for AI coding assistants (GitHub Copilot, Cursor, etc.) and human developers.
|
||||
|
||||
Hermes-Agent is an AI agent harness with tool-calling capabilities, interactive CLI, messaging integrations, and scheduled tasks.
|
||||
|
||||
## Development Environment
|
||||
|
||||
**IMPORTANT**: Always use the virtual environment if it exists:
|
||||
```bash
|
||||
source .venv/bin/activate # ALWAYS activate before running Python
|
||||
source venv/bin/activate # Before running any Python commands
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ └── setup.py # Interactive setup wizard
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Firecrawl search/extract
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~2500+ tests)
|
||||
├── hermes_cli/ # Unified CLI commands
|
||||
│ ├── main.py # Entry point, command dispatcher
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── config.py # Config management & migration
|
||||
│ ├── status.py # Status display
|
||||
│ ├── doctor.py # Diagnostics
|
||||
│ ├── gateway.py # Gateway management
|
||||
│ ├── uninstall.py # Uninstaller
|
||||
│ └── cron.py # Cron job management
|
||||
├── tools/ # Tool implementations
|
||||
├── gateway/ # Messaging platform adapters
|
||||
├── cron/ # Scheduler implementation
|
||||
├── skills/ # Knowledge documents
|
||||
├── cli.py # Interactive CLI (Rich UI)
|
||||
├── run_agent.py # Agent runner with AIAgent class
|
||||
├── model_tools.py # Tool schemas and handlers
|
||||
├── toolsets.py # Tool groupings
|
||||
├── toolset_distributions.py # Probability-based tool selection
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
**User Configuration** (stored in `~/.hermes/`):
|
||||
- `~/.hermes/config.yaml` - Settings (model, terminal, toolsets, etc.)
|
||||
- `~/.hermes/.env` - API keys and secrets
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
```
|
||||
tools/registry.py (no deps — imported by all tool files)
|
||||
↑
|
||||
tools/*.py (each calls registry.register() at import time)
|
||||
↑
|
||||
model_tools.py (imports tools/registry + triggers tool discovery)
|
||||
↑
|
||||
run_agent.py, cli.py, batch_runner.py, environments/
|
||||
tools/*.py → tools/__init__.py → model_tools.py → toolsets.py → toolset_distributions.py
|
||||
↑
|
||||
run_agent.py ──────────────────────────┘
|
||||
cli.py → run_agent.py (uses AIAgent with quiet_mode=True)
|
||||
batch_runner.py → run_agent.py + toolset_distributions.py
|
||||
```
|
||||
|
||||
Always ensure consistency between tools, model_tools.py, and toolsets.py when changing any of them.
|
||||
|
||||
---
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
## AIAgent Class
|
||||
|
||||
The main agent is implemented in `run_agent.py`:
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
def __init__(
|
||||
self,
|
||||
model: str = "anthropic/claude-sonnet-4",
|
||||
api_key: str = None,
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
max_iterations: int = 60, # Max tool-calling loops
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
"""Simple interface — returns final response string."""
|
||||
|
||||
def run_conversation(self, user_message: str, system_message: str = None,
|
||||
conversation_history: list = None, task_id: str = None) -> dict:
|
||||
"""Full interface — returns dict with final_response + messages."""
|
||||
verbose_logging: bool = False,
|
||||
quiet_mode: bool = False, # Suppress progress output
|
||||
tool_progress_callback: callable = None, # Called on each tool use
|
||||
):
|
||||
# Initialize OpenAI client, load tools based on toolsets
|
||||
...
|
||||
|
||||
def chat(self, user_message: str, task_id: str = None) -> str:
|
||||
# Main entry point - runs the agent loop
|
||||
...
|
||||
```
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
The core loop in `_run_agent_loop()`:
|
||||
|
||||
```
|
||||
1. Add user message to conversation
|
||||
2. Call LLM with tools
|
||||
3. If LLM returns tool calls:
|
||||
- Execute each tool
|
||||
- Add tool results to conversation
|
||||
- Go to step 2
|
||||
4. If LLM returns text response:
|
||||
- Return response to user
|
||||
```
|
||||
|
||||
```python
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
while turns < max_turns:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tool_schemas,
|
||||
)
|
||||
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
result = handle_function_call(tool_call.name, tool_call.args, task_id)
|
||||
result = await execute_tool(tool_call)
|
||||
messages.append(tool_result_message(result))
|
||||
api_call_count += 1
|
||||
turns += 1
|
||||
else:
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
### Conversation Management
|
||||
|
||||
Messages are stored as a list of dicts following OpenAI format:
|
||||
|
||||
```python
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant..."},
|
||||
{"role": "user", "content": "Search for Python tutorials"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [...]},
|
||||
{"role": "tool", "tool_call_id": "...", "content": "..."},
|
||||
{"role": "assistant", "content": "Here's what I found..."},
|
||||
]
|
||||
```
|
||||
|
||||
### Reasoning Model Support
|
||||
|
||||
For models that support chain-of-thought reasoning:
|
||||
- Extract `reasoning_content` from API responses
|
||||
- Store in `assistant_msg["reasoning"]` for trajectory export
|
||||
- Pass back via `reasoning_content` field on subsequent turns
|
||||
|
||||
---
|
||||
|
||||
## CLI Architecture (cli.py)
|
||||
|
||||
- **Rich** for banner/panels, **prompt_toolkit** for input with autocomplete
|
||||
- **KawaiiSpinner** (`agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
|
||||
- `load_cli_config()` in cli.py merges hardcoded defaults + user config YAML
|
||||
- `process_command()` is a method on `HermesCLI` (not in commands.py)
|
||||
- Skill slash commands: `agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching
|
||||
The interactive CLI uses:
|
||||
- **Rich** - For the welcome banner and styled panels
|
||||
- **prompt_toolkit** - For fixed input area with history and `patch_stdout`
|
||||
- **KawaiiSpinner** (in run_agent.py) - Animated feedback during API calls and tool execution
|
||||
|
||||
Key components:
|
||||
- `HermesCLI` class - Main CLI controller with commands and conversation loop
|
||||
- `load_cli_config()` - Loads config, sets environment variables for terminal
|
||||
- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
|
||||
- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
|
||||
|
||||
CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
|
||||
|
||||
### Adding CLI Commands
|
||||
|
||||
1. Add to `COMMANDS` dict in `hermes_cli/commands.py`
|
||||
2. Add handler in `HermesCLI.process_command()` in `cli.py`
|
||||
3. For persistent settings, use `save_config_value()` in `cli.py`
|
||||
1. Add to `COMMANDS` dict with description
|
||||
2. Add handler in `process_command()` method
|
||||
3. For persistent settings, use `save_config_value()` to update config
|
||||
|
||||
---
|
||||
|
||||
## Hermes CLI Commands
|
||||
|
||||
The unified `hermes` command provides all functionality:
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `hermes` | Interactive chat (default) |
|
||||
| `hermes chat -q "..."` | Single query mode |
|
||||
| `hermes setup` | Configure API keys and settings |
|
||||
| `hermes config` | View current configuration |
|
||||
| `hermes config edit` | Open config in editor |
|
||||
| `hermes config set KEY VAL` | Set a specific value |
|
||||
| `hermes config check` | Check for missing config |
|
||||
| `hermes config migrate` | Prompt for missing config interactively |
|
||||
| `hermes status` | Show configuration status |
|
||||
| `hermes doctor` | Diagnose issues |
|
||||
| `hermes update` | Update to latest (checks for new config) |
|
||||
| `hermes uninstall` | Uninstall (can keep configs for reinstall) |
|
||||
| `hermes gateway` | Start messaging gateway |
|
||||
| `hermes cron list` | View scheduled jobs |
|
||||
| `hermes version` | Show version info |
|
||||
|
||||
---
|
||||
|
||||
## Messaging Gateway
|
||||
|
||||
The gateway connects Hermes to Telegram, Discord, and WhatsApp.
|
||||
|
||||
### Configuration (in `~/.hermes/.env`):
|
||||
|
||||
```bash
|
||||
# Telegram
|
||||
TELEGRAM_BOT_TOKEN=123456:ABC-DEF... # From @BotFather
|
||||
TELEGRAM_ALLOWED_USERS=123456789,987654 # Comma-separated user IDs (from @userinfobot)
|
||||
|
||||
# Discord
|
||||
DISCORD_BOT_TOKEN=MTIz... # From Developer Portal
|
||||
DISCORD_ALLOWED_USERS=123456789012345678 # Comma-separated user IDs
|
||||
|
||||
# Agent Behavior
|
||||
HERMES_MAX_ITERATIONS=60 # Max tool-calling iterations
|
||||
MESSAGING_CWD=/home/myuser # Terminal working directory for messaging
|
||||
|
||||
# Tool Progress (optional)
|
||||
HERMES_TOOL_PROGRESS=true # Send progress messages
|
||||
HERMES_TOOL_PROGRESS_MODE=new # "new" or "all"
|
||||
```
|
||||
|
||||
### Working Directory Behavior
|
||||
|
||||
- **CLI (`hermes` command)**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging (Telegram/Discord)**: Uses `MESSAGING_CWD` (default: home directory)
|
||||
|
||||
This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location.
|
||||
|
||||
### Security (User Allowlists):
|
||||
|
||||
**IMPORTANT**: Without an allowlist, anyone who finds your bot can use it!
|
||||
|
||||
The gateway checks `{PLATFORM}_ALLOWED_USERS` environment variables:
|
||||
- If set: Only listed user IDs can interact with the bot
|
||||
- If unset: All users are allowed (dangerous with terminal access!)
|
||||
|
||||
Users can find their IDs:
|
||||
- **Telegram**: Message [@userinfobot](https://t.me/userinfobot)
|
||||
- **Discord**: Enable Developer Mode, right-click name → Copy ID
|
||||
|
||||
### Tool Progress Notifications
|
||||
|
||||
When `HERMES_TOOL_PROGRESS=true`, the bot sends status messages as it works:
|
||||
- `💻 \`ls -la\`...` (terminal commands show the actual command)
|
||||
- `🔍 web_search...`
|
||||
- `📄 web_extract...`
|
||||
|
||||
Modes:
|
||||
- `new`: Only when switching to a different tool (less spam)
|
||||
- `all`: Every single tool call
|
||||
|
||||
### Typing Indicator
|
||||
|
||||
The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences.
|
||||
|
||||
### Platform Toolsets:
|
||||
|
||||
Each platform has a dedicated toolset in `toolsets.py`:
|
||||
- `hermes-telegram`: Full tools including terminal (with safety checks)
|
||||
- `hermes-discord`: Full tools including terminal
|
||||
- `hermes-whatsapp`: Full tools including terminal
|
||||
|
||||
---
|
||||
|
||||
## Configuration System
|
||||
|
||||
Configuration files are stored in `~/.hermes/` for easy user access:
|
||||
- `~/.hermes/config.yaml` - All settings (model, terminal, compression, etc.)
|
||||
- `~/.hermes/.env` - API keys and secrets
|
||||
|
||||
### Adding New Configuration Options
|
||||
|
||||
When adding new configuration variables, you MUST follow this process:
|
||||
|
||||
#### For config.yaml options:
|
||||
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. **CRITICAL**: Bump `_config_version` in `DEFAULT_CONFIG` when adding required fields
|
||||
3. This triggers migration prompts for existing users on next `hermes update` or `hermes setup`
|
||||
|
||||
Example:
|
||||
```python
|
||||
DEFAULT_CONFIG = {
|
||||
# ... existing config ...
|
||||
|
||||
"new_feature": {
|
||||
"enabled": True,
|
||||
"option": "default_value",
|
||||
},
|
||||
|
||||
# BUMP THIS when adding required fields
|
||||
"_config_version": 2, # Was 1, now 2
|
||||
}
|
||||
```
|
||||
|
||||
#### For .env variables (API keys/secrets):
|
||||
|
||||
1. Add to `REQUIRED_ENV_VARS` or `OPTIONAL_ENV_VARS` in `hermes_cli/config.py`
|
||||
2. Include metadata for the migration system:
|
||||
|
||||
```python
|
||||
OPTIONAL_ENV_VARS = {
|
||||
# ... existing vars ...
|
||||
"NEW_API_KEY": {
|
||||
"description": "What this key is for",
|
||||
"prompt": "Display name in prompts",
|
||||
"url": "https://where-to-get-it.com/",
|
||||
"tools": ["tools_it_enables"], # What tools need this
|
||||
"password": True, # Mask input
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
#### Update related files:
|
||||
|
||||
- `hermes_cli/setup.py` - Add prompts in the setup wizard
|
||||
- `cli-config.yaml.example` - Add example with comments
|
||||
- Update README.md if user-facing
|
||||
|
||||
### Config Version Migration
|
||||
|
||||
The system uses `_config_version` to detect outdated configs:
|
||||
|
||||
1. `check_for_missing_config()` compares user config to `DEFAULT_CONFIG`
|
||||
2. `migrate_config()` interactively prompts for missing values
|
||||
3. Called automatically by `hermes update` and optionally by `hermes setup`
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
API keys are loaded from `~/.hermes/.env`:
|
||||
- `OPENROUTER_API_KEY` - Main LLM API access (primary provider)
|
||||
- `FIRECRAWL_API_KEY` - Web search/extract tools
|
||||
- `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation
|
||||
- `FAL_KEY` - Image generation (FLUX model)
|
||||
- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools
|
||||
|
||||
Terminal tool configuration (in `~/.hermes/config.yaml`):
|
||||
- `terminal.backend` - Backend: local, docker, singularity, modal, or ssh
|
||||
- `terminal.cwd` - Working directory for CLI ("." = current directory)
|
||||
- `terminal.docker_image` - Image for Docker backend
|
||||
- `terminal.singularity_image` - Image for Singularity backend
|
||||
- `terminal.modal_image` - Image for Modal backend
|
||||
- SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env
|
||||
|
||||
Agent behavior (in `~/.hermes/.env`):
|
||||
- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 60)
|
||||
- `MESSAGING_CWD` - Working directory for messaging platforms (default: ~)
|
||||
- `HERMES_TOOL_PROGRESS` - Enable tool progress messages (`true`/`false`)
|
||||
- `HERMES_TOOL_PROGRESS_MODE` - Progress mode: `new` (tool changes) or `all`
|
||||
|
||||
### Dangerous Command Approval
|
||||
|
||||
The terminal tool includes safety checks for potentially destructive commands (e.g., `rm -rf`, `DROP TABLE`, `chmod 777`, etc.):
|
||||
|
||||
**Behavior by Backend:**
|
||||
- **Docker/Singularity/Modal**: Commands run unrestricted (isolated containers)
|
||||
- **Local/SSH**: Dangerous commands trigger approval flow
|
||||
|
||||
**Approval Flow (CLI):**
|
||||
```
|
||||
⚠️ Potentially dangerous command detected: recursive delete
|
||||
rm -rf /tmp/test
|
||||
|
||||
[o]nce | [s]ession | [a]lways | [d]eny
|
||||
Choice [o/s/a/D]:
|
||||
```
|
||||
|
||||
**Approval Flow (Messaging):**
|
||||
- Command is blocked with explanation
|
||||
- Agent explains the command was blocked for safety
|
||||
- User must add the pattern to their allowlist via `hermes config edit` or run the command directly on their machine
|
||||
|
||||
**Configuration:**
|
||||
- `command_allowlist` in `~/.hermes/config.yaml` stores permanently allowed patterns
|
||||
- Add patterns via "always" approval or edit directly
|
||||
|
||||
**Sudo Handling (Messaging):**
|
||||
- If sudo fails over messaging, output includes tip to add `SUDO_PASSWORD` to `~/.hermes/.env`
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
Requires changes in **3 files**:
|
||||
Follow this strict order to maintain consistency:
|
||||
|
||||
1. Create `tools/your_tool.py` with:
|
||||
- Handler function (sync or async) returning a JSON string via `json.dumps()`
|
||||
- `check_*_requirements()` function to verify dependencies (e.g., API keys)
|
||||
- Schema definition following OpenAI function-calling format
|
||||
|
||||
2. Export in `tools/__init__.py`:
|
||||
- Import the handler and check function
|
||||
- Add to `__all__` list
|
||||
|
||||
3. Register in `model_tools.py`:
|
||||
- Add to `TOOLSET_REQUIREMENTS` if it needs API keys
|
||||
- Create `get_*_tool_definitions()` function or add to existing
|
||||
- Add routing in `handle_function_call()` dispatcher
|
||||
- Update `get_all_tool_names()` with the tool name
|
||||
- Update `get_toolset_for_tool()` mapping
|
||||
- Update `get_available_toolsets()` and `check_toolset_requirements()`
|
||||
|
||||
4. Add to toolset in `toolsets.py`:
|
||||
- Add to existing toolset or create new one in TOOLSETS dict
|
||||
|
||||
5. If the tool requires an API key:
|
||||
- Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py`
|
||||
- The tool will be auto-disabled if the key is missing
|
||||
|
||||
6. Optionally add to `toolset_distributions.py` for batch processing
|
||||
|
||||
### Tool Implementation Pattern
|
||||
|
||||
**1. Create `tools/your_tool.py`:**
|
||||
```python
|
||||
import json, os
|
||||
from tools.registry import registry
|
||||
# tools/example_tool.py
|
||||
import json
|
||||
import os
|
||||
|
||||
def check_requirements() -> bool:
|
||||
def check_example_requirements() -> bool:
|
||||
"""Check if required API keys/dependencies are available."""
|
||||
return bool(os.getenv("EXAMPLE_API_KEY"))
|
||||
|
||||
def example_tool(param: str, task_id: str = None) -> str:
|
||||
return json.dumps({"success": True, "data": "..."})
|
||||
|
||||
registry.register(
|
||||
name="example_tool",
|
||||
toolset="example",
|
||||
schema={"name": "example_tool", "description": "...", "parameters": {...}},
|
||||
handler=lambda args, **kw: example_tool(param=args.get("param", ""), task_id=kw.get("task_id")),
|
||||
check_fn=check_requirements,
|
||||
requires_env=["EXAMPLE_API_KEY"],
|
||||
)
|
||||
"""Execute the tool and return JSON string result."""
|
||||
try:
|
||||
result = {"success": True, "data": "..."}
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return json.dumps({"error": str(e)}, ensure_ascii=False)
|
||||
```
|
||||
|
||||
**2. Add import** in `model_tools.py` `_discover_tools()` list.
|
||||
All tool handlers MUST return a JSON string. Never return raw dicts.
|
||||
|
||||
**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
|
||||
### Dynamic Tool Availability
|
||||
|
||||
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
|
||||
Tools are automatically disabled when their API keys are missing:
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
## Adding Configuration
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
|
||||
### .env variables:
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
"description": "What it's for",
|
||||
"prompt": "Display name",
|
||||
"url": "https://...",
|
||||
"password": True,
|
||||
"category": "tool", # provider, tool, messaging, setting
|
||||
},
|
||||
# In model_tools.py
|
||||
TOOLSET_REQUIREMENTS = {
|
||||
"web": {"env_vars": ["FIRECRAWL_API_KEY"]},
|
||||
"browser": {"env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"]},
|
||||
"creative": {"env_vars": ["FAL_KEY"]},
|
||||
}
|
||||
```
|
||||
|
||||
### Config loaders (two separate systems):
|
||||
The `check_tool_availability()` function determines which tools to include.
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
### Stateful Tools
|
||||
|
||||
Tools that maintain state (terminal, browser) require:
|
||||
- `task_id` parameter for session isolation between concurrent tasks
|
||||
- `cleanup_*()` function to release resources
|
||||
- Cleanup is called automatically in run_agent.py after conversation completes
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
## Trajectory Format
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
Conversations are saved in ShareGPT format for training:
|
||||
```json
|
||||
{"from": "system", "value": "System prompt with <tools>...</tools>"}
|
||||
{"from": "human", "value": "User message"}
|
||||
{"from": "gpt", "value": "<think>reasoning</think>\n<tool_call>{...}</tool_call>"}
|
||||
{"from": "tool", "value": "<tool_response>{...}</tool_response>"}
|
||||
{"from": "gpt", "value": "Final response"}
|
||||
```
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
- Alter past context mid-conversation
|
||||
- Change toolsets mid-conversation
|
||||
- Reload memories or rebuild system prompts mid-conversation
|
||||
Tool calls use `<tool_call>` XML tags, responses use `<tool_response>` tags, reasoning uses `<think>` tags.
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
### Trajectory Export
|
||||
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
```python
|
||||
agent = AIAgent(save_trajectories=True)
|
||||
agent.chat("Do something")
|
||||
# Saves to trajectories/*.jsonl in ShareGPT format
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Known Pitfalls
|
||||
## Batch Processing (batch_runner.py)
|
||||
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
|
||||
### `_last_resolved_tool_names` is a process-global in `model_tools.py`
|
||||
When subagents overwrite this global, `execute_code` calls after delegation may fail with missing tool imports. Known bug.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
For processing multiple prompts:
|
||||
- Parallel execution with multiprocessing
|
||||
- Content-based resume for fault tolerance (matches on prompt text, not indices)
|
||||
- Toolset distributions control probabilistic tool availability per prompt
|
||||
- Output: `data/<run_name>/trajectories.jsonl` (combined) + individual batch files
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q # Full suite (~2500 tests, ~2 min)
|
||||
python -m pytest tests/test_model_tools.py -q # Toolset resolution
|
||||
python -m pytest tests/test_cli_init.py -q # CLI config loading
|
||||
python -m pytest tests/gateway/ -q # Gateway tests
|
||||
python -m pytest tests/tools/ -q # Tool-level tests
|
||||
python batch_runner.py \
|
||||
--dataset_file=prompts.jsonl \
|
||||
--batch_size=20 \
|
||||
--num_workers=4 \
|
||||
--run_name=my_run
|
||||
```
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
---
|
||||
|
||||
## Skills System
|
||||
|
||||
Skills are on-demand knowledge documents the agent can load. Located in `skills/` directory:
|
||||
|
||||
```
|
||||
skills/
|
||||
├── mlops/ # Category folder
|
||||
│ ├── axolotl/ # Skill folder
|
||||
│ │ ├── SKILL.md # Main instructions (required)
|
||||
│ │ ├── references/ # Additional docs, API specs
|
||||
│ │ └── templates/ # Output formats, configs
|
||||
│ └── vllm/
|
||||
│ └── SKILL.md
|
||||
└── example-skill/
|
||||
└── SKILL.md
|
||||
```
|
||||
|
||||
**Progressive disclosure** (token-efficient):
|
||||
1. `skills_categories()` - List category names (~50 tokens)
|
||||
2. `skills_list(category)` - Name + description per skill (~3k tokens)
|
||||
3. `skill_view(name)` - Full content + tags + linked files
|
||||
|
||||
SKILL.md files use YAML frontmatter:
|
||||
```yaml
|
||||
---
|
||||
name: skill-name
|
||||
description: Brief description for listing
|
||||
tags: [tag1, tag2]
|
||||
related_skills: [other-skill]
|
||||
version: 1.0.0
|
||||
---
|
||||
# Skill Content...
|
||||
```
|
||||
|
||||
Tool files: `tools/skills_tool.py` → `model_tools.py` → `toolsets.py`
|
||||
|
||||
---
|
||||
|
||||
## Testing Changes
|
||||
|
||||
After making changes:
|
||||
|
||||
1. Run `hermes doctor` to check setup
|
||||
2. Run `hermes config check` to verify config
|
||||
3. Test with `hermes chat -q "test message"`
|
||||
4. For new config options, test fresh install: `rm -rf ~/.hermes && hermes setup`
|
||||
|
||||
522
CONTRIBUTING.md
522
CONTRIBUTING.md
@@ -1,522 +0,0 @@
|
||||
# Contributing to Hermes Agent
|
||||
|
||||
Thank you for contributing to Hermes Agent! This guide covers everything you need: setting up your dev environment, understanding the architecture, deciding what to build, and getting your PR merged.
|
||||
|
||||
---
|
||||
|
||||
## Contribution Priorities
|
||||
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
6. **New tools** — rarely needed. Most capabilities should be skills. See below.
|
||||
7. **Documentation** — fixes, clarifications, new examples.
|
||||
|
||||
---
|
||||
|
||||
## Should it be a Skill or a Tool?
|
||||
|
||||
This is the most common question for new contributors. The answer is almost always **skill**.
|
||||
|
||||
### Make it a Skill when:
|
||||
|
||||
- The capability can be expressed as instructions + shell commands + existing tools
|
||||
- It wraps an external CLI or API that the agent can call via `terminal` or `web_extract`
|
||||
- It doesn't need custom Python integration or API key management baked into the agent
|
||||
- Examples: arXiv search, git workflows, Docker management, PDF processing, email via CLI tools
|
||||
|
||||
### Make it a Tool when:
|
||||
|
||||
- It requires end-to-end integration with API keys, auth flows, or multi-component configuration managed by the agent harness
|
||||
- It needs custom processing logic that must execute precisely every time (not "best effort" from LLM interpretation)
|
||||
- It handles binary data, streaming, or real-time events that can't go through the terminal
|
||||
- Examples: browser automation (Browserbase session management), TTS (audio encoding + platform delivery), vision analysis (base64 image handling)
|
||||
|
||||
### Should the Skill be bundled?
|
||||
|
||||
Bundled skills (in `skills/`) ship with every Hermes install. They should be **broadly useful to most users**:
|
||||
|
||||
- Document handling, web research, common dev workflows, system administration
|
||||
- Used regularly by a wide range of people
|
||||
|
||||
If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo but isn't activated by default. Users can discover it via `hermes skills browse` (labeled "official") and install it with `hermes skills install` (no third-party warning, builtin trust).
|
||||
|
||||
If your skill is specialized, community-contributed, or niche, it's better suited for a **Skills Hub** — upload it to a skills registry and share it in the [Nous Research Discord](https://discord.gg/NousResearch). Users can install it with `hermes skills install`.
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With `--recurse-submodules` support |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
|
||||
|
||||
### Clone and install
|
||||
|
||||
```bash
|
||||
git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
|
||||
# Create venv with Python 3.11
|
||||
uv venv venv --python 3.11
|
||||
export VIRTUAL_ENV="$(pwd)/venv"
|
||||
|
||||
# Install with all extras (messaging, cron, CLI menus, dev tools)
|
||||
uv pip install -e ".[all,dev]"
|
||||
uv pip install -e "./mini-swe-agent"
|
||||
uv pip install -e "./tinker-atropos"
|
||||
|
||||
# Optional: browser tools
|
||||
npm install
|
||||
```
|
||||
|
||||
### Configure for development
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills}
|
||||
cp cli-config.yaml.example ~/.hermes/config.yaml
|
||||
touch ~/.hermes/.env
|
||||
|
||||
# Add at minimum an LLM provider key:
|
||||
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```bash
|
||||
# Symlink for global access
|
||||
mkdir -p ~/.local/bin
|
||||
ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
|
||||
|
||||
# Verify
|
||||
hermes doctor
|
||||
hermes chat -q "Hello"
|
||||
```
|
||||
|
||||
### Run tests
|
||||
|
||||
```bash
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop, tool dispatch, session persistence
|
||||
├── cli.py # HermesCLI class — interactive TUI, prompt_toolkit integration
|
||||
├── model_tools.py # Tool orchestration (thin layer over tools/registry.py)
|
||||
├── toolsets.py # Tool groupings and presets (hermes-cli, hermes-telegram, etc.)
|
||||
├── hermes_state.py # SQLite session database with FTS5 full-text search, session titles
|
||||
├── batch_runner.py # Parallel batch processing for trajectory generation
|
||||
│
|
||||
├── agent/ # Agent internals (extracted modules)
|
||||
│ ├── prompt_builder.py # System prompt assembly (identity, skills, context files, memory)
|
||||
│ ├── context_compressor.py # Auto-summarization when approaching context limits
|
||||
│ ├── auxiliary_client.py # Resolves auxiliary OpenAI clients (summarization, vision)
|
||||
│ ├── display.py # KawaiiSpinner, tool progress formatting
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
│
|
||||
├── hermes_cli/ # CLI command implementations
|
||||
│ ├── main.py # Entry point, argument parsing, command dispatch
|
||||
│ ├── config.py # Config management, migration, env var definitions
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── auth.py # Provider resolution, OAuth, Nous Portal
|
||||
│ ├── models.py # OpenRouter model selection lists
|
||||
│ ├── banner.py # Welcome banner, ASCII art
|
||||
│ ├── commands.py # Slash command definitions + autocomplete
|
||||
│ ├── callbacks.py # Interactive callbacks (clarify, sudo, approval)
|
||||
│ ├── doctor.py # Diagnostics
|
||||
│ └── skills_hub.py # Skills Hub CLI + /skills slash command
|
||||
│
|
||||
├── tools/ # Tool implementations (self-registering)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection + per-session approval
|
||||
│ ├── terminal_tool.py # Terminal orchestration (sudo, env lifecycle, backends)
|
||||
│ ├── file_operations.py # read_file, write_file, search, patch, etc.
|
||||
│ ├── web_tools.py # web_search, web_extract (Firecrawl + Gemini summarization)
|
||||
│ ├── vision_tools.py # Image analysis via multimodal models
|
||||
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
||||
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
||||
│ ├── session_search_tool.py # Search past conversations with FTS5 + summarization
|
||||
│ ├── cronjob_tools.py # Scheduled task management
|
||||
│ ├── skill_tools.py # Skill search, load, manage
|
||||
│ └── environments/ # Terminal execution backends
|
||||
│ ├── base.py # BaseEnvironment ABC
|
||||
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py
|
||||
│
|
||||
├── gateway/ # Messaging gateway
|
||||
│ ├── run.py # GatewayRunner — platform lifecycle, message routing, cron
|
||||
│ ├── config.py # Platform configuration resolution
|
||||
│ ├── session.py # Session store, context prompts, reset policies
|
||||
│ └── platforms/ # Platform adapters
|
||||
│ ├── telegram.py, discord_adapter.py, slack.py, whatsapp.py
|
||||
│
|
||||
├── scripts/ # Installer and bridge scripts
|
||||
│ ├── install.sh # Linux/macOS installer
|
||||
│ ├── install.ps1 # Windows PowerShell installer
|
||||
│ └── whatsapp-bridge/ # Node.js WhatsApp bridge (Baileys)
|
||||
│
|
||||
├── skills/ # Bundled skills (copied to ~/.hermes/skills/ on install)
|
||||
├── optional-skills/ # Official optional skills (discoverable via hub, not activated by default)
|
||||
├── environments/ # RL training environments (Atropos integration)
|
||||
├── tests/ # Test suite
|
||||
├── website/ # Documentation site (hermes-agent.nousresearch.com)
|
||||
│
|
||||
├── cli-config.yaml.example # Example configuration (copied to ~/.hermes/config.yaml)
|
||||
└── AGENTS.md # Development guide for AI coding assistants
|
||||
```
|
||||
|
||||
### User configuration (stored in `~/.hermes/`)
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
| `~/.hermes/config.yaml` | Settings (model, terminal, toolsets, compression, etc.) |
|
||||
| `~/.hermes/.env` | API keys and secrets |
|
||||
| `~/.hermes/auth.json` | OAuth credentials (Nous Portal) |
|
||||
| `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) |
|
||||
| `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) |
|
||||
| `~/.hermes/state.db` | SQLite session database |
|
||||
| `~/.hermes/sessions/` | JSON session logs |
|
||||
| `~/.hermes/cron/` | Scheduled job data |
|
||||
| `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials |
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Core Loop
|
||||
|
||||
```
|
||||
User message → AIAgent._run_agent_loop()
|
||||
├── Build system prompt (prompt_builder.py)
|
||||
├── Build API kwargs (model, messages, tools, reasoning config)
|
||||
├── Call LLM (OpenAI-compatible API)
|
||||
├── If tool_calls in response:
|
||||
│ ├── Execute each tool via registry dispatch
|
||||
│ ├── Add tool results to conversation
|
||||
│ └── Loop back to LLM call
|
||||
├── If text response:
|
||||
│ ├── Persist session to DB
|
||||
│ └── Return final_response
|
||||
└── Context compression if approaching token limit
|
||||
```
|
||||
|
||||
### Key Design Patterns
|
||||
|
||||
- **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
|
||||
- **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
|
||||
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`.
|
||||
- **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
|
||||
- **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
|
||||
- **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
- **PEP 8** with practical exceptions (we don't enforce strict line length)
|
||||
- **Comments**: Only when explaining non-obvious intent, trade-offs, or API quirks. Don't narrate what the code does — `# increment counter` adds nothing
|
||||
- **Error handling**: Catch specific exceptions. Log with `logger.warning()`/`logger.error()` — use `exc_info=True` for unexpected errors so stack traces appear in logs
|
||||
- **Cross-platform**: Never assume Unix. See [Cross-Platform Compatibility](#cross-platform-compatibility)
|
||||
|
||||
---
|
||||
|
||||
## Adding a New Tool
|
||||
|
||||
Before writing a tool, ask: [should this be a skill instead?](#should-it-be-a-skill-or-a-tool)
|
||||
|
||||
Tools self-register with the central registry. Each tool file co-locates its schema, handler, and registration:
|
||||
|
||||
```python
|
||||
"""my_tool — Brief description of what this tool does."""
|
||||
|
||||
import json
|
||||
from tools.registry import registry
|
||||
|
||||
|
||||
def my_tool(param1: str, param2: int = 10, **kwargs) -> str:
|
||||
"""Handler. Returns a string result (often JSON)."""
|
||||
result = do_work(param1, param2)
|
||||
return json.dumps(result)
|
||||
|
||||
|
||||
MY_TOOL_SCHEMA = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "my_tool",
|
||||
"description": "What this tool does and when the agent should use it.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"param1": {"type": "string", "description": "What param1 is"},
|
||||
"param2": {"type": "integer", "description": "What param2 is", "default": 10},
|
||||
},
|
||||
"required": ["param1"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _check_requirements() -> bool:
|
||||
"""Return True if this tool's dependencies are available."""
|
||||
return True
|
||||
|
||||
|
||||
registry.register(
|
||||
name="my_tool",
|
||||
toolset="my_toolset",
|
||||
schema=MY_TOOL_SCHEMA,
|
||||
handler=lambda args, **kw: my_tool(**args, **kw),
|
||||
check_fn=_check_requirements,
|
||||
)
|
||||
```
|
||||
|
||||
Then add the import to `model_tools.py` in the `_modules` list:
|
||||
|
||||
```python
|
||||
_modules = [
|
||||
# ... existing modules ...
|
||||
"tools.my_tool",
|
||||
]
|
||||
```
|
||||
|
||||
If it's a new toolset, add it to `toolsets.py` and to the relevant platform presets.
|
||||
|
||||
---
|
||||
|
||||
## Adding a Skill
|
||||
|
||||
Bundled skills live in `skills/` organized by category. Official optional skills use the same structure in `optional-skills/`:
|
||||
|
||||
```
|
||||
skills/
|
||||
├── research/
|
||||
│ └── arxiv/
|
||||
│ ├── SKILL.md # Required: main instructions
|
||||
│ └── scripts/ # Optional: helper scripts
|
||||
│ └── search_arxiv.py
|
||||
├── productivity/
|
||||
│ └── ocr-and-documents/
|
||||
│ ├── SKILL.md
|
||||
│ ├── scripts/
|
||||
│ └── references/
|
||||
└── ...
|
||||
```
|
||||
|
||||
### SKILL.md format
|
||||
|
||||
```markdown
|
||||
---
|
||||
name: my-skill
|
||||
description: Brief description (shown in skill search results)
|
||||
version: 1.0.0
|
||||
author: Your Name
|
||||
license: MIT
|
||||
platforms: [macos, linux] # Optional — restrict to specific OS platforms
|
||||
# Valid: macos, linux, windows
|
||||
# Omit to load on all platforms (default)
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [Category, Subcategory, Keywords]
|
||||
related_skills: [other-skill-name]
|
||||
---
|
||||
|
||||
# Skill Title
|
||||
|
||||
Brief intro.
|
||||
|
||||
## When to Use
|
||||
Trigger conditions — when should the agent load this skill?
|
||||
|
||||
## Quick Reference
|
||||
Table of common commands or API calls.
|
||||
|
||||
## Procedure
|
||||
Step-by-step instructions the agent follows.
|
||||
|
||||
## Pitfalls
|
||||
Known failure modes and how to handle them.
|
||||
|
||||
## Verification
|
||||
How the agent confirms it worked.
|
||||
```
|
||||
|
||||
### Platform-specific skills
|
||||
|
||||
Skills can declare which OS platforms they support via the `platforms` frontmatter field. Skills with this field are automatically hidden from the system prompt, `skills_list()`, and slash commands on incompatible platforms.
|
||||
|
||||
```yaml
|
||||
platforms: [macos] # macOS only (e.g., iMessage, Apple Reminders)
|
||||
platforms: [macos, linux] # macOS and Linux
|
||||
platforms: [windows] # Windows only
|
||||
```
|
||||
|
||||
If the field is omitted or empty, the skill loads on all platforms (backward compatible). See `skills/apple/` for examples of macOS-only skills.
|
||||
|
||||
### Skill guidelines
|
||||
|
||||
- **No external dependencies unless absolutely necessary.** Prefer stdlib Python, curl, and existing Hermes tools (`web_extract`, `terminal`, `read_file`).
|
||||
- **Progressive disclosure.** Put the most common workflow first. Edge cases and advanced usage go at the bottom.
|
||||
- **Include helper scripts** for XML/JSON parsing or complex logic — don't expect the LLM to write parsers inline every time.
|
||||
- **Test it.** Run `hermes --toolsets skills -q "Use the X skill to do Y"` and verify the agent follows the instructions correctly.
|
||||
|
||||
---
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
1. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError` and `NotImplementedError`:
|
||||
```python
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
menu = TerminalMenu(options)
|
||||
idx = menu.show()
|
||||
except (ImportError, NotImplementedError):
|
||||
# Fallback: numbered menu for Windows
|
||||
for i, opt in enumerate(options):
|
||||
print(f" {i+1}. {opt}")
|
||||
idx = int(input("Choice: ")) - 1
|
||||
```
|
||||
|
||||
2. **File encoding.** Windows may save `.env` files in `cp1252`. Always handle encoding errors:
|
||||
```python
|
||||
try:
|
||||
load_dotenv(env_path)
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
```
|
||||
|
||||
3. **Process management.** `os.setsid()`, `os.killpg()`, and signal handling differ on Windows. Use platform checks:
|
||||
```python
|
||||
import platform
|
||||
if platform.system() != "Windows":
|
||||
kwargs["preexec_fn"] = os.setsid
|
||||
```
|
||||
|
||||
4. **Path separators.** Use `pathlib.Path` instead of string concatenation with `/`.
|
||||
|
||||
5. **Shell commands in installers.** If you change `scripts/install.sh`, check if the equivalent change is needed in `scripts/install.ps1`.
|
||||
|
||||
---
|
||||
|
||||
## Security Considerations
|
||||
|
||||
Hermes has terminal access. Security matters.
|
||||
|
||||
### Existing protections
|
||||
|
||||
| Layer | Implementation |
|
||||
|-------|---------------|
|
||||
| **Sudo password piping** | Uses `shlex.quote()` to prevent shell injection |
|
||||
| **Dangerous command detection** | Regex patterns in `tools/approval.py` with user approval flow |
|
||||
| **Cron prompt injection** | Scanner in `tools/cronjob_tools.py` blocks instruction-override patterns |
|
||||
| **Write deny list** | Protected paths (`~/.ssh/authorized_keys`, `/etc/shadow`) resolved via `os.path.realpath()` to prevent symlink bypass |
|
||||
| **Skills guard** | Security scanner for hub-installed skills (`tools/skills_guard.py`) |
|
||||
| **Code execution sandbox** | `execute_code` child process runs with API keys stripped from environment |
|
||||
| **Container hardening** | Docker: all capabilities dropped, no privilege escalation, PID limits, size-limited tmpfs |
|
||||
|
||||
### When contributing security-sensitive code
|
||||
|
||||
- **Always use `shlex.quote()`** when interpolating user input into shell commands
|
||||
- **Resolve symlinks** with `os.path.realpath()` before path-based access control checks
|
||||
- **Don't log secrets.** API keys, tokens, and passwords should never appear in log output
|
||||
- **Catch broad exceptions** around tool execution so a single failure doesn't crash the agent loop
|
||||
- **Test on all platforms** if your change touches file paths, process management, or shell commands
|
||||
|
||||
If your PR affects security, note it explicitly in the description.
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
### Branch naming
|
||||
|
||||
```
|
||||
fix/description # Bug fixes
|
||||
feat/description # New features
|
||||
docs/description # Documentation
|
||||
test/description # Tests
|
||||
refactor/description # Code restructuring
|
||||
```
|
||||
|
||||
### Before submitting
|
||||
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
Include:
|
||||
- **What** changed and **why**
|
||||
- **How to test** it (reproduction steps for bugs, usage examples for features)
|
||||
- **What platforms** you tested on
|
||||
- Reference any related issues
|
||||
|
||||
### Commit messages
|
||||
|
||||
We use [Conventional Commits](https://www.conventionalcommits.org/):
|
||||
|
||||
```
|
||||
<type>(<scope>): <description>
|
||||
```
|
||||
|
||||
| Type | Use for |
|
||||
|------|---------|
|
||||
| `fix` | Bug fixes |
|
||||
| `feat` | New features |
|
||||
| `docs` | Documentation |
|
||||
| `test` | Tests |
|
||||
| `refactor` | Code restructuring (no behavior change) |
|
||||
| `chore` | Build, CI, dependency updates |
|
||||
|
||||
Scopes: `cli`, `gateway`, `tools`, `skills`, `agent`, `install`, `whatsapp`, `security`, etc.
|
||||
|
||||
Examples:
|
||||
```
|
||||
fix(cli): prevent crash in save_config_value when model is a string
|
||||
feat(gateway): add WhatsApp multi-user session isolation
|
||||
fix(security): prevent shell injection in sudo password piping
|
||||
test(tools): add unit tests for file_operations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
- Use [GitHub Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- Include: OS, Python version, Hermes version (`hermes version`), full error traceback
|
||||
- Include steps to reproduce
|
||||
- Check existing issues before creating duplicates
|
||||
- For security vulnerabilities, please report privately
|
||||
|
||||
---
|
||||
|
||||
## Community
|
||||
|
||||
- **Discord**: [discord.gg/NousResearch](https://discord.gg/NousResearch) — for questions, showcasing projects, and sharing skills
|
||||
- **GitHub Discussions**: For design proposals and architecture discussions
|
||||
- **Skills Hub**: Upload specialized skills to a registry and share them with the community
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
|
||||
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Nous Research
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
142
Project_notes.md
Normal file
142
Project_notes.md
Normal file
@@ -0,0 +1,142 @@
|
||||
# Project Notes
|
||||
|
||||
*Maintained by Hermes — last updated June 2025*
|
||||
|
||||
---
|
||||
|
||||
## 1. Kandinsky (Multimodal Transformer)
|
||||
- **Repo:** https://github.com/samherring99/kandinsky
|
||||
- **Local path:** `~/Desktop/Projects/kandinsky`
|
||||
- **Description:** An anything-to-anything transformer combining text, image, and audio modalities. Trains on Pokemon BLIP captions paired with Gen 1 Pokemon audio cries. Uses audio tokenization adapted from nanoGPT.
|
||||
- **Status:** Early POC. Training code exists (`model.py`) and dataset creation (`create_dataset.py`) works. Audio heads are producing the same sound — unclear if it's a training issue or data issue.
|
||||
- **TODO:**
|
||||
- Debug why audio heads produce identical output
|
||||
- Investigate if model needs more training time
|
||||
- Design a data pipeline for better/more training data
|
||||
- General repo cleanup (requirements.txt, proper CLI, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 2. NightwingGameSim (LLM → GameBoy ROM Generator)
|
||||
- **Repo:** https://github.com/samherring99/NightwingGameSim
|
||||
- **Local path:** `~/Desktop/Projects/NightwingGameSim`
|
||||
- **Description:** AI-powered pipeline that turns natural language prompts into playable GameBoy ROM files. Generates C code, compiles with GBDK, outputs `.gb` files. Supports Claude API, local Llama, and RAG backends.
|
||||
- **Status:** Functional — generation pipeline works end-to-end with Claude 4 system prompt. Has tests, docs, examples, and retry logic.
|
||||
- **TODO:**
|
||||
- Harden the repo, clean up structure
|
||||
- Build a better testing pipeline
|
||||
- Come up with better prompt ideas / examples
|
||||
|
||||
---
|
||||
|
||||
## 3. ContentBasedMIR (Music Information Retrieval)
|
||||
- **Repo:** https://github.com/samherring99/ContentBasedMIR
|
||||
- **Local path:** `~/Desktop/Projects/ContentBasedMIR`
|
||||
- **Description:** Music similarity analysis using Spotify API track data. Extracts 54 audio features per song and visualizes similarity matrices for music recommendation.
|
||||
- **Status:** Early stage. Can download Spotify track analysis data and plot similarity matrices. Needs significant expansion.
|
||||
- **TODO:**
|
||||
- Expand analysis pipeline with more features
|
||||
- Integrate with text message data for personalized recommendations
|
||||
- Build out visualization and exploration tools
|
||||
- General modernization (dependencies, structure)
|
||||
|
||||
---
|
||||
|
||||
## 4. MessageRetrieval (iMessage RAG/SQL)
|
||||
- **Repo:** https://github.com/samherring99/MessageRetrieval
|
||||
- **Local path:** `~/Desktop/Projects/MessageRetrieval`
|
||||
- **Description:** Natural language querying over iMessage data using SQL generation (text2SQL) instead of vector embeddings. Uses LLM-as-Judge pattern for scoring and ranking retrieved messages.
|
||||
- **Status:** Has initial text2SQL pipeline and summarization tool. Recently worked on with Claude Code. Needs testing.
|
||||
- **TODO:**
|
||||
- Test out the recent Claude Code work
|
||||
- Build "iMessage Jarvis" — answer questions about texts
|
||||
- Improve SQL generation prompts and accuracy
|
||||
- Better error handling and UX
|
||||
|
||||
---
|
||||
|
||||
## 5. Grailed Embedding Search
|
||||
- **Repo:** https://github.com/samherring99/grailed-embedding-search
|
||||
- **Local path:** `~/Desktop/Projects/grailed-embedding-search`
|
||||
- **Description:** Semantic similarity search over Grailed fashion listings using CLIP embeddings and FAISS. Search by image URL or text description to find visually similar products.
|
||||
- **Status:** Functional core pipeline. CLIP ViT-B/32 embeds product cover photos into 512-dim vectors, indexed with FAISS cosine similarity. Has CLI, batch embedding, persistent index save/load, and logging.
|
||||
- **Recent work (June 2025):**
|
||||
- PR #1 — Initial cleanup: docstrings, type hints, `.gitignore`, `requirements.txt`, README rewrite
|
||||
- PR #2 — Feature improvements: persistent FAISS save/load, batch embedding, CLI (`cli.py`), proper logging throughout, lazy Grailed client, `fetch_details` toggle
|
||||
- **TODO:**
|
||||
- Embedding cache (avoid re-embedding known product URLs)
|
||||
- Async/threaded image downloads for faster batch indexing
|
||||
- Search result visualization (matplotlib grid of cover photos)
|
||||
- Filter by category, designer, price range before search
|
||||
- Web UI (Gradio or Streamlit)
|
||||
|
||||
---
|
||||
|
||||
## 6. NightwingNBA (Sports Analytics)
|
||||
- **Repo:** https://github.com/samherring99/NightwingNBA
|
||||
- **Local path:** `~/Desktop/Projects/NightwingNBA`
|
||||
- **Description:** NBA game prediction system. Builds a database of game data, trains a PyTorch model, and makes daily predictions. Has full pipeline: build DB → write data → train → predict.
|
||||
- **Status:** Functional pipeline exists. Has database building, training, prediction, and daily update scripts.
|
||||
- **TODO:**
|
||||
- Explore and potentially revive
|
||||
- Update data sources if stale
|
||||
- Improve model accuracy
|
||||
- Add visualization/reporting
|
||||
|
||||
---
|
||||
|
||||
## 7. Stable Audio Sample Explorer
|
||||
- **Repo:** https://github.com/samherring99/stable-audio-sample-explorer
|
||||
- **Local path:** `~/Desktop/Projects/stable-audio-sample-explorer`
|
||||
- **Description:** Tool for exploring audio samples generated by Stable Audio.
|
||||
- **Status:** 🪦 **Dead** — no active work needed per Sam.
|
||||
|
||||
---
|
||||
|
||||
## 8. NightwingArt (Art Tools)
|
||||
- **Repo:** https://github.com/samherring99/NightwingArt
|
||||
- **Local path:** `~/Desktop/Projects/NightwingArt`
|
||||
- **Description:** Collection of art tooling scripts — video editing, clip splicing with beat matching, damage effects, and general image manipulation.
|
||||
- **Status:** Maintenance mode. Tools exist for various effects. Work happens as-needed.
|
||||
- **TODO:**
|
||||
- Add tools as needed for new art projects
|
||||
|
||||
---
|
||||
|
||||
## 9. Claude-based VST Building ⚠️ *Needs new repo*
|
||||
- **Description:** Generate VST audio plugins for DAWs from English language prompts. LLM-powered audio plugin creation.
|
||||
- **Status:** Concept only — no repo exists yet.
|
||||
- **TODO:**
|
||||
- Create repo
|
||||
- Research VST SDK / JUCE framework
|
||||
- Design prompt → code → compile pipeline
|
||||
|
||||
---
|
||||
|
||||
## 10. Government Auction Site Scraper ⚠️ *Needs new repo*
|
||||
- **Description:** Tool that monitors and scrapes government auction sites in San Francisco for deals.
|
||||
- **Status:** Concept only — no repo exists yet.
|
||||
- **TODO:**
|
||||
- Create repo
|
||||
- Research SF government auction sites and their structure
|
||||
- Build scraper + notification system
|
||||
|
||||
---
|
||||
|
||||
## Priority Assessment
|
||||
|
||||
| Project | Activity Level | Suggested Priority |
|
||||
|---------|---------------|-------------------|
|
||||
| NightwingGameSim | Active | 🔴 High |
|
||||
| MessageRetrieval | Active | 🔴 High |
|
||||
| Kandinsky | Active | 🟡 Medium |
|
||||
| ContentBasedMIR | Exploratory | 🟡 Medium |
|
||||
| Grailed Embedding Search | Early | 🟡 Medium |
|
||||
| NightwingNBA | Dormant | 🟢 Low |
|
||||
| NightwingArt | As-needed | 🟢 Low |
|
||||
| VST Builder | Concept | 🔵 Future |
|
||||
| Gov Auction Scraper | Concept | 🔵 Future |
|
||||
| Stable Audio Explorer | Dead | ⚫ None |
|
||||
|
||||
|
||||
|
||||
589
TODO.md
Normal file
589
TODO.md
Normal file
@@ -0,0 +1,589 @@
|
||||
# Hermes Agent - Future Improvements
|
||||
|
||||
> Ideas for enhancing the agent's capabilities, generated from self-analysis of the codebase.
|
||||
|
||||
---
|
||||
|
||||
## 1. Subagent Architecture (Context Isolation) 🎯
|
||||
|
||||
**Problem:** Long-running tools (terminal commands, browser automation, complex file operations) consume massive context. A single `ls -la` can add hundreds of lines. Browser snapshots, debugging sessions, and iterative terminal work quickly bloat the main conversation, leaving less room for actual reasoning.
|
||||
|
||||
**Solution:** The main agent becomes an **orchestrator** that delegates context-heavy tasks to **subagents**.
|
||||
|
||||
**Architecture:**
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ ORCHESTRATOR (main agent) │
|
||||
│ - Receives user request │
|
||||
│ - Plans approach │
|
||||
│ - Delegates heavy tasks to subagents │
|
||||
│ - Receives summarized results │
|
||||
│ - Maintains clean, focused context │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ TERMINAL AGENT │ │ BROWSER AGENT │ │ CODE AGENT │
|
||||
│ - terminal tool │ │ - browser tools │ │ - file tools │
|
||||
│ - file tools │ │ - web_search │ │ - terminal │
|
||||
│ │ │ - web_extract │ │ │
|
||||
│ Isolated context│ │ Isolated context│ │ Isolated context│
|
||||
│ Returns summary │ │ Returns summary │ │ Returns summary │
|
||||
└─────────────────┘ └─────────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
**How it works:**
|
||||
1. User asks: "Set up a new Python project with FastAPI and tests"
|
||||
2. Orchestrator plans: "I need to create files, install deps, write code"
|
||||
3. Orchestrator calls: `terminal_task(goal="Create venv, install fastapi pytest", context="New project in ~/myapp")`
|
||||
4. **Subagent spawns** with fresh context, only terminal/file tools
|
||||
5. Subagent iterates (may take 10+ tool calls, lots of output)
|
||||
6. Subagent completes → returns summary: "Created venv, installed fastapi==0.109.0, pytest==8.0.0"
|
||||
7. Orchestrator receives **only the summary**, context stays clean
|
||||
8. Orchestrator continues with next subtask
|
||||
|
||||
**Key tools to implement:**
|
||||
- [ ] `terminal_task(goal, context, cwd?)` - Delegate terminal/shell work
|
||||
- [ ] `browser_task(goal, context, start_url?)` - Delegate web research/automation
|
||||
- [ ] `code_task(goal, context, files?)` - Delegate code writing/modification
|
||||
- [ ] Generic `delegate_task(goal, context, toolsets=[])` - Flexible delegation
|
||||
|
||||
**Implementation details:**
|
||||
- [ ] Subagent uses same `run_agent.py` but with:
|
||||
- Fresh/empty conversation history
|
||||
- Limited toolset (only what's needed)
|
||||
- Smaller max_iterations (focused task)
|
||||
- Task-specific system prompt
|
||||
- [ ] Subagent returns structured result:
|
||||
```python
|
||||
{
|
||||
"success": True,
|
||||
"summary": "Installed 3 packages, created 2 files",
|
||||
"details": "Optional longer explanation if needed",
|
||||
"artifacts": ["~/myapp/requirements.txt", "~/myapp/main.py"], # Files created
|
||||
"errors": [] # Any issues encountered
|
||||
}
|
||||
```
|
||||
- [ ] Orchestrator sees only the summary in its context
|
||||
- [ ] Full subagent transcript saved separately for debugging
|
||||
|
||||
**Benefits:**
|
||||
- 🧹 **Clean context** - Orchestrator stays focused, doesn't drown in tool output
|
||||
- 📊 **Better token efficiency** - 50 terminal outputs → 1 summary paragraph
|
||||
- 🎯 **Focused subagents** - Each agent has just the tools it needs
|
||||
- 🔄 **Parallel potential** - Independent subtasks could run concurrently
|
||||
- 🐛 **Easier debugging** - Each subtask has its own isolated transcript
|
||||
|
||||
**When to use subagents vs direct tools:**
|
||||
- **Subagent**: Multi-step tasks, iteration likely, lots of output expected
|
||||
- **Direct**: Quick one-off commands, simple file reads, user needs to see output
|
||||
|
||||
**Files to modify:** `run_agent.py` (add orchestration mode), new `tools/delegate_tools.py`, new `subagent_runner.py`
|
||||
|
||||
---
|
||||
|
||||
## 2. Planning & Task Management 📋
|
||||
|
||||
**Problem:** Agent handles tasks reactively without explicit planning. Complex multi-step tasks lack structure, progress tracking, and the ability to decompose work into manageable chunks.
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **Task decomposition tool** - Break complex requests into subtasks:
|
||||
```
|
||||
User: "Set up a new Python project with FastAPI, tests, and Docker"
|
||||
|
||||
Agent creates plan:
|
||||
├── 1. Create project structure and requirements.txt
|
||||
├── 2. Implement FastAPI app skeleton
|
||||
├── 3. Add pytest configuration and initial tests
|
||||
├── 4. Create Dockerfile and docker-compose.yml
|
||||
└── 5. Verify everything works together
|
||||
```
|
||||
- Each subtask becomes a trackable unit
|
||||
- Agent can report progress: "Completed 3/5 tasks"
|
||||
|
||||
- [ ] **Progress checkpoints** - Periodic self-assessment:
|
||||
- After N tool calls or time elapsed, pause to evaluate
|
||||
- "What have I accomplished? What remains? Am I on track?"
|
||||
- Detect if stuck in loops or making no progress
|
||||
- Could trigger replanning if approach isn't working
|
||||
|
||||
- [ ] **Explicit plan storage** - Persist plan in conversation:
|
||||
- Store as structured data (not just in context)
|
||||
- Update status as tasks complete
|
||||
- User can ask "What's the plan?" or "What's left?"
|
||||
- Survives context compression (plans are protected)
|
||||
|
||||
- [ ] **Failure recovery with replanning** - When things go wrong:
|
||||
- Record what failed and why
|
||||
- Revise plan to work around the issue
|
||||
- "Step 3 failed because X, adjusting approach to Y"
|
||||
- Prevents repeating failed strategies
|
||||
|
||||
**Files to modify:** `run_agent.py` (add planning hooks), new `tools/planning_tool.py`
|
||||
|
||||
---
|
||||
|
||||
## 3. Dynamic Skills Expansion 📚
|
||||
|
||||
**Problem:** Skills system is elegant but static. Skills must be manually created and added.
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **Skill acquisition from successful tasks** - After completing a complex task:
|
||||
- "This approach worked well. Save as a skill?"
|
||||
- Extract: goal, steps taken, tools used, key decisions
|
||||
- Generate SKILL.md automatically
|
||||
- Store in user's skills directory
|
||||
|
||||
- [ ] **Skill templates** - Common patterns that can be parameterized:
|
||||
```markdown
|
||||
# Debug {language} Error
|
||||
1. Reproduce the error
|
||||
2. Search for error message: `web_search("{error_message} {language}")`
|
||||
3. Check common causes: {common_causes}
|
||||
4. Apply fix and verify
|
||||
```
|
||||
|
||||
- [ ] **Skill chaining** - Combine skills for complex workflows:
|
||||
- Skills can reference other skills as dependencies
|
||||
- "To do X, first apply skill Y, then skill Z"
|
||||
- Directed graph of skill dependencies
|
||||
|
||||
**Files to modify:** `tools/skills_tool.py`, `skills/` directory structure, new `skill_generator.py`
|
||||
|
||||
---
|
||||
|
||||
## 4. Interactive Clarifying Questions Tool ❓
|
||||
|
||||
**Problem:** Agent sometimes makes assumptions or guesses when it should ask the user. Currently can only ask via text, which gets lost in long outputs.
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **Multiple-choice prompt tool** - Let agent present structured choices to user:
|
||||
```
|
||||
ask_user_choice(
|
||||
question="Should the language switcher enable only German or all languages?",
|
||||
choices=[
|
||||
"Only enable German - works immediately",
|
||||
"Enable all, mark untranslated - show fallback notice",
|
||||
"Let me specify something else"
|
||||
]
|
||||
)
|
||||
```
|
||||
- Renders as interactive terminal UI with arrow key / Tab navigation
|
||||
- User selects option, result returned to agent
|
||||
- Up to 4 choices + optional free-text option
|
||||
|
||||
- [ ] **Implementation:**
|
||||
- Use `inquirer` or `questionary` Python library for rich terminal prompts
|
||||
- Tool returns selected option text (or user's custom input)
|
||||
- **CLI-only** - only works when running via `cli.py` (not API/programmatic use)
|
||||
- Graceful fallback: if not in interactive mode, return error asking agent to rephrase as text
|
||||
|
||||
- [ ] **Use cases:**
|
||||
- Clarify ambiguous requirements before starting work
|
||||
- Confirm destructive operations with clear options
|
||||
- Let user choose between implementation approaches
|
||||
- Checkpoint complex multi-step workflows
|
||||
|
||||
**Files to modify:** New `tools/ask_user_tool.py`, `cli.py` (detect interactive mode), `model_tools.py`
|
||||
|
||||
---
|
||||
|
||||
## 5. Collaborative Problem Solving 🤝
|
||||
|
||||
**Problem:** Interaction is command/response. Complex problems benefit from dialogue.
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **Assumption surfacing** - Make implicit assumptions explicit:
|
||||
- "I'm assuming you want Python 3.11+. Correct?"
|
||||
- "This solution assumes you have sudo access..."
|
||||
- Let user correct before going down wrong path
|
||||
|
||||
- [ ] **Checkpoint & confirm** - For high-stakes operations:
|
||||
- "About to delete 47 files. Here's the list - proceed?"
|
||||
- "This will modify your database. Want a backup first?"
|
||||
- Configurable threshold for when to ask
|
||||
|
||||
**Files to modify:** `run_agent.py`, system prompt configuration
|
||||
|
||||
---
|
||||
|
||||
## 6. Project-Local Context 💾
|
||||
|
||||
**Problem:** Valuable context lost between sessions.
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **Project awareness** - Remember project-specific context:
|
||||
- Store `.hermes/context.md` in project directory
|
||||
- "This is a Django project using PostgreSQL"
|
||||
- Coding style preferences, deployment setup, etc.
|
||||
- Load automatically when working in that directory
|
||||
|
||||
- [ ] **Handoff notes** - Leave notes for future sessions:
|
||||
- Write to `.hermes/notes.md` in project
|
||||
- "TODO for next session: finish implementing X"
|
||||
- "Known issues: Y doesn't work on Windows"
|
||||
|
||||
**Files to modify:** New `project_context.py`, auto-load in `run_agent.py`
|
||||
|
||||
## 6. Tools & Skills Wishlist 🧰
|
||||
|
||||
*Things that would need new tool implementations (can't do well with current tools):*
|
||||
|
||||
### High-Impact
|
||||
|
||||
- [ ] **Audio/Video Transcription** 🎬 *(See also: Section 16 for detailed spec)*
|
||||
- Transcribe audio files, podcasts, YouTube videos
|
||||
- Extract key moments from video
|
||||
- Voice memo transcription for messaging integrations
|
||||
- *Provider options: Whisper API, Deepgram, local Whisper*
|
||||
|
||||
- [ ] **Diagram Rendering** 📊
|
||||
- Render Mermaid/PlantUML to actual images
|
||||
- Can generate the code, but rendering requires external service or tool
|
||||
- "Show me how these components connect" → actual visual diagram
|
||||
|
||||
### Medium-Impact
|
||||
|
||||
- [ ] **Canvas / Visual Workspace** 🖼️
|
||||
- Agent-controlled visual panel for rendering interactive UI
|
||||
- Inspired by OpenClaw's Canvas feature
|
||||
- **Capabilities:**
|
||||
- `present` / `hide` - Show/hide the canvas panel
|
||||
- `navigate` - Load HTML files or URLs into the canvas
|
||||
- `eval` - Execute JavaScript in the canvas context
|
||||
- `snapshot` - Capture the rendered UI as an image
|
||||
- **Use cases:**
|
||||
- Display generated HTML/CSS/JS previews
|
||||
- Show interactive data visualizations (charts, graphs)
|
||||
- Render diagrams (Mermaid → rendered output)
|
||||
- Present structured information in rich format
|
||||
- A2UI-style component system for structured agent UI
|
||||
- **Implementation options:**
|
||||
- Electron-based panel for CLI
|
||||
- WebSocket-connected web app
|
||||
- VS Code webview extension
|
||||
- *Would let agent "show" things rather than just describe them*
|
||||
|
||||
- [ ] **Document Generation** 📄
|
||||
- Create styled PDFs, Word docs, presentations
|
||||
- *Can do basic PDF via terminal tools, but limited*
|
||||
|
||||
- [ ] **Diff/Patch Tool** 📝
|
||||
- Surgical code modifications with preview
|
||||
- "Change line 45-50 to X" without rewriting whole file
|
||||
- Show diffs before applying
|
||||
- *Can use `diff`/`patch` but a native tool would be safer*
|
||||
|
||||
### Skills to Create
|
||||
|
||||
- [ ] **Domain-specific skill packs:**
|
||||
- DevOps/Infrastructure (Terraform, K8s, AWS)
|
||||
- Data Science workflows (EDA, model training)
|
||||
- Security/pentesting procedures
|
||||
|
||||
- [ ] **Framework-specific skills:**
|
||||
- React/Vue/Angular patterns
|
||||
- Django/Rails/Express conventions
|
||||
- Database optimization playbooks
|
||||
|
||||
- [ ] **Troubleshooting flowcharts:**
|
||||
- "Docker container won't start" → decision tree
|
||||
- "Production is slow" → systematic diagnosis
|
||||
|
||||
---
|
||||
|
||||
## 7. Messaging Platform Integrations 💬 ✅ COMPLETE
|
||||
|
||||
**Problem:** Agent currently only works via `cli.py` which requires direct terminal access. Users may want to interact via messaging apps from their phone or other devices.
|
||||
|
||||
**Architecture:**
|
||||
- `run_agent.py` already accepts `conversation_history` parameter and returns updated messages ✅
|
||||
- Need: persistent session storage, platform monitors, session key resolution
|
||||
|
||||
**Implementation approach:**
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Platform Monitor (e.g., telegram_monitor.py) │
|
||||
│ ├─ Long-running daemon connecting to messaging platform │
|
||||
│ ├─ On message: resolve session key → load history from disk│
|
||||
│ ├─ Call run_agent.py with loaded history │
|
||||
│ ├─ Save updated history back to disk (JSONL) │
|
||||
│ └─ Send response back to platform │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Platform support (each user sets up their own credentials):**
|
||||
- [x] **Telegram** - via `python-telegram-bot`
|
||||
- Bot token from @BotFather
|
||||
- Easiest to set up, good for personal use
|
||||
- [x] **Discord** - via `discord.py`
|
||||
- Bot token from Discord Developer Portal
|
||||
- Can work in servers (group sessions) or DMs
|
||||
- [x] **WhatsApp** - via Node.js bridge (whatsapp-web.js/baileys)
|
||||
- Requires Node.js bridge setup
|
||||
- More complex, but reaches most people
|
||||
|
||||
**Session management:**
|
||||
- [x] **Session store** - JSONL persistence per session key
|
||||
- `~/.hermes/sessions/{session_id}.jsonl`
|
||||
- Session keys: `agent:main:telegram:dm`, `agent:main:discord:group:123`, etc.
|
||||
- [x] **Session expiry** - Configurable reset policies
|
||||
- Daily reset (default 4am) OR idle timeout (default 2 hours)
|
||||
- Manual reset via `/reset` or `/new` command in chat
|
||||
- Per-platform and per-type overrides
|
||||
- [x] **Session continuity** - Conversations persist across messages until reset
|
||||
|
||||
**Files created:** `gateway/`, `gateway/platforms/`, `gateway/config.py`, `gateway/session.py`, `gateway/delivery.py`, `gateway/run.py`
|
||||
|
||||
**Configuration:**
|
||||
- Environment variables: `TELEGRAM_BOT_TOKEN`, `DISCORD_BOT_TOKEN`, etc.
|
||||
- Config file: `~/.hermes/gateway.json`
|
||||
- CLI commands: `/platforms` to check status, `--gateway` to start
|
||||
|
||||
**Dynamic context injection:**
|
||||
- Agent knows its source platform and chat
|
||||
- Agent knows connected platforms and home channels
|
||||
- Agent can deliver cron outputs to specific platforms
|
||||
|
||||
---
|
||||
|
||||
## 8. Text-to-Speech (TTS) 🔊
|
||||
|
||||
**Problem:** Agent can only respond with text. Some users prefer audio responses (accessibility, hands-free use, podcasts).
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **TTS tool** - Generate audio files from text
|
||||
```python
|
||||
tts_generate(text="Here's your summary...", voice="nova", output="summary.mp3")
|
||||
```
|
||||
- Returns path to generated audio file
|
||||
- For messaging integrations: can send as voice message
|
||||
|
||||
- [ ] **Provider options:**
|
||||
- Edge TTS (free, good quality, many voices)
|
||||
- OpenAI TTS (paid, excellent quality)
|
||||
- ElevenLabs (paid, best quality, voice cloning)
|
||||
- Local options (Coqui TTS, Bark)
|
||||
|
||||
- [ ] **Modes:**
|
||||
- On-demand: User explicitly asks "read this to me"
|
||||
- Auto-TTS: Configurable to always generate audio for responses
|
||||
- Long-text handling: Summarize or chunk very long responses
|
||||
|
||||
- [ ] **Integration with messaging:**
|
||||
- When enabled, can send voice notes instead of/alongside text
|
||||
- User preference per channel
|
||||
|
||||
**Files to create:** `tools/tts_tool.py`, config in `cli-config.yaml`
|
||||
|
||||
---
|
||||
|
||||
## 13. Speech-to-Text / Audio Transcription 🎤
|
||||
|
||||
**Problem:** Users may want to send voice memos instead of typing. Agent is blind to audio content.
|
||||
|
||||
**Ideas:**
|
||||
- [ ] **Voice memo transcription** - For messaging integrations
|
||||
- User sends voice message → transcribe → process as text
|
||||
- Seamless: user speaks, agent responds
|
||||
|
||||
- [ ] **Audio/video file transcription** - Existing idea, expanded:
|
||||
- Transcribe local audio files (mp3, wav, m4a)
|
||||
- Transcribe YouTube videos (download audio → transcribe)
|
||||
- Extract key moments with timestamps
|
||||
|
||||
- [ ] **Provider options:**
|
||||
- OpenAI Whisper API (good quality, cheap)
|
||||
- Deepgram (fast, good for real-time)
|
||||
- Local Whisper (free, runs on GPU)
|
||||
- Groq Whisper (fast, free tier available)
|
||||
|
||||
- [ ] **Tool interface:**
|
||||
```python
|
||||
transcribe(source="audio.mp3") # Local file
|
||||
transcribe(source="https://youtube.com/...") # YouTube
|
||||
transcribe(source="voice_message", data=bytes) # Voice memo
|
||||
```
|
||||
|
||||
**Files to create:** `tools/transcribe_tool.py`, integrate with messaging monitors
|
||||
|
||||
### Plugin/Extension System 🔌
|
||||
|
||||
**Concept:** Allow users to add custom tools/skills without modifying core code.
|
||||
|
||||
**Why interesting:**
|
||||
- Community contributions
|
||||
- Organization-specific tools
|
||||
- Clean separation of core vs. extensions
|
||||
|
||||
**Open questions:**
|
||||
- Security implications of loading arbitrary code
|
||||
- Versioning and compatibility
|
||||
- Discovery and installation UX
|
||||
|
||||
---
|
||||
|
||||
## Recently Completed ✅
|
||||
|
||||
### Dangerous Command Approval System
|
||||
**Implemented:** Dangerous command detection and approval for terminal tool.
|
||||
|
||||
**Features:**
|
||||
- Pattern-based detection of dangerous commands (rm -rf, DROP TABLE, chmod 777, etc.)
|
||||
- CLI prompt with options: `[o]nce | [s]ession | [a]lways | [d]eny`
|
||||
- Session caching (approved patterns don't re-prompt)
|
||||
- Permanent allowlist in `~/.hermes/config.yaml`
|
||||
- Force flag for agent to bypass after user confirmation
|
||||
- Skip check for isolated backends (Docker, Singularity, Modal)
|
||||
- Helpful sudo failure messages for messaging platforms
|
||||
|
||||
**Files:** `tools/terminal_tool.py`, `model_tools.py`, `hermes_cli/config.py`
|
||||
|
||||
---
|
||||
|
||||
## 14. Learning Machine / Dynamic Memory System 🧠
|
||||
|
||||
*Inspired by [Dash](~/agent-codebases/dash) - a self-learning data agent.*
|
||||
|
||||
**Problem:** Agent starts fresh every session. Valuable learnings from debugging, error patterns, successful approaches, and user preferences are lost.
|
||||
|
||||
**Dash's Key Insight:** Separate **Knowledge** (static, curated) from **Learnings** (dynamic, discovered):
|
||||
|
||||
| System | What It Stores | How It Evolves |
|
||||
|--------|---------------|----------------|
|
||||
| **Knowledge** (Skills) | Validated approaches, templates, best practices | Curated by user |
|
||||
| **Learnings** | Error patterns, gotchas, discovered fixes | Managed automatically |
|
||||
|
||||
**Tools to implement:**
|
||||
- [ ] `save_learning(topic, learning, context?)` - Record a discovered pattern
|
||||
```python
|
||||
save_learning(
|
||||
topic="python-ssl",
|
||||
learning="On Ubuntu 22.04, SSL certificate errors often fixed by: apt install ca-certificates",
|
||||
context="Debugging requests SSL failure"
|
||||
)
|
||||
```
|
||||
- [ ] `search_learnings(query)` - Find relevant past learnings
|
||||
```python
|
||||
search_learnings("SSL certificate error Python")
|
||||
# Returns: "On Ubuntu 22.04, SSL certificate errors often fixed by..."
|
||||
```
|
||||
|
||||
**User Profile & Memory:**
|
||||
- [ ] `user_profile` - Structured facts about user preferences
|
||||
```yaml
|
||||
# ~/.hermes/user_profile.yaml
|
||||
coding_style:
|
||||
python_formatter: black
|
||||
type_hints: always
|
||||
test_framework: pytest
|
||||
preferences:
|
||||
verbosity: detailed
|
||||
confirm_destructive: true
|
||||
environment:
|
||||
os: linux
|
||||
shell: bash
|
||||
default_python: 3.11
|
||||
```
|
||||
- [ ] `user_memory` - Unstructured observations the agent learns
|
||||
```yaml
|
||||
# ~/.hermes/user_memory.yaml
|
||||
- "User prefers tabs over spaces despite black's defaults"
|
||||
- "User's main project is ~/work/myapp - a Django app"
|
||||
- "User often works late - don't ask about timezone"
|
||||
```
|
||||
|
||||
**When to learn:**
|
||||
- After fixing an error that took multiple attempts
|
||||
- When user corrects the agent's approach
|
||||
- When a workaround is discovered for a tool limitation
|
||||
- When user expresses a preference
|
||||
|
||||
**Storage:** Vector database (ChromaDB) or simple YAML with embedding search.
|
||||
|
||||
**Files to create:** `tools/learning_tools.py`, `learning/store.py`, `~/.hermes/learnings/`
|
||||
|
||||
---
|
||||
|
||||
## 15. Layered Context Architecture 📊
|
||||
|
||||
*Inspired by Dash's "Six Layers of Context" - grounding responses in multiple sources.*
|
||||
|
||||
**Problem:** Context sources are ad-hoc. No clear hierarchy or strategy for what context to include when.
|
||||
|
||||
**Proposed Layers for Hermes:**
|
||||
|
||||
| Layer | Source | When Loaded | Example |
|
||||
|-------|--------|-------------|---------|
|
||||
| 1. **Project Context** | `.hermes/context.md` | Auto on cwd | "This is a FastAPI project using PostgreSQL" |
|
||||
| 2. **Skills** | `skills/*.md` | On request | "How to set up React project" |
|
||||
| 3. **User Profile** | `~/.hermes/user_profile.yaml` | Always | "User prefers pytest, uses black" |
|
||||
| 4. **Learnings** | `~/.hermes/learnings/` | Semantic search | "SSL fix for Ubuntu" |
|
||||
| 5. **External Knowledge** | Web search, docs | On demand | Current API docs, Stack Overflow |
|
||||
| 6. **Runtime Introspection** | Tool calls | Real-time | File contents, terminal output |
|
||||
|
||||
**Benefits:**
|
||||
- Clear mental model for what context is available
|
||||
- Prioritization: local > learned > external
|
||||
- Debugging: "Why did agent do X?" → check which layers contributed
|
||||
|
||||
**Files to modify:** `run_agent.py` (context loading), new `context/layers.py`
|
||||
|
||||
---
|
||||
|
||||
## 16. Evaluation System with LLM Grading 📏
|
||||
|
||||
*Inspired by Dash's evaluation framework.*
|
||||
|
||||
**Problem:** `batch_runner.py` runs test cases but lacks quality assessment.
|
||||
|
||||
**Dash's Approach:**
|
||||
- **String matching** (default) - Check if expected strings appear
|
||||
- **LLM grader** (-g flag) - GPT evaluates response quality
|
||||
- **Result comparison** (-r flag) - Compare against golden output
|
||||
|
||||
**Implementation for Hermes:**
|
||||
|
||||
- [ ] **Test case format:**
|
||||
```python
|
||||
TestCase(
|
||||
name="create_python_project",
|
||||
prompt="Create a new Python project with FastAPI and tests",
|
||||
expected_strings=["requirements.txt", "main.py", "test_"], # Basic check
|
||||
golden_actions=["write:main.py", "write:requirements.txt", "terminal:pip install"],
|
||||
grader_criteria="Should create complete project structure with working code"
|
||||
)
|
||||
```
|
||||
|
||||
- [ ] **LLM grader mode:**
|
||||
```python
|
||||
def grade_response(response: str, criteria: str) -> Grade:
|
||||
"""Use GPT to evaluate response quality."""
|
||||
prompt = f"""
|
||||
Evaluate this agent response against the criteria.
|
||||
Criteria: {criteria}
|
||||
Response: {response}
|
||||
|
||||
Score (1-5) and explain why.
|
||||
"""
|
||||
# Returns: Grade(score=4, explanation="Created all files but tests are minimal")
|
||||
```
|
||||
|
||||
- [ ] **Action comparison mode:**
|
||||
- Record tool calls made during test
|
||||
- Compare against expected actions
|
||||
- "Expected terminal call to pip install, got npm install"
|
||||
|
||||
- [ ] **CLI flags:**
|
||||
```bash
|
||||
python batch_runner.py eval test_cases.yaml # String matching
|
||||
python batch_runner.py eval test_cases.yaml -g # + LLM grading
|
||||
python batch_runner.py eval test_cases.yaml -r # + Result comparison
|
||||
python batch_runner.py eval test_cases.yaml -v # Verbose (show responses)
|
||||
```
|
||||
|
||||
**Files to modify:** `batch_runner.py`, new `evals/test_cases.py`, new `evals/grader.py`
|
||||
|
||||
---
|
||||
|
||||
*Last updated: $(date +%Y-%m-%d)* 🤖
|
||||
@@ -1,6 +0,0 @@
|
||||
"""Agent internals -- extracted modules from run_agent.py.
|
||||
|
||||
These modules contain pure utility functions and self-contained classes
|
||||
that were previously embedded in the 3,600-line run_agent.py. Extracting
|
||||
them makes run_agent.py focused on the AIAgent orchestrator class.
|
||||
"""
|
||||
@@ -1,600 +0,0 @@
|
||||
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
|
||||
|
||||
Provides a single resolution chain so every consumer (context compression,
|
||||
session search, web extraction, vision analysis, browser vision) picks up
|
||||
the best available backend without duplicating fallback logic.
|
||||
|
||||
Resolution order for text tasks (auto mode):
|
||||
1. OpenRouter (OPENROUTER_API_KEY)
|
||||
2. Nous Portal (~/.hermes/auth.json active provider)
|
||||
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
|
||||
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
||||
wrapped to look like a chat.completions client)
|
||||
5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
|
||||
— checked via PROVIDER_REGISTRY entries with auth_type='api_key'
|
||||
6. None
|
||||
|
||||
Resolution order for vision/multimodal tasks (auto mode):
|
||||
1. OpenRouter
|
||||
2. Nous Portal
|
||||
3. None (steps 3-5 are skipped — they may not support multimodal)
|
||||
|
||||
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
|
||||
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
|
||||
"openrouter", "nous", "codex", or "main" (= steps 3-5).
|
||||
Default "auto" follows the chains above.
|
||||
|
||||
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
||||
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
||||
than the provider's default.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"minimax": "MiniMax-M2.5-highspeed",
|
||||
"minimax-cn": "MiniMax-M2.5-highspeed",
|
||||
}
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
_OR_HEADERS = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
# Nous Portal extra_body for product attribution.
|
||||
# Callers should pass this as extra_body in chat.completions.create()
|
||||
# when the auxiliary client is backed by Nous Portal.
|
||||
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
|
||||
|
||||
# Set at resolve time — True if the auxiliary client points to Nous Portal
|
||||
auxiliary_is_nous: bool = False
|
||||
|
||||
# Default auxiliary models per provider
|
||||
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_MODEL = "gemini-3-flash"
|
||||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
|
||||
|
||||
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
||||
# OAuth token can access) with a fast model for auxiliary tasks.
|
||||
_CODEX_AUX_MODEL = "gpt-5.3-codex"
|
||||
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
||||
# ── Codex Responses → chat.completions adapter ─────────────────────────────
|
||||
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
|
||||
# read response.choices[0].message.content. This adapter translates those
|
||||
# calls to the Codex Responses API so callers don't need any changes.
|
||||
|
||||
|
||||
def _convert_content_for_responses(content: Any) -> Any:
|
||||
"""Convert chat.completions content to Responses API format.
|
||||
|
||||
chat.completions uses:
|
||||
{"type": "text", "text": "..."}
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
|
||||
|
||||
Responses API uses:
|
||||
{"type": "input_text", "text": "..."}
|
||||
{"type": "input_image", "image_url": "data:image/png;base64,..."}
|
||||
|
||||
If content is a plain string, it's returned as-is (the Responses API
|
||||
accepts strings directly for text-only messages).
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if not isinstance(content, list):
|
||||
return str(content) if content else ""
|
||||
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type", "")
|
||||
if ptype == "text":
|
||||
converted.append({"type": "input_text", "text": part.get("text", "")})
|
||||
elif ptype == "image_url":
|
||||
# chat.completions nests the URL: {"image_url": {"url": "..."}}
|
||||
image_data = part.get("image_url", {})
|
||||
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
|
||||
entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
|
||||
# Preserve detail if specified
|
||||
detail = image_data.get("detail") if isinstance(image_data, dict) else None
|
||||
if detail:
|
||||
entry["detail"] = detail
|
||||
converted.append(entry)
|
||||
elif ptype in ("input_text", "input_image"):
|
||||
# Already in Responses format — pass through
|
||||
converted.append(part)
|
||||
else:
|
||||
# Unknown content type — try to preserve as text
|
||||
text = part.get("text", "")
|
||||
if text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
|
||||
return converted or ""
|
||||
|
||||
|
||||
class _CodexCompletionsAdapter:
|
||||
"""Drop-in shim that accepts chat.completions.create() kwargs and
|
||||
routes them through the Codex Responses streaming API."""
|
||||
|
||||
def __init__(self, real_client: OpenAI, model: str):
|
||||
self._client = real_client
|
||||
self._model = model
|
||||
|
||||
def create(self, **kwargs) -> Any:
|
||||
messages = kwargs.get("messages", [])
|
||||
model = kwargs.get("model", self._model)
|
||||
temperature = kwargs.get("temperature")
|
||||
|
||||
# Separate system/instructions from conversation messages.
|
||||
# Convert chat.completions multimodal content blocks to Responses
|
||||
# API format (input_text / input_image instead of text / image_url).
|
||||
instructions = "You are a helpful assistant."
|
||||
input_msgs: List[Dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content") or ""
|
||||
if role == "system":
|
||||
instructions = content if isinstance(content, str) else str(content)
|
||||
else:
|
||||
input_msgs.append({
|
||||
"role": role,
|
||||
"content": _convert_content_for_responses(content),
|
||||
})
|
||||
|
||||
resp_kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": input_msgs or [{"role": "user", "content": ""}],
|
||||
"store": False,
|
||||
}
|
||||
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
for t in tools:
|
||||
fn = t.get("function", {}) if isinstance(t, dict) else {}
|
||||
name = fn.get("name")
|
||||
if not name:
|
||||
continue
|
||||
converted.append({
|
||||
"type": "function",
|
||||
"name": name,
|
||||
"description": fn.get("description", ""),
|
||||
"parameters": fn.get("parameters", {}),
|
||||
})
|
||||
if converted:
|
||||
resp_kwargs["tools"] = converted
|
||||
|
||||
# Stream and collect the response
|
||||
text_parts: List[str] = []
|
||||
tool_calls_raw: List[Any] = []
|
||||
usage = None
|
||||
|
||||
try:
|
||||
with self._client.responses.stream(**resp_kwargs) as stream:
|
||||
for _event in stream:
|
||||
pass
|
||||
final = stream.get_final_response()
|
||||
|
||||
# Extract text and tool calls from the Responses output
|
||||
for item in getattr(final, "output", []):
|
||||
item_type = getattr(item, "type", None)
|
||||
if item_type == "message":
|
||||
for part in getattr(item, "content", []):
|
||||
ptype = getattr(part, "type", None)
|
||||
if ptype in ("output_text", "text"):
|
||||
text_parts.append(getattr(part, "text", ""))
|
||||
elif item_type == "function_call":
|
||||
tool_calls_raw.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", ""),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
|
||||
resp_usage = getattr(final, "usage", None)
|
||||
if resp_usage:
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
|
||||
completion_tokens=getattr(resp_usage, "output_tokens", 0),
|
||||
total_tokens=getattr(resp_usage, "total_tokens", 0),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
|
||||
raise
|
||||
|
||||
content = "".join(text_parts).strip() or None
|
||||
|
||||
# Build a response that looks like chat.completions
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content=content,
|
||||
tool_calls=tool_calls_raw or None,
|
||||
)
|
||||
choice = SimpleNamespace(
|
||||
index=0,
|
||||
message=message,
|
||||
finish_reason="stop" if not tool_calls_raw else "tool_calls",
|
||||
)
|
||||
return SimpleNamespace(
|
||||
choices=[choice],
|
||||
model=model,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _CodexChatShim:
|
||||
"""Wraps the adapter to provide client.chat.completions.create()."""
|
||||
|
||||
def __init__(self, adapter: _CodexCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class CodexAuxiliaryClient:
|
||||
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
|
||||
|
||||
Consumers can call client.chat.completions.create(**kwargs) as normal.
|
||||
Also exposes .api_key and .base_url for introspection by async wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, real_client: OpenAI, model: str):
|
||||
self._real_client = real_client
|
||||
adapter = _CodexCompletionsAdapter(real_client, model)
|
||||
self.chat = _CodexChatShim(adapter)
|
||||
self.api_key = real_client.api_key
|
||||
self.base_url = real_client.base_url
|
||||
|
||||
def close(self):
|
||||
self._real_client.close()
|
||||
|
||||
|
||||
class _AsyncCodexCompletionsAdapter:
|
||||
"""Async version of the Codex Responses adapter.
|
||||
|
||||
Wraps the sync adapter via asyncio.to_thread() so async consumers
|
||||
(web_tools, session_search) can await it as normal.
|
||||
"""
|
||||
|
||||
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
|
||||
self._sync = sync_adapter
|
||||
|
||||
async def create(self, **kwargs) -> Any:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self._sync.create, **kwargs)
|
||||
|
||||
|
||||
class _AsyncCodexChatShim:
|
||||
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class AsyncCodexAuxiliaryClient:
|
||||
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
|
||||
|
||||
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
|
||||
sync_adapter = sync_wrapper.chat.completions
|
||||
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
|
||||
self.chat = _AsyncCodexChatShim(async_adapter)
|
||||
self.api_key = sync_wrapper.api_key
|
||||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
def _read_nous_auth() -> Optional[dict]:
|
||||
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
||||
|
||||
Returns the provider state dict if Nous is active with tokens,
|
||||
otherwise None.
|
||||
"""
|
||||
try:
|
||||
if not _AUTH_JSON_PATH.is_file():
|
||||
return None
|
||||
data = json.loads(_AUTH_JSON_PATH.read_text())
|
||||
if data.get("active_provider") != "nous":
|
||||
return None
|
||||
provider = data.get("providers", {}).get("nous", {})
|
||||
# Must have at least an access_token or agent_key
|
||||
if not provider.get("agent_key") and not provider.get("access_token"):
|
||||
return None
|
||||
return provider
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read Nous auth: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def _nous_api_key(provider: dict) -> str:
|
||||
"""Extract the best API key from a Nous provider state dict."""
|
||||
return provider.get("agent_key") or provider.get("access_token", "")
|
||||
|
||||
|
||||
def _nous_base_url() -> str:
|
||||
"""Resolve the Nous inference base URL from env or default."""
|
||||
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid Codex OAuth access token from Hermes auth store (~/.hermes/auth.json)."""
|
||||
try:
|
||||
from hermes_cli.auth import _read_codex_tokens
|
||||
data = _read_codex_tokens()
|
||||
tokens = data.get("tokens", {})
|
||||
access_token = tokens.get("access_token")
|
||||
if isinstance(access_token, str) and access_token.strip():
|
||||
return access_token.strip()
|
||||
return None
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Try each API-key provider in PROVIDER_REGISTRY order.
|
||||
|
||||
Returns (client, model) for the first provider whose env var is set,
|
||||
or (None, None) if none are configured.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
except ImportError:
|
||||
logger.debug("Could not import PROVIDER_REGISTRY for API-key fallback")
|
||||
return None, None
|
||||
|
||||
for provider_id, pconfig in PROVIDER_REGISTRY.items():
|
||||
if pconfig.auth_type != "api_key":
|
||||
continue
|
||||
# Check if any of the provider's env vars are set
|
||||
api_key = ""
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
val = os.getenv(env_var, "").strip()
|
||||
if val:
|
||||
api_key = val
|
||||
break
|
||||
if not api_key:
|
||||
continue
|
||||
# Resolve base URL (with optional env-var override)
|
||||
# Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
|
||||
if env_url:
|
||||
base_url = env_url.rstrip("/")
|
||||
elif provider_id == "kimi-coding" and api_key.startswith("sk-kimi-"):
|
||||
base_url = "https://api.kimi.com/coding/v1"
|
||||
else:
|
||||
base_url = pconfig.inference_base_url
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
|
||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
# ── Provider resolution helpers ─────────────────────────────────────────────
|
||||
|
||||
def _get_auxiliary_provider(task: str = "") -> str:
|
||||
"""Read the provider override for a specific auxiliary task.
|
||||
|
||||
Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER),
|
||||
then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider),
|
||||
then falls back to "auto". Returns one of: "auto", "openrouter", "nous", "main".
|
||||
"""
|
||||
if task:
|
||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||
val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower()
|
||||
if val and val != "auto":
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
nous = _read_nous_auth()
|
||||
if not nous:
|
||||
return None, None
|
||||
global auxiliary_is_nous
|
||||
auxiliary_is_nous = True
|
||||
logger.debug("Auxiliary client: Nous Portal")
|
||||
return (
|
||||
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
|
||||
_NOUS_MODEL,
|
||||
)
|
||||
|
||||
|
||||
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
custom_base = os.getenv("OPENAI_BASE_URL")
|
||||
custom_key = os.getenv("OPENAI_API_KEY")
|
||||
if not custom_base or not custom_key:
|
||||
return None, None
|
||||
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s)", model)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
codex_token = _read_codex_access_token()
|
||||
if not codex_token:
|
||||
return None, None
|
||||
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
||||
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
||||
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
||||
|
||||
|
||||
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Resolve a specific forced provider. Returns (None, None) if creds missing."""
|
||||
if forced == "openrouter":
|
||||
client, model = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=openrouter but OPENROUTER_API_KEY not set")
|
||||
return client, model
|
||||
|
||||
if forced == "nous":
|
||||
client, model = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)")
|
||||
return client, model
|
||||
|
||||
if forced == "codex":
|
||||
client, model = _try_codex()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=codex but no Codex OAuth token found (run: hermes model)")
|
||||
return client, model
|
||||
|
||||
if forced == "main":
|
||||
# "main" = skip OpenRouter/Nous, use the main chat model's credentials.
|
||||
for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider):
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
return client, model
|
||||
logger.warning("auxiliary.provider=main but no main endpoint credentials found")
|
||||
return None, None
|
||||
|
||||
# Unknown provider name — fall through to auto
|
||||
logger.warning("Unknown auxiliary.provider=%r, falling back to auto", forced)
|
||||
return None, None
|
||||
|
||||
|
||||
def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None."""
|
||||
for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
|
||||
_try_codex, _resolve_api_key_provider):
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
return client, model
|
||||
logger.debug("Auxiliary client: none available")
|
||||
return None, None
|
||||
|
||||
|
||||
# ── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, default_model_slug) for text-only auxiliary tasks.
|
||||
|
||||
Args:
|
||||
task: Optional task name ("compression", "web_extract") to check
|
||||
for a task-specific provider override.
|
||||
|
||||
Callers may override the returned model with a per-task env var
|
||||
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
||||
"""
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return _resolve_forced_provider(forced)
|
||||
return _resolve_auto()
|
||||
|
||||
|
||||
def get_async_text_auxiliary_client(task: str = ""):
|
||||
"""Return (async_client, model_slug) for async consumers.
|
||||
|
||||
For standard providers returns (AsyncOpenAI, model). For Codex returns
|
||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
sync_client, model = get_text_auxiliary_client(task)
|
||||
if sync_client is None:
|
||||
return None, None
|
||||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
"base_url": str(sync_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(sync_client.base_url).lower():
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
elif "api.kimi.com" in str(sync_client.base_url).lower():
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, default_model_slug) for vision/multimodal auxiliary tasks.
|
||||
|
||||
Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise
|
||||
auto-detects. Callers may override the returned model with
|
||||
AUXILIARY_VISION_MODEL.
|
||||
|
||||
In auto mode, only providers known to support multimodal are tried:
|
||||
OpenRouter, Nous Portal, and Codex OAuth (gpt-5.3-codex supports
|
||||
vision via the Responses API). Custom endpoints and API-key
|
||||
providers are skipped — they may not handle vision input. To use
|
||||
them, set AUXILIARY_VISION_PROVIDER explicitly.
|
||||
"""
|
||||
forced = _get_auxiliary_provider("vision")
|
||||
if forced != "auto":
|
||||
return _resolve_forced_provider(forced)
|
||||
# Auto: try providers known to support multimodal first, then fall
|
||||
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
||||
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
||||
# caused silent failures for local-only users.
|
||||
for try_fn in (_try_openrouter, _try_nous, _try_codex,
|
||||
_try_custom_endpoint):
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
return client, model
|
||||
logger.debug("Auxiliary vision client: none available")
|
||||
return None, None
|
||||
|
||||
|
||||
def get_auxiliary_extra_body() -> dict:
|
||||
"""Return extra_body kwargs for auxiliary API calls.
|
||||
|
||||
Includes Nous Portal product tags when the auxiliary client is backed
|
||||
by Nous Portal. Returns empty dict otherwise.
|
||||
"""
|
||||
return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
|
||||
|
||||
|
||||
def auxiliary_max_tokens_param(value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the auxiliary client's provider.
|
||||
|
||||
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
|
||||
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
|
||||
The Codex adapter translates max_tokens internally, so we use max_tokens
|
||||
for it as well.
|
||||
"""
|
||||
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
# Only use max_completion_tokens for direct OpenAI custom endpoints
|
||||
if (not or_key
|
||||
and _read_nous_auth() is None
|
||||
and "api.openai.com" in custom_base.lower()):
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
@@ -1,365 +0,0 @@
|
||||
"""Automatic context window compression for long conversations.
|
||||
|
||||
Self-contained class with its own OpenAI client for summarization.
|
||||
Uses Gemini Flash (cheap/fast) to summarize middle turns while
|
||||
protecting head and tail context.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
from agent.model_metadata import (
|
||||
get_model_context_length,
|
||||
estimate_messages_tokens_rough,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
"""Compresses conversation context when approaching the model's context limit.
|
||||
|
||||
Algorithm: protect first N + last N turns, summarize everything in between.
|
||||
Token tracking uses actual counts from API responses for accuracy.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
threshold_percent: float = 0.85,
|
||||
protect_first_n: int = 3,
|
||||
protect_last_n: int = 4,
|
||||
summary_target_tokens: int = 2500,
|
||||
quiet_mode: bool = False,
|
||||
summary_model_override: str = None,
|
||||
base_url: str = "",
|
||||
):
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.threshold_percent = threshold_percent
|
||||
self.protect_first_n = protect_first_n
|
||||
self.protect_last_n = protect_last_n
|
||||
self.summary_target_tokens = summary_target_tokens
|
||||
self.quiet_mode = quiet_mode
|
||||
|
||||
self.context_length = get_model_context_length(model, base_url=base_url)
|
||||
self.threshold_tokens = int(self.context_length * threshold_percent)
|
||||
self.compression_count = 0
|
||||
self._context_probed = False # True after a step-down from context error
|
||||
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
|
||||
self.client, default_model = get_text_auxiliary_client("compression")
|
||||
self.summary_model = summary_model_override or default_model
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
self.last_prompt_tokens = usage.get("prompt_tokens", 0)
|
||||
self.last_completion_tokens = usage.get("completion_tokens", 0)
|
||||
self.last_total_tokens = usage.get("total_tokens", 0)
|
||||
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Check if context exceeds the compression threshold."""
|
||||
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
|
||||
return tokens >= self.threshold_tokens
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick pre-flight check using rough estimate (before API call)."""
|
||||
rough_estimate = estimate_messages_tokens_rough(messages)
|
||||
return rough_estimate >= self.threshold_tokens
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Get current compression status for display/logging."""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
|
||||
"""Generate a concise summary of conversation turns.
|
||||
|
||||
Tries the auxiliary model first, then falls back to the user's main
|
||||
model. Returns None if all attempts fail — the caller should drop
|
||||
the middle turns without a summary rather than inject a useless
|
||||
placeholder.
|
||||
"""
|
||||
parts = []
|
||||
for msg in turns_to_summarize:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content") or ""
|
||||
if len(content) > 2000:
|
||||
content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
|
||||
tool_calls = msg.get("tool_calls", [])
|
||||
if tool_calls:
|
||||
tool_names = [tc.get("function", {}).get("name", "?") for tc in tool_calls if isinstance(tc, dict)]
|
||||
content += f"\n[Tool calls: {', '.join(tool_names)}]"
|
||||
parts.append(f"[{role.upper()}]: {content}")
|
||||
|
||||
content_to_summarize = "\n\n".join(parts)
|
||||
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
|
||||
|
||||
Write from a neutral perspective describing:
|
||||
1. What actions were taken (tool calls, searches, file operations)
|
||||
2. Key information or results obtained
|
||||
3. Important decisions or findings
|
||||
4. Relevant data, file names, or outputs
|
||||
|
||||
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
|
||||
|
||||
---
|
||||
TURNS TO SUMMARIZE:
|
||||
{content_to_summarize}
|
||||
---
|
||||
|
||||
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
|
||||
# 1. Try the auxiliary model (cheap/fast)
|
||||
if self.client:
|
||||
try:
|
||||
return self._call_summary_model(self.client, self.summary_model, prompt)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
|
||||
|
||||
# 2. Fallback: try the user's main model endpoint
|
||||
fallback_client, fallback_model = self._get_fallback_client()
|
||||
if fallback_client is not None:
|
||||
try:
|
||||
logger.info("Retrying context summary with main model (%s)", fallback_model)
|
||||
summary = self._call_summary_model(fallback_client, fallback_model, prompt)
|
||||
self.client = fallback_client
|
||||
self.summary_model = fallback_model
|
||||
return summary
|
||||
except Exception as fallback_err:
|
||||
logging.warning(f"Main model summary also failed: {fallback_err}")
|
||||
|
||||
# 3. All models failed — return None so the caller drops turns without a summary
|
||||
logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
|
||||
return None
|
||||
|
||||
def _call_summary_model(self, client, model: str, prompt: str) -> str:
|
||||
"""Make the actual LLM call to generate a summary. Raises on failure."""
|
||||
kwargs = {
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": 0.3,
|
||||
"timeout": 30.0,
|
||||
}
|
||||
# Most providers (OpenRouter, local models) use max_tokens.
|
||||
# Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+)
|
||||
# requires max_completion_tokens instead.
|
||||
try:
|
||||
kwargs["max_tokens"] = self.summary_target_tokens * 2
|
||||
response = client.chat.completions.create(**kwargs)
|
||||
except Exception as first_err:
|
||||
if "max_tokens" in str(first_err) or "unsupported_parameter" in str(first_err):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = self.summary_target_tokens * 2
|
||||
response = client.chat.completions.create(**kwargs)
|
||||
else:
|
||||
raise
|
||||
|
||||
summary = response.choices[0].message.content.strip()
|
||||
if not summary.startswith("[CONTEXT SUMMARY]:"):
|
||||
summary = "[CONTEXT SUMMARY]: " + summary
|
||||
return summary
|
||||
|
||||
def _get_fallback_client(self):
|
||||
"""Try to build a fallback client from the main model's endpoint config.
|
||||
|
||||
When the primary auxiliary client fails (e.g. stale OpenRouter key), this
|
||||
creates a client using the user's active custom endpoint (OPENAI_BASE_URL)
|
||||
so compression can still produce a real summary instead of a static string.
|
||||
|
||||
Returns (client, model) or (None, None).
|
||||
"""
|
||||
custom_base = os.getenv("OPENAI_BASE_URL")
|
||||
custom_key = os.getenv("OPENAI_API_KEY")
|
||||
if not custom_base or not custom_key:
|
||||
return None, None
|
||||
|
||||
# Don't fallback to the same provider that just failed
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
|
||||
return None, None
|
||||
|
||||
model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
|
||||
try:
|
||||
from openai import OpenAI as _OpenAI
|
||||
client = _OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
|
||||
return client, model
|
||||
except Exception as exc:
|
||||
logger.debug("Could not build fallback auxiliary client: %s", exc)
|
||||
return None, None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool-call / tool-result pair integrity helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _get_tool_call_id(tc) -> str:
|
||||
"""Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
|
||||
if isinstance(tc, dict):
|
||||
return tc.get("id", "")
|
||||
return getattr(tc, "id", "") or ""
|
||||
|
||||
def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Fix orphaned tool_call / tool_result pairs after compression.
|
||||
|
||||
Two failure modes:
|
||||
1. A tool *result* references a call_id whose assistant tool_call was
|
||||
removed (summarized/truncated). The API rejects this with
|
||||
"No tool call found for function call output with call_id ...".
|
||||
2. An assistant message has tool_calls whose results were dropped.
|
||||
The API rejects this because every tool_call must be followed by
|
||||
a tool result with the matching call_id.
|
||||
|
||||
This method removes orphaned results and inserts stub results for
|
||||
orphaned calls so the message list is always well-formed.
|
||||
"""
|
||||
surviving_call_ids: set = set()
|
||||
for msg in messages:
|
||||
if msg.get("role") == "assistant":
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
cid = self._get_tool_call_id(tc)
|
||||
if cid:
|
||||
surviving_call_ids.add(cid)
|
||||
|
||||
result_call_ids: set = set()
|
||||
for msg in messages:
|
||||
if msg.get("role") == "tool":
|
||||
cid = msg.get("tool_call_id")
|
||||
if cid:
|
||||
result_call_ids.add(cid)
|
||||
|
||||
# 1. Remove tool results whose call_id has no matching assistant tool_call
|
||||
orphaned_results = result_call_ids - surviving_call_ids
|
||||
if orphaned_results:
|
||||
messages = [
|
||||
m for m in messages
|
||||
if not (m.get("role") == "tool" and m.get("tool_call_id") in orphaned_results)
|
||||
]
|
||||
if not self.quiet_mode:
|
||||
logger.info("Compression sanitizer: removed %d orphaned tool result(s)", len(orphaned_results))
|
||||
|
||||
# 2. Add stub results for assistant tool_calls whose results were dropped
|
||||
missing_results = surviving_call_ids - result_call_ids
|
||||
if missing_results:
|
||||
patched: List[Dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
patched.append(msg)
|
||||
if msg.get("role") == "assistant":
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
cid = self._get_tool_call_id(tc)
|
||||
if cid in missing_results:
|
||||
patched.append({
|
||||
"role": "tool",
|
||||
"content": "[Result from earlier conversation — see context summary above]",
|
||||
"tool_call_id": cid,
|
||||
})
|
||||
messages = patched
|
||||
if not self.quiet_mode:
|
||||
logger.info("Compression sanitizer: added %d stub tool result(s)", len(missing_results))
|
||||
|
||||
return messages
|
||||
|
||||
def _align_boundary_forward(self, messages: List[Dict[str, Any]], idx: int) -> int:
|
||||
"""Push a compress-start boundary forward past any orphan tool results.
|
||||
|
||||
If ``messages[idx]`` is a tool result, slide forward until we hit a
|
||||
non-tool message so we don't start the summarised region mid-group.
|
||||
"""
|
||||
while idx < len(messages) and messages[idx].get("role") == "tool":
|
||||
idx += 1
|
||||
return idx
|
||||
|
||||
def _align_boundary_backward(self, messages: List[Dict[str, Any]], idx: int) -> int:
|
||||
"""Pull a compress-end boundary backward to avoid splitting a
|
||||
tool_call / result group.
|
||||
|
||||
If the message just before ``idx`` is an assistant message with
|
||||
tool_calls, those tool results will start at ``idx`` and would be
|
||||
separated from their parent. Move backwards to include the whole
|
||||
group in the summarised region.
|
||||
"""
|
||||
if idx <= 0 or idx >= len(messages):
|
||||
return idx
|
||||
prev = messages[idx - 1]
|
||||
if prev.get("role") == "assistant" and prev.get("tool_calls"):
|
||||
# The results for this assistant turn sit at idx..idx+k.
|
||||
# Include the assistant message in the summarised region too.
|
||||
idx -= 1
|
||||
return idx
|
||||
|
||||
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
|
||||
"""Compress conversation messages by summarizing middle turns.
|
||||
|
||||
Keeps first N + last N turns, summarizes everything in between.
|
||||
After compression, orphaned tool_call / tool_result pairs are cleaned
|
||||
up so the API never receives mismatched IDs.
|
||||
"""
|
||||
n_messages = len(messages)
|
||||
if n_messages <= self.protect_first_n + self.protect_last_n + 1:
|
||||
if not self.quiet_mode:
|
||||
print(f"⚠️ Cannot compress: only {n_messages} messages (need > {self.protect_first_n + self.protect_last_n + 1})")
|
||||
return messages
|
||||
|
||||
compress_start = self.protect_first_n
|
||||
compress_end = n_messages - self.protect_last_n
|
||||
if compress_start >= compress_end:
|
||||
return messages
|
||||
|
||||
# Adjust boundaries to avoid splitting tool_call/result groups.
|
||||
compress_start = self._align_boundary_forward(messages, compress_start)
|
||||
compress_end = self._align_boundary_backward(messages, compress_end)
|
||||
if compress_start >= compress_end:
|
||||
return messages
|
||||
|
||||
turns_to_summarize = messages[compress_start:compress_end]
|
||||
display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
|
||||
print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
|
||||
|
||||
summary = self._generate_summary(turns_to_summarize)
|
||||
|
||||
compressed = []
|
||||
for i in range(compress_start):
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
|
||||
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
|
||||
compressed.append(msg)
|
||||
|
||||
if summary:
|
||||
last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
|
||||
summary_role = "user" if last_head_role in ("assistant", "tool") else "assistant"
|
||||
compressed.append({"role": summary_role, "content": summary})
|
||||
else:
|
||||
if not self.quiet_mode:
|
||||
print(" ⚠️ No summary model available — middle turns dropped without summary")
|
||||
|
||||
for i in range(compress_end, n_messages):
|
||||
compressed.append(messages[i].copy())
|
||||
|
||||
self.compression_count += 1
|
||||
|
||||
compressed = self._sanitize_tool_pairs(compressed)
|
||||
|
||||
if not self.quiet_mode:
|
||||
new_estimate = estimate_messages_tokens_rough(compressed)
|
||||
saved_estimate = display_tokens - new_estimate
|
||||
print(f" ✅ Compressed: {n_messages} → {len(compressed)} messages (~{saved_estimate:,} tokens saved)")
|
||||
print(f" 💡 Compression #{self.compression_count} complete")
|
||||
|
||||
return compressed
|
||||
469
agent/display.py
469
agent/display.py
@@ -1,469 +0,0 @@
|
||||
"""CLI presentation -- spinner, kawaii faces, tool preview formatting.
|
||||
|
||||
Pure display functions and classes with no AIAgent dependency.
|
||||
Used by AIAgent._execute_tool_calls for CLI feedback.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
# ANSI escape codes for coloring tool failure indicators
|
||||
_RED = "\033[31m"
|
||||
_RESET = "\033[0m"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tool preview (one-line summary of a tool call's primary argument)
|
||||
# =========================================================================
|
||||
|
||||
def build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
|
||||
"""Build a short preview of a tool call's primary argument for display."""
|
||||
primary_args = {
|
||||
"terminal": "command", "web_search": "query", "web_extract": "urls",
|
||||
"read_file": "path", "write_file": "path", "patch": "path",
|
||||
"search_files": "pattern", "browser_navigate": "url",
|
||||
"browser_click": "ref", "browser_type": "text",
|
||||
"image_generate": "prompt", "text_to_speech": "text",
|
||||
"vision_analyze": "question", "mixture_of_agents": "user_prompt",
|
||||
"skill_view": "name", "skills_list": "category",
|
||||
"schedule_cronjob": "name",
|
||||
"execute_code": "code", "delegate_task": "goal",
|
||||
"clarify": "question", "skill_manage": "name",
|
||||
}
|
||||
|
||||
if tool_name == "process":
|
||||
action = args.get("action", "")
|
||||
sid = args.get("session_id", "")
|
||||
data = args.get("data", "")
|
||||
timeout_val = args.get("timeout")
|
||||
parts = [action]
|
||||
if sid:
|
||||
parts.append(sid[:16])
|
||||
if data:
|
||||
parts.append(f'"{data[:20]}"')
|
||||
if timeout_val and action == "wait":
|
||||
parts.append(f"{timeout_val}s")
|
||||
return " ".join(parts) if parts else None
|
||||
|
||||
if tool_name == "todo":
|
||||
todos_arg = args.get("todos")
|
||||
merge = args.get("merge", False)
|
||||
if todos_arg is None:
|
||||
return "reading task list"
|
||||
elif merge:
|
||||
return f"updating {len(todos_arg)} task(s)"
|
||||
else:
|
||||
return f"planning {len(todos_arg)} task(s)"
|
||||
|
||||
if tool_name == "session_search":
|
||||
query = args.get("query", "")
|
||||
return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
|
||||
|
||||
if tool_name == "memory":
|
||||
action = args.get("action", "")
|
||||
target = args.get("target", "")
|
||||
if action == "add":
|
||||
content = args.get("content", "")
|
||||
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
|
||||
elif action == "replace":
|
||||
return f"~{target}: \"{args.get('old_text', '')[:20]}\""
|
||||
elif action == "remove":
|
||||
return f"-{target}: \"{args.get('old_text', '')[:20]}\""
|
||||
return action
|
||||
|
||||
if tool_name == "send_message":
|
||||
target = args.get("target", "?")
|
||||
msg = args.get("message", "")
|
||||
if len(msg) > 20:
|
||||
msg = msg[:17] + "..."
|
||||
return f"to {target}: \"{msg}\""
|
||||
|
||||
if tool_name.startswith("rl_"):
|
||||
rl_previews = {
|
||||
"rl_list_environments": "listing envs",
|
||||
"rl_select_environment": args.get("name", ""),
|
||||
"rl_get_current_config": "reading config",
|
||||
"rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}",
|
||||
"rl_start_training": "starting",
|
||||
"rl_check_status": args.get("run_id", "")[:16],
|
||||
"rl_stop_training": f"stopping {args.get('run_id', '')[:16]}",
|
||||
"rl_get_results": args.get("run_id", "")[:16],
|
||||
"rl_list_runs": "listing runs",
|
||||
"rl_test_inference": f"{args.get('num_steps', 3)} steps",
|
||||
}
|
||||
return rl_previews.get(tool_name)
|
||||
|
||||
key = primary_args.get(tool_name)
|
||||
if not key:
|
||||
for fallback_key in ("query", "text", "command", "path", "name", "prompt", "code", "goal"):
|
||||
if fallback_key in args:
|
||||
key = fallback_key
|
||||
break
|
||||
|
||||
if not key or key not in args:
|
||||
return None
|
||||
|
||||
value = args[key]
|
||||
if isinstance(value, list):
|
||||
value = value[0] if value else ""
|
||||
|
||||
preview = str(value).strip()
|
||||
if not preview:
|
||||
return None
|
||||
if len(preview) > max_len:
|
||||
preview = preview[:max_len - 3] + "..."
|
||||
return preview
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# KawaiiSpinner
|
||||
# =========================================================================
|
||||
|
||||
class KawaiiSpinner:
|
||||
"""Animated spinner with kawaii faces for CLI feedback during tool execution."""
|
||||
|
||||
SPINNERS = {
|
||||
'dots': ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'],
|
||||
'bounce': ['⠁', '⠂', '⠄', '⡀', '⢀', '⠠', '⠐', '⠈'],
|
||||
'grow': ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█', '▇', '▆', '▅', '▄', '▃', '▂'],
|
||||
'arrows': ['←', '↖', '↑', '↗', '→', '↘', '↓', '↙'],
|
||||
'star': ['✶', '✷', '✸', '✹', '✺', '✹', '✸', '✷'],
|
||||
'moon': ['🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘'],
|
||||
'pulse': ['◜', '◠', '◝', '◞', '◡', '◟'],
|
||||
'brain': ['🧠', '💭', '💡', '✨', '💫', '🌟', '💡', '💭'],
|
||||
'sparkle': ['⁺', '˚', '*', '✧', '✦', '✧', '*', '˚'],
|
||||
}
|
||||
|
||||
KAWAII_WAITING = [
|
||||
"(。◕‿◕。)", "(◕‿◕✿)", "٩(◕‿◕。)۶", "(✿◠‿◠)", "( ˘▽˘)っ",
|
||||
"♪(´ε` )", "(◕ᴗ◕✿)", "ヾ(^∇^)", "(≧◡≦)", "(★ω★)",
|
||||
]
|
||||
|
||||
KAWAII_THINKING = [
|
||||
"(。•́︿•̀。)", "(◔_◔)", "(¬‿¬)", "( •_•)>⌐■-■", "(⌐■_■)",
|
||||
"(´・_・`)", "◉_◉", "(°ロ°)", "( ˘⌣˘)♡", "ヽ(>∀<☆)☆",
|
||||
"٩(๑❛ᴗ❛๑)۶", "(⊙_⊙)", "(¬_¬)", "( ͡° ͜ʖ ͡°)", "ಠ_ಠ",
|
||||
]
|
||||
|
||||
THINKING_VERBS = [
|
||||
"pondering", "contemplating", "musing", "cogitating", "ruminating",
|
||||
"deliberating", "mulling", "reflecting", "processing", "reasoning",
|
||||
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
|
||||
]
|
||||
|
||||
def __init__(self, message: str = "", spinner_type: str = 'dots'):
|
||||
self.message = message
|
||||
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
|
||||
self.running = False
|
||||
self.thread = None
|
||||
self.frame_idx = 0
|
||||
self.start_time = None
|
||||
self.last_line_len = 0
|
||||
# Capture stdout NOW, before any redirect_stdout(devnull) from
|
||||
# child agents can replace sys.stdout with a black hole.
|
||||
self._out = sys.stdout
|
||||
|
||||
def _write(self, text: str, end: str = '\n', flush: bool = False):
|
||||
"""Write to the stdout captured at spinner creation time."""
|
||||
try:
|
||||
self._out.write(text + end)
|
||||
if flush:
|
||||
self._out.flush()
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
|
||||
def _animate(self):
|
||||
while self.running:
|
||||
if os.getenv("HERMES_SPINNER_PAUSE"):
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
|
||||
elapsed = time.time() - self.start_time
|
||||
line = f" {frame} {self.message} ({elapsed:.1f}s)"
|
||||
pad = max(self.last_line_len - len(line), 0)
|
||||
self._write(f"\r{line}{' ' * pad}", end='', flush=True)
|
||||
self.last_line_len = len(line)
|
||||
self.frame_idx += 1
|
||||
time.sleep(0.12)
|
||||
|
||||
def start(self):
|
||||
if self.running:
|
||||
return
|
||||
self.running = True
|
||||
self.start_time = time.time()
|
||||
self.thread = threading.Thread(target=self._animate, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
def update_text(self, new_message: str):
|
||||
self.message = new_message
|
||||
|
||||
def print_above(self, text: str):
|
||||
"""Print a line above the spinner without disrupting animation.
|
||||
|
||||
Clears the current spinner line, prints the text, and lets the
|
||||
next animation tick redraw the spinner on the line below.
|
||||
Thread-safe: uses the captured stdout reference (self._out).
|
||||
Works inside redirect_stdout(devnull) because _write bypasses
|
||||
sys.stdout and writes to the stdout captured at spinner creation.
|
||||
"""
|
||||
if not self.running:
|
||||
self._write(f" {text}", flush=True)
|
||||
return
|
||||
# Clear spinner line with spaces (not \033[K) to avoid garbled escape
|
||||
# codes when prompt_toolkit's patch_stdout is active — same approach
|
||||
# as stop(). Then print text; spinner redraws on next tick.
|
||||
blanks = ' ' * max(self.last_line_len + 5, 40)
|
||||
self._write(f"\r{blanks}\r {text}", flush=True)
|
||||
|
||||
def stop(self, final_message: str = None):
|
||||
self.running = False
|
||||
if self.thread:
|
||||
self.thread.join(timeout=0.5)
|
||||
# Clear the spinner line with spaces instead of \033[K to avoid
|
||||
# garbled escape codes when prompt_toolkit's patch_stdout is active.
|
||||
blanks = ' ' * max(self.last_line_len + 5, 40)
|
||||
self._write(f"\r{blanks}\r", end='', flush=True)
|
||||
if final_message:
|
||||
self._write(f" {final_message}", flush=True)
|
||||
|
||||
def __enter__(self):
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.stop()
|
||||
return False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Kawaii face arrays (used by AIAgent._execute_tool_calls for spinner text)
|
||||
# =========================================================================
|
||||
|
||||
KAWAII_SEARCH = [
|
||||
"♪(´ε` )", "(。◕‿◕。)", "ヾ(^∇^)", "(◕ᴗ◕✿)", "( ˘▽˘)っ",
|
||||
"٩(◕‿◕。)۶", "(✿◠‿◠)", "♪~(´ε` )", "(ノ´ヮ`)ノ*:・゚✧", "\(◎o◎)/",
|
||||
]
|
||||
KAWAII_READ = [
|
||||
"φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(。•́‿•̀。)۶", "(◕‿◕✿)",
|
||||
"ヾ(@⌒ー⌒@)ノ", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )ノ",
|
||||
]
|
||||
KAWAII_TERMINAL = [
|
||||
"ヽ(>∀<☆)ノ", "(ノ°∀°)ノ", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و",
|
||||
"┗(^0^)┓", "(`・ω・´)", "\( ̄▽ ̄)/", "(ง •̀_•́)ง", "ヽ(´▽`)/",
|
||||
]
|
||||
KAWAII_BROWSER = [
|
||||
"(ノ°∀°)ノ", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)?",
|
||||
"ヾ(•ω•`)o", "( ̄ω ̄)", "( ˇωˇ )", "(ᵔᴥᵔ)", "\(◎o◎)/",
|
||||
]
|
||||
KAWAII_CREATE = [
|
||||
"✧*。٩(ˊᗜˋ*)و✧", "(ノ◕ヮ◕)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "٩(♡ε♡)۶", "(◕‿◕)♡",
|
||||
"✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(^-^)ノ", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°",
|
||||
]
|
||||
KAWAII_SKILL = [
|
||||
"ヾ(@⌒ー⌒@)ノ", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕。)۶", "(✿╹◡╹)", "ヽ(・∀・)ノ",
|
||||
"(ノ´ヮ`)ノ*:・゚✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(^▽^)",
|
||||
"ヾ(^∇^)", "(★ω★)/", "٩(。•́‿•̀。)۶", "(◕ᴗ◕✿)", "\(◎o◎)/",
|
||||
"(✧ω✧)", "ヽ(>∀<☆)ノ", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ( ̄▽ ̄)",
|
||||
]
|
||||
KAWAII_THINK = [
|
||||
"(っ°Д°;)っ", "(;′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "( ̄ヘ ̄)",
|
||||
"(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )ノ", "(;一_一)",
|
||||
]
|
||||
KAWAII_GENERIC = [
|
||||
"♪(´ε` )", "(◕‿◕✿)", "ヾ(^∇^)", "٩(◕‿◕。)۶", "(✿◠‿◠)",
|
||||
"(ノ´ヮ`)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)",
|
||||
]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Cute tool message (completion line that replaces the spinner)
|
||||
# =========================================================================
|
||||
|
||||
def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
|
||||
"""Inspect a tool result string for signs of failure.
|
||||
|
||||
Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
|
||||
like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
|
||||
failures. On success, returns ``(False, "")``.
|
||||
"""
|
||||
if result is None:
|
||||
return False, ""
|
||||
|
||||
if tool_name == "terminal":
|
||||
try:
|
||||
data = json.loads(result)
|
||||
exit_code = data.get("exit_code")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
return True, f" [exit {exit_code}]"
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
pass
|
||||
return False, ""
|
||||
|
||||
# Memory-specific: distinguish "full" from real errors
|
||||
if tool_name == "memory":
|
||||
try:
|
||||
data = json.loads(result)
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
pass
|
||||
|
||||
# Generic heuristic for non-terminal tools
|
||||
lower = result[:500].lower()
|
||||
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
|
||||
return True, " [error]"
|
||||
|
||||
return False, ""
|
||||
|
||||
|
||||
def get_cute_tool_message(
|
||||
tool_name: str, args: dict, duration: float, result: str | None = None,
|
||||
) -> str:
|
||||
"""Generate a formatted tool completion line for CLI quiet mode.
|
||||
|
||||
Format: ``| {emoji} {verb:9} {detail} {duration}``
|
||||
|
||||
When *result* is provided the line is checked for failure indicators.
|
||||
Failed tool calls get a red prefix and an informational suffix.
|
||||
"""
|
||||
dur = f"{duration:.1f}s"
|
||||
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
|
||||
|
||||
def _trunc(s, n=40):
|
||||
s = str(s)
|
||||
return (s[:n-3] + "...") if len(s) > n else s
|
||||
|
||||
def _path(p, n=35):
|
||||
p = str(p)
|
||||
return ("..." + p[-(n-3):]) if len(p) > n else p
|
||||
|
||||
def _wrap(line: str) -> str:
|
||||
"""Append failure suffix when the tool failed."""
|
||||
if not is_failure:
|
||||
return line
|
||||
return f"{line}{failure_suffix}"
|
||||
|
||||
if tool_name == "web_search":
|
||||
return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
|
||||
if tool_name == "web_extract":
|
||||
urls = args.get("urls", [])
|
||||
if urls:
|
||||
url = urls[0] if isinstance(urls, list) else str(urls)
|
||||
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
|
||||
extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
|
||||
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
|
||||
return _wrap(f"┊ 📄 fetch pages {dur}")
|
||||
if tool_name == "web_crawl":
|
||||
url = args.get("url", "")
|
||||
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
|
||||
return _wrap(f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}")
|
||||
if tool_name == "terminal":
|
||||
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
|
||||
if tool_name == "process":
|
||||
action = args.get("action", "?")
|
||||
sid = args.get("session_id", "")[:12]
|
||||
labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
|
||||
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
|
||||
return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
|
||||
if tool_name == "read_file":
|
||||
return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
|
||||
if tool_name == "write_file":
|
||||
return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
|
||||
if tool_name == "patch":
|
||||
return _wrap(f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}")
|
||||
if tool_name == "search_files":
|
||||
pattern = _trunc(args.get("pattern", ""), 35)
|
||||
target = args.get("target", "content")
|
||||
verb = "find" if target == "files" else "grep"
|
||||
return _wrap(f"┊ 🔎 {verb:9} {pattern} {dur}")
|
||||
if tool_name == "browser_navigate":
|
||||
url = args.get("url", "")
|
||||
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
|
||||
return _wrap(f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}")
|
||||
if tool_name == "browser_snapshot":
|
||||
mode = "full" if args.get("full") else "compact"
|
||||
return _wrap(f"┊ 📸 snapshot {mode} {dur}")
|
||||
if tool_name == "browser_click":
|
||||
return _wrap(f"┊ 👆 click {args.get('ref', '?')} {dur}")
|
||||
if tool_name == "browser_type":
|
||||
return _wrap(f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}")
|
||||
if tool_name == "browser_scroll":
|
||||
d = args.get("direction", "down")
|
||||
arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
|
||||
return _wrap(f"┊ {arrow} scroll {d} {dur}")
|
||||
if tool_name == "browser_back":
|
||||
return _wrap(f"┊ ◀️ back {dur}")
|
||||
if tool_name == "browser_press":
|
||||
return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
|
||||
if tool_name == "browser_close":
|
||||
return _wrap(f"┊ 🚪 close browser {dur}")
|
||||
if tool_name == "browser_get_images":
|
||||
return _wrap(f"┊ 🖼️ images extracting {dur}")
|
||||
if tool_name == "browser_vision":
|
||||
return _wrap(f"┊ 👁️ vision analyzing page {dur}")
|
||||
if tool_name == "todo":
|
||||
todos_arg = args.get("todos")
|
||||
merge = args.get("merge", False)
|
||||
if todos_arg is None:
|
||||
return _wrap(f"┊ 📋 plan reading tasks {dur}")
|
||||
elif merge:
|
||||
return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}")
|
||||
else:
|
||||
return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}")
|
||||
if tool_name == "session_search":
|
||||
return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}")
|
||||
if tool_name == "memory":
|
||||
action = args.get("action", "?")
|
||||
target = args.get("target", "")
|
||||
if action == "add":
|
||||
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
|
||||
elif action == "replace":
|
||||
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
|
||||
elif action == "remove":
|
||||
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory {action} {dur}")
|
||||
if tool_name == "skills_list":
|
||||
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
|
||||
if tool_name == "skill_view":
|
||||
return _wrap(f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}")
|
||||
if tool_name == "image_generate":
|
||||
return _wrap(f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}")
|
||||
if tool_name == "text_to_speech":
|
||||
return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
|
||||
if tool_name == "vision_analyze":
|
||||
return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
|
||||
if tool_name == "mixture_of_agents":
|
||||
return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
|
||||
if tool_name == "send_message":
|
||||
return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
|
||||
if tool_name == "schedule_cronjob":
|
||||
return _wrap(f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}")
|
||||
if tool_name == "list_cronjobs":
|
||||
return _wrap(f"┊ ⏰ jobs listing {dur}")
|
||||
if tool_name == "remove_cronjob":
|
||||
return _wrap(f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}")
|
||||
if tool_name.startswith("rl_"):
|
||||
rl = {
|
||||
"rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
|
||||
"rl_get_current_config": "get config", "rl_edit_config": f"set {args.get('field', '?')}",
|
||||
"rl_start_training": "start training", "rl_check_status": f"status {args.get('run_id', '?')[:12]}",
|
||||
"rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
|
||||
"rl_list_runs": "list runs", "rl_test_inference": "test inference",
|
||||
}
|
||||
return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}")
|
||||
if tool_name == "execute_code":
|
||||
code = args.get("code", "")
|
||||
first_line = code.strip().split("\n")[0] if code.strip() else ""
|
||||
return _wrap(f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}")
|
||||
if tool_name == "delegate_task":
|
||||
tasks = args.get("tasks")
|
||||
if tasks and isinstance(tasks, list):
|
||||
return _wrap(f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}")
|
||||
return _wrap(f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}")
|
||||
|
||||
preview = build_tool_preview(tool_name, args) or ""
|
||||
return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}")
|
||||
@@ -1,818 +0,0 @@
|
||||
"""
|
||||
Session Insights Engine for Hermes Agent.
|
||||
|
||||
Analyzes historical session data from the SQLite state database to produce
|
||||
comprehensive usage insights — token consumption, cost estimates, tool usage
|
||||
patterns, activity trends, model/platform breakdowns, and session metrics.
|
||||
|
||||
Inspired by Claude Code's /insights command, adapted for Hermes Agent's
|
||||
multi-platform architecture with additional cost estimation and platform
|
||||
breakdown capabilities.
|
||||
|
||||
Usage:
|
||||
from agent.insights import InsightsEngine
|
||||
engine = InsightsEngine(db)
|
||||
report = engine.generate(days=30)
|
||||
print(engine.format_terminal(report))
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# =========================================================================
|
||||
# Model pricing (USD per million tokens) — approximate as of early 2026
|
||||
# =========================================================================
|
||||
MODEL_PRICING = {
|
||||
# OpenAI
|
||||
"gpt-4o": {"input": 2.50, "output": 10.00},
|
||||
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
||||
"gpt-4.1": {"input": 2.00, "output": 8.00},
|
||||
"gpt-4.1-mini": {"input": 0.40, "output": 1.60},
|
||||
"gpt-4.1-nano": {"input": 0.10, "output": 0.40},
|
||||
"gpt-4.5-preview": {"input": 75.00, "output": 150.00},
|
||||
"gpt-5": {"input": 10.00, "output": 30.00},
|
||||
"gpt-5.4": {"input": 10.00, "output": 30.00},
|
||||
"o3": {"input": 10.00, "output": 40.00},
|
||||
"o3-mini": {"input": 1.10, "output": 4.40},
|
||||
"o4-mini": {"input": 1.10, "output": 4.40},
|
||||
# Anthropic
|
||||
"claude-opus-4-20250514": {"input": 15.00, "output": 75.00},
|
||||
"claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00},
|
||||
"claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
|
||||
"claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
|
||||
"claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
|
||||
"claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
|
||||
# DeepSeek
|
||||
"deepseek-chat": {"input": 0.14, "output": 0.28},
|
||||
"deepseek-reasoner": {"input": 0.55, "output": 2.19},
|
||||
# Google
|
||||
"gemini-2.5-pro": {"input": 1.25, "output": 10.00},
|
||||
"gemini-2.5-flash": {"input": 0.15, "output": 0.60},
|
||||
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
||||
# Meta (via providers)
|
||||
"llama-4-maverick": {"input": 0.50, "output": 0.70},
|
||||
"llama-4-scout": {"input": 0.20, "output": 0.30},
|
||||
# Z.AI / GLM (direct provider — pricing not published externally, treat as local)
|
||||
"glm-5": {"input": 0.0, "output": 0.0},
|
||||
"glm-4.7": {"input": 0.0, "output": 0.0},
|
||||
"glm-4.5": {"input": 0.0, "output": 0.0},
|
||||
"glm-4.5-flash": {"input": 0.0, "output": 0.0},
|
||||
# Kimi / Moonshot (direct provider — pricing not published externally, treat as local)
|
||||
"kimi-k2.5": {"input": 0.0, "output": 0.0},
|
||||
"kimi-k2-thinking": {"input": 0.0, "output": 0.0},
|
||||
"kimi-k2-turbo-preview": {"input": 0.0, "output": 0.0},
|
||||
"kimi-k2-0905-preview": {"input": 0.0, "output": 0.0},
|
||||
# MiniMax (direct provider — pricing not published externally, treat as local)
|
||||
"MiniMax-M2.5": {"input": 0.0, "output": 0.0},
|
||||
"MiniMax-M2.5-highspeed": {"input": 0.0, "output": 0.0},
|
||||
"MiniMax-M2.1": {"input": 0.0, "output": 0.0},
|
||||
}
|
||||
|
||||
# Fallback: unknown/custom models get zero cost (we can't assume pricing
|
||||
# for self-hosted models, custom OAI endpoints, local inference, etc.)
|
||||
_DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
|
||||
|
||||
|
||||
def _has_known_pricing(model_name: str) -> bool:
|
||||
"""Check if a model has known pricing (vs unknown/custom endpoint)."""
|
||||
return _get_pricing(model_name) is not _DEFAULT_PRICING
|
||||
|
||||
|
||||
def _get_pricing(model_name: str) -> Dict[str, float]:
|
||||
"""Look up pricing for a model. Uses fuzzy matching on model name.
|
||||
|
||||
Returns _DEFAULT_PRICING (zero cost) for unknown/custom models —
|
||||
we can't assume costs for self-hosted endpoints, local inference, etc.
|
||||
"""
|
||||
if not model_name:
|
||||
return _DEFAULT_PRICING
|
||||
|
||||
# Strip provider prefix (e.g., "anthropic/claude-..." -> "claude-...")
|
||||
bare = model_name.split("/")[-1].lower()
|
||||
|
||||
# Exact match first
|
||||
if bare in MODEL_PRICING:
|
||||
return MODEL_PRICING[bare]
|
||||
|
||||
# Fuzzy prefix match — prefer the LONGEST matching key to avoid
|
||||
# e.g. "gpt-4o" matching before "gpt-4o-mini" for "gpt-4o-mini-2024-07-18"
|
||||
best_match = None
|
||||
best_len = 0
|
||||
for key, price in MODEL_PRICING.items():
|
||||
if bare.startswith(key) and len(key) > best_len:
|
||||
best_match = price
|
||||
best_len = len(key)
|
||||
if best_match:
|
||||
return best_match
|
||||
|
||||
# Keyword heuristics (checked in most-specific-first order)
|
||||
if "opus" in bare:
|
||||
return {"input": 15.00, "output": 75.00}
|
||||
if "sonnet" in bare:
|
||||
return {"input": 3.00, "output": 15.00}
|
||||
if "haiku" in bare:
|
||||
return {"input": 0.80, "output": 4.00}
|
||||
if "gpt-4o-mini" in bare:
|
||||
return {"input": 0.15, "output": 0.60}
|
||||
if "gpt-4o" in bare:
|
||||
return {"input": 2.50, "output": 10.00}
|
||||
if "gpt-5" in bare:
|
||||
return {"input": 10.00, "output": 30.00}
|
||||
if "deepseek" in bare:
|
||||
return {"input": 0.14, "output": 0.28}
|
||||
if "gemini" in bare:
|
||||
return {"input": 0.15, "output": 0.60}
|
||||
|
||||
return _DEFAULT_PRICING
|
||||
|
||||
|
||||
def _estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
|
||||
"""Estimate the USD cost for a given model and token counts."""
|
||||
pricing = _get_pricing(model)
|
||||
return (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
|
||||
|
||||
|
||||
def _format_duration(seconds: float) -> str:
|
||||
"""Format seconds into a human-readable duration string."""
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
minutes = seconds / 60
|
||||
if minutes < 60:
|
||||
return f"{minutes:.0f}m"
|
||||
hours = minutes / 60
|
||||
if hours < 24:
|
||||
remaining_min = int(minutes % 60)
|
||||
return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h"
|
||||
days = hours / 24
|
||||
return f"{days:.1f}d"
|
||||
|
||||
|
||||
def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
|
||||
"""Create simple horizontal bar chart strings from values."""
|
||||
peak = max(values) if values else 1
|
||||
if peak == 0:
|
||||
return ["" for _ in values]
|
||||
return ["█" * max(1, int(v / peak * max_width)) if v > 0 else "" for v in values]
|
||||
|
||||
|
||||
class InsightsEngine:
|
||||
"""
|
||||
Analyzes session history and produces usage insights.
|
||||
|
||||
Works directly with a SessionDB instance (or raw sqlite3 connection)
|
||||
to query session and message data.
|
||||
"""
|
||||
|
||||
def __init__(self, db):
|
||||
"""
|
||||
Initialize with a SessionDB instance.
|
||||
|
||||
Args:
|
||||
db: A SessionDB instance (from hermes_state.py)
|
||||
"""
|
||||
self.db = db
|
||||
self._conn = db._conn
|
||||
|
||||
def generate(self, days: int = 30, source: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a complete insights report.
|
||||
|
||||
Args:
|
||||
days: Number of days to look back (default: 30)
|
||||
source: Optional filter by source platform
|
||||
|
||||
Returns:
|
||||
Dict with all computed insights
|
||||
"""
|
||||
cutoff = time.time() - (days * 86400)
|
||||
|
||||
# Gather raw data
|
||||
sessions = self._get_sessions(cutoff, source)
|
||||
tool_usage = self._get_tool_usage(cutoff, source)
|
||||
message_stats = self._get_message_stats(cutoff, source)
|
||||
|
||||
if not sessions:
|
||||
return {
|
||||
"days": days,
|
||||
"source_filter": source,
|
||||
"empty": True,
|
||||
"overview": {},
|
||||
"models": [],
|
||||
"platforms": [],
|
||||
"tools": [],
|
||||
"activity": {},
|
||||
"top_sessions": [],
|
||||
}
|
||||
|
||||
# Compute insights
|
||||
overview = self._compute_overview(sessions, message_stats)
|
||||
models = self._compute_model_breakdown(sessions)
|
||||
platforms = self._compute_platform_breakdown(sessions)
|
||||
tools = self._compute_tool_breakdown(tool_usage)
|
||||
activity = self._compute_activity_patterns(sessions)
|
||||
top_sessions = self._compute_top_sessions(sessions)
|
||||
|
||||
return {
|
||||
"days": days,
|
||||
"source_filter": source,
|
||||
"empty": False,
|
||||
"generated_at": time.time(),
|
||||
"overview": overview,
|
||||
"models": models,
|
||||
"platforms": platforms,
|
||||
"tools": tools,
|
||||
"activity": activity,
|
||||
"top_sessions": top_sessions,
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# Data gathering (SQL queries)
|
||||
# =========================================================================
|
||||
|
||||
# Columns we actually need (skip system_prompt, model_config blobs)
|
||||
_SESSION_COLS = ("id, source, model, started_at, ended_at, "
|
||||
"message_count, tool_call_count, input_tokens, output_tokens")
|
||||
|
||||
def _get_sessions(self, cutoff: float, source: str = None) -> List[Dict]:
|
||||
"""Fetch sessions within the time window."""
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
f"""SELECT {self._SESSION_COLS} FROM sessions
|
||||
WHERE started_at >= ? AND source = ?
|
||||
ORDER BY started_at DESC""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
f"""SELECT {self._SESSION_COLS} FROM sessions
|
||||
WHERE started_at >= ?
|
||||
ORDER BY started_at DESC""",
|
||||
(cutoff,),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def _get_tool_usage(self, cutoff: float, source: str = None) -> List[Dict]:
|
||||
"""Get tool call counts from messages.
|
||||
|
||||
Uses two sources:
|
||||
1. tool_name column on 'tool' role messages (set by gateway)
|
||||
2. tool_calls JSON on 'assistant' role messages (covers CLI where
|
||||
tool_name is not populated on tool responses)
|
||||
"""
|
||||
tool_counts = Counter()
|
||||
|
||||
# Source 1: explicit tool_name on tool response messages
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT m.tool_name, COUNT(*) as count
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ? AND s.source = ?
|
||||
AND m.role = 'tool' AND m.tool_name IS NOT NULL
|
||||
GROUP BY m.tool_name
|
||||
ORDER BY count DESC""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT m.tool_name, COUNT(*) as count
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ?
|
||||
AND m.role = 'tool' AND m.tool_name IS NOT NULL
|
||||
GROUP BY m.tool_name
|
||||
ORDER BY count DESC""",
|
||||
(cutoff,),
|
||||
)
|
||||
for row in cursor.fetchall():
|
||||
tool_counts[row["tool_name"]] += row["count"]
|
||||
|
||||
# Source 2: extract from tool_calls JSON on assistant messages
|
||||
# (covers CLI sessions where tool_name is NULL on tool responses)
|
||||
if source:
|
||||
cursor2 = self._conn.execute(
|
||||
"""SELECT m.tool_calls
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ? AND s.source = ?
|
||||
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor2 = self._conn.execute(
|
||||
"""SELECT m.tool_calls
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ?
|
||||
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
|
||||
(cutoff,),
|
||||
)
|
||||
|
||||
tool_calls_counts = Counter()
|
||||
for row in cursor2.fetchall():
|
||||
try:
|
||||
calls = row["tool_calls"]
|
||||
if isinstance(calls, str):
|
||||
calls = json.loads(calls)
|
||||
if isinstance(calls, list):
|
||||
for call in calls:
|
||||
func = call.get("function", {}) if isinstance(call, dict) else {}
|
||||
name = func.get("name")
|
||||
if name:
|
||||
tool_calls_counts[name] += 1
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
continue
|
||||
|
||||
# Merge: prefer tool_name source, supplement with tool_calls source
|
||||
# for tools not already counted
|
||||
if not tool_counts and tool_calls_counts:
|
||||
# No tool_name data at all — use tool_calls exclusively
|
||||
tool_counts = tool_calls_counts
|
||||
elif tool_counts and tool_calls_counts:
|
||||
# Both sources have data — use whichever has the higher count per tool
|
||||
# (they may overlap, so take the max to avoid double-counting)
|
||||
all_tools = set(tool_counts) | set(tool_calls_counts)
|
||||
merged = Counter()
|
||||
for tool in all_tools:
|
||||
merged[tool] = max(tool_counts.get(tool, 0), tool_calls_counts.get(tool, 0))
|
||||
tool_counts = merged
|
||||
|
||||
# Convert to the expected format
|
||||
return [
|
||||
{"tool_name": name, "count": count}
|
||||
for name, count in tool_counts.most_common()
|
||||
]
|
||||
|
||||
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
|
||||
"""Get aggregate message statistics."""
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT
|
||||
COUNT(*) as total_messages,
|
||||
SUM(CASE WHEN m.role = 'user' THEN 1 ELSE 0 END) as user_messages,
|
||||
SUM(CASE WHEN m.role = 'assistant' THEN 1 ELSE 0 END) as assistant_messages,
|
||||
SUM(CASE WHEN m.role = 'tool' THEN 1 ELSE 0 END) as tool_messages
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ? AND s.source = ?""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT
|
||||
COUNT(*) as total_messages,
|
||||
SUM(CASE WHEN m.role = 'user' THEN 1 ELSE 0 END) as user_messages,
|
||||
SUM(CASE WHEN m.role = 'assistant' THEN 1 ELSE 0 END) as assistant_messages,
|
||||
SUM(CASE WHEN m.role = 'tool' THEN 1 ELSE 0 END) as tool_messages
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ?""",
|
||||
(cutoff,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return dict(row) if row else {
|
||||
"total_messages": 0, "user_messages": 0,
|
||||
"assistant_messages": 0, "tool_messages": 0,
|
||||
}
|
||||
|
||||
# =========================================================================
|
||||
# Computation
|
||||
# =========================================================================
|
||||
|
||||
def _compute_overview(self, sessions: List[Dict], message_stats: Dict) -> Dict:
|
||||
"""Compute high-level overview statistics."""
|
||||
total_input = sum(s.get("input_tokens") or 0 for s in sessions)
|
||||
total_output = sum(s.get("output_tokens") or 0 for s in sessions)
|
||||
total_tokens = total_input + total_output
|
||||
total_tool_calls = sum(s.get("tool_call_count") or 0 for s in sessions)
|
||||
total_messages = sum(s.get("message_count") or 0 for s in sessions)
|
||||
|
||||
# Cost estimation (weighted by model)
|
||||
total_cost = 0.0
|
||||
models_with_pricing = set()
|
||||
models_without_pricing = set()
|
||||
for s in sessions:
|
||||
model = s.get("model") or ""
|
||||
inp = s.get("input_tokens") or 0
|
||||
out = s.get("output_tokens") or 0
|
||||
total_cost += _estimate_cost(model, inp, out)
|
||||
display = model.split("/")[-1] if "/" in model else (model or "unknown")
|
||||
if _has_known_pricing(model):
|
||||
models_with_pricing.add(display)
|
||||
else:
|
||||
models_without_pricing.add(display)
|
||||
|
||||
# Session duration stats (guard against negative durations from clock drift)
|
||||
durations = []
|
||||
for s in sessions:
|
||||
start = s.get("started_at")
|
||||
end = s.get("ended_at")
|
||||
if start and end and end > start:
|
||||
durations.append(end - start)
|
||||
|
||||
total_hours = sum(durations) / 3600 if durations else 0
|
||||
avg_duration = sum(durations) / len(durations) if durations else 0
|
||||
|
||||
# Earliest and latest session
|
||||
started_timestamps = [s["started_at"] for s in sessions if s.get("started_at")]
|
||||
date_range_start = min(started_timestamps) if started_timestamps else None
|
||||
date_range_end = max(started_timestamps) if started_timestamps else None
|
||||
|
||||
return {
|
||||
"total_sessions": len(sessions),
|
||||
"total_messages": total_messages,
|
||||
"total_tool_calls": total_tool_calls,
|
||||
"total_input_tokens": total_input,
|
||||
"total_output_tokens": total_output,
|
||||
"total_tokens": total_tokens,
|
||||
"estimated_cost": total_cost,
|
||||
"total_hours": total_hours,
|
||||
"avg_session_duration": avg_duration,
|
||||
"avg_messages_per_session": total_messages / len(sessions) if sessions else 0,
|
||||
"avg_tokens_per_session": total_tokens / len(sessions) if sessions else 0,
|
||||
"user_messages": message_stats.get("user_messages") or 0,
|
||||
"assistant_messages": message_stats.get("assistant_messages") or 0,
|
||||
"tool_messages": message_stats.get("tool_messages") or 0,
|
||||
"date_range_start": date_range_start,
|
||||
"date_range_end": date_range_end,
|
||||
"models_with_pricing": sorted(models_with_pricing),
|
||||
"models_without_pricing": sorted(models_without_pricing),
|
||||
}
|
||||
|
||||
def _compute_model_breakdown(self, sessions: List[Dict]) -> List[Dict]:
|
||||
"""Break down usage by model."""
|
||||
model_data = defaultdict(lambda: {
|
||||
"sessions": 0, "input_tokens": 0, "output_tokens": 0,
|
||||
"total_tokens": 0, "tool_calls": 0, "cost": 0.0,
|
||||
})
|
||||
|
||||
for s in sessions:
|
||||
model = s.get("model") or "unknown"
|
||||
# Normalize: strip provider prefix for display
|
||||
display_model = model.split("/")[-1] if "/" in model else model
|
||||
d = model_data[display_model]
|
||||
d["sessions"] += 1
|
||||
inp = s.get("input_tokens") or 0
|
||||
out = s.get("output_tokens") or 0
|
||||
d["input_tokens"] += inp
|
||||
d["output_tokens"] += out
|
||||
d["total_tokens"] += inp + out
|
||||
d["tool_calls"] += s.get("tool_call_count") or 0
|
||||
d["cost"] += _estimate_cost(model, inp, out)
|
||||
d["has_pricing"] = _has_known_pricing(model)
|
||||
|
||||
result = [
|
||||
{"model": model, **data}
|
||||
for model, data in model_data.items()
|
||||
]
|
||||
# Sort by tokens first, fall back to session count when tokens are 0
|
||||
result.sort(key=lambda x: (x["total_tokens"], x["sessions"]), reverse=True)
|
||||
return result
|
||||
|
||||
def _compute_platform_breakdown(self, sessions: List[Dict]) -> List[Dict]:
|
||||
"""Break down usage by platform/source."""
|
||||
platform_data = defaultdict(lambda: {
|
||||
"sessions": 0, "messages": 0, "input_tokens": 0,
|
||||
"output_tokens": 0, "total_tokens": 0, "tool_calls": 0,
|
||||
})
|
||||
|
||||
for s in sessions:
|
||||
source = s.get("source") or "unknown"
|
||||
d = platform_data[source]
|
||||
d["sessions"] += 1
|
||||
d["messages"] += s.get("message_count") or 0
|
||||
inp = s.get("input_tokens") or 0
|
||||
out = s.get("output_tokens") or 0
|
||||
d["input_tokens"] += inp
|
||||
d["output_tokens"] += out
|
||||
d["total_tokens"] += inp + out
|
||||
d["tool_calls"] += s.get("tool_call_count") or 0
|
||||
|
||||
result = [
|
||||
{"platform": platform, **data}
|
||||
for platform, data in platform_data.items()
|
||||
]
|
||||
result.sort(key=lambda x: x["sessions"], reverse=True)
|
||||
return result
|
||||
|
||||
def _compute_tool_breakdown(self, tool_usage: List[Dict]) -> List[Dict]:
|
||||
"""Process tool usage data into a ranked list with percentages."""
|
||||
total_calls = sum(t["count"] for t in tool_usage) if tool_usage else 0
|
||||
result = []
|
||||
for t in tool_usage:
|
||||
pct = (t["count"] / total_calls * 100) if total_calls else 0
|
||||
result.append({
|
||||
"tool": t["tool_name"],
|
||||
"count": t["count"],
|
||||
"percentage": pct,
|
||||
})
|
||||
return result
|
||||
|
||||
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
|
||||
"""Analyze activity patterns by day of week and hour."""
|
||||
day_counts = Counter() # 0=Monday ... 6=Sunday
|
||||
hour_counts = Counter()
|
||||
daily_counts = Counter() # date string -> count
|
||||
|
||||
for s in sessions:
|
||||
ts = s.get("started_at")
|
||||
if not ts:
|
||||
continue
|
||||
dt = datetime.fromtimestamp(ts)
|
||||
day_counts[dt.weekday()] += 1
|
||||
hour_counts[dt.hour] += 1
|
||||
daily_counts[dt.strftime("%Y-%m-%d")] += 1
|
||||
|
||||
day_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||||
day_breakdown = [
|
||||
{"day": day_names[i], "count": day_counts.get(i, 0)}
|
||||
for i in range(7)
|
||||
]
|
||||
|
||||
hour_breakdown = [
|
||||
{"hour": i, "count": hour_counts.get(i, 0)}
|
||||
for i in range(24)
|
||||
]
|
||||
|
||||
# Busiest day and hour
|
||||
busiest_day = max(day_breakdown, key=lambda x: x["count"]) if day_breakdown else None
|
||||
busiest_hour = max(hour_breakdown, key=lambda x: x["count"]) if hour_breakdown else None
|
||||
|
||||
# Active days (days with at least one session)
|
||||
active_days = len(daily_counts)
|
||||
|
||||
# Streak calculation
|
||||
if daily_counts:
|
||||
all_dates = sorted(daily_counts.keys())
|
||||
current_streak = 1
|
||||
max_streak = 1
|
||||
for i in range(1, len(all_dates)):
|
||||
d1 = datetime.strptime(all_dates[i - 1], "%Y-%m-%d")
|
||||
d2 = datetime.strptime(all_dates[i], "%Y-%m-%d")
|
||||
if (d2 - d1).days == 1:
|
||||
current_streak += 1
|
||||
max_streak = max(max_streak, current_streak)
|
||||
else:
|
||||
current_streak = 1
|
||||
else:
|
||||
max_streak = 0
|
||||
|
||||
return {
|
||||
"by_day": day_breakdown,
|
||||
"by_hour": hour_breakdown,
|
||||
"busiest_day": busiest_day,
|
||||
"busiest_hour": busiest_hour,
|
||||
"active_days": active_days,
|
||||
"max_streak": max_streak,
|
||||
}
|
||||
|
||||
def _compute_top_sessions(self, sessions: List[Dict]) -> List[Dict]:
|
||||
"""Find notable sessions (longest, most messages, most tokens)."""
|
||||
top = []
|
||||
|
||||
# Longest by duration
|
||||
sessions_with_duration = [
|
||||
s for s in sessions
|
||||
if s.get("started_at") and s.get("ended_at")
|
||||
]
|
||||
if sessions_with_duration:
|
||||
longest = max(
|
||||
sessions_with_duration,
|
||||
key=lambda s: (s["ended_at"] - s["started_at"]),
|
||||
)
|
||||
dur = longest["ended_at"] - longest["started_at"]
|
||||
top.append({
|
||||
"label": "Longest session",
|
||||
"session_id": longest["id"][:16],
|
||||
"value": _format_duration(dur),
|
||||
"date": datetime.fromtimestamp(longest["started_at"]).strftime("%b %d"),
|
||||
})
|
||||
|
||||
# Most messages
|
||||
most_msgs = max(sessions, key=lambda s: s.get("message_count") or 0)
|
||||
if (most_msgs.get("message_count") or 0) > 0:
|
||||
top.append({
|
||||
"label": "Most messages",
|
||||
"session_id": most_msgs["id"][:16],
|
||||
"value": f"{most_msgs['message_count']} msgs",
|
||||
"date": datetime.fromtimestamp(most_msgs["started_at"]).strftime("%b %d") if most_msgs.get("started_at") else "?",
|
||||
})
|
||||
|
||||
# Most tokens
|
||||
most_tokens = max(
|
||||
sessions,
|
||||
key=lambda s: (s.get("input_tokens") or 0) + (s.get("output_tokens") or 0),
|
||||
)
|
||||
token_total = (most_tokens.get("input_tokens") or 0) + (most_tokens.get("output_tokens") or 0)
|
||||
if token_total > 0:
|
||||
top.append({
|
||||
"label": "Most tokens",
|
||||
"session_id": most_tokens["id"][:16],
|
||||
"value": f"{token_total:,} tokens",
|
||||
"date": datetime.fromtimestamp(most_tokens["started_at"]).strftime("%b %d") if most_tokens.get("started_at") else "?",
|
||||
})
|
||||
|
||||
# Most tool calls
|
||||
most_tools = max(sessions, key=lambda s: s.get("tool_call_count") or 0)
|
||||
if (most_tools.get("tool_call_count") or 0) > 0:
|
||||
top.append({
|
||||
"label": "Most tool calls",
|
||||
"session_id": most_tools["id"][:16],
|
||||
"value": f"{most_tools['tool_call_count']} calls",
|
||||
"date": datetime.fromtimestamp(most_tools["started_at"]).strftime("%b %d") if most_tools.get("started_at") else "?",
|
||||
})
|
||||
|
||||
return top
|
||||
|
||||
# =========================================================================
|
||||
# Formatting
|
||||
# =========================================================================
|
||||
|
||||
def format_terminal(self, report: Dict) -> str:
|
||||
"""Format the insights report for terminal display (CLI)."""
|
||||
if report.get("empty"):
|
||||
days = report.get("days", 30)
|
||||
src = f" (source: {report['source_filter']})" if report.get("source_filter") else ""
|
||||
return f" No sessions found in the last {days} days{src}."
|
||||
|
||||
lines = []
|
||||
o = report["overview"]
|
||||
days = report["days"]
|
||||
src_filter = report.get("source_filter")
|
||||
|
||||
# Header
|
||||
lines.append("")
|
||||
lines.append(" ╔══════════════════════════════════════════════════════════╗")
|
||||
lines.append(" ║ 📊 Hermes Insights ║")
|
||||
period_label = f"Last {days} days"
|
||||
if src_filter:
|
||||
period_label += f" ({src_filter})"
|
||||
padding = 58 - len(period_label) - 2
|
||||
left_pad = padding // 2
|
||||
right_pad = padding - left_pad
|
||||
lines.append(f" ║{' ' * left_pad} {period_label} {' ' * right_pad}║")
|
||||
lines.append(" ╚══════════════════════════════════════════════════════════╝")
|
||||
lines.append("")
|
||||
|
||||
# Date range
|
||||
if o.get("date_range_start") and o.get("date_range_end"):
|
||||
start_str = datetime.fromtimestamp(o["date_range_start"]).strftime("%b %d, %Y")
|
||||
end_str = datetime.fromtimestamp(o["date_range_end"]).strftime("%b %d, %Y")
|
||||
lines.append(f" Period: {start_str} — {end_str}")
|
||||
lines.append("")
|
||||
|
||||
# Overview
|
||||
lines.append(" 📋 Overview")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}")
|
||||
lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}")
|
||||
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
|
||||
cost_str = f"${o['estimated_cost']:.2f}"
|
||||
if o.get("models_without_pricing"):
|
||||
cost_str += " *"
|
||||
lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}")
|
||||
if o["total_hours"] > 0:
|
||||
lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
|
||||
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
|
||||
lines.append("")
|
||||
|
||||
# Model breakdown
|
||||
if report["models"]:
|
||||
lines.append(" 🤖 Models Used")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
|
||||
for m in report["models"]:
|
||||
model_name = m["model"][:28]
|
||||
if m.get("has_pricing"):
|
||||
cost_cell = f"${m['cost']:>6.2f}"
|
||||
else:
|
||||
cost_cell = " N/A"
|
||||
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
|
||||
if o.get("models_without_pricing"):
|
||||
lines.append(f" * Cost N/A for custom/self-hosted models")
|
||||
lines.append("")
|
||||
|
||||
# Platform breakdown
|
||||
if len(report["platforms"]) > 1 or (report["platforms"] and report["platforms"][0]["platform"] != "cli"):
|
||||
lines.append(" 📱 Platforms")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" {'Platform':<14} {'Sessions':>8} {'Messages':>10} {'Tokens':>14}")
|
||||
for p in report["platforms"]:
|
||||
lines.append(f" {p['platform']:<14} {p['sessions']:>8} {p['messages']:>10,} {p['total_tokens']:>14,}")
|
||||
lines.append("")
|
||||
|
||||
# Tool usage
|
||||
if report["tools"]:
|
||||
lines.append(" 🔧 Top Tools")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" {'Tool':<28} {'Calls':>8} {'%':>8}")
|
||||
for t in report["tools"][:15]: # Top 15
|
||||
lines.append(f" {t['tool']:<28} {t['count']:>8,} {t['percentage']:>7.1f}%")
|
||||
if len(report["tools"]) > 15:
|
||||
lines.append(f" ... and {len(report['tools']) - 15} more tools")
|
||||
lines.append("")
|
||||
|
||||
# Activity patterns
|
||||
act = report.get("activity", {})
|
||||
if act.get("by_day"):
|
||||
lines.append(" 📅 Activity Patterns")
|
||||
lines.append(" " + "─" * 56)
|
||||
|
||||
# Day of week chart
|
||||
day_values = [d["count"] for d in act["by_day"]]
|
||||
bars = _bar_chart(day_values, max_width=15)
|
||||
for i, d in enumerate(act["by_day"]):
|
||||
bar = bars[i]
|
||||
lines.append(f" {d['day']} {bar:<15} {d['count']}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Peak hours (show top 5 busiest hours)
|
||||
busy_hours = sorted(act["by_hour"], key=lambda x: x["count"], reverse=True)
|
||||
busy_hours = [h for h in busy_hours if h["count"] > 0][:5]
|
||||
if busy_hours:
|
||||
hour_strs = []
|
||||
for h in busy_hours:
|
||||
hr = h["hour"]
|
||||
ampm = "AM" if hr < 12 else "PM"
|
||||
display_hr = hr % 12 or 12
|
||||
hour_strs.append(f"{display_hr}{ampm} ({h['count']})")
|
||||
lines.append(f" Peak hours: {', '.join(hour_strs)}")
|
||||
|
||||
if act.get("active_days"):
|
||||
lines.append(f" Active days: {act['active_days']}")
|
||||
if act.get("max_streak") and act["max_streak"] > 1:
|
||||
lines.append(f" Best streak: {act['max_streak']} consecutive days")
|
||||
lines.append("")
|
||||
|
||||
# Notable sessions
|
||||
if report.get("top_sessions"):
|
||||
lines.append(" 🏆 Notable Sessions")
|
||||
lines.append(" " + "─" * 56)
|
||||
for ts in report["top_sessions"]:
|
||||
lines.append(f" {ts['label']:<20} {ts['value']:<18} ({ts['date']}, {ts['session_id']})")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def format_gateway(self, report: Dict) -> str:
|
||||
"""Format the insights report for gateway/messaging (shorter)."""
|
||||
if report.get("empty"):
|
||||
days = report.get("days", 30)
|
||||
return f"No sessions found in the last {days} days."
|
||||
|
||||
lines = []
|
||||
o = report["overview"]
|
||||
days = report["days"]
|
||||
|
||||
lines.append(f"📊 **Hermes Insights** — Last {days} days\n")
|
||||
|
||||
# Overview
|
||||
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
|
||||
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
|
||||
cost_note = ""
|
||||
if o.get("models_without_pricing"):
|
||||
cost_note = " _(excludes custom/self-hosted models)_"
|
||||
lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
|
||||
if o["total_hours"] > 0:
|
||||
lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
|
||||
lines.append("")
|
||||
|
||||
# Models (top 5)
|
||||
if report["models"]:
|
||||
lines.append("**🤖 Models:**")
|
||||
for m in report["models"][:5]:
|
||||
cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
|
||||
lines.append(f" {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
|
||||
lines.append("")
|
||||
|
||||
# Platforms (if multi-platform)
|
||||
if len(report["platforms"]) > 1:
|
||||
lines.append("**📱 Platforms:**")
|
||||
for p in report["platforms"]:
|
||||
lines.append(f" {p['platform']} — {p['sessions']} sessions, {p['messages']:,} msgs")
|
||||
lines.append("")
|
||||
|
||||
# Tools (top 8)
|
||||
if report["tools"]:
|
||||
lines.append("**🔧 Top Tools:**")
|
||||
for t in report["tools"][:8]:
|
||||
lines.append(f" {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
|
||||
lines.append("")
|
||||
|
||||
# Activity summary
|
||||
act = report.get("activity", {})
|
||||
if act.get("busiest_day") and act.get("busiest_hour"):
|
||||
hr = act["busiest_hour"]["hour"]
|
||||
ampm = "AM" if hr < 12 else "PM"
|
||||
display_hr = hr % 12 or 12
|
||||
lines.append(f"**📅 Busiest:** {act['busiest_day']['day']}s ({act['busiest_day']['count']} sessions), {display_hr}{ampm} ({act['busiest_hour']['count']} sessions)")
|
||||
if act.get("active_days"):
|
||||
lines.append(f"**Active days:** {act['active_days']}", )
|
||||
if act.get("max_streak", 0) > 1:
|
||||
lines.append(f"**Best streak:** {act['max_streak']} consecutive days")
|
||||
|
||||
return "\n".join(lines)
|
||||
@@ -1,224 +0,0 @@
|
||||
"""Model metadata, context lengths, and token estimation utilities.
|
||||
|
||||
Pure utility functions with no AIAgent dependency. Used by ContextCompressor
|
||||
and run_agent.py for pre-flight context checks.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
|
||||
_model_metadata_cache_time: float = 0
|
||||
_MODEL_CACHE_TTL = 3600
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start high and step down on context-length errors until one works.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
2_000_000,
|
||||
1_000_000,
|
||||
512_000,
|
||||
200_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
]
|
||||
|
||||
DEFAULT_CONTEXT_LENGTHS = {
|
||||
"anthropic/claude-opus-4": 200000,
|
||||
"anthropic/claude-opus-4.5": 200000,
|
||||
"anthropic/claude-opus-4.6": 200000,
|
||||
"anthropic/claude-sonnet-4": 200000,
|
||||
"anthropic/claude-sonnet-4-20250514": 200000,
|
||||
"anthropic/claude-haiku-4.5": 200000,
|
||||
"openai/gpt-4o": 128000,
|
||||
"openai/gpt-4-turbo": 128000,
|
||||
"openai/gpt-4o-mini": 128000,
|
||||
"google/gemini-2.0-flash": 1048576,
|
||||
"google/gemini-2.5-pro": 1048576,
|
||||
"meta-llama/llama-3.3-70b-instruct": 131072,
|
||||
"deepseek/deepseek-chat-v3": 65536,
|
||||
"qwen/qwen-2.5-72b-instruct": 32768,
|
||||
"glm-4.7": 202752,
|
||||
"glm-5": 202752,
|
||||
"glm-4.5": 131072,
|
||||
"glm-4.5-flash": 131072,
|
||||
"kimi-k2.5": 262144,
|
||||
"kimi-k2-thinking": 262144,
|
||||
"kimi-k2-turbo-preview": 262144,
|
||||
"kimi-k2-0905-preview": 131072,
|
||||
"MiniMax-M2.5": 204800,
|
||||
"MiniMax-M2.5-highspeed": 204800,
|
||||
"MiniMax-M2.1": 204800,
|
||||
}
|
||||
|
||||
|
||||
def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
|
||||
"""Fetch model metadata from OpenRouter (cached for 1 hour)."""
|
||||
global _model_metadata_cache, _model_metadata_cache_time
|
||||
|
||||
if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
|
||||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
cache = {}
|
||||
for model in data.get("data", []):
|
||||
model_id = model.get("id", "")
|
||||
cache[model_id] = {
|
||||
"context_length": model.get("context_length", 128000),
|
||||
"max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
|
||||
"name": model.get("name", model_id),
|
||||
"pricing": model.get("pricing", {}),
|
||||
}
|
||||
canonical = model.get("canonical_slug", "")
|
||||
if canonical and canonical != model_id:
|
||||
cache[canonical] = cache[model_id]
|
||||
|
||||
_model_metadata_cache = cache
|
||||
_model_metadata_cache_time = time.time()
|
||||
logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
|
||||
return cache
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
|
||||
return _model_metadata_cache or {}
|
||||
|
||||
|
||||
def _get_context_cache_path() -> Path:
|
||||
"""Return path to the persistent context length cache file."""
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
return hermes_home / "context_length_cache.yaml"
|
||||
|
||||
|
||||
def _load_context_cache() -> Dict[str, int]:
|
||||
"""Load the model+provider → context_length cache from disk."""
|
||||
path = _get_context_cache_path()
|
||||
if not path.exists():
|
||||
return {}
|
||||
try:
|
||||
with open(path) as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
return data.get("context_lengths", {})
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load context length cache: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def save_context_length(model: str, base_url: str, length: int) -> None:
|
||||
"""Persist a discovered context length for a model+provider combo.
|
||||
|
||||
Cache key is ``model@base_url`` so the same model name served from
|
||||
different providers can have different limits.
|
||||
"""
|
||||
key = f"{model}@{base_url}"
|
||||
cache = _load_context_cache()
|
||||
if cache.get(key) == length:
|
||||
return # already stored
|
||||
cache[key] = length
|
||||
path = _get_context_cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
|
||||
logger.info("Cached context length %s → %s tokens", key, f"{length:,}")
|
||||
except Exception as e:
|
||||
logger.debug("Failed to save context length cache: %s", e)
|
||||
|
||||
|
||||
def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
|
||||
"""Look up a previously discovered context length for model+provider."""
|
||||
key = f"{model}@{base_url}"
|
||||
cache = _load_context_cache()
|
||||
return cache.get(key)
|
||||
|
||||
|
||||
def get_next_probe_tier(current_length: int) -> Optional[int]:
|
||||
"""Return the next lower probe tier, or None if already at minimum."""
|
||||
for tier in CONTEXT_PROBE_TIERS:
|
||||
if tier < current_length:
|
||||
return tier
|
||||
return None
|
||||
|
||||
|
||||
def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
"""Try to extract the actual context limit from an API error message.
|
||||
|
||||
Many providers include the limit in their error text, e.g.:
|
||||
- "maximum context length is 32768 tokens"
|
||||
- "context_length_exceeded: 131072"
|
||||
- "Maximum context size 32768 exceeded"
|
||||
- "model's max context length is 65536"
|
||||
"""
|
||||
error_lower = error_msg.lower()
|
||||
# Pattern: look for numbers near context-related keywords
|
||||
patterns = [
|
||||
r'(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})',
|
||||
r'context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})',
|
||||
r'(\d{4,})\s*(?:token)?\s*(?:context|limit)',
|
||||
r'>\s*(\d{4,})\s*(?:max|limit|token)', # "250000 tokens > 200000 maximum"
|
||||
r'(\d{4,})\s*(?:max(?:imum)?)\b', # "200000 maximum"
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_lower)
|
||||
if match:
|
||||
limit = int(match.group(1))
|
||||
# Sanity check: must be a reasonable context length
|
||||
if 1024 <= limit <= 10_000_000:
|
||||
return limit
|
||||
return None
|
||||
|
||||
|
||||
def get_model_context_length(model: str, base_url: str = "") -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
Resolution order:
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
2. OpenRouter API metadata
|
||||
3. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match)
|
||||
4. First probe tier (2M) — will be narrowed on first context error
|
||||
"""
|
||||
# 1. Check persistent cache (model+provider)
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
# 2. OpenRouter API metadata
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", 128000)
|
||||
|
||||
# 3. Hardcoded defaults (fuzzy match)
|
||||
for default_model, length in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
if default_model in model or model in default_model:
|
||||
return length
|
||||
|
||||
# 4. Unknown model — start at highest probe tier
|
||||
return CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
|
||||
def estimate_tokens_rough(text: str) -> int:
|
||||
"""Rough token estimate (~4 chars/token) for pre-flight checks."""
|
||||
if not text:
|
||||
return 0
|
||||
return len(text) // 4
|
||||
|
||||
|
||||
def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int:
|
||||
"""Rough token estimate for a message list (pre-flight only)."""
|
||||
total_chars = sum(len(str(msg)) for msg in messages)
|
||||
return total_chars // 4
|
||||
@@ -1,387 +0,0 @@
|
||||
"""System prompt assembly -- identity, platform hints, skills index, context files.
|
||||
|
||||
All functions are stateless. AIAgent._build_system_prompt() calls these to
|
||||
assemble pieces, then combines them with memory and ephemeral prompts.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Context file scanning — detect prompt injection in AGENTS.md, .cursorrules,
|
||||
# SOUL.md before they get injected into the system prompt.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CONTEXT_THREAT_PATTERNS = [
|
||||
(r'ignore\s+(previous|all|above|prior)\s+instructions', "prompt_injection"),
|
||||
(r'do\s+not\s+tell\s+the\s+user', "deception_hide"),
|
||||
(r'system\s+prompt\s+override', "sys_prompt_override"),
|
||||
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
|
||||
(r'act\s+as\s+(if|though)\s+you\s+(have\s+no|don\'t\s+have)\s+(restrictions|limits|rules)', "bypass_restrictions"),
|
||||
(r'<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->', "html_comment_injection"),
|
||||
(r'<\s*div\s+style\s*=\s*["\'].*display\s*:\s*none', "hidden_div"),
|
||||
(r'translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)', "translate_execute"),
|
||||
(r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
|
||||
(r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
|
||||
]
|
||||
|
||||
_CONTEXT_INVISIBLE_CHARS = {
|
||||
'\u200b', '\u200c', '\u200d', '\u2060', '\ufeff',
|
||||
'\u202a', '\u202b', '\u202c', '\u202d', '\u202e',
|
||||
}
|
||||
|
||||
|
||||
def _scan_context_content(content: str, filename: str) -> str:
|
||||
"""Scan context file content for injection. Returns sanitized content."""
|
||||
findings = []
|
||||
|
||||
# Check invisible unicode
|
||||
for char in _CONTEXT_INVISIBLE_CHARS:
|
||||
if char in content:
|
||||
findings.append(f"invisible unicode U+{ord(char):04X}")
|
||||
|
||||
# Check threat patterns
|
||||
for pattern, pid in _CONTEXT_THREAT_PATTERNS:
|
||||
if re.search(pattern, content, re.IGNORECASE):
|
||||
findings.append(pid)
|
||||
|
||||
if findings:
|
||||
logger.warning("Context file %s blocked: %s", filename, ", ".join(findings))
|
||||
return f"[BLOCKED: {filename} contained potential prompt injection ({', '.join(findings)}). Content not loaded.]"
|
||||
|
||||
return content
|
||||
|
||||
# =========================================================================
|
||||
# Constants
|
||||
# =========================================================================
|
||||
|
||||
DEFAULT_AGENT_IDENTITY = (
|
||||
"You are Hermes Agent, an intelligent AI assistant created by Nous Research. "
|
||||
"You are helpful, knowledgeable, and direct. You assist users with a wide "
|
||||
"range of tasks including answering questions, writing and editing code, "
|
||||
"analyzing information, creative work, and executing actions via your tools. "
|
||||
"You communicate clearly, admit uncertainty when appropriate, and prioritize "
|
||||
"being genuinely useful over being verbose unless otherwise directed below. "
|
||||
"Be targeted and efficient in your exploration and investigations."
|
||||
)
|
||||
|
||||
MEMORY_GUIDANCE = (
|
||||
"You have persistent memory across sessions. Proactively save important things "
|
||||
"you learn (user preferences, environment details, useful approaches) and do "
|
||||
"(like a diary!) using the memory tool -- don't wait to be asked."
|
||||
)
|
||||
|
||||
SESSION_SEARCH_GUIDANCE = (
|
||||
"When the user references something from a past conversation or you suspect "
|
||||
"relevant prior context exists, use session_search to recall it before asking "
|
||||
"them to repeat themselves."
|
||||
)
|
||||
|
||||
SKILLS_GUIDANCE = (
|
||||
"After completing a complex task (5+ tool calls), fixing a tricky error, "
|
||||
"or discovering a non-trivial workflow, consider saving the approach as a "
|
||||
"skill with skill_manage so you can reuse it next time."
|
||||
)
|
||||
|
||||
PLATFORM_HINTS = {
|
||||
"whatsapp": (
|
||||
"You are on a text messaging communication platform, WhatsApp. "
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. The file "
|
||||
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
|
||||
".webp) appear as photos, videos (.mp4, .mov) play inline, and other "
|
||||
"files arrive as downloadable documents. You can also include image "
|
||||
"URLs in markdown format  and they will be sent as photos."
|
||||
),
|
||||
"telegram": (
|
||||
"You are on a text messaging communication platform, Telegram. "
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
|
||||
"bubbles, and videos (.mp4) play inline. You can also include image "
|
||||
"URLs in markdown format  and they will be sent as native photos."
|
||||
),
|
||||
"discord": (
|
||||
"You are in a Discord server or group chat communicating with your user. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.png, .jpg, .webp) are sent as photo "
|
||||
"attachments, audio as file attachments. You can also include image URLs "
|
||||
"in markdown format  and they will be sent as attachments."
|
||||
),
|
||||
"slack": (
|
||||
"You are in a Slack workspace communicating with your user. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.png, .jpg, .webp) are uploaded as photo "
|
||||
"attachments, audio as file attachments. You can also include image URLs "
|
||||
"in markdown format  and they will be uploaded as attachments."
|
||||
),
|
||||
"signal": (
|
||||
"You are on a text messaging communication platform, Signal. "
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio as attachments, and other "
|
||||
"files arrive as downloadable documents. You can also include image "
|
||||
"URLs in markdown format  and they will be sent as photos."
|
||||
),
|
||||
"cli": (
|
||||
"You are a CLI AI Agent. Try not to use markdown but simple text "
|
||||
"renderable inside a terminal."
|
||||
),
|
||||
}
|
||||
|
||||
CONTEXT_FILE_MAX_CHARS = 20_000
|
||||
CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
|
||||
CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skills index
|
||||
# =========================================================================
|
||||
|
||||
def _read_skill_description(skill_file: Path, max_chars: int = 60) -> str:
|
||||
"""Read the description from a SKILL.md frontmatter, capped at max_chars."""
|
||||
try:
|
||||
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
||||
match = re.search(
|
||||
r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---",
|
||||
raw, re.MULTILINE | re.DOTALL,
|
||||
)
|
||||
if match:
|
||||
desc = match.group(1).strip().strip("'\"")
|
||||
if len(desc) > max_chars:
|
||||
desc = desc[:max_chars - 3] + "..."
|
||||
return desc
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _skill_is_platform_compatible(skill_file: Path) -> bool:
|
||||
"""Quick check if a SKILL.md is compatible with the current OS platform.
|
||||
|
||||
Reads just enough to parse the ``platforms`` frontmatter field.
|
||||
Skills without the field (the vast majority) are always compatible.
|
||||
"""
|
||||
try:
|
||||
from tools.skills_tool import _parse_frontmatter, skill_matches_platform
|
||||
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
||||
frontmatter, _ = _parse_frontmatter(raw)
|
||||
return skill_matches_platform(frontmatter)
|
||||
except Exception:
|
||||
return True # Err on the side of showing the skill
|
||||
|
||||
|
||||
def build_skills_system_prompt() -> str:
|
||||
"""Build a compact skill index for the system prompt.
|
||||
|
||||
Scans ~/.hermes/skills/ for SKILL.md files grouped by category.
|
||||
Includes per-skill descriptions from frontmatter so the model can
|
||||
match skills by meaning, not just name.
|
||||
Filters out skills incompatible with the current OS platform.
|
||||
"""
|
||||
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
skills_dir = hermes_home / "skills"
|
||||
|
||||
if not skills_dir.exists():
|
||||
return ""
|
||||
|
||||
# Collect skills with descriptions, grouped by category
|
||||
# Each entry: (skill_name, description)
|
||||
# Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
|
||||
# → category "mlops/training", skill "axolotl"
|
||||
skills_by_category: dict[str, list[tuple[str, str]]] = {}
|
||||
for skill_file in skills_dir.rglob("SKILL.md"):
|
||||
# Skip skills incompatible with the current OS platform
|
||||
if not _skill_is_platform_compatible(skill_file):
|
||||
continue
|
||||
rel_path = skill_file.relative_to(skills_dir)
|
||||
parts = rel_path.parts
|
||||
if len(parts) >= 2:
|
||||
# Category is everything between skills_dir and the skill folder
|
||||
# e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
|
||||
# → category = "mlops/training", skill_name = "axolotl"
|
||||
# e.g. parts = ("github", "github-auth", "SKILL.md")
|
||||
# → category = "github", skill_name = "github-auth"
|
||||
skill_name = parts[-2]
|
||||
category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
|
||||
else:
|
||||
category = "general"
|
||||
skill_name = skill_file.parent.name
|
||||
desc = _read_skill_description(skill_file)
|
||||
skills_by_category.setdefault(category, []).append((skill_name, desc))
|
||||
|
||||
if not skills_by_category:
|
||||
return ""
|
||||
|
||||
# Read category-level descriptions from DESCRIPTION.md
|
||||
# Checks both the exact category path and parent directories
|
||||
category_descriptions = {}
|
||||
for category in skills_by_category:
|
||||
cat_path = Path(category)
|
||||
desc_file = skills_dir / cat_path / "DESCRIPTION.md"
|
||||
if desc_file.exists():
|
||||
try:
|
||||
content = desc_file.read_text(encoding="utf-8")
|
||||
match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL)
|
||||
if match:
|
||||
category_descriptions[category] = match.group(1).strip()
|
||||
except Exception as e:
|
||||
logger.debug("Could not read skill description %s: %s", desc_file, e)
|
||||
|
||||
index_lines = []
|
||||
for category in sorted(skills_by_category.keys()):
|
||||
cat_desc = category_descriptions.get(category, "")
|
||||
if cat_desc:
|
||||
index_lines.append(f" {category}: {cat_desc}")
|
||||
else:
|
||||
index_lines.append(f" {category}:")
|
||||
# Deduplicate and sort skills within each category
|
||||
seen = set()
|
||||
for name, desc in sorted(skills_by_category[category], key=lambda x: x[0]):
|
||||
if name in seen:
|
||||
continue
|
||||
seen.add(name)
|
||||
if desc:
|
||||
index_lines.append(f" - {name}: {desc}")
|
||||
else:
|
||||
index_lines.append(f" - {name}")
|
||||
|
||||
return (
|
||||
"## Skills (mandatory)\n"
|
||||
"Before replying, scan the skills below. If one clearly matches your task, "
|
||||
"load it with skill_view(name) and follow its instructions. "
|
||||
"If a skill has issues, fix it with skill_manage(action='patch').\n"
|
||||
"\n"
|
||||
"<available_skills>\n"
|
||||
+ "\n".join(index_lines) + "\n"
|
||||
"</available_skills>\n"
|
||||
"\n"
|
||||
"If none match, proceed normally without loading a skill."
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# =========================================================================
|
||||
|
||||
def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
|
||||
"""Head/tail truncation with a marker in the middle."""
|
||||
if len(content) <= max_chars:
|
||||
return content
|
||||
head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
|
||||
tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
|
||||
head = content[:head_chars]
|
||||
tail = content[-tail_chars:]
|
||||
marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
|
||||
return head + marker + tail
|
||||
|
||||
|
||||
def build_context_files_prompt(cwd: Optional[str] = None) -> str:
|
||||
"""Discover and load context files for the system prompt.
|
||||
|
||||
Discovery: AGENTS.md (recursive), .cursorrules / .cursor/rules/*.mdc,
|
||||
SOUL.md (cwd then ~/.hermes/ fallback). Each capped at 20,000 chars.
|
||||
"""
|
||||
if cwd is None:
|
||||
cwd = os.getcwd()
|
||||
|
||||
cwd_path = Path(cwd).resolve()
|
||||
sections = []
|
||||
|
||||
# AGENTS.md (hierarchical, recursive)
|
||||
top_level_agents = None
|
||||
for name in ["AGENTS.md", "agents.md"]:
|
||||
candidate = cwd_path / name
|
||||
if candidate.exists():
|
||||
top_level_agents = candidate
|
||||
break
|
||||
|
||||
if top_level_agents:
|
||||
agents_files = []
|
||||
for root, dirs, files in os.walk(cwd_path):
|
||||
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'venv', '.venv')]
|
||||
for f in files:
|
||||
if f.lower() == "agents.md":
|
||||
agents_files.append(Path(root) / f)
|
||||
agents_files.sort(key=lambda p: len(p.parts))
|
||||
|
||||
total_agents_content = ""
|
||||
for agents_path in agents_files:
|
||||
try:
|
||||
content = agents_path.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
rel_path = agents_path.relative_to(cwd_path)
|
||||
content = _scan_context_content(content, str(rel_path))
|
||||
total_agents_content += f"## {rel_path}\n\n{content}\n\n"
|
||||
except Exception as e:
|
||||
logger.debug("Could not read %s: %s", agents_path, e)
|
||||
|
||||
if total_agents_content:
|
||||
total_agents_content = _truncate_content(total_agents_content, "AGENTS.md")
|
||||
sections.append(total_agents_content)
|
||||
|
||||
# .cursorrules
|
||||
cursorrules_content = ""
|
||||
cursorrules_file = cwd_path / ".cursorrules"
|
||||
if cursorrules_file.exists():
|
||||
try:
|
||||
content = cursorrules_file.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
content = _scan_context_content(content, ".cursorrules")
|
||||
cursorrules_content += f"## .cursorrules\n\n{content}\n\n"
|
||||
except Exception as e:
|
||||
logger.debug("Could not read .cursorrules: %s", e)
|
||||
|
||||
cursor_rules_dir = cwd_path / ".cursor" / "rules"
|
||||
if cursor_rules_dir.exists() and cursor_rules_dir.is_dir():
|
||||
mdc_files = sorted(cursor_rules_dir.glob("*.mdc"))
|
||||
for mdc_file in mdc_files:
|
||||
try:
|
||||
content = mdc_file.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
content = _scan_context_content(content, f".cursor/rules/{mdc_file.name}")
|
||||
cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n"
|
||||
except Exception as e:
|
||||
logger.debug("Could not read %s: %s", mdc_file, e)
|
||||
|
||||
if cursorrules_content:
|
||||
cursorrules_content = _truncate_content(cursorrules_content, ".cursorrules")
|
||||
sections.append(cursorrules_content)
|
||||
|
||||
# SOUL.md (cwd first, then ~/.hermes/ fallback)
|
||||
soul_path = None
|
||||
for name in ["SOUL.md", "soul.md"]:
|
||||
candidate = cwd_path / name
|
||||
if candidate.exists():
|
||||
soul_path = candidate
|
||||
break
|
||||
if not soul_path:
|
||||
global_soul = Path.home() / ".hermes" / "SOUL.md"
|
||||
if global_soul.exists():
|
||||
soul_path = global_soul
|
||||
|
||||
if soul_path:
|
||||
try:
|
||||
content = soul_path.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
content = _scan_context_content(content, "SOUL.md")
|
||||
content = _truncate_content(content, "SOUL.md")
|
||||
sections.append(
|
||||
f"## SOUL.md\n\nIf SOUL.md is present, embody its persona and tone. "
|
||||
f"Avoid stiff, generic replies; follow its guidance unless higher-priority "
|
||||
f"instructions override it.\n\n{content}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
|
||||
|
||||
if not sections:
|
||||
return ""
|
||||
return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections)
|
||||
@@ -1,68 +0,0 @@
|
||||
"""Anthropic prompt caching (system_and_3 strategy).
|
||||
|
||||
Reduces input token costs by ~75% on multi-turn conversations by caching
|
||||
the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max):
|
||||
1. System prompt (stable across all turns)
|
||||
2-4. Last 3 non-system messages (rolling window)
|
||||
|
||||
Pure functions -- no class state, no AIAgent dependency.
|
||||
"""
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def _apply_cache_marker(msg: dict, cache_marker: dict) -> None:
|
||||
"""Add cache_control to a single message, handling all format variations."""
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content")
|
||||
|
||||
if role == "tool":
|
||||
msg["cache_control"] = cache_marker
|
||||
return
|
||||
|
||||
if content is None:
|
||||
msg["cache_control"] = cache_marker
|
||||
return
|
||||
|
||||
if isinstance(content, str):
|
||||
msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}]
|
||||
return
|
||||
|
||||
if isinstance(content, list) and content:
|
||||
last = content[-1]
|
||||
if isinstance(last, dict):
|
||||
last["cache_control"] = cache_marker
|
||||
|
||||
|
||||
def apply_anthropic_cache_control(
|
||||
api_messages: List[Dict[str, Any]],
|
||||
cache_ttl: str = "5m",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Apply system_and_3 caching strategy to messages for Anthropic models.
|
||||
|
||||
Places up to 4 cache_control breakpoints: system prompt + last 3 non-system messages.
|
||||
|
||||
Returns:
|
||||
Deep copy of messages with cache_control breakpoints injected.
|
||||
"""
|
||||
messages = copy.deepcopy(api_messages)
|
||||
if not messages:
|
||||
return messages
|
||||
|
||||
marker = {"type": "ephemeral"}
|
||||
if cache_ttl == "1h":
|
||||
marker["ttl"] = "1h"
|
||||
|
||||
breakpoints_used = 0
|
||||
|
||||
if messages[0].get("role") == "system":
|
||||
_apply_cache_marker(messages[0], marker)
|
||||
breakpoints_used += 1
|
||||
|
||||
remaining = 4 - breakpoints_used
|
||||
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
|
||||
for idx in non_sys[-remaining:]:
|
||||
_apply_cache_marker(messages[idx], marker)
|
||||
|
||||
return messages
|
||||
161
agent/redact.py
161
agent/redact.py
@@ -1,161 +0,0 @@
|
||||
"""Regex-based secret redaction for logs and tool output.
|
||||
|
||||
Applies pattern matching to mask API keys, tokens, and credentials
|
||||
before they reach log files, verbose output, or gateway logs.
|
||||
|
||||
Short tokens (< 18 chars) are fully masked. Longer tokens preserve
|
||||
the first 6 and last 4 characters for debuggability.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Known API key prefixes -- match the prefix + contiguous token chars
|
||||
_PREFIX_PATTERNS = [
|
||||
r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter / Anthropic (sk-ant-*)
|
||||
r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic)
|
||||
r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained)
|
||||
r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens
|
||||
r"AIza[A-Za-z0-9_-]{30,}", # Google API keys
|
||||
r"pplx-[A-Za-z0-9]{10,}", # Perplexity
|
||||
r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai
|
||||
r"fc-[A-Za-z0-9]{10,}", # Firecrawl
|
||||
r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase
|
||||
r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens
|
||||
r"AKIA[A-Z0-9]{16}", # AWS Access Key ID
|
||||
r"sk_live_[A-Za-z0-9]{10,}", # Stripe secret key (live)
|
||||
r"sk_test_[A-Za-z0-9]{10,}", # Stripe secret key (test)
|
||||
r"rk_live_[A-Za-z0-9]{10,}", # Stripe restricted key
|
||||
r"SG\.[A-Za-z0-9_-]{10,}", # SendGrid API key
|
||||
r"hf_[A-Za-z0-9]{10,}", # HuggingFace token
|
||||
r"r8_[A-Za-z0-9]{10,}", # Replicate API token
|
||||
r"npm_[A-Za-z0-9]{10,}", # npm access token
|
||||
r"pypi-[A-Za-z0-9_-]{10,}", # PyPI API token
|
||||
r"dop_v1_[A-Za-z0-9]{10,}", # DigitalOcean PAT
|
||||
r"doo_v1_[A-Za-z0-9]{10,}", # DigitalOcean OAuth
|
||||
r"am_[A-Za-z0-9_-]{10,}", # AgentMail API key
|
||||
]
|
||||
|
||||
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
|
||||
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
|
||||
_ENV_ASSIGN_RE = re.compile(
|
||||
rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# JSON field patterns: "apiKey": "value", "token": "value", etc.
|
||||
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
|
||||
_JSON_FIELD_RE = re.compile(
|
||||
rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Authorization headers
|
||||
_AUTH_HEADER_RE = re.compile(
|
||||
r"(Authorization:\s*Bearer\s+)(\S+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
|
||||
_TELEGRAM_RE = re.compile(
|
||||
r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
|
||||
)
|
||||
|
||||
# Private key blocks: -----BEGIN RSA PRIVATE KEY----- ... -----END RSA PRIVATE KEY-----
|
||||
_PRIVATE_KEY_RE = re.compile(
|
||||
r"-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----"
|
||||
)
|
||||
|
||||
# Database connection strings: protocol://user:PASSWORD@host
|
||||
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
|
||||
_DB_CONNSTR_RE = re.compile(
|
||||
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# E.164 phone numbers: +<country><number>, 7-15 digits
|
||||
# Negative lookahead prevents matching hex strings or identifiers
|
||||
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
|
||||
|
||||
# Compile known prefix patterns into one alternation
|
||||
_PREFIX_RE = re.compile(
|
||||
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
|
||||
)
|
||||
|
||||
|
||||
def _mask_token(token: str) -> str:
|
||||
"""Mask a token, preserving prefix for long tokens."""
|
||||
if len(token) < 18:
|
||||
return "***"
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
Disabled when security.redact_secrets is false in config.yaml.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"):
|
||||
return text
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
|
||||
# ENV assignments: OPENAI_API_KEY=sk-abc...
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
|
||||
# JSON fields: "apiKey": "value"
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Authorization headers
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
lambda m: m.group(1) + _mask_token(m.group(2)),
|
||||
text,
|
||||
)
|
||||
|
||||
# Telegram bot tokens
|
||||
def _redact_telegram(m):
|
||||
prefix = m.group(1) or ""
|
||||
digits = m.group(2)
|
||||
return f"{prefix}{digits}:***"
|
||||
text = _TELEGRAM_RE.sub(_redact_telegram, text)
|
||||
|
||||
# Private key blocks
|
||||
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
|
||||
|
||||
# Database connection string passwords
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# E.164 phone numbers (Signal, WhatsApp)
|
||||
def _redact_phone(m):
|
||||
phone = m.group(1)
|
||||
if len(phone) <= 8:
|
||||
return phone[:2] + "****" + phone[-2:]
|
||||
return phone[:4] + "****" + phone[-4:]
|
||||
text = _SIGNAL_PHONE_RE.sub(_redact_phone, text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
class RedactingFormatter(logging.Formatter):
|
||||
"""Log formatter that redacts secrets from all log messages."""
|
||||
|
||||
def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
|
||||
super().__init__(fmt, datefmt, style, **kwargs)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
original = super().format(record)
|
||||
return redact_sensitive_text(original)
|
||||
@@ -1,116 +0,0 @@
|
||||
"""Skill slash commands — scan installed skills and build invocation messages.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Scan ~/.hermes/skills/ and return a mapping of /command -> skill info.
|
||||
|
||||
Returns:
|
||||
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
|
||||
"""
|
||||
global _skill_commands
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform
|
||||
if not SKILLS_DIR.exists():
|
||||
return _skill_commands
|
||||
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
frontmatter, body = _parse_frontmatter(content)
|
||||
# Skip skills incompatible with the current OS platform
|
||||
if not skill_matches_platform(frontmatter):
|
||||
continue
|
||||
name = frontmatter.get('name', skill_md.parent.name)
|
||||
description = frontmatter.get('description', '')
|
||||
if not description:
|
||||
for line in body.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
description = line[:80]
|
||||
break
|
||||
cmd_name = name.lower().replace(' ', '-').replace('_', '-')
|
||||
_skill_commands[f"/{cmd_name}"] = {
|
||||
"name": name,
|
||||
"description": description or f"Invoke the {name} skill",
|
||||
"skill_md_path": str(skill_md),
|
||||
"skill_dir": str(skill_md.parent),
|
||||
}
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Return the current skill commands mapping (scan first if empty)."""
|
||||
if not _skill_commands:
|
||||
scan_skill_commands()
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]:
|
||||
"""Build the user message content for a skill slash command invocation.
|
||||
|
||||
Args:
|
||||
cmd_key: The command key including leading slash (e.g., "/gif-search").
|
||||
user_instruction: Optional text the user typed after the command.
|
||||
|
||||
Returns:
|
||||
The formatted message string, or None if the skill wasn't found.
|
||||
"""
|
||||
commands = get_skill_commands()
|
||||
skill_info = commands.get(cmd_key)
|
||||
if not skill_info:
|
||||
return None
|
||||
|
||||
skill_md_path = Path(skill_info["skill_md_path"])
|
||||
skill_dir = Path(skill_info["skill_dir"])
|
||||
skill_name = skill_info["name"]
|
||||
|
||||
try:
|
||||
content = skill_md_path.read_text(encoding='utf-8')
|
||||
except Exception:
|
||||
return f"[Failed to load skill: {skill_name}]"
|
||||
|
||||
parts = [
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
"",
|
||||
content.strip(),
|
||||
]
|
||||
|
||||
supporting = []
|
||||
for subdir in ("references", "templates", "scripts", "assets"):
|
||||
subdir_path = skill_dir / subdir
|
||||
if subdir_path.exists():
|
||||
for f in sorted(subdir_path.rglob("*")):
|
||||
if f.is_file():
|
||||
rel = str(f.relative_to(skill_dir))
|
||||
supporting.append(rel)
|
||||
|
||||
if supporting:
|
||||
parts.append("")
|
||||
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
|
||||
for sf in supporting:
|
||||
parts.append(f"- {sf}")
|
||||
parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="<path>")')
|
||||
|
||||
if user_instruction:
|
||||
parts.append("")
|
||||
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
|
||||
|
||||
return "\n".join(parts)
|
||||
@@ -1,56 +0,0 @@
|
||||
"""Trajectory saving utilities and static helpers.
|
||||
|
||||
_convert_to_trajectory_format stays as an AIAgent method (batch_runner.py
|
||||
calls agent._convert_to_trajectory_format). Only the static helpers and
|
||||
the file-write logic live here.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def convert_scratchpad_to_think(content: str) -> str:
|
||||
"""Convert <REASONING_SCRATCHPAD> tags to <think> tags."""
|
||||
if not content or "<REASONING_SCRATCHPAD>" not in content:
|
||||
return content
|
||||
return content.replace("<REASONING_SCRATCHPAD>", "<think>").replace("</REASONING_SCRATCHPAD>", "</think>")
|
||||
|
||||
|
||||
def has_incomplete_scratchpad(content: str) -> bool:
|
||||
"""Check if content has an opening <REASONING_SCRATCHPAD> without a closing tag."""
|
||||
if not content:
|
||||
return False
|
||||
return "<REASONING_SCRATCHPAD>" in content and "</REASONING_SCRATCHPAD>" not in content
|
||||
|
||||
|
||||
def save_trajectory(trajectory: List[Dict[str, Any]], model: str,
|
||||
completed: bool, filename: str = None):
|
||||
"""Append a trajectory entry to a JSONL file.
|
||||
|
||||
Args:
|
||||
trajectory: The ShareGPT-format conversation list.
|
||||
model: Model name for metadata.
|
||||
completed: Whether the conversation completed successfully.
|
||||
filename: Override output filename. Defaults to trajectory_samples.jsonl
|
||||
or failed_trajectories.jsonl based on ``completed``.
|
||||
"""
|
||||
if filename is None:
|
||||
filename = "trajectory_samples.jsonl" if completed else "failed_trajectories.jsonl"
|
||||
|
||||
entry = {
|
||||
"conversations": trajectory,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"model": model,
|
||||
"completed": completed,
|
||||
}
|
||||
|
||||
try:
|
||||
with open(filename, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
logger.info("Trajectory saved to %s", filename)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to save trajectory: %s", e)
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 12 KiB |
131
batch_runner.py
131
batch_runner.py
@@ -27,14 +27,16 @@ import time
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from multiprocessing import Pool, Lock
|
||||
from multiprocessing import Pool, Manager, Lock
|
||||
import traceback
|
||||
|
||||
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn, MofNCompleteColumn
|
||||
from rich.console import Console
|
||||
import fire
|
||||
|
||||
from run_agent import AIAgent
|
||||
from toolset_distributions import (
|
||||
get_distribution,
|
||||
list_distributions,
|
||||
sample_toolsets_from_distribution,
|
||||
validate_distribution
|
||||
@@ -171,7 +173,7 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
|
||||
if content_json.get("success") is False:
|
||||
is_success = False
|
||||
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
except:
|
||||
# If not JSON, check if content is empty or explicitly states an error
|
||||
# Note: We avoid simple substring matching to prevent false positives
|
||||
if not content:
|
||||
@@ -238,7 +240,7 @@ def _process_single_prompt(
|
||||
|
||||
Args:
|
||||
prompt_index (int): Index of prompt in dataset
|
||||
prompt_data (Dict): Prompt data containing 'prompt' field and optional 'image' field
|
||||
prompt_data (Dict): Prompt data containing 'prompt' field
|
||||
batch_num (int): Batch number
|
||||
config (Dict): Configuration dict with agent parameters
|
||||
|
||||
@@ -246,58 +248,6 @@ def _process_single_prompt(
|
||||
Dict: Result containing trajectory, stats, and metadata
|
||||
"""
|
||||
prompt = prompt_data["prompt"]
|
||||
task_id = f"task_{prompt_index}"
|
||||
|
||||
# Per-prompt container image override: if the dataset row has an 'image' field,
|
||||
# register it for this task's sandbox. Works with Docker, Modal, Singularity, and Daytona.
|
||||
container_image = prompt_data.get("image") or prompt_data.get("docker_image")
|
||||
if container_image:
|
||||
# Verify the image is accessible before spending tokens on the agent loop.
|
||||
# For Docker: check local cache, then try pulling.
|
||||
# For Modal: skip local check (Modal pulls server-side).
|
||||
env_type = os.getenv("TERMINAL_ENV", "local")
|
||||
if env_type == "docker":
|
||||
import subprocess as _sp
|
||||
try:
|
||||
probe = _sp.run(
|
||||
["docker", "image", "inspect", container_image],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
if probe.returncode != 0:
|
||||
if config.get("verbose"):
|
||||
print(f" Prompt {prompt_index}: Pulling docker image {container_image}...", flush=True)
|
||||
pull = _sp.run(
|
||||
["docker", "pull", container_image],
|
||||
capture_output=True, text=True, timeout=600,
|
||||
)
|
||||
if pull.returncode != 0:
|
||||
return {
|
||||
"success": False,
|
||||
"prompt_index": prompt_index,
|
||||
"error": f"Docker image not available: {container_image}\n{pull.stderr[:500]}",
|
||||
"trajectory": None,
|
||||
"tool_stats": {},
|
||||
"toolsets_used": [],
|
||||
"metadata": {"batch_num": batch_num, "timestamp": datetime.now().isoformat()},
|
||||
}
|
||||
except FileNotFoundError:
|
||||
pass # Docker CLI not installed — skip check (e.g., Modal backend)
|
||||
except Exception as img_err:
|
||||
if config.get("verbose"):
|
||||
print(f" Prompt {prompt_index}: Docker image check failed: {img_err}", flush=True)
|
||||
|
||||
from tools.terminal_tool import register_task_env_overrides
|
||||
overrides = {
|
||||
"docker_image": container_image,
|
||||
"modal_image": container_image,
|
||||
"singularity_image": f"docker://{container_image}",
|
||||
"daytona_image": container_image,
|
||||
}
|
||||
if prompt_data.get("cwd"):
|
||||
overrides["cwd"] = prompt_data["cwd"]
|
||||
register_task_env_overrides(task_id, overrides)
|
||||
if config.get("verbose"):
|
||||
print(f" Prompt {prompt_index}: Using container image {container_image}")
|
||||
|
||||
try:
|
||||
# Sample toolsets from distribution for this prompt
|
||||
@@ -326,12 +276,10 @@ def _process_single_prompt(
|
||||
max_tokens=config.get("max_tokens"),
|
||||
reasoning_config=config.get("reasoning_config"),
|
||||
prefill_messages=config.get("prefill_messages"),
|
||||
skip_context_files=True, # Don't pollute trajectories with SOUL.md/AGENTS.md
|
||||
skip_memory=True, # Don't use persistent memory in batch runs
|
||||
)
|
||||
|
||||
# Run the agent with task_id to ensure each task gets its own isolated VM
|
||||
result = agent.run_conversation(prompt, task_id=task_id)
|
||||
result = agent.run_conversation(prompt, task_id=f"task_{prompt_index}")
|
||||
|
||||
# Extract tool usage statistics
|
||||
tool_stats = _extract_tool_stats(result["messages"])
|
||||
@@ -700,13 +648,14 @@ class BatchRunner:
|
||||
lock (Lock): Optional lock for thread-safe access
|
||||
"""
|
||||
checkpoint_data["last_updated"] = datetime.now().isoformat()
|
||||
|
||||
from utils import atomic_json_write
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
atomic_json_write(self.checkpoint_file, checkpoint_data)
|
||||
with open(self.checkpoint_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(checkpoint_data, f, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
atomic_json_write(self.checkpoint_file, checkpoint_data)
|
||||
with open(self.checkpoint_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(checkpoint_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _scan_completed_prompts_by_content(self) -> set:
|
||||
"""
|
||||
@@ -831,15 +780,13 @@ class BatchRunner:
|
||||
print(f" New batches created: {len(batches_to_process)}")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
# Load existing checkpoint (so resume doesn't clobber prior progress)
|
||||
checkpoint_data = self._load_checkpoint()
|
||||
if checkpoint_data.get("run_name") != self.run_name:
|
||||
checkpoint_data = {
|
||||
"run_name": self.run_name,
|
||||
"completed_prompts": [],
|
||||
"batch_stats": {},
|
||||
"last_updated": None
|
||||
}
|
||||
# Initialize checkpoint data (needed for saving at the end)
|
||||
checkpoint_data = {
|
||||
"run_name": self.run_name,
|
||||
"completed_prompts": [],
|
||||
"batch_stats": {},
|
||||
"last_updated": None
|
||||
}
|
||||
|
||||
# Prepare configuration for workers
|
||||
config = {
|
||||
@@ -861,7 +808,7 @@ class BatchRunner:
|
||||
}
|
||||
|
||||
# For backward compatibility, still track by index (but this is secondary to content matching)
|
||||
completed_prompts_set = set(checkpoint_data.get("completed_prompts", []))
|
||||
completed_prompts_set = set()
|
||||
|
||||
# Aggregate statistics across all batches
|
||||
total_tool_stats = {}
|
||||
@@ -870,9 +817,6 @@ class BatchRunner:
|
||||
|
||||
print(f"\n🔧 Initializing {self.num_workers} worker processes...")
|
||||
|
||||
# Checkpoint writes happen in the parent process; keep a lock for safety.
|
||||
checkpoint_lock = Lock()
|
||||
|
||||
# Process batches in parallel
|
||||
with Pool(processes=self.num_workers) as pool:
|
||||
# Create tasks for each batch
|
||||
@@ -918,28 +862,6 @@ class BatchRunner:
|
||||
for result in pool.imap_unordered(_process_batch_worker, tasks):
|
||||
results.append(result)
|
||||
progress.update(task, advance=1)
|
||||
|
||||
# Incremental checkpoint update (so resume works after crash)
|
||||
try:
|
||||
batch_num = result.get('batch_num')
|
||||
completed = result.get('completed_prompts', []) or []
|
||||
completed_prompts_set.update(completed)
|
||||
|
||||
if isinstance(batch_num, int):
|
||||
checkpoint_data.setdefault('batch_stats', {})[str(batch_num)] = {
|
||||
'processed': result.get('processed', 0),
|
||||
'skipped': result.get('skipped', 0),
|
||||
'discarded_no_reasoning': result.get('discarded_no_reasoning', 0),
|
||||
}
|
||||
|
||||
checkpoint_data['completed_prompts'] = sorted(completed_prompts_set)
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
# Don't fail the run if checkpoint write fails
|
||||
print(f"⚠️ Warning: Failed to save incremental checkpoint: {ckpt_err}")
|
||||
except Exception as e:
|
||||
logger.error("Batch worker failed: %s", e, exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
@@ -968,12 +890,9 @@ class BatchRunner:
|
||||
for key in total_reasoning_stats:
|
||||
total_reasoning_stats[key] += batch_result.get("reasoning_stats", {}).get(key, 0)
|
||||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
# Save final checkpoint
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
self._save_checkpoint(checkpoint_data)
|
||||
|
||||
# Calculate success rates
|
||||
for tool_name in total_tool_stats:
|
||||
@@ -1112,7 +1031,7 @@ def main(
|
||||
batch_size: int = None,
|
||||
run_name: str = None,
|
||||
distribution: str = "default",
|
||||
model: str = "anthropic/claude-sonnet-4.6",
|
||||
model: str = "anthropic/claude-sonnet-4-20250514",
|
||||
api_key: str = None,
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
max_turns: int = 10,
|
||||
@@ -1155,7 +1074,7 @@ def main(
|
||||
providers_order (str): Comma-separated list of OpenRouter providers to try in order (e.g. "anthropic,openai,google")
|
||||
provider_sort (str): Sort providers by "price", "throughput", or "latency" (OpenRouter only)
|
||||
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
|
||||
reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "medium")
|
||||
reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "xhigh")
|
||||
reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False)
|
||||
prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts)
|
||||
max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
|
||||
@@ -1216,7 +1135,7 @@ def main(
|
||||
providers_order_list = [p.strip() for p in providers_order.split(",")] if providers_order else None
|
||||
|
||||
# Build reasoning_config from CLI flags
|
||||
# --reasoning_disabled takes priority, then --reasoning_effort, then default (medium)
|
||||
# --reasoning_disabled takes priority, then --reasoning_effort, then default (xhigh)
|
||||
reasoning_config = None
|
||||
if reasoning_disabled:
|
||||
# Completely disable reasoning/thinking tokens
|
||||
|
||||
@@ -9,57 +9,10 @@ model:
|
||||
# Default model to use (can be overridden with --model flag)
|
||||
default: "anthropic/claude-opus-4.6"
|
||||
|
||||
# Inference provider selection:
|
||||
# "auto" - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
|
||||
# "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
|
||||
# "nous" - Always use Nous Portal (requires: hermes login)
|
||||
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
|
||||
# "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY)
|
||||
# "minimax" - Use MiniMax global endpoint (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY)
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
|
||||
# API configuration (falls back to OPENROUTER_API_KEY env var)
|
||||
# api_key: "your-key-here" # Uncomment to set here instead of .env
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
# Control how requests are routed across providers on OpenRouter.
|
||||
# See: https://openrouter.ai/docs/guides/routing/provider-selection
|
||||
#
|
||||
# provider_routing:
|
||||
# # Sort strategy: "price" (default), "throughput", or "latency"
|
||||
# # Append :nitro to model name for a shortcut to throughput sorting.
|
||||
# sort: "throughput"
|
||||
#
|
||||
# # Only allow these providers (provider slugs from OpenRouter)
|
||||
# # only: ["anthropic", "google"]
|
||||
#
|
||||
# # Skip these providers entirely
|
||||
# # ignore: ["deepinfra", "fireworks"]
|
||||
#
|
||||
# # Try providers in this order (overrides default load balancing)
|
||||
# # order: ["anthropic", "google", "together"]
|
||||
#
|
||||
# # Require providers to support all parameters in your request
|
||||
# # require_parameters: true
|
||||
#
|
||||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
# When enabled, each CLI session creates an isolated git worktree so multiple
|
||||
# agents can work on the same repo concurrently without file collisions.
|
||||
# Equivalent to always passing --worktree / -w on the command line.
|
||||
#
|
||||
# worktree: true # Always create a worktree when in a git repo
|
||||
# worktree: false # Default — only create when -w flag is passed
|
||||
|
||||
# =============================================================================
|
||||
# Terminal Tool Configuration
|
||||
# =============================================================================
|
||||
@@ -74,8 +27,8 @@ model:
|
||||
# - CLI (`hermes` command): Uses "." (current directory where you run hermes)
|
||||
# - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
|
||||
terminal:
|
||||
backend: "local"
|
||||
cwd: "." # For local backend: "." = current directory. Ignored for remote backends.
|
||||
env_type: "local"
|
||||
cwd: "." # CLI working directory - "." means current directory
|
||||
timeout: 180
|
||||
lifetime_seconds: 300
|
||||
# sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!
|
||||
@@ -86,8 +39,8 @@ terminal:
|
||||
# Great for: keeping agent isolated from its own code, using powerful remote hardware
|
||||
# -----------------------------------------------------------------------------
|
||||
# terminal:
|
||||
# backend: "ssh"
|
||||
# cwd: "/home/myuser/project" # Path on the REMOTE server
|
||||
# env_type: "ssh"
|
||||
# cwd: "/home/myuser/project"
|
||||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# ssh_host: "my-server.example.com"
|
||||
@@ -101,8 +54,8 @@ terminal:
|
||||
# Great for: reproducible environments, testing, isolation
|
||||
# -----------------------------------------------------------------------------
|
||||
# terminal:
|
||||
# backend: "docker"
|
||||
# cwd: "/workspace" # Path INSIDE the container (default: /)
|
||||
# env_type: "docker"
|
||||
# cwd: "/workspace"
|
||||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
@@ -113,8 +66,8 @@ terminal:
|
||||
# Great for: HPC clusters, shared compute environments
|
||||
# -----------------------------------------------------------------------------
|
||||
# terminal:
|
||||
# backend: "singularity"
|
||||
# cwd: "/workspace" # Path INSIDE the container (default: /root)
|
||||
# env_type: "singularity"
|
||||
# cwd: "/workspace"
|
||||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
@@ -125,35 +78,12 @@ terminal:
|
||||
# Great for: GPU access, scalable compute, serverless execution
|
||||
# -----------------------------------------------------------------------------
|
||||
# terminal:
|
||||
# backend: "modal"
|
||||
# cwd: "/workspace" # Path INSIDE the sandbox (default: /root)
|
||||
# env_type: "modal"
|
||||
# cwd: "/workspace"
|
||||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OPTION 6: Daytona cloud execution
|
||||
# Commands run in Daytona cloud sandboxes
|
||||
# Great for: Cloud dev environments, persistent workspaces, team collaboration
|
||||
# Requires: pip install daytona, DAYTONA_API_KEY env var
|
||||
# -----------------------------------------------------------------------------
|
||||
# terminal:
|
||||
# backend: "daytona"
|
||||
# cwd: "~"
|
||||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
# container_disk: 10240 # Daytona max is 10GB per sandbox
|
||||
|
||||
#
|
||||
# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
|
||||
# These settings apply to all container backends. They control the resources
|
||||
# allocated to the sandbox and whether its filesystem persists across sessions.
|
||||
container_cpu: 1 # CPU cores
|
||||
container_memory: 5120 # Memory in MB (5120 = 5GB)
|
||||
container_disk: 51200 # Disk in MB (51200 = 50GB)
|
||||
container_persistent: true # Persist filesystem across sessions (false = ephemeral)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# SUDO SUPPORT (works with ALL backends above)
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -209,126 +139,8 @@ compression:
|
||||
threshold: 0.85
|
||||
|
||||
# Model to use for generating summaries (fast/cheap recommended)
|
||||
# This model compresses the middle turns into a concise summary.
|
||||
# IMPORTANT: it receives the full middle section of the conversation, so it
|
||||
# MUST support a context length at least as large as your main model's.
|
||||
# This model compresses the middle turns into a concise summary
|
||||
summary_model: "google/gemini-3-flash-preview"
|
||||
|
||||
# Provider for the summary model (default: "auto")
|
||||
# Options: "auto", "openrouter", "nous", "main"
|
||||
# summary_provider: "auto"
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary Models (Advanced — Experimental)
|
||||
# =============================================================================
|
||||
# Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
|
||||
# browser screenshot analysis, web page summarization, and context compression.
|
||||
#
|
||||
# By default these use Gemini Flash via OpenRouter or Nous Portal and are
|
||||
# auto-detected from your credentials. You do NOT need to change anything
|
||||
# here for normal usage.
|
||||
#
|
||||
# WARNING: Overriding these with providers other than OpenRouter or Nous Portal
|
||||
# is EXPERIMENTAL and may not work. Not all models/providers support vision,
|
||||
# produce usable summaries, or accept the same API format. Change at your own
|
||||
# risk — if things break, reset to "auto" / empty values.
|
||||
#
|
||||
# Each task has its own provider + model pair so you can mix providers.
|
||||
# For example: OpenRouter for vision (needs multimodal), but your main
|
||||
# local endpoint for compression (just needs text).
|
||||
#
|
||||
# Provider options:
|
||||
# "auto" - Best available: OpenRouter → Nous Portal → main endpoint (default)
|
||||
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
|
||||
# "nous" - Force Nous Portal (requires: hermes login)
|
||||
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
|
||||
# Uses gpt-5.3-codex which supports vision.
|
||||
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
|
||||
# Works with OpenAI API, local models, or any OpenAI-compatible
|
||||
# endpoint. Also falls back to Codex OAuth and API-key providers.
|
||||
#
|
||||
# Model: leave empty to use the provider's default. When empty, OpenRouter
|
||||
# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash".
|
||||
# Other providers pick a sensible default automatically.
|
||||
#
|
||||
# auxiliary:
|
||||
# # Image analysis: vision_analyze tool + browser screenshots
|
||||
# vision:
|
||||
# provider: "auto"
|
||||
# model: "" # e.g. "google/gemini-2.5-flash", "openai/gpt-4o"
|
||||
#
|
||||
# # Web page scraping / summarization + browser page text extraction
|
||||
# web_extract:
|
||||
# provider: "auto"
|
||||
# model: ""
|
||||
|
||||
# =============================================================================
|
||||
# Persistent Memory
|
||||
# =============================================================================
|
||||
# Bounded curated memory injected into the system prompt every session.
|
||||
# Two stores: MEMORY.md (agent's notes) and USER.md (user profile).
|
||||
# Character limits keep the memory small and focused. The agent manages
|
||||
# pruning -- when at the limit, it must consolidate or replace entries.
|
||||
# Disabled by default in batch_runner and RL environments.
|
||||
#
|
||||
memory:
|
||||
# Agent's personal notes: environment facts, conventions, things learned
|
||||
memory_enabled: true
|
||||
|
||||
# User profile: preferences, communication style, expectations
|
||||
user_profile_enabled: true
|
||||
|
||||
# Character limits (~2.75 chars per token, model-independent)
|
||||
memory_char_limit: 2200 # ~800 tokens
|
||||
user_char_limit: 1375 # ~500 tokens
|
||||
|
||||
# Periodic memory nudge: remind the agent to consider saving memories
|
||||
# every N user turns. Set to 0 to disable. Only active when memory is enabled.
|
||||
nudge_interval: 10 # Nudge every 10 user turns (0 = disabled)
|
||||
|
||||
# Memory flush: give the agent one turn to save memories before context is
|
||||
# lost (compression, /new, /reset, exit). Set to 0 to disable.
|
||||
# For exit/reset, only fires if the session had at least this many user turns.
|
||||
flush_min_turns: 6 # Min user turns to trigger flush on exit/reset (0 = disabled)
|
||||
|
||||
# =============================================================================
|
||||
# Session Reset Policy (Messaging Platforms)
|
||||
# =============================================================================
|
||||
# Controls when messaging sessions (Telegram, Discord, WhatsApp, Slack) are
|
||||
# automatically cleared. Without resets, conversation context grows indefinitely
|
||||
# which increases API costs with every message.
|
||||
#
|
||||
# When a reset triggers, the agent first saves important information to its
|
||||
# persistent memory — but the conversation context is wiped. The agent starts
|
||||
# fresh but retains learned facts via its memory system.
|
||||
#
|
||||
# Users can always manually reset with /reset or /new in chat.
|
||||
#
|
||||
# Modes:
|
||||
# "both" - Reset on EITHER inactivity timeout or daily boundary (recommended)
|
||||
# "idle" - Reset only after N minutes of inactivity
|
||||
# "daily" - Reset only at a fixed hour each day
|
||||
# "none" - Never auto-reset; context lives until /reset or compression kicks in
|
||||
#
|
||||
# When a reset triggers, the agent gets one turn to save important memories and
|
||||
# skills before the context is wiped. Persistent memory carries across sessions.
|
||||
#
|
||||
session_reset:
|
||||
mode: both # "both", "idle", "daily", or "none"
|
||||
idle_minutes: 1440 # Inactivity timeout in minutes (default: 1440 = 24 hours)
|
||||
at_hour: 4 # Daily reset hour, 0-23 local time (default: 4 AM)
|
||||
|
||||
# =============================================================================
|
||||
# Skills Configuration
|
||||
# =============================================================================
|
||||
# Skills are reusable procedures the agent can load and follow. The agent can
|
||||
# also create new skills after completing complex tasks.
|
||||
#
|
||||
skills:
|
||||
# Nudge the agent to create skills after complex tasks.
|
||||
# Every N tool-calling iterations, remind the model to consider saving a skill.
|
||||
# Set to 0 to disable.
|
||||
creation_nudge_interval: 15
|
||||
|
||||
# =============================================================================
|
||||
# Agent Behavior
|
||||
@@ -342,10 +154,9 @@ agent:
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
|
||||
# Reasoning effort level (OpenRouter and Nous Portal)
|
||||
# Controls how much "thinking" the model does before responding.
|
||||
# Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
|
||||
reasoning_effort: "medium"
|
||||
# Custom system prompt (personality, instructions, etc.)
|
||||
# Leave empty or remove to use default agent behavior
|
||||
system_prompt: ""
|
||||
|
||||
# Predefined personalities (use with /personality command)
|
||||
personalities:
|
||||
@@ -370,107 +181,19 @@ agent:
|
||||
# Control which tools the agent has access to.
|
||||
# Use "all" to enable everything, or specify individual toolsets.
|
||||
|
||||
# =============================================================================
|
||||
# Platform Toolsets (per-platform tool configuration)
|
||||
# =============================================================================
|
||||
# Override which toolsets are available on each platform.
|
||||
# If a platform isn't listed here, its built-in default is used.
|
||||
#
|
||||
# You can use EITHER:
|
||||
# - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
|
||||
# - A list of individual toolsets to compose your own (see list below)
|
||||
#
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack
|
||||
#
|
||||
# Examples:
|
||||
#
|
||||
# # Use presets (same as defaults):
|
||||
# platform_toolsets:
|
||||
# cli: [hermes-cli]
|
||||
# telegram: [hermes-telegram]
|
||||
#
|
||||
# # Custom: give Telegram only web + terminal + file + planning:
|
||||
# platform_toolsets:
|
||||
# telegram: [web, terminal, file, todo]
|
||||
#
|
||||
# # Custom: CLI without browser or image gen:
|
||||
# platform_toolsets:
|
||||
# cli: [web, terminal, file, skills, todo, tts, cronjob]
|
||||
#
|
||||
# # Restrictive: Discord gets read-only tools only:
|
||||
# platform_toolsets:
|
||||
# discord: [web, vision, skills, todo]
|
||||
#
|
||||
# If not set, defaults are:
|
||||
# cli: hermes-cli (everything + cronjob management)
|
||||
# telegram: hermes-telegram (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
|
||||
# discord: hermes-discord (same as telegram)
|
||||
# whatsapp: hermes-whatsapp (same as telegram)
|
||||
# slack: hermes-slack (same as telegram)
|
||||
#
|
||||
platform_toolsets:
|
||||
cli: [hermes-cli]
|
||||
telegram: [hermes-telegram]
|
||||
discord: [hermes-discord]
|
||||
whatsapp: [hermes-whatsapp]
|
||||
slack: [hermes-slack]
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Available toolsets (use these names in platform_toolsets or the toolsets list)
|
||||
#
|
||||
# Run `hermes chat --list-toolsets` to see all toolsets and their tools.
|
||||
# Run `hermes chat --list-tools` to see every individual tool with descriptions.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# INDIVIDUAL TOOLSETS (compose your own):
|
||||
# web - web_search, web_extract
|
||||
# search - web_search only (no scraping)
|
||||
# terminal - terminal, process
|
||||
# file - read_file, write_file, patch, search
|
||||
# browser - browser_navigate, browser_snapshot, browser_click, browser_type,
|
||||
# browser_scroll, browser_back, browser_press, browser_close,
|
||||
# browser_get_images, browser_vision (requires BROWSERBASE_API_KEY)
|
||||
# vision - vision_analyze (requires OPENROUTER_API_KEY)
|
||||
# image_gen - image_generate (requires FAL_KEY)
|
||||
# skills - skills_list, skill_view
|
||||
# skills_hub - skill_hub (search/install/manage from online registries — user-driven only)
|
||||
# moa - mixture_of_agents (requires OPENROUTER_API_KEY)
|
||||
# todo - todo (in-memory task planning, no deps)
|
||||
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI key)
|
||||
# cronjob - schedule_cronjob, list_cronjobs, remove_cronjob
|
||||
# rl - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
|
||||
#
|
||||
# PRESETS (curated bundles):
|
||||
# hermes-cli - All of the above except rl + send_message
|
||||
# hermes-telegram - terminal, file, web, vision, image_gen, tts, browser,
|
||||
# skills, todo, cronjob, send_message
|
||||
# hermes-discord - Same as hermes-telegram
|
||||
# hermes-whatsapp - Same as hermes-telegram
|
||||
# hermes-slack - Same as hermes-telegram
|
||||
#
|
||||
# COMPOSITE:
|
||||
# debugging - terminal + web + file
|
||||
# safe - web + vision + moa (no terminal access)
|
||||
# all - Everything available
|
||||
# Available toolsets:
|
||||
#
|
||||
# web - Web search and content extraction (web_search, web_extract)
|
||||
# search - Web search only, no scraping (web_search)
|
||||
# terminal - Command execution and process management (terminal, process)
|
||||
# file - File operations: read, write, patch, search
|
||||
# terminal - Command execution (terminal)
|
||||
# browser - Full browser automation (navigate, click, type, screenshot, etc.)
|
||||
# vision - Image analysis (vision_analyze)
|
||||
# image_gen - Image generation with FLUX (image_generate)
|
||||
# skills - Load skill documents (skills_list, skill_view)
|
||||
# skills - Load skill documents (skills_categories, skills_list, skill_view)
|
||||
# moa - Mixture of Agents reasoning (mixture_of_agents)
|
||||
# todo - Task planning and tracking for multi-step work
|
||||
# memory - Persistent memory across sessions (personal notes + user profile)
|
||||
# session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
|
||||
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI)
|
||||
# cronjob - Schedule and manage automated tasks (CLI-only)
|
||||
# rl - RL training tools (Tinker-Atropos)
|
||||
#
|
||||
# Composite toolsets:
|
||||
# debugging - terminal + web + file (for troubleshooting)
|
||||
# debugging - terminal + web (for troubleshooting)
|
||||
# safe - web + vision + moa (no terminal access)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -521,74 +244,6 @@ toolsets:
|
||||
# toolsets:
|
||||
# - safe
|
||||
|
||||
# =============================================================================
|
||||
# MCP (Model Context Protocol) Servers
|
||||
# =============================================================================
|
||||
# Connect to external MCP servers to add tools from the MCP ecosystem.
|
||||
# Each server's tools are automatically discovered and registered.
|
||||
# See docs/mcp.md for full documentation.
|
||||
#
|
||||
# Stdio servers (spawn a subprocess):
|
||||
# command: the executable to run
|
||||
# args: command-line arguments
|
||||
# env: environment variables (only these + safe defaults passed to subprocess)
|
||||
#
|
||||
# HTTP servers (connect to a URL):
|
||||
# url: the MCP server endpoint
|
||||
# headers: HTTP headers (e.g., for authentication)
|
||||
#
|
||||
# Optional per-server settings:
|
||||
# timeout: tool call timeout in seconds (default: 120)
|
||||
# connect_timeout: initial connection timeout (default: 60)
|
||||
#
|
||||
# mcp_servers:
|
||||
# time:
|
||||
# command: uvx
|
||||
# args: ["mcp-server-time"]
|
||||
# filesystem:
|
||||
# command: npx
|
||||
# args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user"]
|
||||
# notion:
|
||||
# url: https://mcp.notion.com/mcp
|
||||
# github:
|
||||
# command: npx
|
||||
# args: ["-y", "@modelcontextprotocol/server-github"]
|
||||
# env:
|
||||
# GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
|
||||
#
|
||||
# Sampling (server-initiated LLM requests) — enabled by default.
|
||||
# Per-server config under the 'sampling' key:
|
||||
# analysis:
|
||||
# command: npx
|
||||
# args: ["-y", "analysis-server"]
|
||||
# sampling:
|
||||
# enabled: true # default: true
|
||||
# model: "gemini-3-flash" # override model (optional)
|
||||
# max_tokens_cap: 4096 # max tokens per request
|
||||
# timeout: 30 # LLM call timeout (seconds)
|
||||
# max_rpm: 10 # max requests per minute
|
||||
# allowed_models: [] # model whitelist (empty = all)
|
||||
# max_tool_rounds: 5 # tool loop limit (0 = disable)
|
||||
# log_level: "info" # audit verbosity
|
||||
|
||||
# =============================================================================
|
||||
# Voice Transcription (Speech-to-Text)
|
||||
# =============================================================================
|
||||
# Automatically transcribe voice messages on messaging platforms.
|
||||
# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly).
|
||||
stt:
|
||||
enabled: true
|
||||
model: "whisper-1" # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
|
||||
# =============================================================================
|
||||
# Response Pacing (Messaging Platforms)
|
||||
# =============================================================================
|
||||
# Add human-like delays between message chunks.
|
||||
# human_delay:
|
||||
# mode: "off" # "off" | "natural" | "custom"
|
||||
# min_ms: 800 # Min delay (custom mode only)
|
||||
# max_ms: 2500 # Max delay (custom mode only)
|
||||
|
||||
# =============================================================================
|
||||
# Session Logging
|
||||
# =============================================================================
|
||||
@@ -604,54 +259,9 @@ stt:
|
||||
# No configuration needed - logging is always enabled.
|
||||
# To disable, you would need to modify the source code.
|
||||
|
||||
# =============================================================================
|
||||
# Code Execution Sandbox (Programmatic Tool Calling)
|
||||
# =============================================================================
|
||||
# The execute_code tool runs Python scripts that call Hermes tools via RPC.
|
||||
# Intermediate tool results stay out of the LLM's context window.
|
||||
code_execution:
|
||||
timeout: 300 # Max seconds per script before kill (default: 300 = 5 min)
|
||||
max_tool_calls: 50 # Max RPC tool calls per execution (default: 50)
|
||||
|
||||
# =============================================================================
|
||||
# Subagent Delegation
|
||||
# =============================================================================
|
||||
# The delegate_task tool spawns child agents with isolated context.
|
||||
# Supports single tasks and batch mode (up to 3 parallel).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
|
||||
|
||||
# =============================================================================
|
||||
# Honcho Integration (Cross-Session User Modeling)
|
||||
# =============================================================================
|
||||
# AI-native persistent memory via Honcho (https://honcho.dev/).
|
||||
# Builds a deeper understanding of the user across sessions and tools.
|
||||
# Runs alongside USER.md — additive, not a replacement.
|
||||
#
|
||||
# Requires: pip install honcho-ai
|
||||
# Config: ~/.honcho/config.json (shared with Claude Code, Cursor, etc.)
|
||||
# API key: HONCHO_API_KEY in ~/.hermes/.env or ~/.honcho/config.json
|
||||
#
|
||||
# Hermes-specific overrides (optional — most config comes from ~/.honcho/config.json):
|
||||
# honcho: {}
|
||||
|
||||
# =============================================================================
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
# off: Silent — no tool activity shown, just the final response
|
||||
# new: Show a tool indicator only when the tool changes (skip repeats)
|
||||
# all: Show every tool call with a short preview (default)
|
||||
# verbose: Full args, results, and debug logs (same as /verbose)
|
||||
# Toggle at runtime with /verbose in the CLI
|
||||
tool_progress: all
|
||||
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
bell_on_complete: false
|
||||
|
||||
42
configs/run_browser_tasks.sh
Executable file
42
configs/run_browser_tasks.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Browser-focused data generation run
|
||||
# Uses browser-use-tasks.jsonl (6504 tasks)
|
||||
# Distribution: browser 97%, web 20%, vision 12%, terminal 15%
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/browser_tasks_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
echo "🌐 Running browser-focused tasks with browser_tasks distribution"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="browser-use-tasks.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="browser_tasks" \
|
||||
--distribution="browser_tasks" \
|
||||
--model="moonshotai/kimi-k2.5" \
|
||||
--verbose \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--num_workers=50 \
|
||||
--max_turns=60 \
|
||||
--resume \
|
||||
--ephemeral_system_prompt="You are an AI assistant with browser automation capabilities. Your primary task is to navigate and interact with web pages to accomplish user goals.
|
||||
|
||||
IMPORTANT GUIDELINES:
|
||||
|
||||
1. SEARCHING: Do NOT try to search directly on Google or other search engines via the browser - they block automated searches. Instead, ALWAYS use the web_search tool first to find URLs for any pages you need to visit, then use browser tools to navigate to those URLs.
|
||||
|
||||
2. COOKIE/PRIVACY DIALOGS: After navigating to a page, ALWAYS check if there are cookie consent dialogs, privacy popups, or overlay modals blocking the page. These appear in snapshots as 'dialog' elements with buttons like 'Close', 'Accept', 'Accept All', 'Decline', 'I Agree', 'Got it', 'OK', or 'X'. You MUST dismiss these dialogs FIRST by clicking the appropriate button before trying to interact with other page elements. After dismissing a dialog, take a fresh browser_snapshot to get updated element references.
|
||||
|
||||
3. HANDLING TIMEOUTS: If an action times out, it often means the element is blocked by an overlay or the page state has changed. Take a new snapshot to see the current page state and look for any dialogs or popups that need to be dismissed. If there is no dialog box to bypass, then try a new method or report the error to the user and complete the task.
|
||||
|
||||
4. GENERAL: Use browser tools to click elements, fill forms, extract information, and perform web-based tasks. If terminal is available, use it for any local file operations or computations needed to support your web tasks. Be thorough in verifying your actions and handle any errors gracefully by retrying or trying alternative approaches." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
|
||||
# --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
26
configs/run_datagen_glm4.7-imagen.sh
Executable file
26
configs/run_datagen_glm4.7-imagen.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate a timestamp for the log file
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
LOG_FILE="logs/imagen_eval_gpt5_${TIMESTAMP}.log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="source-data/hermes-agent-imagen-data/hermes_agent_imagen_train_sft.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="imagen_train_sft_glm4.7" \
|
||||
--distribution="image_gen" \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=50 \
|
||||
--max_turns=25 \
|
||||
--ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
# --verbose \
|
||||
26
configs/run_datagen_glm4.7.sh
Executable file
26
configs/run_datagen_glm4.7.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/glm4.7-thinking-sft1_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_sft_2.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="megascience_glm4.7-thinking-sft2" \
|
||||
--distribution="science" \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=15 \
|
||||
--max_turns=60 \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12, so you can maintain focused context." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
|
||||
# --verbose \
|
||||
27
configs/run_datagen_glm4.7_megascience.sh
Executable file
27
configs/run_datagen_glm4.7_megascience.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/glm4.7-thinking-sft1-10k_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="source-data/hermes-agent-megascience-data/hermes_agent_megascience_sft_train_1_10k.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="megascience_glm4.7-thinking-sft1" \
|
||||
--distribution="science" \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=50 \
|
||||
--max_turns=60 \
|
||||
--resume \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used for furthering results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12, so you can maintain a focused context." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
|
||||
# --verbose \
|
||||
28
configs/run_datagen_glm4.7_raw_tasks.sh
Executable file
28
configs/run_datagen_glm4.7_raw_tasks.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/glm4.7-terminal-tasks_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="source-data/raw_tasks_prompts.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="terminal-tasks-glm4.7-thinking" \
|
||||
--distribution="default" \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=50 \
|
||||
--max_turns=60 \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you complete coding, system administration, and general computing tasks. You can use them in sequence and build off of the results of prior tools you've used. Always use the terminal tool to execute commands, write code, install packages, and verify your work. You should test and validate everything you create. Always pip install any packages you need (use --break-system-packages if needed). If you need a tool that isn't available, you can use the terminal to install or create it. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Use web search when you need to look up documentation, APIs, or current best practices." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
|
||||
# --verbose \
|
||||
# --resume \
|
||||
|
||||
12
configs/run_datagen_megascience.sh
Executable file
12
configs/run_datagen_megascience.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
python batch_runner.py \
|
||||
--dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \
|
||||
--batch_size=10 \
|
||||
--run_name="megascience_eval_gpt5_2" \
|
||||
--distribution="science" \
|
||||
--model="gpt-5" \
|
||||
--base_url="https://api.openai.com/v1" \
|
||||
--api_key="${OPENAI_API_KEY}" \
|
||||
--num_workers=5 \
|
||||
--max_turns=30 \
|
||||
--verbose \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should not be confident in your own reasoning, knowledge, or calculations without using a tool to verify or validate your work."
|
||||
12
configs/run_datagen_minimax-3.1.sh
Executable file
12
configs/run_datagen_minimax-3.1.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
python batch_runner.py \
|
||||
--dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_eval.jsonl" \
|
||||
--batch_size=50 \
|
||||
--run_name="megascience_sft_minimax-m2.1-thinking-2-eval" \
|
||||
--distribution="science" \
|
||||
--model="minimax/minimax-m2.1" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="minimax" \
|
||||
--num_workers=1 \
|
||||
--max_turns=40 \
|
||||
--verbose \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12."
|
||||
29
configs/run_eval_glm4.7_newterm.sh
Executable file
29
configs/run_eval_glm4.7_newterm.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/glm4.7-terminal-tasks-newterm_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_eval.jsonl" \
|
||||
--batch_size=1 \
|
||||
--run_name="terminal-tasks-test-newterm" \
|
||||
--distribution="terminal_only" \
|
||||
--verbose \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=5 \
|
||||
--max_turns=60 \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you complete coding, system administration, and general computing tasks. You can use them in sequence and build off of the results of prior tools you've used. Always use the terminal tool to execute commands, write code, install packages, and verify your work. You should test and validate everything you create. Always pip install any packages you need (use --break-system-packages if needed). If you need a tool that isn't available, you can use the terminal to install or create it. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Use web search when you need to look up documentation, APIs, or current best practices." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
|
||||
# --verbose \
|
||||
# --resume \
|
||||
|
||||
33
configs/run_eval_terminal.sh
Executable file
33
configs/run_eval_terminal.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Terminal-only evaluation run using Modal sandboxes
|
||||
# Uses 10 sample tasks from nous-terminal-tasks
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/terminal_eval_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
echo "🔧 Using Modal sandboxes (TERMINAL_ENV=modal)"
|
||||
|
||||
# Set terminal to use Modal
|
||||
export TERMINAL_ENV=modal
|
||||
export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
|
||||
export TERMINAL_TIMEOUT=300
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="nous-terminal-tasks_eval.jsonl" \
|
||||
--batch_size=5 \
|
||||
--run_name="terminal_eval" \
|
||||
--distribution="terminal_only" \
|
||||
--model="z-ai/glm-4.7" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \
|
||||
--num_workers=2 \
|
||||
--max_turns=30 \
|
||||
--ephemeral_system_prompt="You have access to a terminal tool for executing commands. Use it to complete the task. Install any packages you need with apt-get or pip (use --break-system-packages if needed). Do not use interactive tools (vim, nano, python repl). If git output is large, pipe to cat." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
46
configs/run_mixed_tasks.sh
Executable file
46
configs/run_mixed_tasks.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Mixed browser+terminal data generation run
|
||||
# Uses mixed-browser-terminal-tasks.jsonl (200 tasks)
|
||||
# Distribution: browser 92%, terminal 92%, web 35%, vision 15%, image_gen 15%
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/mixed_tasks_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
echo "🔀 Running mixed browser+terminal tasks with mixed_tasks distribution"
|
||||
|
||||
# Set terminal environment
|
||||
# SIF images are automatically built/cached by terminal_tool.py
|
||||
export TERMINAL_ENV=singularity
|
||||
export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
export TERMINAL_TIMEOUT=300
|
||||
|
||||
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
|
||||
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
|
||||
CACHE_BASE="/scratch/$USER/.apptainer"
|
||||
else
|
||||
CACHE_BASE="/tmp/$USER/.apptainer"
|
||||
fi
|
||||
export APPTAINER_CACHEDIR="$CACHE_BASE"
|
||||
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
|
||||
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
|
||||
|
||||
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="mixed-browser-terminal-tasks.jsonl" \
|
||||
--batch_size=20 \
|
||||
--run_name="mixed_tasks" \
|
||||
--distribution="mixed_tasks" \
|
||||
--model="moonshotai/kimi-k2.5" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--num_workers=25 \
|
||||
--max_turns=60 \
|
||||
--ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
50
configs/run_terminal_tasks.sh
Executable file
50
configs/run_terminal_tasks.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Terminal-focused data generation run
|
||||
# Uses nous-terminal-tasks.jsonl (597 tasks)
|
||||
# Distribution: terminal 97%, web 15%, browser 0%, vision 8%, image_gen 3%
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
mkdir -p logs
|
||||
|
||||
# Generate log filename with timestamp
|
||||
LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "📝 Logging output to: $LOG_FILE"
|
||||
echo "💻 Running terminal-focused tasks with terminal_tasks distribution"
|
||||
|
||||
# Set terminal environment
|
||||
# SIF images are automatically built/cached by terminal_tool.py
|
||||
export TERMINAL_ENV=singularity
|
||||
export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
export TERMINAL_TIMEOUT=300
|
||||
|
||||
# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp)
|
||||
if [ -d "/scratch" ] && [ -w "/scratch" ]; then
|
||||
CACHE_BASE="/scratch/$USER/.apptainer"
|
||||
else
|
||||
CACHE_BASE="/tmp/$USER/.apptainer"
|
||||
fi
|
||||
export APPTAINER_CACHEDIR="$CACHE_BASE"
|
||||
export APPTAINER_TMPDIR="$CACHE_BASE/tmp"
|
||||
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
|
||||
|
||||
echo "📁 Apptainer cache: $APPTAINER_CACHEDIR"
|
||||
echo "🐳 Image: $TERMINAL_SINGULARITY_IMAGE (auto-converted to SIF on first use)"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="nous-terminal-tasks.jsonl" \
|
||||
--batch_size=5 \
|
||||
--run_name="terminal_tasks-kimi-k2.5" \
|
||||
--distribution="terminal_tasks" \
|
||||
--model="moonshotai/kimi-k2.5" \
|
||||
--verbose \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--num_workers=80 \
|
||||
--max_turns=60 \
|
||||
--providers_ignored="Novita" \
|
||||
--resume \
|
||||
--ephemeral_system_prompt="You have access to a terminal tool for executing commands and completing coding, system administration, and computing tasks. Use the terminal to write code, run scripts, install packages (use --break-system-packages with pip if needed), manipulate files, and verify your work. Always test and validate code you create. Do not use interactive tools like vim, nano, or python REPL. If git output is large, pipe to cat. When web search is available, use it to look up documentation, APIs, or best practices. If browser tools are available, use them for web interactions that require page manipulation. Do not use the terminal to communicate with the user - only your final response will be shown to them." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Log saved to: $LOG_FILE"
|
||||
23
configs/test_run.sh
Executable file
23
configs/test_run.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Check if a prompt argument was provided
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Error: Please provide a prompt as an argument"
|
||||
echo "Usage: $0 \"your prompt here\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the prompt from the first argument
|
||||
PROMPT="$1"
|
||||
|
||||
# Set debug mode for web tools
|
||||
export WEB_TOOLS_DEBUG=true
|
||||
|
||||
# Run the agent with the provided prompt
|
||||
python run_agent.py \
|
||||
--query "$PROMPT" \
|
||||
--max_turns 30 \
|
||||
--model claude-sonnet-4-5-20250929 \
|
||||
--base_url https://api.anthropic.com/v1/ \
|
||||
--api_key $ANTHROPIC_API_KEY \
|
||||
--save_trajectories
|
||||
21
configs/test_skills_kimi.sh
Normal file
21
configs/test_skills_kimi.sh
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test skills tool with Kimi K2.5
|
||||
# Usage: ./configs/test_skills_kimi.sh "your query here"
|
||||
# Example: ./configs/test_skills_kimi.sh "List available skills and show me the vllm skill"
|
||||
|
||||
# Default query if none provided
|
||||
QUERY="${1:-List all available skills. Then show me the axolotl skill and view one of its reference files.}"
|
||||
|
||||
echo "🎯 Testing Skills Tool with Kimi K2.5"
|
||||
echo "📝 Query: $QUERY"
|
||||
echo "="
|
||||
|
||||
python run_agent.py \
|
||||
--enabled_toolsets=skills \
|
||||
--model="moonshotai/kimi-k2.5" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--max_turns=10 \
|
||||
--verbose \
|
||||
--save_sample \
|
||||
--query="$QUERY"
|
||||
@@ -6,12 +6,12 @@ This module provides scheduled task execution, allowing the agent to:
|
||||
- Self-schedule reminders and follow-up tasks
|
||||
- Execute tasks in isolated sessions (no prior context)
|
||||
|
||||
Cron jobs are executed automatically by the gateway daemon:
|
||||
hermes gateway install # Install as system service (recommended)
|
||||
hermes gateway # Or run in foreground
|
||||
|
||||
The gateway ticks the scheduler every 60 seconds. A file lock prevents
|
||||
duplicate execution if multiple processes overlap.
|
||||
Usage:
|
||||
# Run due jobs (for system cron integration)
|
||||
python -c "from cron import tick; tick()"
|
||||
|
||||
# Or via CLI
|
||||
python cli.py --cron-daemon
|
||||
"""
|
||||
|
||||
from cron.jobs import (
|
||||
@@ -22,7 +22,7 @@ from cron.jobs import (
|
||||
update_job,
|
||||
JOBS_FILE,
|
||||
)
|
||||
from cron.scheduler import tick
|
||||
from cron.scheduler import tick, run_daemon
|
||||
|
||||
__all__ = [
|
||||
"create_job",
|
||||
@@ -31,5 +31,6 @@ __all__ = [
|
||||
"remove_job",
|
||||
"update_job",
|
||||
"tick",
|
||||
"run_daemon",
|
||||
"JOBS_FILE",
|
||||
]
|
||||
|
||||
59
cron/jobs.py
59
cron/jobs.py
@@ -6,7 +6,6 @@ Output is saved to ~/.hermes/cron/output/{job_id}/{timestamp}.md
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
@@ -14,8 +13,6 @@ from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, List, Any
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
|
||||
try:
|
||||
from croniter import croniter
|
||||
HAS_CRONITER = True
|
||||
@@ -130,7 +127,7 @@ def parse_schedule(schedule: str) -> Dict[str, Any]:
|
||||
# Duration like "30m", "2h", "1d" → one-shot from now
|
||||
try:
|
||||
minutes = parse_duration(schedule)
|
||||
run_at = _hermes_now() + timedelta(minutes=minutes)
|
||||
run_at = datetime.now() + timedelta(minutes=minutes)
|
||||
return {
|
||||
"kind": "once",
|
||||
"run_at": run_at.isoformat(),
|
||||
@@ -148,50 +145,37 @@ def parse_schedule(schedule: str) -> Dict[str, Any]:
|
||||
)
|
||||
|
||||
|
||||
def _ensure_aware(dt: datetime) -> datetime:
|
||||
"""Make a naive datetime tz-aware using the configured timezone.
|
||||
|
||||
Handles backward compatibility: timestamps stored before timezone support
|
||||
are naive (server-local). We assume they were in the same timezone as
|
||||
the current configuration so comparisons work without crashing.
|
||||
"""
|
||||
if dt.tzinfo is None:
|
||||
tz = _hermes_now().tzinfo
|
||||
return dt.replace(tzinfo=tz)
|
||||
return dt
|
||||
|
||||
|
||||
def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
|
||||
"""
|
||||
Compute the next run time for a schedule.
|
||||
|
||||
|
||||
Returns ISO timestamp string, or None if no more runs.
|
||||
"""
|
||||
now = _hermes_now()
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
if schedule["kind"] == "once":
|
||||
run_at = _ensure_aware(datetime.fromisoformat(schedule["run_at"]))
|
||||
run_at = datetime.fromisoformat(schedule["run_at"])
|
||||
# If in the future, return it; if in the past, no more runs
|
||||
return schedule["run_at"] if run_at > now else None
|
||||
|
||||
|
||||
elif schedule["kind"] == "interval":
|
||||
minutes = schedule["minutes"]
|
||||
if last_run_at:
|
||||
# Next run is last_run + interval
|
||||
last = _ensure_aware(datetime.fromisoformat(last_run_at))
|
||||
last = datetime.fromisoformat(last_run_at)
|
||||
next_run = last + timedelta(minutes=minutes)
|
||||
else:
|
||||
# First run is now + interval
|
||||
next_run = now + timedelta(minutes=minutes)
|
||||
return next_run.isoformat()
|
||||
|
||||
|
||||
elif schedule["kind"] == "cron":
|
||||
if not HAS_CRONITER:
|
||||
return None
|
||||
cron = croniter(schedule["expr"], now)
|
||||
next_run = cron.get_next(datetime)
|
||||
return next_run.isoformat()
|
||||
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@@ -216,19 +200,8 @@ def load_jobs() -> List[Dict[str, Any]]:
|
||||
def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
"""Save all jobs to storage."""
|
||||
ensure_dirs()
|
||||
fd, tmp_path = tempfile.mkstemp(dir=str(JOBS_FILE.parent), suffix='.tmp', prefix='.jobs_')
|
||||
try:
|
||||
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
||||
json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, JOBS_FILE)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
with open(JOBS_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump({"jobs": jobs, "updated_at": datetime.now().isoformat()}, f, indent=2)
|
||||
|
||||
|
||||
def create_job(
|
||||
@@ -264,7 +237,7 @@ def create_job(
|
||||
deliver = "origin" if origin else "local"
|
||||
|
||||
job_id = uuid.uuid4().hex[:12]
|
||||
now = _hermes_now().isoformat()
|
||||
now = datetime.now().isoformat()
|
||||
|
||||
job = {
|
||||
"id": job_id,
|
||||
@@ -343,7 +316,7 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
|
||||
jobs = load_jobs()
|
||||
for i, job in enumerate(jobs):
|
||||
if job["id"] == job_id:
|
||||
now = _hermes_now().isoformat()
|
||||
now = datetime.now().isoformat()
|
||||
job["last_run_at"] = now
|
||||
job["last_status"] = "ok" if success else "error"
|
||||
job["last_error"] = error if not success else None
|
||||
@@ -376,7 +349,7 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
|
||||
|
||||
def get_due_jobs() -> List[Dict[str, Any]]:
|
||||
"""Get all jobs that are due to run now."""
|
||||
now = _hermes_now()
|
||||
now = datetime.now()
|
||||
jobs = load_jobs()
|
||||
due = []
|
||||
|
||||
@@ -388,7 +361,7 @@ def get_due_jobs() -> List[Dict[str, Any]]:
|
||||
if not next_run:
|
||||
continue
|
||||
|
||||
next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
|
||||
next_run_dt = datetime.fromisoformat(next_run)
|
||||
if next_run_dt <= now:
|
||||
due.append(job)
|
||||
|
||||
@@ -401,7 +374,7 @@ def save_job_output(job_id: str, output: str):
|
||||
job_output_dir = OUTPUT_DIR / job_id
|
||||
job_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
output_file = job_output_dir / f"{timestamp}.md"
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
|
||||
@@ -1,272 +1,63 @@
|
||||
"""
|
||||
Cron job scheduler - executes due jobs.
|
||||
|
||||
Provides tick() which checks for due jobs and runs them. The gateway
|
||||
calls this every 60 seconds from a background thread.
|
||||
|
||||
Uses a file-based lock (~/.hermes/cron/.tick.lock) so only one tick
|
||||
runs at a time if multiple processes overlap.
|
||||
This module provides:
|
||||
- tick(): Run all due jobs once (for system cron integration)
|
||||
- run_daemon(): Run continuously, checking every 60 seconds
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
# fcntl is Unix-only; on Windows use msvcrt for file locking
|
||||
try:
|
||||
import fcntl
|
||||
except ImportError:
|
||||
fcntl = None
|
||||
try:
|
||||
import msvcrt
|
||||
except ImportError:
|
||||
msvcrt = None
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from cron.jobs import get_due_jobs, mark_job_run, save_job_output
|
||||
|
||||
# Resolve Hermes home directory (respects HERMES_HOME override)
|
||||
_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
|
||||
# File-based lock prevents concurrent ticks from gateway + daemon + systemd timer
|
||||
_LOCK_DIR = _hermes_home / "cron"
|
||||
_LOCK_FILE = _LOCK_DIR / ".tick.lock"
|
||||
|
||||
|
||||
def _resolve_origin(job: dict) -> Optional[dict]:
|
||||
"""Extract origin info from a job, returning {platform, chat_id, chat_name} or None."""
|
||||
origin = job.get("origin")
|
||||
if not origin:
|
||||
return None
|
||||
platform = origin.get("platform")
|
||||
chat_id = origin.get("chat_id")
|
||||
if platform and chat_id:
|
||||
return origin
|
||||
return None
|
||||
|
||||
|
||||
def _deliver_result(job: dict, content: str) -> None:
|
||||
"""
|
||||
Deliver job output to the configured target (origin chat, specific platform, etc.).
|
||||
|
||||
Uses the standalone platform send functions from send_message_tool so delivery
|
||||
works whether or not the gateway is running.
|
||||
"""
|
||||
deliver = job.get("deliver", "local")
|
||||
origin = _resolve_origin(job)
|
||||
|
||||
if deliver == "local":
|
||||
return
|
||||
|
||||
# Resolve target platform + chat_id
|
||||
if deliver == "origin":
|
||||
if not origin:
|
||||
logger.warning("Job '%s' deliver=origin but no origin stored, skipping delivery", job["id"])
|
||||
return
|
||||
platform_name = origin["platform"]
|
||||
chat_id = origin["chat_id"]
|
||||
elif ":" in deliver:
|
||||
platform_name, chat_id = deliver.split(":", 1)
|
||||
else:
|
||||
# Bare platform name like "telegram" — need to resolve to origin or home channel
|
||||
platform_name = deliver
|
||||
if origin and origin.get("platform") == platform_name:
|
||||
chat_id = origin["chat_id"]
|
||||
else:
|
||||
# Fall back to home channel
|
||||
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
|
||||
if not chat_id:
|
||||
logger.warning("Job '%s' deliver=%s but no chat_id or home channel. Set via: hermes config set %s_HOME_CHANNEL <channel_id>", job["id"], deliver, platform_name.upper())
|
||||
return
|
||||
|
||||
from tools.send_message_tool import _send_to_platform
|
||||
from gateway.config import load_gateway_config, Platform
|
||||
|
||||
platform_map = {
|
||||
"telegram": Platform.TELEGRAM,
|
||||
"discord": Platform.DISCORD,
|
||||
"slack": Platform.SLACK,
|
||||
"whatsapp": Platform.WHATSAPP,
|
||||
"signal": Platform.SIGNAL,
|
||||
}
|
||||
platform = platform_map.get(platform_name.lower())
|
||||
if not platform:
|
||||
logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
|
||||
return
|
||||
|
||||
try:
|
||||
config = load_gateway_config()
|
||||
except Exception as e:
|
||||
logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
|
||||
return
|
||||
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
|
||||
return
|
||||
|
||||
# Run the async send in a fresh event loop (safe from any thread)
|
||||
try:
|
||||
result = asyncio.run(_send_to_platform(platform, pconfig, chat_id, content))
|
||||
except RuntimeError:
|
||||
# asyncio.run() fails if there's already a running loop in this thread;
|
||||
# spin up a new thread to avoid that.
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, content))
|
||||
result = future.result(timeout=30)
|
||||
except Exception as e:
|
||||
logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
|
||||
return
|
||||
|
||||
if result and result.get("error"):
|
||||
logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
|
||||
else:
|
||||
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
|
||||
# Mirror the delivered content into the target's gateway session
|
||||
try:
|
||||
from gateway.mirror import mirror_to_session
|
||||
mirror_to_session(platform_name, chat_id, content, source_label="cron")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
def run_job(job: dict) -> tuple[bool, str, Optional[str]]:
|
||||
"""
|
||||
Execute a single cron job.
|
||||
|
||||
Returns:
|
||||
Tuple of (success, full_output_doc, final_response, error_message)
|
||||
Tuple of (success, output, error_message)
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
|
||||
job_id = job["id"]
|
||||
job_name = job["name"]
|
||||
prompt = job["prompt"]
|
||||
origin = _resolve_origin(job)
|
||||
|
||||
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
|
||||
logger.info("Prompt: %s", prompt[:100])
|
||||
|
||||
# Inject origin context so the agent's send_message tool knows the chat
|
||||
if origin:
|
||||
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
|
||||
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
|
||||
if origin.get("chat_name"):
|
||||
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
|
||||
|
||||
print(f"[cron] Running job '{job_name}' (ID: {job_id})")
|
||||
print(f"[cron] Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
|
||||
|
||||
try:
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
from dotenv import load_dotenv
|
||||
try:
|
||||
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
|
||||
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||||
|
||||
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
|
||||
_cfg = {}
|
||||
try:
|
||||
import yaml
|
||||
_cfg_path = str(_hermes_home / "config.yaml")
|
||||
if os.path.exists(_cfg_path):
|
||||
with open(_cfg_path) as _f:
|
||||
_cfg = yaml.safe_load(_f) or {}
|
||||
_model_cfg = _cfg.get("model", {})
|
||||
if isinstance(_model_cfg, str):
|
||||
model = _model_cfg
|
||||
elif isinstance(_model_cfg, dict):
|
||||
model = _model_cfg.get("default", model)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Reasoning config from env or config.yaml
|
||||
reasoning_config = None
|
||||
effort = os.getenv("HERMES_REASONING_EFFORT", "")
|
||||
if not effort:
|
||||
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
|
||||
if effort and effort.lower() != "none":
|
||||
valid = ("xhigh", "high", "medium", "low", "minimal")
|
||||
if effort.lower() in valid:
|
||||
reasoning_config = {"enabled": True, "effort": effort.lower()}
|
||||
elif effort.lower() == "none":
|
||||
reasoning_config = {"enabled": False}
|
||||
|
||||
# Prefill messages from env or config.yaml
|
||||
prefill_messages = None
|
||||
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
|
||||
if prefill_file:
|
||||
import json as _json
|
||||
pfpath = Path(prefill_file).expanduser()
|
||||
if not pfpath.is_absolute():
|
||||
pfpath = _hermes_home / pfpath
|
||||
if pfpath.exists():
|
||||
try:
|
||||
with open(pfpath, "r", encoding="utf-8") as _pf:
|
||||
prefill_messages = _json.load(_pf)
|
||||
if not isinstance(prefill_messages, list):
|
||||
prefill_messages = None
|
||||
except Exception:
|
||||
prefill_messages = None
|
||||
|
||||
# Max iterations
|
||||
max_iterations = _cfg.get("agent", {}).get("max_turns") or _cfg.get("max_turns") or 90
|
||||
|
||||
# Provider routing
|
||||
pr = _cfg.get("provider_routing", {})
|
||||
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
)
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
|
||||
# Create agent with default settings
|
||||
# Jobs run in isolated sessions (no prior context)
|
||||
agent = AIAgent(
|
||||
model=model,
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
max_iterations=max_iterations,
|
||||
reasoning_config=reasoning_config,
|
||||
prefill_messages=prefill_messages,
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"),
|
||||
quiet_mode=True,
|
||||
session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
|
||||
session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
)
|
||||
|
||||
# Run the conversation
|
||||
result = agent.run_conversation(prompt)
|
||||
|
||||
# Extract final response
|
||||
final_response = result.get("final_response", "")
|
||||
if not final_response:
|
||||
final_response = "(No response generated)"
|
||||
|
||||
# Build output document
|
||||
output = f"""# Cron Job: {job_name}
|
||||
|
||||
**Job ID:** {job_id}
|
||||
**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
**Run Time:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
**Schedule:** {job.get('schedule_display', 'N/A')}
|
||||
|
||||
## Prompt
|
||||
@@ -278,17 +69,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
{final_response}
|
||||
"""
|
||||
|
||||
logger.info("Job '%s' completed successfully", job_name)
|
||||
return True, output, final_response, None
|
||||
print(f"[cron] Job '{job_name}' completed successfully")
|
||||
return True, output, None
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"{type(e).__name__}: {str(e)}"
|
||||
logger.error("Job '%s' failed: %s", job_name, error_msg)
|
||||
print(f"[cron] Job '{job_name}' failed: {error_msg}")
|
||||
|
||||
# Build error output
|
||||
output = f"""# Cron Job: {job_name} (FAILED)
|
||||
|
||||
**Job ID:** {job_id}
|
||||
**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
**Run Time:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
**Schedule:** {job.get('schedule_display', 'N/A')}
|
||||
|
||||
## Prompt
|
||||
@@ -303,88 +95,94 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
{traceback.format_exc()}
|
||||
```
|
||||
"""
|
||||
return False, output, "", error_msg
|
||||
|
||||
finally:
|
||||
# Clean up injected env vars so they don't leak to other jobs
|
||||
for key in ("HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"):
|
||||
os.environ.pop(key, None)
|
||||
return False, output, error_msg
|
||||
|
||||
|
||||
def tick(verbose: bool = True) -> int:
|
||||
"""
|
||||
Check and run all due jobs.
|
||||
|
||||
Uses a file lock so only one tick runs at a time, even if the gateway's
|
||||
in-process ticker and a standalone daemon or manual tick overlap.
|
||||
This is designed to be called by system cron every minute:
|
||||
*/1 * * * * cd ~/hermes-agent && python -c "from cron import tick; tick()"
|
||||
|
||||
Args:
|
||||
verbose: Whether to print status messages
|
||||
|
||||
Returns:
|
||||
Number of jobs executed (0 if another tick is already running)
|
||||
Number of jobs executed
|
||||
"""
|
||||
_LOCK_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Cross-platform file locking: fcntl on Unix, msvcrt on Windows
|
||||
lock_fd = None
|
||||
try:
|
||||
lock_fd = open(_LOCK_FILE, "w")
|
||||
if fcntl:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
elif msvcrt:
|
||||
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
except (OSError, IOError):
|
||||
logger.debug("Tick skipped — another instance holds the lock")
|
||||
if lock_fd is not None:
|
||||
lock_fd.close()
|
||||
due_jobs = get_due_jobs()
|
||||
|
||||
if verbose and not due_jobs:
|
||||
print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - No jobs due")
|
||||
return 0
|
||||
|
||||
if verbose:
|
||||
print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - {len(due_jobs)} job(s) due")
|
||||
|
||||
executed = 0
|
||||
for job in due_jobs:
|
||||
try:
|
||||
success, output, error = run_job(job)
|
||||
|
||||
# Save output to file
|
||||
output_file = save_job_output(job["id"], output)
|
||||
if verbose:
|
||||
print(f"[cron] Output saved to: {output_file}")
|
||||
|
||||
# Mark job as run (handles repeat counting, next_run computation)
|
||||
mark_job_run(job["id"], success, error)
|
||||
executed += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"[cron] Error processing job {job['id']}: {e}")
|
||||
mark_job_run(job["id"], False, str(e))
|
||||
|
||||
return executed
|
||||
|
||||
|
||||
def run_daemon(check_interval: int = 60, verbose: bool = True):
|
||||
"""
|
||||
Run the cron daemon continuously.
|
||||
|
||||
Checks for due jobs every `check_interval` seconds.
|
||||
|
||||
Args:
|
||||
check_interval: Seconds between checks (default: 60)
|
||||
verbose: Whether to print status messages
|
||||
"""
|
||||
print(f"[cron] Starting daemon (checking every {check_interval}s)")
|
||||
print(f"[cron] Press Ctrl+C to stop")
|
||||
print()
|
||||
|
||||
try:
|
||||
due_jobs = get_due_jobs()
|
||||
|
||||
if verbose and not due_jobs:
|
||||
logger.info("%s - No jobs due", _hermes_now().strftime('%H:%M:%S'))
|
||||
return 0
|
||||
|
||||
if verbose:
|
||||
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
|
||||
|
||||
executed = 0
|
||||
for job in due_jobs:
|
||||
while True:
|
||||
try:
|
||||
success, output, final_response, error = run_job(job)
|
||||
|
||||
output_file = save_job_output(job["id"], output)
|
||||
if verbose:
|
||||
logger.info("Output saved to: %s", output_file)
|
||||
|
||||
# Deliver the final response to the origin/target chat
|
||||
deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
|
||||
if deliver_content:
|
||||
try:
|
||||
_deliver_result(job, deliver_content)
|
||||
except Exception as de:
|
||||
logger.error("Delivery failed for job %s: %s", job["id"], de)
|
||||
|
||||
mark_job_run(job["id"], success, error)
|
||||
executed += 1
|
||||
|
||||
tick(verbose=verbose)
|
||||
except Exception as e:
|
||||
logger.error("Error processing job %s: %s", job['id'], e)
|
||||
mark_job_run(job["id"], False, str(e))
|
||||
|
||||
return executed
|
||||
finally:
|
||||
if fcntl:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
elif msvcrt:
|
||||
try:
|
||||
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
lock_fd.close()
|
||||
print(f"[cron] Tick error: {e}")
|
||||
|
||||
time.sleep(check_interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n[cron] Daemon stopped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tick(verbose=True)
|
||||
# Allow running directly: python cron/scheduler.py [daemon|tick]
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Hermes Cron Scheduler")
|
||||
parser.add_argument("mode", choices=["daemon", "tick"], default="tick", nargs="?",
|
||||
help="Mode: 'tick' to run once, 'daemon' to run continuously")
|
||||
parser.add_argument("--interval", type=int, default=60,
|
||||
help="Check interval in seconds for daemon mode")
|
||||
parser.add_argument("--quiet", "-q", action="store_true",
|
||||
help="Suppress status messages")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "daemon":
|
||||
run_daemon(check_interval=args.interval, verbose=not args.quiet)
|
||||
else:
|
||||
tick(verbose=not args.quiet)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
{"prompt": "Go to https://news.ycombinator.com and find the top 5 posts on the front page. For each post, get the title, URL, points, and number of comments. Return the results as a formatted summary."}
|
||||
{"prompt": "Navigate to https://en.wikipedia.org/wiki/Hermes and extract the first paragraph of the article, the image caption, and the list of items in the infobox. Summarize what you find."}
|
||||
{"prompt": "Go to https://github.com/trending and find the top 3 trending repositories today. For each repo, get the name, description, language, and star count. Write the results to a file called trending_repos.md."}
|
||||
{"prompt": "Visit https://httpbin.org/forms/post and fill out the form with sample data (customer name: Jane Doe, size: Medium, topping: Bacon, delivery time: 12:00). Submit the form and report what the response page shows."}
|
||||
{"prompt": "Navigate to https://books.toscrape.com, browse to the Travel category, find the highest-rated book, and extract its title, price, availability, and description."}
|
||||
@@ -1,65 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =============================================================================
|
||||
# Example: Browser-Focused Data Generation
|
||||
# =============================================================================
|
||||
#
|
||||
# Generates tool-calling trajectories for browser automation tasks.
|
||||
# The agent navigates websites, fills forms, extracts information, etc.
|
||||
#
|
||||
# Distribution: browser 97%, web 20%, vision 12%, terminal 15%
|
||||
#
|
||||
# Prerequisites:
|
||||
# - OPENROUTER_API_KEY in ~/.hermes/.env
|
||||
# - BROWSERBASE_API_KEY in ~/.hermes/.env (for browser tools)
|
||||
# - A dataset JSONL file with one {"prompt": "..."} per line
|
||||
#
|
||||
# Usage:
|
||||
# cd ~/.hermes/hermes-agent
|
||||
# bash datagen-config-examples/run_browser_tasks.sh
|
||||
#
|
||||
# Output: data/browser_tasks_example/trajectories.jsonl
|
||||
# =============================================================================
|
||||
|
||||
mkdir -p logs
|
||||
|
||||
LOG_FILE="logs/browser_tasks_$(date +%Y%m%d_%H%M%S).log"
|
||||
echo "📝 Logging to: $LOG_FILE"
|
||||
|
||||
# Point to the example dataset in this directory
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
python batch_runner.py \
|
||||
--dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
|
||||
--batch_size=5 \
|
||||
--run_name="browser_tasks_example" \
|
||||
--distribution="browser_tasks" \
|
||||
--model="anthropic/claude-sonnet-4" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--num_workers=3 \
|
||||
--max_turns=30 \
|
||||
--ephemeral_system_prompt="You are an AI assistant with browser automation capabilities. Your primary task is to navigate and interact with web pages to accomplish user goals.
|
||||
|
||||
IMPORTANT GUIDELINES:
|
||||
|
||||
1. SEARCHING: Do NOT search directly on Google via the browser — they block automated searches. Use the web_search tool first to find URLs, then navigate to them with browser tools.
|
||||
|
||||
2. COOKIE/PRIVACY DIALOGS: After navigating to a page, check for cookie consent or privacy popups. Dismiss them by clicking Accept/Close/OK before interacting with other elements. Take a fresh browser_snapshot afterward.
|
||||
|
||||
3. HANDLING TIMEOUTS: If an action times out, the element may be blocked by an overlay. Take a new snapshot and look for dialogs to dismiss. If none, try an alternative approach or report the issue.
|
||||
|
||||
4. GENERAL: Use browser tools to click, fill forms, and extract information. Use terminal for local file operations. Verify your actions and handle errors gracefully." \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo "✅ Done. Log: $LOG_FILE"
|
||||
|
||||
# =============================================================================
|
||||
# Common options you can add:
|
||||
#
|
||||
# --resume Resume from checkpoint if interrupted
|
||||
# --verbose Enable detailed logging
|
||||
# --max_tokens=63000 Set max response tokens
|
||||
# --reasoning_disabled Disable model thinking/reasoning tokens
|
||||
# --providers_allowed="anthropic,google" Restrict to specific providers
|
||||
# --prefill_messages_file="configs/prefill.json" Few-shot priming
|
||||
# =============================================================================
|
||||
@@ -1,46 +0,0 @@
|
||||
# datagen-config-examples/web_research.yaml
|
||||
#
|
||||
# Batch data generation config for WebResearchEnv.
|
||||
# Generates tool-calling trajectories for multi-step web research tasks.
|
||||
#
|
||||
# Usage:
|
||||
# python batch_runner.py \
|
||||
# --config datagen-config-examples/web_research.yaml \
|
||||
# --run_name web_research_v1
|
||||
|
||||
environment: web-research
|
||||
|
||||
# Toolsets available to the agent during data generation
|
||||
toolsets:
|
||||
- web
|
||||
- file
|
||||
|
||||
# How many parallel workers to use
|
||||
num_workers: 4
|
||||
|
||||
# Questions per batch
|
||||
batch_size: 20
|
||||
|
||||
# Total trajectories to generate (comment out to run full dataset)
|
||||
max_items: 500
|
||||
|
||||
# Model to use for generation (override with --model flag)
|
||||
model: openrouter/nousresearch/hermes-3-llama-3.1-405b
|
||||
|
||||
# System prompt additions (ephemeral — not saved to trajectories)
|
||||
ephemeral_system_prompt: |
|
||||
You are a highly capable research agent. When asked a factual question,
|
||||
always use web_search to find current, accurate information before answering.
|
||||
Cite at least 2 sources. Be concise and accurate.
|
||||
|
||||
# Output directory
|
||||
output_dir: data/web_research_v1
|
||||
|
||||
# Trajectory compression settings (for fitting into training token budgets)
|
||||
compression:
|
||||
enabled: true
|
||||
target_max_tokens: 16000
|
||||
|
||||
# Eval settings
|
||||
eval_every: 100 # Run eval every N trajectories
|
||||
eval_size: 25 # Number of held-out questions per eval run
|
||||
104
docs/agents.md
Normal file
104
docs/agents.md
Normal file
@@ -0,0 +1,104 @@
|
||||
# Agents
|
||||
|
||||
The agent is the core loop that orchestrates LLM calls and tool execution.
|
||||
|
||||
## AIAgent Class
|
||||
|
||||
The main agent is implemented in `run_agent.py`:
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(
|
||||
self,
|
||||
model: str = "anthropic/claude-sonnet-4",
|
||||
api_key: str = None,
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
max_turns: int = 20,
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
verbose_logging: bool = False,
|
||||
):
|
||||
# Initialize OpenAI client, load tools based on toolsets
|
||||
...
|
||||
|
||||
def chat(self, user_message: str, task_id: str = None) -> str:
|
||||
# Main entry point - runs the agent loop
|
||||
...
|
||||
```
|
||||
|
||||
## Agent Loop
|
||||
|
||||
The core loop in `_run_agent_loop()`:
|
||||
|
||||
```
|
||||
1. Add user message to conversation
|
||||
2. Call LLM with tools
|
||||
3. If LLM returns tool calls:
|
||||
- Execute each tool
|
||||
- Add tool results to conversation
|
||||
- Go to step 2
|
||||
4. If LLM returns text response:
|
||||
- Return response to user
|
||||
```
|
||||
|
||||
```python
|
||||
while turns < max_turns:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tool_schemas,
|
||||
)
|
||||
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
result = await execute_tool(tool_call)
|
||||
messages.append(tool_result_message(result))
|
||||
turns += 1
|
||||
else:
|
||||
return response.content
|
||||
```
|
||||
|
||||
## Conversation Management
|
||||
|
||||
Messages are stored as a list of dicts following OpenAI format:
|
||||
|
||||
```python
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant..."},
|
||||
{"role": "user", "content": "Search for Python tutorials"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [...]},
|
||||
{"role": "tool", "tool_call_id": "...", "content": "..."},
|
||||
{"role": "assistant", "content": "Here's what I found..."},
|
||||
]
|
||||
```
|
||||
|
||||
## Reasoning Context
|
||||
|
||||
For models that support reasoning (chain-of-thought), the agent:
|
||||
1. Extracts `reasoning_content` from API responses
|
||||
2. Stores it in `assistant_msg["reasoning"]` for trajectory export
|
||||
3. Passes it back via `reasoning_content` field on subsequent turns
|
||||
|
||||
## Trajectory Export
|
||||
|
||||
Conversations can be exported for training:
|
||||
|
||||
```python
|
||||
agent = AIAgent(save_trajectories=True)
|
||||
agent.chat("Do something")
|
||||
# Saves to trajectories/*.jsonl in ShareGPT format
|
||||
```
|
||||
|
||||
## Batch Processing
|
||||
|
||||
For processing multiple prompts, use `batch_runner.py`:
|
||||
|
||||
```bash
|
||||
python batch_runner.py \
|
||||
--dataset_file=prompts.jsonl \
|
||||
--batch_size=20 \
|
||||
--num_workers=4 \
|
||||
--run_name=my_run
|
||||
```
|
||||
|
||||
See `batch_runner.py` for parallel execution with checkpointing.
|
||||
296
docs/cli.md
Normal file
296
docs/cli.md
Normal file
@@ -0,0 +1,296 @@
|
||||
# CLI
|
||||
|
||||
The Hermes Agent CLI provides an interactive terminal interface for working with the agent.
|
||||
|
||||
## Running the CLI
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
./hermes
|
||||
|
||||
# With specific model
|
||||
./hermes --model "anthropic/claude-sonnet-4"
|
||||
|
||||
# With specific toolsets
|
||||
./hermes --toolsets "web,terminal,skills"
|
||||
|
||||
# Verbose mode
|
||||
./hermes --verbose
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
The CLI is implemented in `cli.py` and uses:
|
||||
|
||||
- **Rich** - Welcome banner with ASCII art and styled panels
|
||||
- **prompt_toolkit** - Fixed input area with command history
|
||||
- **KawaiiSpinner** - Animated feedback during operations
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ HERMES-AGENT ASCII Logo │
|
||||
│ ┌─────────────┐ ┌────────────────────────────┐ │
|
||||
│ │ Caduceus │ │ Model: claude-opus-4.5 │ │
|
||||
│ │ ASCII Art │ │ Terminal: local │ │
|
||||
│ │ │ │ Working Dir: /home/user │ │
|
||||
│ │ │ │ Available Tools: 19 │ │
|
||||
│ │ │ │ Available Skills: 12 │ │
|
||||
│ └─────────────┘ └────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────┘
|
||||
│ Conversation output scrolls here... │
|
||||
│ │
|
||||
│ User: Hello! │
|
||||
│ ────────────────────────────────────────────── │
|
||||
│ (◕‿◕✿) 🧠 pondering... (2.3s) │
|
||||
│ ✧٩(ˊᗜˋ*)و✧ got it! (2.3s) │
|
||||
│ │
|
||||
│ Assistant: Hello! How can I help you today? │
|
||||
├─────────────────────────────────────────────────┤
|
||||
│ ❯ [Fixed input area at bottom] │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/help` | Show available commands |
|
||||
| `/tools` | List available tools grouped by toolset |
|
||||
| `/toolsets` | List available toolsets with descriptions |
|
||||
| `/model [name]` | Show or change the current model |
|
||||
| `/prompt [text]` | View/set/clear custom system prompt |
|
||||
| `/personality [name]` | Set a predefined personality |
|
||||
| `/clear` | Clear screen and reset conversation |
|
||||
| `/reset` | Reset conversation only (keep screen) |
|
||||
| `/history` | Show conversation history |
|
||||
| `/save` | Save current conversation to file |
|
||||
| `/config` | Show current configuration |
|
||||
| `/quit` | Exit the CLI (also: `/exit`, `/q`) |
|
||||
|
||||
## Configuration
|
||||
|
||||
The CLI is configured via `cli-config.yaml`. Copy from `cli-config.yaml.example`:
|
||||
|
||||
```bash
|
||||
cp cli-config.yaml.example cli-config.yaml
|
||||
```
|
||||
|
||||
### Model Configuration
|
||||
|
||||
```yaml
|
||||
model:
|
||||
default: "anthropic/claude-opus-4.5"
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
```
|
||||
|
||||
### Terminal Configuration
|
||||
|
||||
The CLI supports multiple terminal backends:
|
||||
|
||||
```yaml
|
||||
# Local execution (default)
|
||||
terminal:
|
||||
env_type: "local"
|
||||
cwd: "." # Current directory
|
||||
|
||||
# SSH remote execution (sandboxed - agent can't touch its own code)
|
||||
terminal:
|
||||
env_type: "ssh"
|
||||
cwd: "/home/myuser/project"
|
||||
ssh_host: "my-server.example.com"
|
||||
ssh_user: "myuser"
|
||||
ssh_key: "~/.ssh/id_rsa"
|
||||
|
||||
# Docker container
|
||||
terminal:
|
||||
env_type: "docker"
|
||||
docker_image: "python:3.11"
|
||||
|
||||
# Singularity/Apptainer (HPC)
|
||||
terminal:
|
||||
env_type: "singularity"
|
||||
singularity_image: "docker://python:3.11"
|
||||
|
||||
# Modal cloud
|
||||
terminal:
|
||||
env_type: "modal"
|
||||
modal_image: "python:3.11"
|
||||
```
|
||||
|
||||
### Sudo Support
|
||||
|
||||
The CLI supports interactive sudo prompts:
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────┐
|
||||
│ 🔐 SUDO PASSWORD REQUIRED │
|
||||
├──────────────────────────────────────────────────────────┤
|
||||
│ Enter password below (input is hidden), or: │
|
||||
│ • Press Enter to skip (command fails gracefully) │
|
||||
│ • Wait 45s to auto-skip │
|
||||
└──────────────────────────────────────────────────────────┘
|
||||
|
||||
Password (hidden):
|
||||
```
|
||||
|
||||
**Options:**
|
||||
- **Interactive**: Leave `sudo_password` unset - you'll be prompted when needed
|
||||
- **Configured**: Set `sudo_password` in `cli-config.yaml` to auto-fill
|
||||
- **Environment**: Set `SUDO_PASSWORD` in `.env` for all runs
|
||||
|
||||
Password is cached for the session once entered.
|
||||
|
||||
### Toolsets
|
||||
|
||||
Control which tools are available:
|
||||
|
||||
```yaml
|
||||
# Enable all tools
|
||||
toolsets:
|
||||
- all
|
||||
|
||||
# Or enable specific toolsets
|
||||
toolsets:
|
||||
- web
|
||||
- terminal
|
||||
- skills
|
||||
```
|
||||
|
||||
Available toolsets: `web`, `search`, `terminal`, `browser`, `vision`, `image_gen`, `skills`, `moa`, `debugging`, `safe`
|
||||
|
||||
### Personalities
|
||||
|
||||
Predefined personalities for the `/personality` command:
|
||||
|
||||
```yaml
|
||||
agent:
|
||||
personalities:
|
||||
helpful: "You are a helpful, friendly AI assistant."
|
||||
kawaii: "You are a kawaii assistant! Use cute expressions..."
|
||||
pirate: "Arrr! Ye be talkin' to Captain Hermes..."
|
||||
# Add your own!
|
||||
```
|
||||
|
||||
Built-in personalities:
|
||||
- `helpful`, `concise`, `technical`, `creative`, `teacher`
|
||||
- `kawaii`, `catgirl`, `pirate`, `shakespeare`, `surfer`
|
||||
- `noir`, `uwu`, `philosopher`, `hype`
|
||||
|
||||
## Animated Feedback
|
||||
|
||||
The CLI provides animated feedback during operations:
|
||||
|
||||
### Thinking Animation
|
||||
|
||||
During API calls, shows animated spinner with thinking verbs:
|
||||
```
|
||||
◜ (。•́︿•̀。) pondering... (1.2s)
|
||||
◠ (⊙_⊙) contemplating... (2.4s)
|
||||
✧٩(ˊᗜˋ*)و✧ got it! (3.1s)
|
||||
```
|
||||
|
||||
### Tool Execution Animation
|
||||
|
||||
Each tool type has unique animations:
|
||||
```
|
||||
⠋ (◕‿◕✿) 🔍 web_search... (0.8s)
|
||||
▅ (≧◡≦) 💻 terminal... (1.2s)
|
||||
🌓 (★ω★) 🌐 browser_navigate... (2.1s)
|
||||
✧ (✿◠‿◠) 🎨 image_generate... (4.5s)
|
||||
```
|
||||
|
||||
## Multi-line Input
|
||||
|
||||
For multi-line input, end a line with `\` to continue:
|
||||
|
||||
```
|
||||
❯ Write a function that:\
|
||||
1. Takes a list of numbers\
|
||||
2. Returns the sum
|
||||
```
|
||||
|
||||
## Environment Variable Priority
|
||||
|
||||
For terminal settings, `cli-config.yaml` takes precedence over `.env`:
|
||||
|
||||
1. `cli-config.yaml` (highest priority in CLI)
|
||||
2. `.env` file
|
||||
3. System environment variables
|
||||
4. Default values
|
||||
|
||||
This allows you to have different terminal configs for CLI vs batch processing.
|
||||
|
||||
## Session Management
|
||||
|
||||
- **History**: Command history is saved to `~/.hermes_history`
|
||||
- **Conversations**: Use `/save` to export conversations
|
||||
- **Reset**: Use `/clear` for full reset, `/reset` to just clear history
|
||||
- **Session Logs**: Every session automatically logs to `logs/session_{session_id}.json`
|
||||
|
||||
### Session Logging
|
||||
|
||||
Sessions are automatically logged to the `logs/` directory:
|
||||
|
||||
```
|
||||
logs/
|
||||
├── session_20260201_143052_a1b2c3.json
|
||||
├── session_20260201_150217_d4e5f6.json
|
||||
└── ...
|
||||
```
|
||||
|
||||
The session ID is displayed in the welcome banner and follows the format: `YYYYMMDD_HHMMSS_UUID`.
|
||||
|
||||
Log files contain:
|
||||
- Full conversation history in trajectory format
|
||||
- Timestamps for session start and last update
|
||||
- Model and message count metadata
|
||||
|
||||
This is useful for:
|
||||
- Debugging agent behavior
|
||||
- Replaying conversations
|
||||
- Training data inspection
|
||||
|
||||
### Context Compression
|
||||
|
||||
Long conversations can exceed model context limits. The CLI automatically compresses context when approaching the limit:
|
||||
|
||||
```yaml
|
||||
# In cli-config.yaml
|
||||
compression:
|
||||
enabled: true # Enable auto-compression
|
||||
threshold: 0.85 # Compress at 85% of context limit
|
||||
summary_model: "google/gemini-2.0-flash-001"
|
||||
```
|
||||
|
||||
**How it works:**
|
||||
1. Tracks actual token usage from each API response
|
||||
2. When tokens reach threshold, middle turns are summarized
|
||||
3. First 3 and last 4 turns are always protected
|
||||
4. Conversation continues seamlessly after compression
|
||||
|
||||
**When compression triggers:**
|
||||
```
|
||||
📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
|
||||
📊 Model context limit: 200,000 tokens (85% = 170,000)
|
||||
🗜️ Summarizing turns 4-15 (12 turns)
|
||||
✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
|
||||
```
|
||||
|
||||
To disable compression:
|
||||
```yaml
|
||||
compression:
|
||||
enabled: false
|
||||
```
|
||||
|
||||
## Quiet Mode
|
||||
|
||||
The CLI runs in "quiet mode" (`HERMES_QUIET=1`), which:
|
||||
- Suppresses verbose logging from tools
|
||||
- Enables kawaii-style animated feedback
|
||||
- Hides terminal environment warnings
|
||||
- Keeps output clean and user-friendly
|
||||
|
||||
For verbose output (debugging), use:
|
||||
```bash
|
||||
./hermes --verbose
|
||||
```
|
||||
124
docs/llm_client.md
Normal file
124
docs/llm_client.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# LLM Client
|
||||
|
||||
Hermes Agent uses the OpenAI Python SDK with OpenRouter as the backend, providing access to many models through a single API.
|
||||
|
||||
## Configuration
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key=os.getenv("OPENROUTER_API_KEY"),
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
```
|
||||
|
||||
## Supported Models
|
||||
|
||||
Any model available on [OpenRouter](https://openrouter.ai/models):
|
||||
|
||||
```python
|
||||
# Anthropic
|
||||
model = "anthropic/claude-sonnet-4"
|
||||
model = "anthropic/claude-opus-4"
|
||||
|
||||
# OpenAI
|
||||
model = "openai/gpt-4o"
|
||||
model = "openai/o1"
|
||||
|
||||
# Google
|
||||
model = "google/gemini-2.0-flash"
|
||||
|
||||
# Open models
|
||||
model = "meta-llama/llama-3.3-70b-instruct"
|
||||
model = "deepseek/deepseek-chat-v3"
|
||||
model = "moonshotai/kimi-k2.5"
|
||||
```
|
||||
|
||||
## Tool Calling
|
||||
|
||||
Standard OpenAI function calling format:
|
||||
|
||||
```python
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "Search the web",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
# Check for tool calls
|
||||
if response.choices[0].message.tool_calls:
|
||||
for tool_call in response.choices[0].message.tool_calls:
|
||||
name = tool_call.function.name
|
||||
args = json.loads(tool_call.function.arguments)
|
||||
# Execute tool...
|
||||
```
|
||||
|
||||
## Reasoning Models
|
||||
|
||||
Some models return reasoning/thinking content:
|
||||
|
||||
```python
|
||||
# Access reasoning if available
|
||||
message = response.choices[0].message
|
||||
if hasattr(message, 'reasoning_content') and message.reasoning_content:
|
||||
reasoning = message.reasoning_content
|
||||
# Store for trajectory export
|
||||
```
|
||||
|
||||
## Provider Selection
|
||||
|
||||
OpenRouter allows selecting specific providers:
|
||||
|
||||
```python
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
extra_body={
|
||||
"provider": {
|
||||
"order": ["Anthropic", "Google"], # Preferred providers
|
||||
"ignore": ["Novita"], # Providers to skip
|
||||
}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common errors and handling:
|
||||
|
||||
```python
|
||||
try:
|
||||
response = client.chat.completions.create(...)
|
||||
except openai.RateLimitError:
|
||||
# Back off and retry
|
||||
except openai.APIError as e:
|
||||
# Check e.code for specific errors
|
||||
# 400 = bad request (often provider-specific)
|
||||
# 502 = bad gateway (retry with different provider)
|
||||
```
|
||||
|
||||
## Cost Tracking
|
||||
|
||||
OpenRouter returns usage info:
|
||||
|
||||
```python
|
||||
usage = response.usage
|
||||
print(f"Tokens: {usage.prompt_tokens} + {usage.completion_tokens}")
|
||||
print(f"Cost: ${usage.cost:.6f}") # If available
|
||||
```
|
||||
121
docs/message_graph.md
Normal file
121
docs/message_graph.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Message Format & Trajectories
|
||||
|
||||
Hermes Agent uses two message formats: the **API format** for LLM calls and the **trajectory format** for training data export.
|
||||
|
||||
## API Message Format
|
||||
|
||||
Standard OpenAI chat format used during execution:
|
||||
|
||||
```python
|
||||
messages = [
|
||||
# System prompt
|
||||
{"role": "system", "content": "You are a helpful assistant with tools..."},
|
||||
|
||||
# User query
|
||||
{"role": "user", "content": "Search for Python tutorials"},
|
||||
|
||||
# Assistant with tool call
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [{
|
||||
"id": "call_abc123",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"arguments": "{\"query\": \"Python tutorials\"}"
|
||||
}
|
||||
}]
|
||||
},
|
||||
|
||||
# Tool result
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_abc123",
|
||||
"content": "{\"results\": [...]}"
|
||||
},
|
||||
|
||||
# Final response
|
||||
{"role": "assistant", "content": "Here's what I found..."}
|
||||
]
|
||||
```
|
||||
|
||||
## Trajectory Format (ShareGPT)
|
||||
|
||||
Exported for training in ShareGPT format:
|
||||
|
||||
```json
|
||||
{
|
||||
"conversations": [
|
||||
{"from": "system", "value": "You are a helpful assistant..."},
|
||||
{"from": "human", "value": "Search for Python tutorials"},
|
||||
{"from": "gpt", "value": "<tool_call>\n{\"name\": \"web_search\", \"arguments\": {\"query\": \"Python tutorials\"}}\n</tool_call>"},
|
||||
{"from": "tool", "value": "<tool_response>\n{\"results\": [...]}\n</tool_response>"},
|
||||
{"from": "gpt", "value": "Here's what I found..."}
|
||||
],
|
||||
"tools": "[{\"type\": \"function\", \"function\": {...}}]",
|
||||
"source": "hermes-agent"
|
||||
}
|
||||
```
|
||||
|
||||
## Reasoning Content
|
||||
|
||||
For models that output reasoning/chain-of-thought:
|
||||
|
||||
**During execution** (API format):
|
||||
```python
|
||||
# Stored internally but not sent back to model in content
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": "Here's what I found...",
|
||||
"reasoning": "Let me think about this step by step..." # Internal only
|
||||
}
|
||||
```
|
||||
|
||||
**In trajectory export** (reasoning wrapped in tags):
|
||||
```json
|
||||
{
|
||||
"from": "gpt",
|
||||
"value": "<think>\nLet me think about this step by step...\n</think>\nHere's what I found..."
|
||||
}
|
||||
```
|
||||
|
||||
## Conversion Flow
|
||||
|
||||
```
|
||||
API Response → Internal Storage → Trajectory Export
|
||||
↓ ↓ ↓
|
||||
tool_calls reasoning field <tool_call> tags
|
||||
reasoning_content <think> tags
|
||||
```
|
||||
|
||||
The conversion happens in `_convert_to_trajectory_format()` in `run_agent.py`.
|
||||
|
||||
## Ephemeral System Prompts
|
||||
|
||||
Batch processing supports ephemeral system prompts that guide behavior during execution but are NOT saved to trajectories:
|
||||
|
||||
```python
|
||||
# During execution: full system prompt + ephemeral guidance
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT + "\n\n" + ephemeral_prompt},
|
||||
...
|
||||
]
|
||||
|
||||
# In saved trajectory: only the base system prompt
|
||||
trajectory = {
|
||||
"conversations": [
|
||||
{"from": "system", "value": SYSTEM_PROMPT}, # No ephemeral
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Trajectory Compression
|
||||
|
||||
Long trajectories can be compressed for training using `trajectory_compressor.py`:
|
||||
|
||||
- Protects first/last N turns
|
||||
- Summarizes middle turns with LLM
|
||||
- Targets specific token budget
|
||||
- See `configs/trajectory_compression.yaml` for settings
|
||||
515
docs/messaging.md
Normal file
515
docs/messaging.md
Normal file
@@ -0,0 +1,515 @@
|
||||
# Messaging Platform Integrations (Gateway)
|
||||
|
||||
Hermes Agent can connect to messaging platforms like Telegram, Discord, and WhatsApp to serve as a conversational AI assistant.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# 1. Set your bot token(s) in .env file
|
||||
echo 'TELEGRAM_BOT_TOKEN="your_telegram_bot_token"' >> .env
|
||||
echo 'DISCORD_BOT_TOKEN="your_discord_bot_token"' >> .env
|
||||
|
||||
# 2. Test the gateway (foreground)
|
||||
./scripts/hermes-gateway run
|
||||
|
||||
# 3. Install as a system service (runs in background)
|
||||
./scripts/hermes-gateway install
|
||||
|
||||
# 4. Manage the service
|
||||
./scripts/hermes-gateway start
|
||||
./scripts/hermes-gateway stop
|
||||
./scripts/hermes-gateway restart
|
||||
./scripts/hermes-gateway status
|
||||
```
|
||||
|
||||
**Quick test (without service install):**
|
||||
```bash
|
||||
python cli.py --gateway # Runs in foreground, useful for debugging
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Hermes Gateway │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Telegram │ │ Discord │ │ WhatsApp │ │
|
||||
│ │ Adapter │ │ Adapter │ │ Adapter │ │
|
||||
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
|
||||
│ │ │ │ │
|
||||
│ └─────────────────┼─────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ Session Store │ │
|
||||
│ │ (per-chat) │ │
|
||||
│ └────────┬────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ AIAgent │ │
|
||||
│ │ (run_agent) │ │
|
||||
│ └─────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Session Management
|
||||
|
||||
### Session Persistence
|
||||
|
||||
Sessions persist across messages until they reset. The agent remembers your conversation context.
|
||||
|
||||
### Reset Policies
|
||||
|
||||
Sessions reset based on configurable policies:
|
||||
|
||||
| Policy | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| Daily | 4:00 AM | Reset at a specific hour each day |
|
||||
| Idle | 120 min | Reset after N minutes of inactivity |
|
||||
| Both | (combined) | Whichever triggers first |
|
||||
|
||||
### Manual Reset
|
||||
|
||||
Send `/new` or `/reset` as a message to start fresh.
|
||||
|
||||
### Per-Platform Overrides
|
||||
|
||||
Configure different reset policies per platform:
|
||||
|
||||
```json
|
||||
{
|
||||
"reset_by_platform": {
|
||||
"telegram": { "mode": "idle", "idle_minutes": 240 },
|
||||
"discord": { "mode": "idle", "idle_minutes": 60 }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Platform Setup
|
||||
|
||||
### Telegram
|
||||
|
||||
1. **Create a bot** via [@BotFather](https://t.me/BotFather)
|
||||
2. **Get your token** (looks like `123456789:ABCdefGHIjklMNOpqrsTUVwxyz`)
|
||||
3. **Set environment variable:**
|
||||
```bash
|
||||
export TELEGRAM_BOT_TOKEN="your_token_here"
|
||||
```
|
||||
4. **Optional: Set home channel** for cron job delivery:
|
||||
```bash
|
||||
export TELEGRAM_HOME_CHANNEL="-1001234567890"
|
||||
export TELEGRAM_HOME_CHANNEL_NAME="My Notes"
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
```bash
|
||||
pip install python-telegram-bot>=20.0
|
||||
```
|
||||
|
||||
### Discord
|
||||
|
||||
1. **Create an application** at [Discord Developer Portal](https://discord.com/developers/applications)
|
||||
2. **Create a bot** under your application
|
||||
3. **Get the bot token**
|
||||
4. **Enable required intents:**
|
||||
- Message Content Intent
|
||||
- Server Members Intent (optional)
|
||||
5. **Invite to your server** using OAuth2 URL generator (scopes: `bot`, `applications.commands`)
|
||||
6. **Set environment variable:**
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="your_token_here"
|
||||
```
|
||||
7. **Optional: Set home channel:**
|
||||
```bash
|
||||
export DISCORD_HOME_CHANNEL="123456789012345678"
|
||||
export DISCORD_HOME_CHANNEL_NAME="#bot-updates"
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
```bash
|
||||
pip install discord.py>=2.0
|
||||
```
|
||||
|
||||
### WhatsApp
|
||||
|
||||
WhatsApp integration is more complex due to the lack of a simple bot API.
|
||||
|
||||
**Options:**
|
||||
1. **WhatsApp Business API** (requires Meta verification)
|
||||
2. **whatsapp-web.js** via Node.js bridge (for personal accounts)
|
||||
|
||||
**Bridge Setup:**
|
||||
1. Install Node.js
|
||||
2. Set up the bridge script (see `scripts/whatsapp-bridge/` for reference)
|
||||
3. Configure in gateway:
|
||||
```json
|
||||
{
|
||||
"platforms": {
|
||||
"whatsapp": {
|
||||
"enabled": true,
|
||||
"extra": {
|
||||
"bridge_script": "/path/to/bridge.js",
|
||||
"bridge_port": 3000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
There are **three ways** to configure the gateway (in order of precedence):
|
||||
|
||||
### 1. Environment Variables (`.env` file) - Recommended for Quick Setup
|
||||
|
||||
Add to your `~/.hermes/.env` file:
|
||||
|
||||
```bash
|
||||
# =============================================================================
|
||||
# MESSAGING PLATFORM TOKENS
|
||||
# =============================================================================
|
||||
|
||||
# Telegram - get from @BotFather on Telegram
|
||||
TELEGRAM_BOT_TOKEN=your_telegram_bot_token
|
||||
TELEGRAM_ALLOWED_USERS=123456789,987654321 # Security: restrict to these user IDs
|
||||
|
||||
# Optional: Default channel for cron job delivery
|
||||
TELEGRAM_HOME_CHANNEL=-1001234567890
|
||||
TELEGRAM_HOME_CHANNEL_NAME="My Notes"
|
||||
|
||||
# Discord - get from Discord Developer Portal
|
||||
DISCORD_BOT_TOKEN=your_discord_bot_token
|
||||
DISCORD_ALLOWED_USERS=123456789012345678 # Security: restrict to these user IDs
|
||||
|
||||
# Optional: Default channel for cron job delivery
|
||||
DISCORD_HOME_CHANNEL=123456789012345678
|
||||
DISCORD_HOME_CHANNEL_NAME="#bot-updates"
|
||||
|
||||
# WhatsApp - requires Node.js bridge setup
|
||||
WHATSAPP_ENABLED=true
|
||||
|
||||
# =============================================================================
|
||||
# AGENT SETTINGS
|
||||
# =============================================================================
|
||||
|
||||
# Max tool-calling iterations per conversation (default: 60)
|
||||
HERMES_MAX_ITERATIONS=60
|
||||
|
||||
# Working directory for terminal commands (default: home ~)
|
||||
MESSAGING_CWD=/home/myuser
|
||||
|
||||
# =============================================================================
|
||||
# TOOL PROGRESS NOTIFICATIONS
|
||||
# =============================================================================
|
||||
|
||||
# Show progress messages as agent uses tools
|
||||
HERMES_TOOL_PROGRESS=true
|
||||
|
||||
# Mode: "new" (only when tool changes) or "all" (every tool call)
|
||||
HERMES_TOOL_PROGRESS_MODE=new
|
||||
|
||||
# =============================================================================
|
||||
# SESSION SETTINGS
|
||||
# =============================================================================
|
||||
|
||||
# Reset sessions after N minutes of inactivity (default: 120)
|
||||
SESSION_IDLE_MINUTES=120
|
||||
|
||||
# Daily reset hour in 24h format (default: 4 = 4am)
|
||||
SESSION_RESET_HOUR=4
|
||||
```
|
||||
|
||||
### 2. Gateway Config File (`~/.hermes/gateway.json`) - Full Control
|
||||
|
||||
For advanced configuration, create `~/.hermes/gateway.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"platforms": {
|
||||
"telegram": {
|
||||
"enabled": true,
|
||||
"token": "your_telegram_token",
|
||||
"home_channel": {
|
||||
"platform": "telegram",
|
||||
"chat_id": "-1001234567890",
|
||||
"name": "My Notes"
|
||||
}
|
||||
},
|
||||
"discord": {
|
||||
"enabled": true,
|
||||
"token": "your_discord_token",
|
||||
"home_channel": {
|
||||
"platform": "discord",
|
||||
"chat_id": "123456789012345678",
|
||||
"name": "#bot-updates"
|
||||
}
|
||||
}
|
||||
},
|
||||
"default_reset_policy": {
|
||||
"mode": "both",
|
||||
"at_hour": 4,
|
||||
"idle_minutes": 120
|
||||
},
|
||||
"reset_by_platform": {
|
||||
"discord": {
|
||||
"mode": "idle",
|
||||
"idle_minutes": 60
|
||||
}
|
||||
},
|
||||
"always_log_local": true
|
||||
}
|
||||
```
|
||||
|
||||
## Platform-Specific Toolsets
|
||||
|
||||
Each platform has its own toolset for security:
|
||||
|
||||
| Platform | Toolset | Capabilities |
|
||||
|----------|---------|--------------|
|
||||
| CLI | `hermes-cli` | Full access (terminal, browser, etc.) |
|
||||
| Telegram | `hermes-telegram` | Full tools including terminal |
|
||||
| Discord | `hermes-discord` | Full tools including terminal |
|
||||
| WhatsApp | `hermes-whatsapp` | Full tools including terminal |
|
||||
|
||||
## User Experience Features
|
||||
|
||||
### Typing Indicator
|
||||
|
||||
The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences.
|
||||
|
||||
### Tool Progress Notifications
|
||||
|
||||
When `HERMES_TOOL_PROGRESS=true`, the bot sends status messages as it works:
|
||||
|
||||
```
|
||||
💻 `ls -la`...
|
||||
🔍 web_search...
|
||||
📄 web_extract...
|
||||
🎨 image_generate...
|
||||
```
|
||||
|
||||
Terminal commands show the actual command (truncated to 50 chars). Other tools just show the tool name.
|
||||
|
||||
**Modes:**
|
||||
- `new`: Only sends message when switching to a different tool (less spam)
|
||||
- `all`: Sends message for every single tool call
|
||||
|
||||
### Working Directory
|
||||
|
||||
- **CLI (`hermes` command)**: Uses current directory where you run the command
|
||||
- **Messaging**: Uses `MESSAGING_CWD` (default: home directory `~`)
|
||||
|
||||
This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location.
|
||||
|
||||
### Max Iterations
|
||||
|
||||
If the agent hits the max iteration limit while working, instead of a generic error, it asks the model to summarize what it found so far. This gives you a useful response even when the task couldn't be fully completed.
|
||||
|
||||
## Cron Job Delivery
|
||||
|
||||
When scheduling cron jobs, you can specify where the output should be delivered:
|
||||
|
||||
```
|
||||
User: "Remind me to check the server in 30 minutes"
|
||||
|
||||
Agent uses: schedule_cronjob(
|
||||
prompt="Check server status...",
|
||||
schedule="30m",
|
||||
deliver="origin" # Back to this chat
|
||||
)
|
||||
```
|
||||
|
||||
### Delivery Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `"origin"` | Back to where the job was created |
|
||||
| `"local"` | Save to local files only |
|
||||
| `"telegram"` | Telegram home channel |
|
||||
| `"discord"` | Discord home channel |
|
||||
| `"telegram:123456"` | Specific Telegram chat |
|
||||
|
||||
## Dynamic Context Injection
|
||||
|
||||
The agent knows where it is via injected context:
|
||||
|
||||
```
|
||||
## Current Session Context
|
||||
|
||||
**Source:** Telegram (group: Dev Team, ID: -1001234567890)
|
||||
**Connected Platforms:** local, telegram, discord
|
||||
|
||||
**Home Channels:**
|
||||
- telegram: My Notes (ID: -1001234567890)
|
||||
- discord: #bot-updates (ID: 123456789012345678)
|
||||
|
||||
**Delivery options for scheduled tasks:**
|
||||
- "origin" → Back to this chat (Dev Team)
|
||||
- "local" → Save to local files only
|
||||
- "telegram" → Home channel (My Notes)
|
||||
- "discord" → Home channel (#bot-updates)
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/platforms` | Show gateway configuration and status |
|
||||
| `--gateway` | Start the gateway (CLI flag) |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "python-telegram-bot not installed"
|
||||
|
||||
```bash
|
||||
pip install python-telegram-bot>=20.0
|
||||
```
|
||||
|
||||
### "discord.py not installed"
|
||||
|
||||
```bash
|
||||
pip install discord.py>=2.0
|
||||
```
|
||||
|
||||
### "No platforms connected"
|
||||
|
||||
1. Check your environment variables are set
|
||||
2. Check your tokens are valid
|
||||
3. Try `/platforms` to see configuration status
|
||||
|
||||
### Session not persisting
|
||||
|
||||
1. Check `~/.hermes/sessions/` exists
|
||||
2. Check session policies aren't too aggressive
|
||||
3. Verify no errors in gateway logs
|
||||
|
||||
## Adding a New Platform
|
||||
|
||||
To add a new messaging platform:
|
||||
|
||||
### 1. Create the adapter
|
||||
|
||||
Create `gateway/platforms/your_platform.py`:
|
||||
|
||||
```python
|
||||
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
|
||||
class YourPlatformAdapter(BasePlatformAdapter):
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.YOUR_PLATFORM)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
# Connect to the platform
|
||||
...
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
# Disconnect
|
||||
...
|
||||
|
||||
async def send(self, chat_id: str, content: str, ...) -> SendResult:
|
||||
# Send a message
|
||||
...
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
# Get chat information
|
||||
...
|
||||
```
|
||||
|
||||
### 2. Register the platform
|
||||
|
||||
Add to `gateway/config.py`:
|
||||
|
||||
```python
|
||||
class Platform(Enum):
|
||||
# ... existing ...
|
||||
YOUR_PLATFORM = "your_platform"
|
||||
```
|
||||
|
||||
### 3. Add to gateway runner
|
||||
|
||||
Update `gateway/run.py` `_create_adapter()`:
|
||||
|
||||
```python
|
||||
elif platform == Platform.YOUR_PLATFORM:
|
||||
from gateway.platforms.your_platform import YourPlatformAdapter
|
||||
return YourPlatformAdapter(config)
|
||||
```
|
||||
|
||||
### 4. Create a toolset (optional)
|
||||
|
||||
Add to `toolsets.py`:
|
||||
|
||||
```python
|
||||
"hermes-your-platform": {
|
||||
"description": "Your platform toolset",
|
||||
"tools": [...],
|
||||
"includes": []
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Configure
|
||||
|
||||
Add environment variables to `.env`:
|
||||
|
||||
```bash
|
||||
YOUR_PLATFORM_TOKEN=...
|
||||
YOUR_PLATFORM_HOME_CHANNEL=...
|
||||
```
|
||||
|
||||
## Service Management
|
||||
|
||||
### Linux (systemd)
|
||||
|
||||
```bash
|
||||
# Install as user service
|
||||
./scripts/hermes-gateway install
|
||||
|
||||
# Manage
|
||||
systemctl --user start hermes-gateway
|
||||
systemctl --user stop hermes-gateway
|
||||
systemctl --user restart hermes-gateway
|
||||
systemctl --user status hermes-gateway
|
||||
|
||||
# View logs
|
||||
journalctl --user -u hermes-gateway -f
|
||||
|
||||
# Enable lingering (keeps running after logout)
|
||||
sudo loginctl enable-linger $USER
|
||||
```
|
||||
|
||||
### macOS (launchd)
|
||||
|
||||
```bash
|
||||
# Install
|
||||
./scripts/hermes-gateway install
|
||||
|
||||
# Manage
|
||||
launchctl start ai.hermes.gateway
|
||||
launchctl stop ai.hermes.gateway
|
||||
|
||||
# View logs
|
||||
tail -f ~/.hermes/logs/gateway.log
|
||||
```
|
||||
|
||||
### Manual (any platform)
|
||||
|
||||
```bash
|
||||
# Run in foreground (for testing/debugging)
|
||||
./scripts/hermes-gateway run
|
||||
|
||||
# Or via CLI (also foreground)
|
||||
python cli.py --gateway
|
||||
```
|
||||
|
||||
## Storage Locations
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
| `~/.hermes/gateway.json` | Gateway configuration |
|
||||
| `~/.hermes/sessions/sessions.json` | Session index |
|
||||
| `~/.hermes/sessions/{id}.jsonl` | Conversation transcripts |
|
||||
| `~/.hermes/cron/output/` | Cron job outputs |
|
||||
| `~/.hermes/logs/gateway.log` | Gateway logs (macOS launchd) |
|
||||
159
docs/tools.md
Normal file
159
docs/tools.md
Normal file
@@ -0,0 +1,159 @@
|
||||
# Tools
|
||||
|
||||
Tools are functions that extend the agent's capabilities. Each tool is defined with an OpenAI-compatible JSON schema and an async handler function.
|
||||
|
||||
## Tool Structure
|
||||
|
||||
Each tool module in `tools/` exports:
|
||||
1. **Schema definitions** - OpenAI function-calling format
|
||||
2. **Handler functions** - Async functions that execute the tool
|
||||
|
||||
```python
|
||||
# Example: tools/web_tools.py
|
||||
|
||||
# Schema definition
|
||||
WEB_SEARCH_SCHEMA = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "Search the web for information",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "Search query"}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Handler function
|
||||
async def web_search(query: str) -> dict:
|
||||
"""Execute web search and return results."""
|
||||
# Implementation...
|
||||
return {"results": [...]}
|
||||
```
|
||||
|
||||
## Tool Categories
|
||||
|
||||
| Category | Module | Tools |
|
||||
|----------|--------|-------|
|
||||
| **Web** | `web_tools.py` | `web_search`, `web_extract`, `web_crawl` |
|
||||
| **Terminal** | `terminal_tool.py` | `terminal` (local/docker/singularity/modal/ssh backends) |
|
||||
| **Browser** | `browser_tool.py` | `browser_navigate`, `browser_click`, `browser_type`, etc. |
|
||||
| **Vision** | `vision_tools.py` | `vision_analyze` |
|
||||
| **Image Gen** | `image_generation_tool.py` | `image_generate` |
|
||||
| **Reasoning** | `mixture_of_agents_tool.py` | `mixture_of_agents` |
|
||||
| **Skills** | `skills_tool.py` | `skills_categories`, `skills_list`, `skill_view` |
|
||||
|
||||
## Tool Registration
|
||||
|
||||
Tools are registered in `model_tools.py`:
|
||||
|
||||
```python
|
||||
# model_tools.py
|
||||
TOOL_SCHEMAS = [
|
||||
*WEB_TOOL_SCHEMAS,
|
||||
*TERMINAL_TOOL_SCHEMAS,
|
||||
*BROWSER_TOOL_SCHEMAS,
|
||||
# ...
|
||||
]
|
||||
|
||||
TOOL_HANDLERS = {
|
||||
"web_search": web_search,
|
||||
"terminal": terminal_tool,
|
||||
"browser_navigate": browser_navigate,
|
||||
# ...
|
||||
}
|
||||
```
|
||||
|
||||
## Toolsets
|
||||
|
||||
Tools are grouped into **toolsets** for logical organization (see `toolsets.py`):
|
||||
|
||||
```python
|
||||
TOOLSETS = {
|
||||
"web": {
|
||||
"description": "Web search and content extraction",
|
||||
"tools": ["web_search", "web_extract", "web_crawl"]
|
||||
},
|
||||
"terminal": {
|
||||
"description": "Command execution",
|
||||
"tools": ["terminal"]
|
||||
},
|
||||
# ...
|
||||
}
|
||||
```
|
||||
|
||||
## Adding a New Tool
|
||||
|
||||
1. Create handler function in `tools/your_tool.py`
|
||||
2. Define JSON schema following OpenAI format
|
||||
3. Register in `model_tools.py` (schemas and handlers)
|
||||
4. Add to appropriate toolset in `toolsets.py`
|
||||
5. Update `tools/__init__.py` exports
|
||||
|
||||
## Stateful Tools
|
||||
|
||||
Some tools maintain state across calls within a session:
|
||||
|
||||
- **Terminal**: Keeps container/sandbox running between commands
|
||||
- **Browser**: Maintains browser session for multi-step navigation
|
||||
|
||||
State is managed per `task_id` and cleaned up automatically.
|
||||
|
||||
## Terminal Backends
|
||||
|
||||
The terminal tool supports multiple execution backends:
|
||||
|
||||
| Backend | Description | Use Case |
|
||||
|---------|-------------|----------|
|
||||
| `local` | Direct execution on host | Development, simple tasks |
|
||||
| `ssh` | Remote execution via SSH | Sandboxing (agent can't modify its own code) |
|
||||
| `docker` | Docker container | Isolation, reproducibility |
|
||||
| `singularity` | Singularity/Apptainer | HPC clusters, rootless containers |
|
||||
| `modal` | Modal cloud | Scalable cloud compute, GPUs |
|
||||
|
||||
Configure via environment variables or `cli-config.yaml`:
|
||||
|
||||
```yaml
|
||||
# SSH backend example (in cli-config.yaml)
|
||||
terminal:
|
||||
env_type: "ssh"
|
||||
ssh_host: "my-server.example.com"
|
||||
ssh_user: "myuser"
|
||||
ssh_key: "~/.ssh/id_rsa"
|
||||
cwd: "/home/myuser/project"
|
||||
```
|
||||
|
||||
The SSH backend uses ControlMaster for connection persistence, making subsequent commands fast.
|
||||
|
||||
## Skills Tools (Progressive Disclosure)
|
||||
|
||||
Skills are on-demand knowledge documents. They use **progressive disclosure** to minimize tokens:
|
||||
|
||||
```
|
||||
Level 0: skills_categories() → ["mlops", "devops"] (~50 tokens)
|
||||
Level 1: skills_list(category) → [{name, description}, ...] (~3k tokens)
|
||||
Level 2: skill_view(name) → Full content + metadata (varies)
|
||||
Level 3: skill_view(name, path) → Specific reference file (varies)
|
||||
```
|
||||
|
||||
Skill directory structure:
|
||||
```
|
||||
skills/
|
||||
└── mlops/
|
||||
└── axolotl/
|
||||
├── SKILL.md # Main instructions (required)
|
||||
├── references/ # Additional docs
|
||||
└── templates/ # Output formats, configs
|
||||
```
|
||||
|
||||
SKILL.md uses YAML frontmatter:
|
||||
```yaml
|
||||
---
|
||||
name: axolotl
|
||||
description: Fine-tuning LLMs with Axolotl
|
||||
tags: [Fine-Tuning, LoRA, DPO]
|
||||
---
|
||||
```
|
||||
@@ -40,8 +40,8 @@ This directory contains the integration layer between **hermes-agent's** tool-ca
|
||||
- `evaluate_log()` for saving eval results to JSON + samples.jsonl
|
||||
|
||||
**HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics:
|
||||
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, daytona, ssh, singularity)
|
||||
- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`)
|
||||
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, ssh, singularity)
|
||||
- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` from `model_tools.py`)
|
||||
- Implements `collect_trajectory()` which runs the full agent loop and computes rewards
|
||||
- Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer)
|
||||
- Applies monkey patches for async-safe tool operation at import time
|
||||
@@ -60,7 +60,7 @@ Concrete environments inherit from `HermesAgentBaseEnv` and implement:
|
||||
`HermesAgentLoop` is the reusable multi-turn agent engine. It runs the same pattern as hermes-agent's `run_agent.py`:
|
||||
|
||||
1. Send messages + tools to the API via `server.chat_completion()`
|
||||
2. If the response contains `tool_calls`, execute each one via `handle_function_call()` (which delegates to `tools/registry.py`'s `dispatch()`)
|
||||
2. If the response contains `tool_calls`, execute each one via `handle_function_call()` from `model_tools.py`
|
||||
3. Append tool results to the conversation and go back to step 1
|
||||
4. If the response has no tool_calls, the agent is done
|
||||
|
||||
@@ -195,12 +195,8 @@ environments/
|
||||
│ └── hermes_swe_env.py
|
||||
│
|
||||
└── benchmarks/ # Evaluation benchmarks
|
||||
├── terminalbench_2/ # 89 terminal tasks, Modal sandboxes
|
||||
│ └── terminalbench2_env.py
|
||||
├── tblite/ # 100 calibrated tasks (fast TB2 proxy)
|
||||
│ └── tblite_env.py
|
||||
└── yc_bench/ # Long-horizon strategic benchmark
|
||||
└── yc_bench_env.py
|
||||
└── terminalbench_2/
|
||||
└── terminalbench2_env.py
|
||||
```
|
||||
|
||||
## Concrete Environments
|
||||
@@ -328,7 +324,7 @@ For eval benchmarks, follow the pattern in `terminalbench2_env.py`:
|
||||
| `distribution` | Probabilistic toolset distribution name | `None` |
|
||||
| `max_agent_turns` | Max LLM calls per rollout | `30` |
|
||||
| `agent_temperature` | Sampling temperature | `1.0` |
|
||||
| `terminal_backend` | `local`, `docker`, `modal`, `daytona`, `ssh`, `singularity` | `local` |
|
||||
| `terminal_backend` | `local`, `docker`, `modal`, `ssh`, `singularity` | `local` |
|
||||
| `system_prompt` | System message for the agent | `None` |
|
||||
| `tool_call_parser` | Parser name for Phase 2 | `hermes` |
|
||||
| `eval_handling` | `STOP_TRAIN`, `LIMIT_TRAIN`, `NONE` | `STOP_TRAIN` |
|
||||
|
||||
@@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Set
|
||||
from model_tools import handle_function_call
|
||||
|
||||
# Thread pool for running sync tool calls that internally use asyncio.run()
|
||||
# (e.g., mini-swe-agent's modal/docker/daytona backends). Running them in a separate
|
||||
# (e.g., mini-swe-agent's modal/docker backends). Running them in a separate
|
||||
# thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
|
||||
# Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all
|
||||
# making tool calls). Too small = thread pool starvation, tasks queue for minutes.
|
||||
@@ -73,6 +73,12 @@ class AgentResult:
|
||||
# Tool errors encountered during the loop
|
||||
tool_errors: List[ToolError] = field(default_factory=list)
|
||||
|
||||
# Tool-call metrics (debugging / optional reward shaping)
|
||||
tool_calls_attempted: int = 0
|
||||
tool_calls_schema_valid: int = 0
|
||||
tool_calls_executed_ok: int = 0
|
||||
tool_calls_exec_error: int = 0
|
||||
|
||||
|
||||
def _extract_reasoning_from_message(message) -> Optional[str]:
|
||||
"""
|
||||
@@ -136,6 +142,8 @@ class HermesAgentLoop:
|
||||
temperature: float = 1.0,
|
||||
max_tokens: Optional[int] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
tool_handler=None,
|
||||
max_context_tokens: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the agent loop.
|
||||
@@ -152,6 +160,13 @@ class HermesAgentLoop:
|
||||
extra_body: Extra parameters passed to the OpenAI client's create() call.
|
||||
Used for OpenRouter provider preferences, transforms, etc.
|
||||
e.g. {"provider": {"ignore": ["DeepInfra"]}}
|
||||
tool_handler: Optional async callable(tool_name, args, task_id) -> str.
|
||||
When provided, used INSTEAD of handle_function_call() for
|
||||
tool dispatch. This allows sandbox backends (Modal, Nomad)
|
||||
to route tool calls through their slot-based execution.
|
||||
max_context_tokens: Maximum prompt tokens before truncation.
|
||||
If None, no truncation is applied.
|
||||
Recommended: set to max_model_len - max_tokens - 512 (safety margin).
|
||||
"""
|
||||
self.server = server
|
||||
self.tool_schemas = tool_schemas
|
||||
@@ -161,6 +176,123 @@ class HermesAgentLoop:
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.extra_body = extra_body
|
||||
self.tool_handler = tool_handler
|
||||
self.max_context_tokens = max_context_tokens
|
||||
|
||||
def _truncate_context(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Truncate conversation history to fit within max_context_tokens.
|
||||
|
||||
Strategy:
|
||||
- Keep system message (index 0) and initial user message (index 1) always
|
||||
- Keep last 6 messages (recent context) always
|
||||
- For everything in between, progressively truncate tool result content
|
||||
- If still too long, drop oldest middle messages entirely
|
||||
|
||||
Uses rough char/4 token estimate (fast, no tokenizer needed).
|
||||
|
||||
NOTE: This function mutates the provided list (it may pop/replace entries).
|
||||
Call it on a copy when you want to preserve the full trajectory.
|
||||
"""
|
||||
if self.max_context_tokens is None:
|
||||
return messages
|
||||
|
||||
def estimate_tokens(msgs):
|
||||
total = 0
|
||||
for m in msgs:
|
||||
content = m.get("content", "") or ""
|
||||
total += len(content) // 4 + 10 # ~4 chars per token + overhead
|
||||
if "tool_calls" in m:
|
||||
total += 50 * len(m["tool_calls"]) # tool call overhead
|
||||
return total
|
||||
|
||||
if estimate_tokens(messages) <= self.max_context_tokens:
|
||||
return messages
|
||||
|
||||
protect_head = 2
|
||||
protect_tail = max(0, min(6, len(messages) - protect_head))
|
||||
middle_start = protect_head
|
||||
middle_end = len(messages) - protect_tail
|
||||
|
||||
# Phase 1: truncate tool outputs in the middle
|
||||
if middle_start < middle_end:
|
||||
for i in range(middle_start, middle_end):
|
||||
if messages[i].get("role") == "tool":
|
||||
content = messages[i].get("content", "") or ""
|
||||
if len(content) > 200:
|
||||
messages[i] = dict(messages[i])
|
||||
messages[i]["content"] = content[:100] + "\n...[truncated]...\n" + content[-50:]
|
||||
|
||||
if estimate_tokens(messages) <= self.max_context_tokens:
|
||||
return messages
|
||||
|
||||
# Phase 2: drop oldest middle messages (try to keep assistant+tool pairs)
|
||||
while middle_start < middle_end and estimate_tokens(messages) > self.max_context_tokens:
|
||||
msg = messages[middle_start]
|
||||
messages.pop(middle_start)
|
||||
middle_end -= 1
|
||||
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
tool_ids = {
|
||||
tc.get("id") or tc.get("tool_call_id", "")
|
||||
for tc in msg.get("tool_calls", [])
|
||||
if isinstance(tc, dict)
|
||||
}
|
||||
i = middle_start
|
||||
while i < middle_end:
|
||||
if messages[i].get("role") == "tool" and messages[i].get("tool_call_id", "") in tool_ids:
|
||||
messages.pop(i)
|
||||
middle_end -= 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return messages
|
||||
|
||||
def _normalize_tool_args(self, tool_name: str, tool_args_raw: str) -> (Dict[str, Any], bool):
|
||||
"""Normalize tool arguments into a dict.
|
||||
|
||||
Returns: (args_dict, schema_valid)
|
||||
|
||||
schema_valid is True only when arguments decode directly into a dict
|
||||
(no double-decoding and no coercion/wrapping required).
|
||||
|
||||
Goal: keep environments robust (never crash on args format drift) while
|
||||
still allowing reward functions to penalize malformed formats if desired.
|
||||
"""
|
||||
try:
|
||||
decoded = json.loads(tool_args_raw)
|
||||
except json.JSONDecodeError:
|
||||
# Not JSON at all — treat as a plain string
|
||||
if tool_name == "terminal":
|
||||
return {"command": tool_args_raw}, False
|
||||
return {"input": tool_args_raw}, False
|
||||
|
||||
if isinstance(decoded, dict):
|
||||
if tool_name == "terminal":
|
||||
cmd = decoded.get("command")
|
||||
if isinstance(cmd, str) and cmd.strip():
|
||||
return decoded, True
|
||||
if isinstance(decoded.get("input"), str):
|
||||
return {"command": decoded.get("input")}, False
|
||||
return decoded, False
|
||||
return decoded, True
|
||||
|
||||
if isinstance(decoded, str):
|
||||
s = decoded.strip()
|
||||
if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
|
||||
try:
|
||||
decoded2 = json.loads(s)
|
||||
except json.JSONDecodeError:
|
||||
decoded2 = None
|
||||
if isinstance(decoded2, dict):
|
||||
return decoded2, False
|
||||
|
||||
if tool_name == "terminal":
|
||||
return {"command": decoded}, False
|
||||
return {"input": decoded}, False
|
||||
|
||||
if tool_name == "terminal":
|
||||
return {"command": str(decoded)}, False
|
||||
return {"input": decoded}, False
|
||||
|
||||
async def run(self, messages: List[Dict[str, Any]]) -> AgentResult:
|
||||
"""
|
||||
@@ -176,27 +308,22 @@ class HermesAgentLoop:
|
||||
reasoning_per_turn = []
|
||||
tool_errors: List[ToolError] = []
|
||||
|
||||
# Per-loop TodoStore for the todo tool (ephemeral, dies with the loop)
|
||||
from tools.todo_tool import TodoStore, todo_tool as _todo_tool
|
||||
_todo_store = TodoStore()
|
||||
|
||||
# Extract user task from first user message for browser_snapshot context
|
||||
_user_task = None
|
||||
for msg in messages:
|
||||
if msg.get("role") == "user":
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str) and content.strip():
|
||||
_user_task = content.strip()[:500] # Cap to avoid huge strings
|
||||
break
|
||||
tool_calls_attempted = 0
|
||||
tool_calls_schema_valid = 0
|
||||
tool_calls_executed_ok = 0
|
||||
tool_calls_exec_error = 0
|
||||
|
||||
import time as _time
|
||||
|
||||
for turn in range(self.max_turns):
|
||||
turn_start = _time.monotonic()
|
||||
|
||||
# Truncate prompt view on a copy (preserve full trajectory in `messages`)
|
||||
prompt_messages = self._truncate_context(list(messages))
|
||||
|
||||
# Build the chat_completion kwargs
|
||||
chat_kwargs = {
|
||||
"messages": messages,
|
||||
"messages": prompt_messages,
|
||||
"n": 1,
|
||||
"temperature": self.temperature,
|
||||
}
|
||||
@@ -228,6 +355,10 @@ class HermesAgentLoop:
|
||||
finished_naturally=False,
|
||||
reasoning_per_turn=reasoning_per_turn,
|
||||
tool_errors=tool_errors,
|
||||
tool_calls_attempted=tool_calls_attempted,
|
||||
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||
tool_calls_exec_error=tool_calls_exec_error,
|
||||
)
|
||||
|
||||
api_elapsed = _time.monotonic() - api_start
|
||||
@@ -241,6 +372,10 @@ class HermesAgentLoop:
|
||||
finished_naturally=False,
|
||||
reasoning_per_turn=reasoning_per_turn,
|
||||
tool_errors=tool_errors,
|
||||
tool_calls_attempted=tool_calls_attempted,
|
||||
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||
tool_calls_exec_error=tool_calls_exec_error,
|
||||
)
|
||||
|
||||
assistant_msg = response.choices[0].message
|
||||
@@ -283,6 +418,7 @@ class HermesAgentLoop:
|
||||
|
||||
# Validate tool name
|
||||
if tool_name not in self.valid_tool_names:
|
||||
tool_calls_exec_error += 1
|
||||
tool_result = json.dumps(
|
||||
{
|
||||
"error": f"Unknown tool '{tool_name}'. "
|
||||
@@ -300,55 +436,36 @@ class HermesAgentLoop:
|
||||
tool_name, turn + 1,
|
||||
)
|
||||
else:
|
||||
# Parse arguments and dispatch
|
||||
try:
|
||||
args = json.loads(tool_args_raw)
|
||||
except json.JSONDecodeError:
|
||||
args = {}
|
||||
logger.warning(
|
||||
"Invalid JSON in tool call arguments for '%s': %s",
|
||||
tool_name, tool_args_raw[:200],
|
||||
)
|
||||
tool_calls_attempted += 1
|
||||
args, schema_valid = self._normalize_tool_args(tool_name, tool_args_raw)
|
||||
if schema_valid:
|
||||
tool_calls_schema_valid += 1
|
||||
|
||||
try:
|
||||
if tool_name == "terminal":
|
||||
backend = os.getenv("TERMINAL_ENV", "local")
|
||||
cmd_preview = args.get("command", "")[:80]
|
||||
cmd_preview = str(args.get("command", ""))[:80]
|
||||
logger.info(
|
||||
"[%s] $ %s", self.task_id[:8], cmd_preview,
|
||||
)
|
||||
|
||||
tool_submit_time = _time.monotonic()
|
||||
|
||||
# Todo tool -- handle locally (needs per-loop TodoStore)
|
||||
if tool_name == "todo":
|
||||
tool_result = _todo_tool(
|
||||
todos=args.get("todos"),
|
||||
merge=args.get("merge", False),
|
||||
store=_todo_store,
|
||||
)
|
||||
tool_elapsed = _time.monotonic() - tool_submit_time
|
||||
elif tool_name == "memory":
|
||||
tool_result = json.dumps({"error": "Memory is not available in RL environments."})
|
||||
tool_elapsed = _time.monotonic() - tool_submit_time
|
||||
elif tool_name == "session_search":
|
||||
tool_result = json.dumps({"error": "Session search is not available in RL environments."})
|
||||
tool_elapsed = _time.monotonic() - tool_submit_time
|
||||
if self.tool_handler:
|
||||
tool_result = await self.tool_handler(tool_name, args, self.task_id)
|
||||
else:
|
||||
# Run tool calls in a thread pool so backends that
|
||||
# use asyncio.run() internally (modal, docker, daytona) get
|
||||
# a clean event loop instead of deadlocking.
|
||||
# Run tool calls in a thread pool so backends that use
|
||||
# asyncio.run() internally (modal, docker) get a clean
|
||||
# event loop instead of deadlocking inside Atropos's loop.
|
||||
loop = asyncio.get_event_loop()
|
||||
# Capture current tool_name/args for the lambda
|
||||
_tn, _ta, _tid = tool_name, args, self.task_id
|
||||
tool_result = await loop.run_in_executor(
|
||||
_tool_executor,
|
||||
lambda: handle_function_call(
|
||||
_tn, _ta, task_id=_tid,
|
||||
user_task=_user_task,
|
||||
tool_name, args, task_id=self.task_id
|
||||
),
|
||||
)
|
||||
tool_elapsed = _time.monotonic() - tool_submit_time
|
||||
|
||||
tool_elapsed = _time.monotonic() - tool_submit_time
|
||||
|
||||
# Log slow tools and thread pool stats for debugging
|
||||
pool_active = _tool_executor._work_queue.qsize()
|
||||
@@ -359,6 +476,7 @@ class HermesAgentLoop:
|
||||
tool_elapsed, pool_active,
|
||||
)
|
||||
except Exception as e:
|
||||
tool_calls_exec_error += 1
|
||||
tool_result = json.dumps(
|
||||
{"error": f"Tool execution failed: {type(e).__name__}: {str(e)}"}
|
||||
)
|
||||
@@ -372,22 +490,31 @@ class HermesAgentLoop:
|
||||
"Tool '%s' execution failed on turn %d: %s",
|
||||
tool_name, turn + 1, e,
|
||||
)
|
||||
else:
|
||||
tool_err = False
|
||||
try:
|
||||
result_data = json.loads(tool_result)
|
||||
if isinstance(result_data, dict):
|
||||
err = result_data.get("error")
|
||||
if err:
|
||||
tool_err = True
|
||||
|
||||
# Also check if the tool returned an error in its JSON result
|
||||
try:
|
||||
result_data = json.loads(tool_result)
|
||||
if isinstance(result_data, dict):
|
||||
err = result_data.get("error")
|
||||
exit_code = result_data.get("exit_code")
|
||||
if err and exit_code and exit_code < 0:
|
||||
tool_errors.append(ToolError(
|
||||
turn=turn + 1, tool_name=tool_name,
|
||||
arguments=tool_args_raw[:200],
|
||||
error=str(err),
|
||||
tool_result=tool_result[:500],
|
||||
))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
exit_code = result_data.get("exit_code")
|
||||
if exit_code is not None and isinstance(exit_code, int) and exit_code < 0:
|
||||
tool_err = True
|
||||
tool_errors.append(ToolError(
|
||||
turn=turn + 1, tool_name=tool_name,
|
||||
arguments=tool_args_raw[:200],
|
||||
error=str(err) if err else "nonzero exit_code",
|
||||
tool_result=tool_result[:500],
|
||||
))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
if tool_err:
|
||||
tool_calls_exec_error += 1
|
||||
else:
|
||||
tool_calls_executed_ok += 1
|
||||
|
||||
# Add tool response to conversation
|
||||
messages.append(
|
||||
@@ -428,6 +555,10 @@ class HermesAgentLoop:
|
||||
finished_naturally=True,
|
||||
reasoning_per_turn=reasoning_per_turn,
|
||||
tool_errors=tool_errors,
|
||||
tool_calls_attempted=tool_calls_attempted,
|
||||
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||
tool_calls_exec_error=tool_calls_exec_error,
|
||||
)
|
||||
|
||||
# Hit max turns without the model stopping
|
||||
@@ -439,6 +570,10 @@ class HermesAgentLoop:
|
||||
finished_naturally=False,
|
||||
reasoning_per_turn=reasoning_per_turn,
|
||||
tool_errors=tool_errors,
|
||||
tool_calls_attempted=tool_calls_attempted,
|
||||
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||
tool_calls_exec_error=tool_calls_exec_error,
|
||||
)
|
||||
|
||||
def _get_managed_state(self) -> Optional[Dict[str, Any]]:
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
# OpenThoughts-TBLite Evaluation Environment
|
||||
|
||||
This environment evaluates terminal agents on the [OpenThoughts-TBLite](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TBLite) benchmark, a difficulty-calibrated subset of [Terminal-Bench 2.0](https://www.tbench.ai/leaderboard/terminal-bench/2.0).
|
||||
|
||||
## Source
|
||||
|
||||
OpenThoughts-TBLite was created by the [OpenThoughts](https://www.openthoughts.ai/) Agent team in collaboration with [Snorkel AI](https://snorkel.ai/) and [Bespoke Labs](https://bespokelabs.ai/). The original dataset and documentation live at:
|
||||
|
||||
- **Dataset (source):** [open-thoughts/OpenThoughts-TBLite](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TBLite)
|
||||
- **GitHub:** [open-thoughts/OpenThoughts-TBLite](https://github.com/open-thoughts/OpenThoughts-TBLite)
|
||||
- **Blog post:** [openthoughts.ai/blog/openthoughts-tblite](https://www.openthoughts.ai/blog/openthoughts-tblite)
|
||||
|
||||
## Our Dataset
|
||||
|
||||
We converted the source into the same schema used by our Terminal-Bench 2.0 environment (pre-built Docker Hub images, base64-encoded test tarballs, etc.) and published it as:
|
||||
|
||||
- **Dataset (ours):** [NousResearch/openthoughts-tblite](https://huggingface.co/datasets/NousResearch/openthoughts-tblite)
|
||||
- **Docker images:** `nousresearch/tblite-<task-name>:latest` on Docker Hub (100 images)
|
||||
|
||||
The conversion script is at `scripts/prepare_tblite_dataset.py`.
|
||||
|
||||
## Why TBLite?
|
||||
|
||||
Terminal-Bench 2.0 is one of the strongest frontier evaluations for terminal agents, but when a model scores near the floor (e.g., Qwen 3 8B at <1%), many changes look identical in aggregate score. TBLite addresses this by calibrating task difficulty using Claude Haiku 4.5 as a reference:
|
||||
|
||||
| Difficulty | Pass Rate Range | Tasks |
|
||||
|------------|----------------|-------|
|
||||
| Easy | >= 70% | 40 |
|
||||
| Medium | 40-69% | 26 |
|
||||
| Hard | 10-39% | 26 |
|
||||
| Extreme | < 10% | 8 |
|
||||
|
||||
This gives enough solvable tasks to detect small improvements quickly, while preserving enough hard tasks to avoid saturation. The correlation between TBLite and TB2 scores is **r = 0.911**.
|
||||
|
||||
TBLite also runs 2.6-8x faster than the full TB2, making it practical for iteration loops.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Run the full benchmark
|
||||
python environments/benchmarks/tblite/tblite_env.py evaluate
|
||||
|
||||
# Filter to specific tasks
|
||||
python environments/benchmarks/tblite/tblite_env.py evaluate \
|
||||
--env.task_filter "broken-python,pandas-etl"
|
||||
|
||||
# Use a different model
|
||||
python environments/benchmarks/tblite/tblite_env.py evaluate \
|
||||
--server.model_name "qwen/qwen3-30b"
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
`TBLiteEvalEnv` is a thin subclass of `TerminalBench2EvalEnv`. All evaluation logic (agent loop, Docker sandbox management, test verification, metrics) is inherited. Only the defaults differ:
|
||||
|
||||
| Setting | TB2 | TBLite |
|
||||
|----------------|----------------------------------|-----------------------------------------|
|
||||
| Dataset | `NousResearch/terminal-bench-2` | `NousResearch/openthoughts-tblite` |
|
||||
| Tasks | 89 | 100 |
|
||||
| Task timeout | 1800s (30 min) | 1200s (20 min) |
|
||||
| Wandb name | `terminal-bench-2` | `openthoughts-tblite` |
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@software{OpenThoughts-TBLite,
|
||||
author = {OpenThoughts-Agent team, Snorkel AI, Bespoke Labs},
|
||||
month = Feb,
|
||||
title = {{OpenThoughts-TBLite: A High-Signal Benchmark for Iterating on Terminal Agents}},
|
||||
howpublished = {https://www.openthoughts.ai/blog/openthoughts-tblite},
|
||||
year = {2026}
|
||||
}
|
||||
```
|
||||
@@ -1,39 +0,0 @@
|
||||
# OpenThoughts-TBLite Evaluation -- Default Configuration
|
||||
#
|
||||
# Eval-only environment for the TBLite benchmark (100 difficulty-calibrated
|
||||
# terminal tasks, a faster proxy for Terminal-Bench 2.0).
|
||||
# Uses Modal terminal backend for per-task cloud-isolated sandboxes
|
||||
# and OpenRouter for inference.
|
||||
#
|
||||
# Usage:
|
||||
# python environments/benchmarks/tblite/tblite_env.py evaluate \
|
||||
# --config environments/benchmarks/tblite/default.yaml
|
||||
#
|
||||
# # Override model:
|
||||
# python environments/benchmarks/tblite/tblite_env.py evaluate \
|
||||
# --config environments/benchmarks/tblite/default.yaml \
|
||||
# --openai.model_name anthropic/claude-sonnet-4
|
||||
|
||||
env:
|
||||
enabled_toolsets: ["terminal", "file"]
|
||||
max_agent_turns: 60
|
||||
max_token_length: 32000
|
||||
agent_temperature: 0.8
|
||||
terminal_backend: "modal"
|
||||
terminal_timeout: 300 # 5 min per command (builds, pip install)
|
||||
tool_pool_size: 128 # thread pool for 100 parallel tasks
|
||||
dataset_name: "NousResearch/openthoughts-tblite"
|
||||
test_timeout: 600
|
||||
task_timeout: 1200 # 20 min wall-clock per task (TBLite tasks are faster)
|
||||
tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B"
|
||||
use_wandb: true
|
||||
wandb_name: "openthoughts-tblite"
|
||||
ensure_scores_are_not_same: false
|
||||
data_dir_to_save_evals: "environments/benchmarks/evals/openthoughts-tblite"
|
||||
|
||||
openai:
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
model_name: "anthropic/claude-opus-4.6"
|
||||
server_type: "openai"
|
||||
health_check: false
|
||||
# api_key loaded from OPENROUTER_API_KEY in .env
|
||||
@@ -1,42 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# OpenThoughts-TBLite Evaluation
|
||||
#
|
||||
# Run from repo root:
|
||||
# bash environments/benchmarks/tblite/run_eval.sh
|
||||
#
|
||||
# Override model:
|
||||
# bash environments/benchmarks/tblite/run_eval.sh \
|
||||
# --openai.model_name anthropic/claude-sonnet-4
|
||||
#
|
||||
# Run a subset:
|
||||
# bash environments/benchmarks/tblite/run_eval.sh \
|
||||
# --env.task_filter broken-python,pandas-etl
|
||||
#
|
||||
# All terminal settings (backend, timeout, lifetime, pool size) are
|
||||
# configured via env config fields -- no env vars needed.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p logs evals/openthoughts-tblite
|
||||
LOG_FILE="logs/tblite_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "OpenThoughts-TBLite Evaluation"
|
||||
echo "Log file: $LOG_FILE"
|
||||
echo ""
|
||||
|
||||
# Unbuffered python output so logs are written in real-time
|
||||
export PYTHONUNBUFFERED=1
|
||||
|
||||
# Show INFO-level agent loop timing (api/tool durations per turn)
|
||||
# These go to the log file; tqdm + [START]/[PASS]/[FAIL] go to terminal
|
||||
export LOGLEVEL=INFO
|
||||
|
||||
python tblite_env.py evaluate \
|
||||
--config default.yaml \
|
||||
"$@" \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo ""
|
||||
echo "Log saved to: $LOG_FILE"
|
||||
echo "Eval results: evals/openthoughts-tblite/"
|
||||
@@ -1,119 +0,0 @@
|
||||
"""
|
||||
OpenThoughts-TBLite Evaluation Environment
|
||||
|
||||
A lighter, faster alternative to Terminal-Bench 2.0 for iterating on terminal
|
||||
agents. Uses the same evaluation logic as TerminalBench2EvalEnv but defaults
|
||||
to the NousResearch/openthoughts-tblite dataset (100 difficulty-calibrated
|
||||
tasks vs TB2's 89 harder tasks).
|
||||
|
||||
TBLite tasks are a curated subset of TB2 with a difficulty distribution
|
||||
designed to give meaningful signal even for smaller models:
|
||||
- Easy (40 tasks): >= 70% pass rate with Claude Haiku 4.5
|
||||
- Medium (26 tasks): 40-69% pass rate
|
||||
- Hard (26 tasks): 10-39% pass rate
|
||||
- Extreme (8 tasks): < 10% pass rate
|
||||
|
||||
Usage:
|
||||
python environments/benchmarks/tblite/tblite_env.py evaluate
|
||||
|
||||
# Filter to specific tasks:
|
||||
python environments/benchmarks/tblite/tblite_env.py evaluate \\
|
||||
--env.task_filter "broken-python,pandas-etl"
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
if str(_repo_root) not in sys.path:
|
||||
sys.path.insert(0, str(_repo_root))
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from atroposlib.envs.base import EvalHandlingEnum
|
||||
from atroposlib.envs.server_handling.server_manager import APIServerConfig
|
||||
|
||||
from environments.benchmarks.terminalbench_2.terminalbench2_env import (
|
||||
TerminalBench2EvalConfig,
|
||||
TerminalBench2EvalEnv,
|
||||
)
|
||||
|
||||
|
||||
class TBLiteEvalConfig(TerminalBench2EvalConfig):
|
||||
"""Configuration for the OpenThoughts-TBLite evaluation environment.
|
||||
|
||||
Inherits all TB2 config fields. Only the dataset default and task timeout
|
||||
differ -- TBLite tasks are calibrated to be faster.
|
||||
"""
|
||||
|
||||
dataset_name: str = Field(
|
||||
default="NousResearch/openthoughts-tblite",
|
||||
description="HuggingFace dataset containing TBLite tasks.",
|
||||
)
|
||||
|
||||
task_timeout: int = Field(
|
||||
default=1200,
|
||||
description="Maximum wall-clock seconds per task. TBLite tasks are "
|
||||
"generally faster than TB2, so 20 minutes is usually sufficient.",
|
||||
)
|
||||
|
||||
|
||||
class TBLiteEvalEnv(TerminalBench2EvalEnv):
|
||||
"""OpenThoughts-TBLite evaluation environment.
|
||||
|
||||
Inherits all evaluation logic from TerminalBench2EvalEnv (agent loop,
|
||||
test verification, Docker image resolution, metrics, wandb logging).
|
||||
Only the default configuration differs.
|
||||
"""
|
||||
|
||||
name = "openthoughts-tblite"
|
||||
env_config_cls = TBLiteEvalConfig
|
||||
|
||||
@classmethod
|
||||
def config_init(cls) -> Tuple[TBLiteEvalConfig, List[APIServerConfig]]:
|
||||
env_config = TBLiteEvalConfig(
|
||||
enabled_toolsets=["terminal", "file"],
|
||||
disabled_toolsets=None,
|
||||
distribution=None,
|
||||
|
||||
max_agent_turns=60,
|
||||
max_token_length=16000,
|
||||
agent_temperature=0.6,
|
||||
system_prompt=None,
|
||||
|
||||
terminal_backend="modal",
|
||||
terminal_timeout=300,
|
||||
|
||||
test_timeout=180,
|
||||
|
||||
# 100 tasks in parallel
|
||||
tool_pool_size=128,
|
||||
|
||||
eval_handling=EvalHandlingEnum.STOP_TRAIN,
|
||||
group_size=1,
|
||||
steps_per_eval=1,
|
||||
total_steps=1,
|
||||
|
||||
tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B",
|
||||
use_wandb=True,
|
||||
wandb_name="openthoughts-tblite",
|
||||
ensure_scores_are_not_same=False,
|
||||
)
|
||||
|
||||
server_configs = [
|
||||
APIServerConfig(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model_name="anthropic/claude-sonnet-4",
|
||||
server_type="openai",
|
||||
api_key=os.getenv("OPENROUTER_API_KEY", ""),
|
||||
health_check=False,
|
||||
)
|
||||
]
|
||||
|
||||
return env_config, server_configs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
TBLiteEvalEnv.cli()
|
||||
@@ -12,31 +12,21 @@
|
||||
# Run a subset:
|
||||
# bash environments/benchmarks/terminalbench_2/run_eval.sh \
|
||||
# --env.task_filter fix-git,git-multibranch
|
||||
#
|
||||
# All terminal settings (backend, timeout, lifetime, pool size) are
|
||||
# configured via env config fields -- no env vars needed.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p logs evals/terminal-bench-2
|
||||
LOG_FILE="logs/terminalbench2_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "Terminal-Bench 2.0 Evaluation"
|
||||
echo "Log file: $LOG_FILE"
|
||||
echo "Log: $LOG_FILE"
|
||||
echo ""
|
||||
|
||||
# Unbuffered python output so logs are written in real-time
|
||||
export PYTHONUNBUFFERED=1
|
||||
export TERMINAL_ENV=modal
|
||||
export TERMINAL_TIMEOUT=300
|
||||
|
||||
# Show INFO-level agent loop timing (api/tool durations per turn)
|
||||
# These go to the log file; tqdm + [START]/[PASS]/[FAIL] go to terminal
|
||||
export LOGLEVEL=INFO
|
||||
|
||||
python terminalbench2_env.py evaluate \
|
||||
--config default.yaml \
|
||||
python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \
|
||||
--config environments/benchmarks/terminalbench_2/default.yaml \
|
||||
"$@" \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo ""
|
||||
echo "Log saved to: $LOG_FILE"
|
||||
echo "Eval results: evals/terminal-bench-2/"
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
# YC-Bench: Long-Horizon Agent Benchmark
|
||||
|
||||
[YC-Bench](https://github.com/collinear-ai/yc-bench) by [Collinear AI](https://collinear.ai/) is a deterministic, long-horizon benchmark that tests LLM agents' ability to act as a tech startup CEO. The agent manages a simulated company over 1-3 years, making compounding decisions about resource allocation, cash flow, task management, and prestige specialisation across 4 skill domains.
|
||||
|
||||
Unlike TerminalBench2 (which evaluates per-task coding ability with binary pass/fail), YC-Bench measures **long-term strategic coherence** — whether an agent can maintain consistent strategy, manage compounding consequences, and adapt plans over hundreds of turns.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Install yc-bench (optional dependency)
|
||||
pip install "hermes-agent[yc-bench]"
|
||||
|
||||
# Or install from source
|
||||
git clone https://github.com/collinear-ai/yc-bench
|
||||
cd yc-bench && pip install -e .
|
||||
|
||||
# Verify
|
||||
yc-bench --help
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
```bash
|
||||
# From the repo root:
|
||||
bash environments/benchmarks/yc_bench/run_eval.sh
|
||||
|
||||
# Or directly:
|
||||
python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \
|
||||
--config environments/benchmarks/yc_bench/default.yaml
|
||||
|
||||
# Override model:
|
||||
bash environments/benchmarks/yc_bench/run_eval.sh \
|
||||
--openai.model_name anthropic/claude-opus-4-20250514
|
||||
|
||||
# Quick single-preset test:
|
||||
bash environments/benchmarks/yc_bench/run_eval.sh \
|
||||
--env.presets '["fast_test"]' --env.seeds '[1]'
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
HermesAgentLoop (our agent)
|
||||
-> terminal tool -> subprocess("yc-bench company status") -> JSON output
|
||||
-> terminal tool -> subprocess("yc-bench task accept --task-id X") -> JSON
|
||||
-> terminal tool -> subprocess("yc-bench sim resume") -> JSON (advance time)
|
||||
-> ... (100-500 turns per run)
|
||||
```
|
||||
|
||||
The environment initialises the simulation via `yc-bench sim init` (NOT `yc-bench run`, which would start yc-bench's own built-in agent loop). Our `HermesAgentLoop` then drives all interaction through CLI commands.
|
||||
|
||||
### Simulation Mechanics
|
||||
|
||||
- **4 skill domains**: research, inference, data_environment, training
|
||||
- **Prestige system** (1.0-10.0): Gates access to higher-paying tasks
|
||||
- **Employee management**: Junior/Mid/Senior with domain-specific skill rates
|
||||
- **Throughput splitting**: `effective_rate = base_rate / N` active tasks per employee
|
||||
- **Financial pressure**: Monthly payroll, bankruptcy = game over
|
||||
- **Deterministic**: SHA256-based RNG — same seed + preset = same world
|
||||
|
||||
### Difficulty Presets
|
||||
|
||||
| Preset | Employees | Tasks | Focus |
|
||||
|-----------|-----------|-------|-------|
|
||||
| tutorial | 3 | 50 | Basic loop mechanics |
|
||||
| easy | 5 | 100 | Throughput awareness |
|
||||
| **medium**| 5 | 150 | Prestige climbing + domain specialisation |
|
||||
| **hard** | 7 | 200 | Precise ETA reasoning |
|
||||
| nightmare | 8 | 300 | Sustained perfection under payroll pressure |
|
||||
| fast_test | (varies) | (varies) | Quick validation (~50 turns) |
|
||||
|
||||
Default eval runs **fast_test + medium + hard** × 3 seeds = 9 runs.
|
||||
|
||||
### Scoring
|
||||
|
||||
```
|
||||
composite = 0.5 × survival + 0.5 × normalised_funds
|
||||
```
|
||||
|
||||
- **Survival** (binary): Did the company avoid bankruptcy?
|
||||
- **Normalised funds** (0.0-1.0): Log-scale relative to initial $250K capital
|
||||
|
||||
## Configuration
|
||||
|
||||
Key fields in `default.yaml`:
|
||||
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `presets` | `["fast_test", "medium", "hard"]` | Which presets to evaluate |
|
||||
| `seeds` | `[1, 2, 3]` | RNG seeds per preset |
|
||||
| `max_agent_turns` | 200 | Max LLM calls per run |
|
||||
| `run_timeout` | 3600 | Wall-clock timeout per run (seconds) |
|
||||
| `survival_weight` | 0.5 | Weight of survival in composite score |
|
||||
| `funds_weight` | 0.5 | Weight of normalised funds in composite |
|
||||
| `horizon_years` | null | Override horizon (null = auto from preset) |
|
||||
|
||||
## Cost & Time Estimates
|
||||
|
||||
Each run is 100-500 LLM turns. Approximate costs per run at typical API rates:
|
||||
|
||||
| Preset | Turns | Time | Est. Cost |
|
||||
|--------|-------|------|-----------|
|
||||
| fast_test | ~50 | 5-10 min | $1-5 |
|
||||
| medium | ~200 | 20-40 min | $5-15 |
|
||||
| hard | ~300 | 30-60 min | $10-25 |
|
||||
|
||||
Full default eval (9 runs): ~3-6 hours, $50-200 depending on model.
|
||||
|
||||
## References
|
||||
|
||||
- [collinear-ai/yc-bench](https://github.com/collinear-ai/yc-bench) — Official repository
|
||||
- [Collinear AI](https://collinear.ai/) — Company behind yc-bench
|
||||
- [TerminalBench2](../terminalbench_2/) — Per-task coding benchmark (complementary)
|
||||
@@ -1,43 +0,0 @@
|
||||
# YC-Bench Evaluation -- Default Configuration
|
||||
#
|
||||
# Long-horizon agent benchmark: agent plays CEO of an AI startup over
|
||||
# a simulated 1-3 year run, interacting via yc-bench CLI subcommands.
|
||||
#
|
||||
# Requires: pip install "hermes-agent[yc-bench]"
|
||||
#
|
||||
# Usage:
|
||||
# python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \
|
||||
# --config environments/benchmarks/yc_bench/default.yaml
|
||||
#
|
||||
# # Override model:
|
||||
# python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \
|
||||
# --config environments/benchmarks/yc_bench/default.yaml \
|
||||
# --openai.model_name anthropic/claude-opus-4-20250514
|
||||
|
||||
env:
|
||||
enabled_toolsets: ["terminal"]
|
||||
max_agent_turns: 200
|
||||
max_token_length: 32000
|
||||
agent_temperature: 0.0
|
||||
terminal_backend: "local"
|
||||
terminal_timeout: 60
|
||||
presets: ["fast_test", "medium", "hard"]
|
||||
seeds: [1, 2, 3]
|
||||
run_timeout: 3600 # 60 min wall-clock per run, auto-FAIL if exceeded
|
||||
survival_weight: 0.5 # weight of binary survival in composite score
|
||||
funds_weight: 0.5 # weight of normalised final funds in composite score
|
||||
db_dir: "/tmp/yc_bench_dbs"
|
||||
company_name: "BenchCo"
|
||||
start_date: "01/01/2025" # MM/DD/YYYY (yc-bench convention)
|
||||
tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B"
|
||||
use_wandb: true
|
||||
wandb_name: "yc-bench"
|
||||
ensure_scores_are_not_same: false
|
||||
data_dir_to_save_evals: "environments/benchmarks/evals/yc-bench"
|
||||
|
||||
openai:
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
model_name: "anthropic/claude-sonnet-4.6"
|
||||
server_type: "openai"
|
||||
health_check: false
|
||||
# api_key loaded from OPENROUTER_API_KEY in .env
|
||||
@@ -1,34 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# YC-Bench Evaluation
|
||||
#
|
||||
# Requires: pip install "hermes-agent[yc-bench]"
|
||||
#
|
||||
# Run from repo root:
|
||||
# bash environments/benchmarks/yc_bench/run_eval.sh
|
||||
#
|
||||
# Override model:
|
||||
# bash environments/benchmarks/yc_bench/run_eval.sh \
|
||||
# --openai.model_name anthropic/claude-opus-4-20250514
|
||||
#
|
||||
# Run a single preset:
|
||||
# bash environments/benchmarks/yc_bench/run_eval.sh \
|
||||
# --env.presets '["fast_test"]' --env.seeds '[1]'
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p logs evals/yc-bench
|
||||
LOG_FILE="logs/yc_bench_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo "YC-Bench Evaluation"
|
||||
echo "Log: $LOG_FILE"
|
||||
echo ""
|
||||
|
||||
PYTHONUNBUFFERED=1 LOGLEVEL="${LOGLEVEL:-INFO}" \
|
||||
python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \
|
||||
--config environments/benchmarks/yc_bench/default.yaml \
|
||||
"$@" \
|
||||
2>&1 | tee "$LOG_FILE"
|
||||
|
||||
echo ""
|
||||
echo "Log saved to: $LOG_FILE"
|
||||
@@ -1,847 +0,0 @@
|
||||
"""
|
||||
YCBenchEvalEnv -- YC-Bench Long-Horizon Agent Benchmark Environment
|
||||
|
||||
Evaluates agentic LLMs on YC-Bench: a deterministic, long-horizon benchmark
|
||||
where the agent acts as CEO of an AI startup over a simulated 1-3 year run.
|
||||
The agent manages cash flow, employees, tasks, and prestige across 4 domains,
|
||||
interacting exclusively via CLI subprocess calls against a SQLite-backed
|
||||
discrete-event simulation.
|
||||
|
||||
Unlike TerminalBench2 (per-task binary pass/fail), YC-Bench measures sustained
|
||||
multi-turn strategic coherence -- whether an agent can manage compounding
|
||||
decisions over hundreds of turns without going bankrupt.
|
||||
|
||||
This is an eval-only environment. Run via:
|
||||
|
||||
python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \
|
||||
--config environments/benchmarks/yc_bench/default.yaml
|
||||
|
||||
The evaluate flow:
|
||||
1. setup() -- Verifies yc-bench installed, builds eval matrix (preset x seed)
|
||||
2. evaluate() -- Iterates over all runs sequentially through:
|
||||
a. rollout_and_score_eval() -- Per-run agent loop
|
||||
- Initialises a fresh yc-bench simulation via `sim init` (NOT `run`)
|
||||
- Runs HermesAgentLoop with terminal tool only
|
||||
- Reads final SQLite DB to extract score
|
||||
- Returns survival (0/1) + normalised funds score
|
||||
b. Aggregates per-preset and overall metrics
|
||||
c. Logs results via evaluate_log() and wandb
|
||||
|
||||
Key features:
|
||||
- CLI-only interface: agent calls yc-bench subcommands via terminal tool
|
||||
- Deterministic: same seed + preset = same world (SHA256-based RNG)
|
||||
- Multi-dimensional scoring: survival + normalised final funds
|
||||
- Per-preset difficulty breakdown in results
|
||||
- Isolated SQLite DB per run (no cross-run state leakage)
|
||||
|
||||
Requires: pip install hermes-agent[yc-bench]
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
if str(_repo_root) not in sys.path:
|
||||
sys.path.insert(0, str(_repo_root))
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from atroposlib.envs.base import EvalHandlingEnum
|
||||
from atroposlib.envs.server_handling.server_manager import APIServerConfig
|
||||
|
||||
from environments.agent_loop import HermesAgentLoop
|
||||
from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================================================
|
||||
# System prompt
|
||||
# =============================================================================
|
||||
|
||||
YC_BENCH_SYSTEM_PROMPT = """\
|
||||
You are the autonomous CEO of an early-stage AI startup in a deterministic
|
||||
business simulation. You manage the company exclusively through the `yc-bench`
|
||||
CLI tool. Your primary goal is to **survive** until the simulation horizon ends
|
||||
without going bankrupt, while **maximising final funds**.
|
||||
|
||||
## Simulation Mechanics
|
||||
|
||||
- **Funds**: You start with $250,000 seed capital. Revenue comes from completing
|
||||
tasks. Rewards scale with your prestige: `base × (1 + scale × (prestige − 1))`.
|
||||
- **Domains**: There are 4 skill domains: **research**, **inference**,
|
||||
**data_environment**, and **training**. Each has its own prestige level
|
||||
(1.0-10.0). Higher prestige unlocks better-paying tasks.
|
||||
- **Employees**: You have employees (Junior/Mid/Senior) with domain-specific
|
||||
skill rates. **Throughput splits**: `effective_rate = base_rate / N` where N
|
||||
is the number of active tasks assigned to that employee. Focus beats breadth.
|
||||
- **Payroll**: Deducted automatically on the first business day of each month.
|
||||
Running out of funds = bankruptcy = game over.
|
||||
- **Time**: The simulation runs on business days (Mon-Fri), 09:00-18:00.
|
||||
Time only advances when you call `yc-bench sim resume`.
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
1. Browse market tasks with `market browse`
|
||||
2. Accept a task with `task accept` (this sets its deadline)
|
||||
3. Assign employees with `task assign`
|
||||
4. Dispatch with `task dispatch` to start work
|
||||
5. Call `sim resume` to advance time and let employees make progress
|
||||
6. Tasks complete when all domain requirements are fulfilled
|
||||
|
||||
**Penalties for failure vary by difficulty preset.** Completing a task on time
|
||||
earns full reward + prestige gain. Missing a deadline or cancelling a task
|
||||
incurs prestige penalties -- cancelling is always more costly than letting a
|
||||
task fail, so cancel only as a last resort.
|
||||
|
||||
## CLI Commands
|
||||
|
||||
### Observe
|
||||
- `yc-bench company status` -- funds, prestige, runway
|
||||
- `yc-bench employee list` -- skills, salary, active tasks
|
||||
- `yc-bench market browse [--domain D] [--required-prestige-lte N]` -- available tasks
|
||||
- `yc-bench task list [--status active|planned]` -- your tasks
|
||||
- `yc-bench task inspect --task-id UUID` -- progress, deadline, assignments
|
||||
- `yc-bench finance ledger [--category monthly_payroll|task_reward]` -- transaction history
|
||||
- `yc-bench report monthly` -- monthly P&L
|
||||
|
||||
### Act
|
||||
- `yc-bench task accept --task-id UUID` -- accept from market
|
||||
- `yc-bench task assign --task-id UUID --employee-id UUID` -- assign employee
|
||||
- `yc-bench task dispatch --task-id UUID` -- start work (needs >=1 assignment)
|
||||
- `yc-bench task cancel --task-id UUID --reason "text"` -- cancel (prestige penalty)
|
||||
- `yc-bench sim resume` -- advance simulation clock
|
||||
|
||||
### Memory (persists across context truncation)
|
||||
- `yc-bench scratchpad read` -- read your persistent notes
|
||||
- `yc-bench scratchpad write --content "text"` -- overwrite notes
|
||||
- `yc-bench scratchpad append --content "text"` -- append to notes
|
||||
- `yc-bench scratchpad clear` -- clear notes
|
||||
|
||||
## Strategy Guidelines
|
||||
|
||||
1. **Specialise in 2-3 domains** to climb the prestige ladder faster and unlock
|
||||
high-reward tasks. Don't spread thin across all 4 domains early on.
|
||||
2. **Focus employees** -- assigning one employee to many tasks halves their
|
||||
throughput per additional task. Keep assignments concentrated.
|
||||
3. **Use the scratchpad** to track your strategy, upcoming deadlines, and
|
||||
employee assignments. This persists even if conversation context is truncated.
|
||||
4. **Monitor runway** -- always know how many months of payroll you can cover.
|
||||
Accept high-reward tasks before payroll dates.
|
||||
5. **Don't over-accept** -- taking too many tasks and missing deadlines cascades
|
||||
into prestige loss, locking you out of profitable contracts.
|
||||
6. Use `finance ledger` and `report monthly` to track revenue trends.
|
||||
|
||||
## Your Turn
|
||||
|
||||
Each turn:
|
||||
1. Call `yc-bench company status` and `yc-bench task list` to orient yourself.
|
||||
2. Check for completed tasks and pending deadlines.
|
||||
3. Browse market for profitable tasks within your prestige level.
|
||||
4. Accept, assign, and dispatch tasks strategically.
|
||||
5. Call `yc-bench sim resume` to advance time.
|
||||
6. Repeat until the simulation ends.
|
||||
|
||||
Think step by step before acting."""
|
||||
|
||||
# Starting funds in cents ($250,000)
|
||||
INITIAL_FUNDS_CENTS = 25_000_000
|
||||
|
||||
# Default horizon per preset (years)
|
||||
_PRESET_HORIZONS = {
|
||||
"tutorial": 1,
|
||||
"easy": 1,
|
||||
"medium": 1,
|
||||
"hard": 1,
|
||||
"nightmare": 1,
|
||||
"fast_test": 1,
|
||||
"default": 3,
|
||||
"high_reward": 1,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Configuration
|
||||
# =============================================================================
|
||||
|
||||
class YCBenchEvalConfig(HermesAgentEnvConfig):
|
||||
"""
|
||||
Configuration for the YC-Bench evaluation environment.
|
||||
|
||||
Extends HermesAgentEnvConfig with YC-Bench-specific settings for
|
||||
preset selection, seed control, scoring, and simulation parameters.
|
||||
"""
|
||||
|
||||
presets: List[str] = Field(
|
||||
default=["fast_test", "medium", "hard"],
|
||||
description="YC-Bench preset names to evaluate.",
|
||||
)
|
||||
seeds: List[int] = Field(
|
||||
default=[1, 2, 3],
|
||||
description="Random seeds -- each preset x seed = one run.",
|
||||
)
|
||||
run_timeout: int = Field(
|
||||
default=3600,
|
||||
description="Maximum wall-clock seconds per run. Default 60 minutes.",
|
||||
)
|
||||
survival_weight: float = Field(
|
||||
default=0.5,
|
||||
description="Weight of survival (0/1) in composite score.",
|
||||
)
|
||||
funds_weight: float = Field(
|
||||
default=0.5,
|
||||
description="Weight of normalised final funds in composite score.",
|
||||
)
|
||||
db_dir: str = Field(
|
||||
default="/tmp/yc_bench_dbs",
|
||||
description="Directory for per-run SQLite databases.",
|
||||
)
|
||||
horizon_years: Optional[int] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Simulation horizon in years. If None (default), inferred from "
|
||||
"preset name (1 year for most, 3 for 'default')."
|
||||
),
|
||||
)
|
||||
company_name: str = Field(
|
||||
default="BenchCo",
|
||||
description="Name of the simulated company.",
|
||||
)
|
||||
start_date: str = Field(
|
||||
default="01/01/2025",
|
||||
description="Simulation start date in MM/DD/YYYY format (yc-bench convention).",
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Scoring helpers
|
||||
# =============================================================================
|
||||
|
||||
def _read_final_score(db_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Read final game state from a YC-Bench SQLite database.
|
||||
|
||||
Returns dict with final_funds_cents (int), survived (bool),
|
||||
terminal_reason (str).
|
||||
|
||||
Note: yc-bench table names are plural -- 'companies' not 'company',
|
||||
'sim_events' not 'simulation_log'.
|
||||
"""
|
||||
if not os.path.exists(db_path):
|
||||
logger.warning("DB not found at %s", db_path)
|
||||
return {
|
||||
"final_funds_cents": 0,
|
||||
"survived": False,
|
||||
"terminal_reason": "db_missing",
|
||||
}
|
||||
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Read final funds from the 'companies' table
|
||||
cur.execute("SELECT funds_cents FROM companies LIMIT 1")
|
||||
row = cur.fetchone()
|
||||
funds = row[0] if row else 0
|
||||
|
||||
# Determine terminal reason from 'sim_events' table
|
||||
terminal_reason = "unknown"
|
||||
try:
|
||||
cur.execute(
|
||||
"SELECT event_type FROM sim_events "
|
||||
"WHERE event_type IN ('bankruptcy', 'horizon_end') "
|
||||
"ORDER BY scheduled_at DESC LIMIT 1"
|
||||
)
|
||||
event_row = cur.fetchone()
|
||||
if event_row:
|
||||
terminal_reason = event_row[0]
|
||||
except sqlite3.OperationalError:
|
||||
# Table may not exist if simulation didn't progress
|
||||
pass
|
||||
|
||||
survived = funds >= 0 and terminal_reason != "bankruptcy"
|
||||
return {
|
||||
"final_funds_cents": funds,
|
||||
"survived": survived,
|
||||
"terminal_reason": terminal_reason,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to read DB %s: %s", db_path, e)
|
||||
return {
|
||||
"final_funds_cents": 0,
|
||||
"survived": False,
|
||||
"terminal_reason": f"db_error: {e}",
|
||||
}
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _compute_composite_score(
|
||||
final_funds_cents: int,
|
||||
survived: bool,
|
||||
survival_weight: float = 0.5,
|
||||
funds_weight: float = 0.5,
|
||||
initial_funds_cents: int = INITIAL_FUNDS_CENTS,
|
||||
) -> float:
|
||||
"""
|
||||
Compute composite score from survival and final funds.
|
||||
|
||||
Score = survival_weight * survival_score
|
||||
+ funds_weight * normalised_funds_score
|
||||
|
||||
Normalised funds uses log-scale relative to initial capital:
|
||||
- funds <= 0: 0.0
|
||||
- funds == initial: ~0.15
|
||||
- funds == 10x: ~0.52
|
||||
- funds == 100x: 1.0
|
||||
"""
|
||||
survival_score = 1.0 if survived else 0.0
|
||||
|
||||
if final_funds_cents <= 0:
|
||||
funds_score = 0.0
|
||||
else:
|
||||
max_ratio = 100.0
|
||||
ratio = final_funds_cents / max(initial_funds_cents, 1)
|
||||
funds_score = min(math.log1p(ratio) / math.log1p(max_ratio), 1.0)
|
||||
|
||||
return survival_weight * survival_score + funds_weight * funds_score
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Main Environment
|
||||
# =============================================================================
|
||||
|
||||
class YCBenchEvalEnv(HermesAgentBaseEnv):
|
||||
"""
|
||||
YC-Bench long-horizon agent benchmark environment (eval-only).
|
||||
|
||||
Each eval item is a (preset, seed) pair. The environment initialises the
|
||||
simulation via ``yc-bench sim init`` (NOT ``yc-bench run`` which would start
|
||||
a competing built-in agent loop). The HermesAgentLoop then drives the
|
||||
interaction by calling individual yc-bench CLI commands via the terminal tool.
|
||||
|
||||
After the agent loop ends, the SQLite DB is read to extract the final score.
|
||||
|
||||
Scoring:
|
||||
composite = 0.5 * survival + 0.5 * normalised_funds
|
||||
"""
|
||||
|
||||
name = "yc-bench"
|
||||
env_config_cls = YCBenchEvalConfig
|
||||
|
||||
@classmethod
|
||||
def config_init(cls) -> Tuple[YCBenchEvalConfig, List[APIServerConfig]]:
|
||||
env_config = YCBenchEvalConfig(
|
||||
enabled_toolsets=["terminal"],
|
||||
disabled_toolsets=None,
|
||||
distribution=None,
|
||||
max_agent_turns=200,
|
||||
max_token_length=32000,
|
||||
agent_temperature=0.0,
|
||||
system_prompt=YC_BENCH_SYSTEM_PROMPT,
|
||||
terminal_backend="local",
|
||||
terminal_timeout=60,
|
||||
presets=["fast_test", "medium", "hard"],
|
||||
seeds=[1, 2, 3],
|
||||
run_timeout=3600,
|
||||
survival_weight=0.5,
|
||||
funds_weight=0.5,
|
||||
db_dir="/tmp/yc_bench_dbs",
|
||||
eval_handling=EvalHandlingEnum.STOP_TRAIN,
|
||||
group_size=1,
|
||||
steps_per_eval=1,
|
||||
total_steps=1,
|
||||
tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B",
|
||||
use_wandb=True,
|
||||
wandb_name="yc-bench",
|
||||
ensure_scores_are_not_same=False,
|
||||
)
|
||||
|
||||
server_configs = [
|
||||
APIServerConfig(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model_name="anthropic/claude-sonnet-4.6",
|
||||
server_type="openai",
|
||||
api_key=os.getenv("OPENROUTER_API_KEY", ""),
|
||||
health_check=False,
|
||||
)
|
||||
]
|
||||
|
||||
return env_config, server_configs
|
||||
|
||||
# =========================================================================
|
||||
# Setup
|
||||
# =========================================================================
|
||||
|
||||
async def setup(self):
|
||||
"""Verify yc-bench is installed and build the eval matrix."""
|
||||
# Verify yc-bench CLI is available
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["yc-bench", "--help"], capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise FileNotFoundError
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
raise RuntimeError(
|
||||
"yc-bench CLI not found. Install with:\n"
|
||||
' pip install "hermes-agent[yc-bench]"\n'
|
||||
"Or: git clone https://github.com/collinear-ai/yc-bench "
|
||||
"&& cd yc-bench && pip install -e ."
|
||||
)
|
||||
print("yc-bench CLI verified.")
|
||||
|
||||
# Build eval matrix: preset x seed
|
||||
self.all_eval_items = [
|
||||
{"preset": preset, "seed": seed}
|
||||
for preset in self.config.presets
|
||||
for seed in self.config.seeds
|
||||
]
|
||||
self.iter = 0
|
||||
|
||||
os.makedirs(self.config.db_dir, exist_ok=True)
|
||||
self.eval_metrics: List[Tuple[str, float]] = []
|
||||
|
||||
# Streaming JSONL log for crash-safe result persistence
|
||||
log_dir = os.path.join(os.path.dirname(__file__), "logs")
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
run_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self._streaming_path = os.path.join(log_dir, f"samples_{run_ts}.jsonl")
|
||||
self._streaming_file = open(self._streaming_path, "w")
|
||||
self._streaming_lock = threading.Lock()
|
||||
|
||||
print(f"\nYC-Bench eval matrix: {len(self.all_eval_items)} runs")
|
||||
for item in self.all_eval_items:
|
||||
print(f" preset={item['preset']!r} seed={item['seed']}")
|
||||
print(f"Streaming results to: {self._streaming_path}\n")
|
||||
|
||||
def _save_result(self, result: Dict[str, Any]):
|
||||
"""Write a single run result to the streaming JSONL file immediately."""
|
||||
if not hasattr(self, "_streaming_file") or self._streaming_file.closed:
|
||||
return
|
||||
with self._streaming_lock:
|
||||
self._streaming_file.write(
|
||||
json.dumps(result, ensure_ascii=False, default=str) + "\n"
|
||||
)
|
||||
self._streaming_file.flush()
|
||||
|
||||
# =========================================================================
|
||||
# Training pipeline stubs (eval-only -- not used)
|
||||
# =========================================================================
|
||||
|
||||
async def get_next_item(self):
|
||||
item = self.all_eval_items[self.iter % len(self.all_eval_items)]
|
||||
self.iter += 1
|
||||
return item
|
||||
|
||||
def format_prompt(self, item: Dict[str, Any]) -> str:
|
||||
preset = item["preset"]
|
||||
seed = item["seed"]
|
||||
return (
|
||||
f"A new YC-Bench simulation has been initialized "
|
||||
f"(preset='{preset}', seed={seed}).\n"
|
||||
f"Your company '{self.config.company_name}' is ready.\n\n"
|
||||
"Begin by calling:\n"
|
||||
"1. `yc-bench company status` -- see your starting funds and prestige\n"
|
||||
"2. `yc-bench employee list` -- see your team and their skills\n"
|
||||
"3. `yc-bench market browse --required-prestige-lte 1` -- find tasks "
|
||||
"you can take\n\n"
|
||||
"Then accept 2-3 tasks, assign employees, dispatch them, and call "
|
||||
"`yc-bench sim resume` to advance time. Repeat this loop until the "
|
||||
"simulation ends (horizon reached or bankruptcy)."
|
||||
)
|
||||
|
||||
async def compute_reward(self, item, result, ctx) -> float:
|
||||
return 0.0
|
||||
|
||||
async def collect_trajectories(self, item):
|
||||
return None, []
|
||||
|
||||
async def score(self, rollout_group_data):
|
||||
return None
|
||||
|
||||
# =========================================================================
|
||||
# Per-run evaluation
|
||||
# =========================================================================
|
||||
|
||||
async def rollout_and_score_eval(self, eval_item: Dict[str, Any]) -> Dict:
|
||||
"""
|
||||
Evaluate a single (preset, seed) run.
|
||||
|
||||
1. Sets DATABASE_URL and YC_BENCH_EXPERIMENT env vars
|
||||
2. Initialises the simulation via ``yc-bench sim init`` (NOT ``run``)
|
||||
3. Runs HermesAgentLoop with terminal tool
|
||||
4. Reads SQLite DB to compute final score
|
||||
5. Returns result dict with survival, funds, and composite score
|
||||
"""
|
||||
preset = eval_item["preset"]
|
||||
seed = eval_item["seed"]
|
||||
run_id = str(uuid.uuid4())[:8]
|
||||
run_key = f"{preset}_seed{seed}_{run_id}"
|
||||
|
||||
from tqdm import tqdm
|
||||
tqdm.write(f" [START] preset={preset!r} seed={seed} (run_id={run_id})")
|
||||
run_start = time.time()
|
||||
|
||||
# Isolated DB per run -- prevents cross-run state leakage
|
||||
db_path = os.path.join(self.config.db_dir, f"yc_bench_{run_key}.db")
|
||||
os.environ["DATABASE_URL"] = f"sqlite:///{db_path}"
|
||||
os.environ["YC_BENCH_EXPERIMENT"] = preset
|
||||
|
||||
# Determine horizon: explicit config override > preset lookup > default 1
|
||||
horizon = self.config.horizon_years or _PRESET_HORIZONS.get(preset, 1)
|
||||
|
||||
try:
|
||||
# ----------------------------------------------------------
|
||||
# Step 1: Initialise the simulation via CLI
|
||||
# IMPORTANT: We use `sim init`, NOT `yc-bench run`.
|
||||
# `yc-bench run` starts yc-bench's own LLM agent loop (via
|
||||
# LiteLLM), which would compete with our HermesAgentLoop.
|
||||
# `sim init` just sets up the world and returns.
|
||||
# ----------------------------------------------------------
|
||||
init_cmd = [
|
||||
"yc-bench", "sim", "init",
|
||||
"--seed", str(seed),
|
||||
"--start-date", self.config.start_date,
|
||||
"--company-name", self.config.company_name,
|
||||
"--horizon-years", str(horizon),
|
||||
]
|
||||
init_result = subprocess.run(
|
||||
init_cmd, capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
if init_result.returncode != 0:
|
||||
error_msg = (init_result.stderr or init_result.stdout).strip()
|
||||
raise RuntimeError(f"yc-bench sim init failed: {error_msg}")
|
||||
|
||||
tqdm.write(f" Simulation initialized (horizon={horizon}yr)")
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Step 2: Run the HermesAgentLoop
|
||||
# ----------------------------------------------------------
|
||||
tools, valid_names = self._resolve_tools_for_group()
|
||||
|
||||
messages: List[Dict[str, Any]] = [
|
||||
{"role": "system", "content": YC_BENCH_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": self.format_prompt(eval_item)},
|
||||
]
|
||||
|
||||
agent = HermesAgentLoop(
|
||||
server=self.server,
|
||||
tool_schemas=tools,
|
||||
valid_tool_names=valid_names,
|
||||
max_turns=self.config.max_agent_turns,
|
||||
task_id=run_id,
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
# ----------------------------------------------------------
|
||||
# Step 3: Read final score from the simulation DB
|
||||
# ----------------------------------------------------------
|
||||
score_data = _read_final_score(db_path)
|
||||
final_funds = score_data["final_funds_cents"]
|
||||
survived = score_data["survived"]
|
||||
terminal_reason = score_data["terminal_reason"]
|
||||
|
||||
composite = _compute_composite_score(
|
||||
final_funds_cents=final_funds,
|
||||
survived=survived,
|
||||
survival_weight=self.config.survival_weight,
|
||||
funds_weight=self.config.funds_weight,
|
||||
)
|
||||
|
||||
elapsed = time.time() - run_start
|
||||
status = "SURVIVED" if survived else "BANKRUPT"
|
||||
if final_funds >= 0:
|
||||
funds_str = f"${final_funds / 100:,.0f}"
|
||||
else:
|
||||
funds_str = f"-${abs(final_funds) / 100:,.0f}"
|
||||
|
||||
tqdm.write(
|
||||
f" [{status}] preset={preset!r} seed={seed} "
|
||||
f"funds={funds_str} score={composite:.3f} "
|
||||
f"turns={result.turns_used} ({elapsed:.0f}s)"
|
||||
)
|
||||
|
||||
out = {
|
||||
"preset": preset,
|
||||
"seed": seed,
|
||||
"survived": survived,
|
||||
"final_funds_cents": final_funds,
|
||||
"final_funds_usd": final_funds / 100,
|
||||
"terminal_reason": terminal_reason,
|
||||
"composite_score": composite,
|
||||
"turns_used": result.turns_used,
|
||||
"finished_naturally": result.finished_naturally,
|
||||
"elapsed_seconds": elapsed,
|
||||
"db_path": db_path,
|
||||
"messages": result.messages,
|
||||
}
|
||||
self._save_result(out)
|
||||
return out
|
||||
|
||||
except Exception as e:
|
||||
elapsed = time.time() - run_start
|
||||
logger.error("Run %s failed: %s", run_key, e, exc_info=True)
|
||||
tqdm.write(
|
||||
f" [ERROR] preset={preset!r} seed={seed}: {e} ({elapsed:.0f}s)"
|
||||
)
|
||||
out = {
|
||||
"preset": preset,
|
||||
"seed": seed,
|
||||
"survived": False,
|
||||
"final_funds_cents": 0,
|
||||
"final_funds_usd": 0.0,
|
||||
"terminal_reason": f"error: {e}",
|
||||
"composite_score": 0.0,
|
||||
"turns_used": 0,
|
||||
"error": str(e),
|
||||
"elapsed_seconds": elapsed,
|
||||
}
|
||||
self._save_result(out)
|
||||
return out
|
||||
|
||||
# =========================================================================
|
||||
# Evaluate
|
||||
# =========================================================================
|
||||
|
||||
async def _run_with_timeout(self, item: Dict[str, Any]) -> Dict:
|
||||
"""Wrap a single rollout with a wall-clock timeout."""
|
||||
preset = item["preset"]
|
||||
seed = item["seed"]
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
self.rollout_and_score_eval(item),
|
||||
timeout=self.config.run_timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
from tqdm import tqdm
|
||||
tqdm.write(
|
||||
f" [TIMEOUT] preset={preset!r} seed={seed} "
|
||||
f"(exceeded {self.config.run_timeout}s)"
|
||||
)
|
||||
out = {
|
||||
"preset": preset,
|
||||
"seed": seed,
|
||||
"survived": False,
|
||||
"final_funds_cents": 0,
|
||||
"final_funds_usd": 0.0,
|
||||
"terminal_reason": f"timeout ({self.config.run_timeout}s)",
|
||||
"composite_score": 0.0,
|
||||
"turns_used": 0,
|
||||
"error": "timeout",
|
||||
}
|
||||
self._save_result(out)
|
||||
return out
|
||||
|
||||
async def evaluate(self, *args, **kwargs) -> None:
|
||||
"""
|
||||
Run YC-Bench evaluation over all (preset, seed) combinations.
|
||||
|
||||
Runs sequentially -- each run is 100-500 turns, parallelising would
|
||||
be prohibitively expensive and cause env var conflicts.
|
||||
"""
|
||||
start_time = time.time()
|
||||
from tqdm import tqdm
|
||||
|
||||
# --- tqdm-compatible logging handler (TB2 pattern) ---
|
||||
class _TqdmHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
try:
|
||||
tqdm.write(self.format(record))
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
root = logging.getLogger()
|
||||
handler = _TqdmHandler()
|
||||
handler.setFormatter(
|
||||
logging.Formatter("%(levelname)s %(name)s: %(message)s")
|
||||
)
|
||||
root.handlers = [handler]
|
||||
for noisy in ("httpx", "openai"):
|
||||
logging.getLogger(noisy).setLevel(logging.WARNING)
|
||||
|
||||
# --- Print config summary ---
|
||||
print(f"\n{'='*60}")
|
||||
print("Starting YC-Bench Evaluation")
|
||||
print(f"{'='*60}")
|
||||
print(f" Presets: {self.config.presets}")
|
||||
print(f" Seeds: {self.config.seeds}")
|
||||
print(f" Total runs: {len(self.all_eval_items)}")
|
||||
print(f" Max turns/run: {self.config.max_agent_turns}")
|
||||
print(f" Run timeout: {self.config.run_timeout}s")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
results = []
|
||||
pbar = tqdm(
|
||||
total=len(self.all_eval_items), desc="YC-Bench", dynamic_ncols=True
|
||||
)
|
||||
|
||||
try:
|
||||
for item in self.all_eval_items:
|
||||
result = await self._run_with_timeout(item)
|
||||
results.append(result)
|
||||
survived_count = sum(1 for r in results if r.get("survived"))
|
||||
pbar.set_postfix_str(
|
||||
f"survived={survived_count}/{len(results)}"
|
||||
)
|
||||
pbar.update(1)
|
||||
|
||||
except (KeyboardInterrupt, asyncio.CancelledError):
|
||||
tqdm.write("\n[INTERRUPTED] Stopping evaluation...")
|
||||
pbar.close()
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
if hasattr(self, "_streaming_file") and not self._streaming_file.closed:
|
||||
self._streaming_file.close()
|
||||
return
|
||||
|
||||
pbar.close()
|
||||
end_time = time.time()
|
||||
|
||||
# --- Compute metrics ---
|
||||
valid = [r for r in results if r is not None]
|
||||
if not valid:
|
||||
print("Warning: No valid results.")
|
||||
return
|
||||
|
||||
total = len(valid)
|
||||
survived_total = sum(1 for r in valid if r.get("survived"))
|
||||
survival_rate = survived_total / total if total else 0.0
|
||||
avg_score = (
|
||||
sum(r.get("composite_score", 0) for r in valid) / total
|
||||
if total
|
||||
else 0.0
|
||||
)
|
||||
|
||||
preset_results: Dict[str, List[Dict]] = defaultdict(list)
|
||||
for r in valid:
|
||||
preset_results[r["preset"]].append(r)
|
||||
|
||||
eval_metrics = {
|
||||
"eval/survival_rate": survival_rate,
|
||||
"eval/avg_composite_score": avg_score,
|
||||
"eval/total_runs": total,
|
||||
"eval/survived_runs": survived_total,
|
||||
"eval/evaluation_time_seconds": end_time - start_time,
|
||||
}
|
||||
|
||||
for preset, items in sorted(preset_results.items()):
|
||||
ps = sum(1 for r in items if r.get("survived"))
|
||||
pt = len(items)
|
||||
pa = (
|
||||
sum(r.get("composite_score", 0) for r in items) / pt
|
||||
if pt
|
||||
else 0
|
||||
)
|
||||
key = preset.replace("-", "_")
|
||||
eval_metrics[f"eval/survival_rate_{key}"] = ps / pt if pt else 0
|
||||
eval_metrics[f"eval/avg_score_{key}"] = pa
|
||||
|
||||
self.eval_metrics = [(k, v) for k, v in eval_metrics.items()]
|
||||
|
||||
# --- Print summary ---
|
||||
print(f"\n{'='*60}")
|
||||
print("YC-Bench Evaluation Results")
|
||||
print(f"{'='*60}")
|
||||
print(
|
||||
f"Overall survival rate: {survival_rate:.1%} "
|
||||
f"({survived_total}/{total})"
|
||||
)
|
||||
print(f"Average composite score: {avg_score:.4f}")
|
||||
print(f"Evaluation time: {end_time - start_time:.1f}s")
|
||||
|
||||
print("\nPer-preset breakdown:")
|
||||
for preset, items in sorted(preset_results.items()):
|
||||
ps = sum(1 for r in items if r.get("survived"))
|
||||
pt = len(items)
|
||||
pa = (
|
||||
sum(r.get("composite_score", 0) for r in items) / pt
|
||||
if pt
|
||||
else 0
|
||||
)
|
||||
print(f" {preset}: {ps}/{pt} survived avg_score={pa:.4f}")
|
||||
for r in items:
|
||||
status = "SURVIVED" if r.get("survived") else "BANKRUPT"
|
||||
funds = r.get("final_funds_usd", 0)
|
||||
print(
|
||||
f" seed={r['seed']} [{status}] "
|
||||
f"${funds:,.0f} "
|
||||
f"score={r.get('composite_score', 0):.3f}"
|
||||
)
|
||||
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# --- Log results ---
|
||||
samples = [
|
||||
{k: v for k, v in r.items() if k != "messages"} for r in valid
|
||||
]
|
||||
|
||||
try:
|
||||
await self.evaluate_log(
|
||||
metrics=eval_metrics,
|
||||
samples=samples,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
generation_parameters={
|
||||
"temperature": self.config.agent_temperature,
|
||||
"max_tokens": self.config.max_token_length,
|
||||
"max_agent_turns": self.config.max_agent_turns,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error logging results: {e}")
|
||||
|
||||
# --- Cleanup (TB2 pattern) ---
|
||||
if hasattr(self, "_streaming_file") and not self._streaming_file.closed:
|
||||
self._streaming_file.close()
|
||||
print(f"Results saved to: {self._streaming_path}")
|
||||
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from environments.agent_loop import _tool_executor
|
||||
_tool_executor.shutdown(wait=False, cancel_futures=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# =========================================================================
|
||||
# Wandb logging
|
||||
# =========================================================================
|
||||
|
||||
async def wandb_log(self, wandb_metrics: Optional[Dict] = None):
|
||||
"""Log YC-Bench-specific metrics to wandb."""
|
||||
if wandb_metrics is None:
|
||||
wandb_metrics = {}
|
||||
for k, v in self.eval_metrics:
|
||||
wandb_metrics[k] = v
|
||||
self.eval_metrics = []
|
||||
await super().wandb_log(wandb_metrics)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
YCBenchEvalEnv.cli()
|
||||
@@ -114,8 +114,8 @@ class HermesAgentEnvConfig(BaseEnvConfig):
|
||||
# --- Terminal backend ---
|
||||
terminal_backend: str = Field(
|
||||
default="local",
|
||||
description="Terminal backend: 'local', 'docker', 'modal', 'daytona', 'ssh', 'singularity'. "
|
||||
"Modal or Daytona recommended for production RL (cloud isolation per rollout).",
|
||||
description="Terminal backend: 'local', 'docker', 'modal', 'ssh', 'singularity'. "
|
||||
"Modal recommended for production RL (cloud isolation per rollout).",
|
||||
)
|
||||
terminal_timeout: int = Field(
|
||||
default=120,
|
||||
@@ -258,11 +258,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
logger.info("Sampled toolsets from '%s': %s", config.distribution, group_toolsets)
|
||||
else:
|
||||
group_toolsets = config.enabled_toolsets # None means "all available"
|
||||
if group_toolsets is None:
|
||||
logger.warning(
|
||||
"enabled_toolsets is None -- loading ALL tools including messaging. "
|
||||
"Set explicit enabled_toolsets for RL training."
|
||||
)
|
||||
|
||||
tools = get_tool_definitions(
|
||||
enabled_toolsets=group_toolsets,
|
||||
@@ -483,6 +478,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
tokenizer=self.tokenizer,
|
||||
tool_call_parser=tc_parser,
|
||||
) as managed:
|
||||
_max_ctx = self.config.max_token_length if (self.config.max_token_length and self.config.max_token_length > 0) else None
|
||||
agent = HermesAgentLoop(
|
||||
server=managed,
|
||||
tool_schemas=tools,
|
||||
@@ -492,6 +488,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
max_context_tokens=_max_ctx,
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
except NotImplementedError:
|
||||
@@ -500,6 +497,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
"ManagedServer not available (OpenAI server?). "
|
||||
"Falling back to direct server mode."
|
||||
)
|
||||
_max_ctx = self.config.max_token_length if (self.config.max_token_length and self.config.max_token_length > 0) else None
|
||||
agent = HermesAgentLoop(
|
||||
server=self.server,
|
||||
tool_schemas=tools,
|
||||
@@ -509,10 +507,12 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
max_context_tokens=_max_ctx,
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
else:
|
||||
# Phase 1: OpenAI server -- native tool_calls, placeholder tokens
|
||||
_max_ctx = self.config.max_token_length if (self.config.max_token_length and self.config.max_token_length > 0) else None
|
||||
agent = HermesAgentLoop(
|
||||
server=self.server,
|
||||
tool_schemas=tools,
|
||||
@@ -522,6 +522,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
temperature=self.config.agent_temperature,
|
||||
max_tokens=self.config.max_token_length,
|
||||
extra_body=self.config.extra_body,
|
||||
max_context_tokens=_max_ctx,
|
||||
)
|
||||
result = await agent.run(messages)
|
||||
|
||||
|
||||
@@ -35,8 +35,7 @@ class DeepSeekV31ToolCallParser(ToolCallParser):
|
||||
|
||||
# Regex captures: function_name, function_arguments
|
||||
PATTERN = re.compile(
|
||||
r"<|tool▁call▁begin|>(?P<function_name>.*?)<|tool▁sep|>(?P<function_arguments>.*?)<|tool▁call▁end|>",
|
||||
re.DOTALL,
|
||||
r"<|tool▁call▁begin|>(?P<function_name>.*?)<|tool▁sep|>(?P<function_arguments>.*?)<|tool▁call▁end|>"
|
||||
)
|
||||
|
||||
def parse(self, text: str) -> ParseResult:
|
||||
|
||||
@@ -38,8 +38,7 @@ class DeepSeekV3ToolCallParser(ToolCallParser):
|
||||
|
||||
# Regex captures: type, function_name, function_arguments
|
||||
PATTERN = re.compile(
|
||||
r"<|tool▁call▁begin|>(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<|tool▁call▁end|>",
|
||||
re.DOTALL,
|
||||
r"<|tool▁call▁begin|>(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<|tool▁call▁end|>"
|
||||
)
|
||||
|
||||
def parse(self, text: str) -> ParseResult:
|
||||
|
||||
@@ -49,15 +49,22 @@ class HermesToolCallParser(ToolCallParser):
|
||||
continue
|
||||
|
||||
tc_data = json.loads(raw_json)
|
||||
# Handle arguments: could be dict or already a JSON string
|
||||
raw_args = tc_data.get("arguments", {})
|
||||
if isinstance(raw_args, str):
|
||||
# Already a string — pass through as-is.
|
||||
# It may be a JSON string ("{...}") or a plain string ("ls").
|
||||
args_str = raw_args
|
||||
else:
|
||||
# Dict — serialize to JSON
|
||||
args_str = json.dumps(raw_args, ensure_ascii=False)
|
||||
tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||
type="function",
|
||||
function=Function(
|
||||
name=tc_data["name"],
|
||||
arguments=json.dumps(
|
||||
tc_data.get("arguments", {}), ensure_ascii=False
|
||||
),
|
||||
arguments=args_str,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -44,10 +44,9 @@ _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
|
||||
def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) -> str:
|
||||
"""
|
||||
Run a tool call in a thread pool executor so backends that use asyncio.run()
|
||||
internally (modal, docker, daytona) get a clean event loop.
|
||||
internally (modal, docker) get a clean event loop.
|
||||
|
||||
If we're already in an async context, executes handle_function_call() in a
|
||||
disposable worker thread and blocks for the result.
|
||||
If we're already in an async context, uses run_in_executor.
|
||||
If not (e.g., called from sync code), runs directly.
|
||||
"""
|
||||
try:
|
||||
@@ -95,7 +94,7 @@ class ToolContext:
|
||||
backend = os.getenv("TERMINAL_ENV", "local")
|
||||
logger.debug("ToolContext.terminal [%s backend] task=%s: %s", backend, self.task_id[:8], command[:100])
|
||||
|
||||
# Run via thread helper so modal/docker/daytona backends' asyncio.run() doesn't deadlock
|
||||
# Run in thread pool so modal/docker backends' asyncio.run() doesn't deadlock
|
||||
result = _run_tool_in_thread(
|
||||
"terminal",
|
||||
{"command": command, "timeout": timeout},
|
||||
@@ -333,7 +332,7 @@ class ToolContext:
|
||||
Dict with search results
|
||||
"""
|
||||
result = handle_function_call(
|
||||
"search_files", {"pattern": query, "path": path}, task_id=self.task_id
|
||||
"search", {"query": query, "path": path}, task_id=self.task_id
|
||||
)
|
||||
try:
|
||||
return json.loads(result)
|
||||
@@ -439,21 +438,11 @@ class ToolContext:
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
Release all resources (terminal VMs, browser sessions, background processes)
|
||||
for this rollout.
|
||||
Release all resources (terminal VMs, browser sessions) for this rollout.
|
||||
|
||||
Called automatically by the base environment via try/finally after
|
||||
compute_reward() completes. You generally don't need to call this yourself.
|
||||
"""
|
||||
# Kill any background processes from this rollout (safety net)
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
killed = process_registry.kill_all(task_id=self.task_id)
|
||||
if killed:
|
||||
logger.debug("Process cleanup for task %s: killed %d process(es)", self.task_id, killed)
|
||||
except Exception as e:
|
||||
logger.debug("Process cleanup for task %s: %s", self.task_id, e)
|
||||
|
||||
try:
|
||||
cleanup_vm(self.task_id)
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,643 +0,0 @@
|
||||
"""
|
||||
WebResearchEnv — RL Environment for Multi-Step Web Research
|
||||
============================================================
|
||||
|
||||
Trains models to do accurate, efficient, multi-source web research.
|
||||
|
||||
Reward signals:
|
||||
- Answer correctness (LLM judge, 0.0–1.0)
|
||||
- Source diversity (used ≥2 distinct domains)
|
||||
- Efficiency (penalizes excessive tool calls)
|
||||
- Tool usage (bonus for actually using web tools)
|
||||
|
||||
Dataset: FRAMES benchmark (Google, 2024) — multi-hop factual questions
|
||||
HuggingFace: google/frames-benchmark
|
||||
Fallback: built-in sample questions (no HF token needed)
|
||||
|
||||
Usage:
|
||||
# Phase 1 (OpenAI-compatible server)
|
||||
python environments/web_research_env.py serve \\
|
||||
--openai.base_url http://localhost:8000/v1 \\
|
||||
--openai.model_name YourModel \\
|
||||
--openai.server_type openai
|
||||
|
||||
# Process mode (offline data generation)
|
||||
python environments/web_research_env.py process \\
|
||||
--env.data_path_to_save_groups data/web_research.jsonl
|
||||
|
||||
# Standalone eval
|
||||
python environments/web_research_env.py evaluate \\
|
||||
--openai.base_url http://localhost:8000/v1 \\
|
||||
--openai.model_name YourModel
|
||||
|
||||
Built by: github.com/jackx707
|
||||
Inspired by: GroceryMind — production Hermes agent doing live web research
|
||||
across German grocery stores (firecrawl + hermes-agent)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
# Ensure hermes-agent root is on path
|
||||
_repo_root = Path(__file__).resolve().parent.parent
|
||||
if str(_repo_root) not in sys.path:
|
||||
sys.path.insert(0, str(_repo_root))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optional HuggingFace datasets import
|
||||
# ---------------------------------------------------------------------------
|
||||
try:
|
||||
from datasets import load_dataset
|
||||
HF_AVAILABLE = True
|
||||
except ImportError:
|
||||
HF_AVAILABLE = False
|
||||
|
||||
from atroposlib.envs.base import ScoredDataGroup
|
||||
from atroposlib.envs.server_handling.server_manager import APIServerConfig
|
||||
from atroposlib.type_definitions import Item
|
||||
|
||||
from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
|
||||
from environments.agent_loop import AgentResult
|
||||
from environments.tool_context import ToolContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fallback sample dataset (used when HuggingFace is unavailable)
|
||||
# Multi-hop questions requiring real web search to answer.
|
||||
# ---------------------------------------------------------------------------
|
||||
SAMPLE_QUESTIONS = [
|
||||
{
|
||||
"question": "What is the current population of the capital city of the country that won the 2022 FIFA World Cup?",
|
||||
"answer": "Buenos Aires has approximately 3 million people in the city proper, or around 15 million in the greater metro area.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "Who is the CEO of the company that makes the most widely used open-source container orchestration platform?",
|
||||
"answer": "The Linux Foundation oversees Kubernetes. CNCF (Cloud Native Computing Foundation) is the specific body — it does not have a traditional CEO but has an executive director.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What programming language was used to write the original version of the web framework used by Instagram?",
|
||||
"answer": "Django, which Instagram was built on, is written in Python.",
|
||||
"difficulty": "easy",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "In what year was the university founded where the inventor of the World Wide Web currently holds a professorship?",
|
||||
"answer": "Tim Berners-Lee holds a professorship at MIT (founded 1861) and the University of Southampton (founded 1952).",
|
||||
"difficulty": "hard",
|
||||
"hops": 3,
|
||||
},
|
||||
{
|
||||
"question": "What is the latest stable version of the programming language that ranks #1 on the TIOBE index as of this year?",
|
||||
"answer": "Python is currently #1 on TIOBE. The latest stable version should be verified via the official python.org site.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "How many employees does the parent company of Instagram have?",
|
||||
"answer": "Meta Platforms (parent of Instagram) employs approximately 70,000+ people as of recent reports.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What is the current interest rate set by the central bank of the country where the Eiffel Tower is located?",
|
||||
"answer": "The European Central Bank sets rates for France/eurozone. The current rate should be verified — it has changed frequently in 2023-2025.",
|
||||
"difficulty": "hard",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "Which company acquired the startup founded by the creator of Oculus VR?",
|
||||
"answer": "Palmer Luckey founded Oculus VR, which was acquired by Facebook (now Meta). He later founded Anduril Industries.",
|
||||
"difficulty": "medium",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What is the market cap of the company that owns the most popular search engine in Russia?",
|
||||
"answer": "Yandex (now split into separate entities after 2024 restructuring). Current market cap should be verified via financial sources.",
|
||||
"difficulty": "hard",
|
||||
"hops": 2,
|
||||
},
|
||||
{
|
||||
"question": "What was the GDP growth rate of the country that hosted the most recent Summer Olympics?",
|
||||
"answer": "Paris, France hosted the 2024 Summer Olympics. France's recent GDP growth should be verified via World Bank or IMF data.",
|
||||
"difficulty": "hard",
|
||||
"hops": 2,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WebResearchEnvConfig(HermesAgentEnvConfig):
|
||||
"""Configuration for the web research RL environment."""
|
||||
|
||||
# Reward weights
|
||||
correctness_weight: float = Field(
|
||||
default=0.6,
|
||||
description="Weight for answer correctness in reward (LLM judge score).",
|
||||
)
|
||||
tool_usage_weight: float = Field(
|
||||
default=0.2,
|
||||
description="Weight for tool usage signal (did the model actually use web tools?).",
|
||||
)
|
||||
efficiency_weight: float = Field(
|
||||
default=0.2,
|
||||
description="Weight for efficiency signal (penalizes excessive tool calls).",
|
||||
)
|
||||
diversity_bonus: float = Field(
|
||||
default=0.1,
|
||||
description="Bonus reward for citing ≥2 distinct domains.",
|
||||
)
|
||||
|
||||
# Efficiency thresholds
|
||||
efficient_max_calls: int = Field(
|
||||
default=5,
|
||||
description="Maximum tool calls before efficiency penalty begins.",
|
||||
)
|
||||
heavy_penalty_calls: int = Field(
|
||||
default=10,
|
||||
description="Tool call count where efficiency penalty steepens.",
|
||||
)
|
||||
|
||||
# Eval
|
||||
eval_size: int = Field(
|
||||
default=20,
|
||||
description="Number of held-out items for evaluation.",
|
||||
)
|
||||
eval_split_ratio: float = Field(
|
||||
default=0.1,
|
||||
description="Fraction of dataset to hold out for evaluation (0.0–1.0).",
|
||||
)
|
||||
|
||||
# Dataset
|
||||
dataset_name: str = Field(
|
||||
default="google/frames-benchmark",
|
||||
description="HuggingFace dataset name for research questions.",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Environment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WebResearchEnv(HermesAgentBaseEnv):
|
||||
"""
|
||||
RL environment for training multi-step web research skills.
|
||||
|
||||
The model is given a factual question requiring 2-3 hops of web research
|
||||
and must use web_search / web_extract tools to find and synthesize the answer.
|
||||
|
||||
Reward is multi-signal:
|
||||
60% — answer correctness (LLM judge)
|
||||
20% — tool usage (did the model actually search the web?)
|
||||
20% — efficiency (penalizes >5 tool calls)
|
||||
|
||||
Bonus +0.1 for source diversity (≥2 distinct domains cited).
|
||||
"""
|
||||
|
||||
name = "web-research"
|
||||
env_config_cls = WebResearchEnvConfig
|
||||
|
||||
# Default toolsets for this environment — web + file for saving notes
|
||||
default_toolsets = ["web", "file"]
|
||||
|
||||
@classmethod
|
||||
def config_init(cls) -> Tuple[WebResearchEnvConfig, List[APIServerConfig]]:
|
||||
"""Default configuration for the web research environment."""
|
||||
env_config = WebResearchEnvConfig(
|
||||
enabled_toolsets=["web", "file"],
|
||||
max_agent_turns=15,
|
||||
agent_temperature=1.0,
|
||||
system_prompt=(
|
||||
"You are a highly capable research agent. When asked a factual question, "
|
||||
"always use web_search to find current, accurate information before answering. "
|
||||
"Cite at least 2 sources. Be concise and accurate."
|
||||
),
|
||||
group_size=4,
|
||||
total_steps=1000,
|
||||
steps_per_eval=100,
|
||||
use_wandb=True,
|
||||
wandb_name="web-research",
|
||||
)
|
||||
|
||||
server_configs = [
|
||||
APIServerConfig(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model_name="anthropic/claude-sonnet-4.5",
|
||||
server_type="openai",
|
||||
api_key=os.getenv("OPENROUTER_API_KEY", ""),
|
||||
health_check=False,
|
||||
)
|
||||
]
|
||||
|
||||
return env_config, server_configs
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._items: list[dict] = []
|
||||
self._eval_items: list[dict] = []
|
||||
self._index: int = 0
|
||||
|
||||
# Metrics tracking for wandb
|
||||
self._reward_buffer: list[float] = []
|
||||
self._correctness_buffer: list[float] = []
|
||||
self._tool_usage_buffer: list[float] = []
|
||||
self._efficiency_buffer: list[float] = []
|
||||
self._diversity_buffer: list[float] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 1. Setup — load dataset
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def setup(self) -> None:
|
||||
"""Load the FRAMES benchmark or fall back to built-in samples."""
|
||||
if HF_AVAILABLE:
|
||||
try:
|
||||
logger.info("Loading FRAMES benchmark from HuggingFace...")
|
||||
ds = load_dataset(self.config.dataset_name, split="test")
|
||||
self._items = [
|
||||
{
|
||||
"question": row["Prompt"],
|
||||
"answer": row["Answer"],
|
||||
"difficulty": row.get("reasoning_types", "unknown"),
|
||||
"hops": 2,
|
||||
}
|
||||
for row in ds
|
||||
]
|
||||
# Hold out for eval
|
||||
eval_size = max(
|
||||
self.config.eval_size,
|
||||
int(len(self._items) * self.config.eval_split_ratio),
|
||||
)
|
||||
random.shuffle(self._items)
|
||||
self._eval_items = self._items[:eval_size]
|
||||
self._items = self._items[eval_size:]
|
||||
logger.info(
|
||||
f"Loaded {len(self._items)} train / {len(self._eval_items)} eval items "
|
||||
f"from FRAMES benchmark."
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load FRAMES from HuggingFace: {e}. Using built-in samples.")
|
||||
|
||||
# Fallback
|
||||
random.shuffle(SAMPLE_QUESTIONS)
|
||||
split = max(1, len(SAMPLE_QUESTIONS) * 8 // 10)
|
||||
self._items = SAMPLE_QUESTIONS[:split]
|
||||
self._eval_items = SAMPLE_QUESTIONS[split:]
|
||||
logger.info(
|
||||
f"Using built-in sample dataset: {len(self._items)} train / "
|
||||
f"{len(self._eval_items)} eval items."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 2. get_next_item — return the next question
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def get_next_item(self) -> dict:
|
||||
"""Return the next item, cycling through the dataset."""
|
||||
if not self._items:
|
||||
raise RuntimeError("Dataset is empty. Did you call setup()?")
|
||||
item = self._items[self._index % len(self._items)]
|
||||
self._index += 1
|
||||
return item
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3. format_prompt — build the user-facing prompt
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def format_prompt(self, item: dict) -> str:
|
||||
"""Format the research question as a task prompt."""
|
||||
return (
|
||||
f"Research the following question thoroughly using web search. "
|
||||
f"You MUST search the web to find current, accurate information — "
|
||||
f"do not rely solely on your training data.\n\n"
|
||||
f"Question: {item['question']}\n\n"
|
||||
f"Requirements:\n"
|
||||
f"- Use web_search and/or web_extract tools to find information\n"
|
||||
f"- Search at least 2 different sources\n"
|
||||
f"- Provide a concise, accurate answer (2-4 sentences)\n"
|
||||
f"- Cite the sources you used"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 4. compute_reward — multi-signal scoring
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def compute_reward(
|
||||
self,
|
||||
item: dict,
|
||||
result: AgentResult,
|
||||
ctx: ToolContext,
|
||||
) -> float:
|
||||
"""
|
||||
Multi-signal reward function:
|
||||
|
||||
correctness_weight * correctness — LLM judge comparing answer to ground truth
|
||||
tool_usage_weight * tool_used — binary: did the model use web tools?
|
||||
efficiency_weight * efficiency — penalizes wasteful tool usage
|
||||
+ diversity_bonus — source diversity (≥2 distinct domains)
|
||||
"""
|
||||
final_response: str = result.final_response or ""
|
||||
tools_used: list[str] = [
|
||||
tc.tool_name for tc in (result.tool_calls or [])
|
||||
] if hasattr(result, "tool_calls") and result.tool_calls else []
|
||||
tool_call_count: int = result.turns_used or len(tools_used)
|
||||
|
||||
cfg = self.config
|
||||
|
||||
# ---- Signal 1: Answer correctness (LLM judge) ----------------
|
||||
correctness = await self._llm_judge(
|
||||
question=item["question"],
|
||||
expected=item["answer"],
|
||||
model_answer=final_response,
|
||||
)
|
||||
|
||||
# ---- Signal 2: Web tool usage --------------------------------
|
||||
web_tools = {"web_search", "web_extract", "search", "firecrawl"}
|
||||
tool_used = 1.0 if any(t in web_tools for t in tools_used) else 0.0
|
||||
|
||||
# ---- Signal 3: Efficiency ------------------------------------
|
||||
if tool_call_count <= cfg.efficient_max_calls:
|
||||
efficiency = 1.0
|
||||
elif tool_call_count <= cfg.heavy_penalty_calls:
|
||||
efficiency = 1.0 - (tool_call_count - cfg.efficient_max_calls) * 0.08
|
||||
else:
|
||||
efficiency = max(0.0, 1.0 - (tool_call_count - cfg.efficient_max_calls) * 0.12)
|
||||
|
||||
# ---- Bonus: Source diversity ---------------------------------
|
||||
domains = self._extract_domains(final_response)
|
||||
diversity = cfg.diversity_bonus if len(domains) >= 2 else 0.0
|
||||
|
||||
# ---- Combine ------------------------------------------------
|
||||
reward = (
|
||||
cfg.correctness_weight * correctness
|
||||
+ cfg.tool_usage_weight * tool_used
|
||||
+ cfg.efficiency_weight * efficiency
|
||||
+ diversity
|
||||
)
|
||||
reward = min(1.0, max(0.0, reward)) # clamp to [0, 1]
|
||||
|
||||
# Track for wandb
|
||||
self._reward_buffer.append(reward)
|
||||
self._correctness_buffer.append(correctness)
|
||||
self._tool_usage_buffer.append(tool_used)
|
||||
self._efficiency_buffer.append(efficiency)
|
||||
self._diversity_buffer.append(diversity)
|
||||
|
||||
logger.debug(
|
||||
f"Reward breakdown — correctness={correctness:.2f}, "
|
||||
f"tool_used={tool_used:.1f}, efficiency={efficiency:.2f}, "
|
||||
f"diversity={diversity:.1f} → total={reward:.3f}"
|
||||
)
|
||||
|
||||
return reward
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 5. evaluate — run on held-out eval split
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def evaluate(self, *args, **kwargs) -> None:
|
||||
"""Run evaluation on the held-out split using the agent loop."""
|
||||
import time
|
||||
|
||||
items = self._eval_items
|
||||
if not items:
|
||||
logger.warning("No eval items available.")
|
||||
return
|
||||
|
||||
eval_size = min(self.config.eval_size, len(items))
|
||||
eval_items = items[:eval_size]
|
||||
|
||||
logger.info(f"Running eval on {len(eval_items)} questions...")
|
||||
start_time = time.time()
|
||||
samples = []
|
||||
|
||||
for item in eval_items:
|
||||
try:
|
||||
# Use the base env's agent loop for eval (same as training)
|
||||
prompt = self.format_prompt(item)
|
||||
completion = await self.server.chat_completion(
|
||||
messages=[
|
||||
{"role": "system", "content": self.config.system_prompt or ""},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
n=1,
|
||||
max_tokens=self.config.max_token_length,
|
||||
temperature=0.0,
|
||||
split="eval",
|
||||
)
|
||||
|
||||
response_content = (
|
||||
completion.choices[0].message.content if completion.choices else ""
|
||||
)
|
||||
|
||||
# Score the response
|
||||
correctness = await self._llm_judge(
|
||||
question=item["question"],
|
||||
expected=item["answer"],
|
||||
model_answer=response_content,
|
||||
)
|
||||
|
||||
samples.append({
|
||||
"prompt": item["question"],
|
||||
"response": response_content,
|
||||
"expected": item["answer"],
|
||||
"correctness": correctness,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Eval error on item: {e}")
|
||||
samples.append({
|
||||
"prompt": item["question"],
|
||||
"response": f"ERROR: {e}",
|
||||
"expected": item["answer"],
|
||||
"correctness": 0.0,
|
||||
})
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
# Compute metrics
|
||||
correctness_scores = [s["correctness"] for s in samples]
|
||||
eval_metrics = {
|
||||
"eval/mean_correctness": (
|
||||
sum(correctness_scores) / len(correctness_scores)
|
||||
if correctness_scores else 0.0
|
||||
),
|
||||
"eval/n_items": len(samples),
|
||||
}
|
||||
|
||||
await self.evaluate_log(
|
||||
metrics=eval_metrics,
|
||||
samples=samples,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 6. wandb_log — custom metrics
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def wandb_log(self, wandb_metrics: Optional[Dict] = None) -> None:
|
||||
"""Log reward breakdown metrics to wandb."""
|
||||
if wandb_metrics is None:
|
||||
wandb_metrics = {}
|
||||
|
||||
if self._reward_buffer:
|
||||
n = len(self._reward_buffer)
|
||||
wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n
|
||||
wandb_metrics["train/mean_correctness"] = sum(self._correctness_buffer) / n
|
||||
wandb_metrics["train/mean_tool_usage"] = sum(self._tool_usage_buffer) / n
|
||||
wandb_metrics["train/mean_efficiency"] = sum(self._efficiency_buffer) / n
|
||||
wandb_metrics["train/mean_diversity"] = sum(self._diversity_buffer) / n
|
||||
wandb_metrics["train/total_rollouts"] = n
|
||||
|
||||
# Accuracy buckets
|
||||
wandb_metrics["train/correct_rate"] = (
|
||||
sum(1 for c in self._correctness_buffer if c >= 0.7) / n
|
||||
)
|
||||
wandb_metrics["train/tool_usage_rate"] = (
|
||||
sum(1 for t in self._tool_usage_buffer if t > 0) / n
|
||||
)
|
||||
|
||||
# Clear buffers
|
||||
self._reward_buffer.clear()
|
||||
self._correctness_buffer.clear()
|
||||
self._tool_usage_buffer.clear()
|
||||
self._efficiency_buffer.clear()
|
||||
self._diversity_buffer.clear()
|
||||
|
||||
await super().wandb_log(wandb_metrics)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _llm_judge(
|
||||
self,
|
||||
question: str,
|
||||
expected: str,
|
||||
model_answer: str,
|
||||
) -> float:
|
||||
"""
|
||||
Use the server's LLM to judge answer correctness.
|
||||
Falls back to keyword heuristic if LLM call fails.
|
||||
"""
|
||||
if not model_answer or not model_answer.strip():
|
||||
return 0.0
|
||||
|
||||
judge_prompt = (
|
||||
"You are an impartial judge evaluating the quality of an AI research answer.\n\n"
|
||||
f"Question: {question}\n\n"
|
||||
f"Reference answer: {expected}\n\n"
|
||||
f"Model answer: {model_answer}\n\n"
|
||||
"Score the model answer on a scale from 0.0 to 1.0 where:\n"
|
||||
" 1.0 = fully correct and complete\n"
|
||||
" 0.7 = mostly correct with minor gaps\n"
|
||||
" 0.4 = partially correct\n"
|
||||
" 0.1 = mentions relevant topic but wrong or very incomplete\n"
|
||||
" 0.0 = completely wrong or no answer\n\n"
|
||||
"Consider: factual accuracy, completeness, and relevance.\n"
|
||||
'Respond with ONLY a JSON object: {"score": <float>, "reason": "<one sentence>"}'
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.server.chat_completion(
|
||||
messages=[{"role": "user", "content": judge_prompt}],
|
||||
n=1,
|
||||
max_tokens=150,
|
||||
temperature=0.0,
|
||||
split="eval",
|
||||
)
|
||||
text = response.choices[0].message.content if response.choices else ""
|
||||
parsed = self._parse_judge_json(text)
|
||||
if parsed is not None:
|
||||
return float(parsed)
|
||||
except Exception as e:
|
||||
logger.debug(f"LLM judge failed: {e}. Using heuristic.")
|
||||
|
||||
return self._heuristic_score(expected, model_answer)
|
||||
|
||||
@staticmethod
|
||||
def _parse_judge_json(text: str) -> Optional[float]:
|
||||
"""Extract the score float from LLM judge JSON response."""
|
||||
try:
|
||||
clean = re.sub(r"```(?:json)?|```", "", text).strip()
|
||||
data = json.loads(clean)
|
||||
score = float(data.get("score", -1))
|
||||
if 0.0 <= score <= 1.0:
|
||||
return score
|
||||
except Exception:
|
||||
match = re.search(r'"score"\s*:\s*([0-9.]+)', text)
|
||||
if match:
|
||||
score = float(match.group(1))
|
||||
if 0.0 <= score <= 1.0:
|
||||
return score
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _heuristic_score(expected: str, model_answer: str) -> float:
|
||||
"""Lightweight keyword overlap score as fallback."""
|
||||
stopwords = {
|
||||
"the", "a", "an", "is", "are", "was", "were", "of", "in", "on",
|
||||
"at", "to", "for", "with", "and", "or", "but", "it", "its",
|
||||
"this", "that", "as", "by", "from", "be", "has", "have", "had",
|
||||
}
|
||||
|
||||
def tokenize(text: str) -> set:
|
||||
tokens = re.findall(r'\b\w+\b', text.lower())
|
||||
return {t for t in tokens if t not in stopwords and len(t) > 2}
|
||||
|
||||
expected_tokens = tokenize(expected)
|
||||
answer_tokens = tokenize(model_answer)
|
||||
|
||||
if not expected_tokens:
|
||||
return 0.5
|
||||
|
||||
overlap = len(expected_tokens & answer_tokens)
|
||||
union = len(expected_tokens | answer_tokens)
|
||||
|
||||
jaccard = overlap / union if union > 0 else 0.0
|
||||
recall = overlap / len(expected_tokens)
|
||||
return min(1.0, 0.4 * jaccard + 0.6 * recall)
|
||||
|
||||
@staticmethod
|
||||
def _extract_domains(text: str) -> set:
|
||||
"""Extract unique domains from URLs cited in the response."""
|
||||
urls = re.findall(r'https?://[^\s\)>\]"\']+', text)
|
||||
domains = set()
|
||||
for url in urls:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower().lstrip("www.")
|
||||
if domain:
|
||||
domains.add(domain)
|
||||
except Exception:
|
||||
pass
|
||||
return domains
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
WebResearchEnv.cli()
|
||||
@@ -1,237 +0,0 @@
|
||||
"""
|
||||
Channel directory -- cached map of reachable channels/contacts per platform.
|
||||
|
||||
Built on gateway startup, refreshed periodically (every 5 min), and saved to
|
||||
~/.hermes/channel_directory.json. The send_message tool reads this file for
|
||||
action="list" and for resolving human-friendly channel names to numeric IDs.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DIRECTORY_PATH = Path.home() / ".hermes" / "channel_directory.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build / refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Build a channel directory from connected platform adapters and session data.
|
||||
|
||||
Returns the directory dict and writes it to DIRECTORY_PATH.
|
||||
"""
|
||||
from gateway.config import Platform
|
||||
|
||||
platforms: Dict[str, List[Dict[str, str]]] = {}
|
||||
|
||||
for platform, adapter in adapters.items():
|
||||
try:
|
||||
if platform == Platform.DISCORD:
|
||||
platforms["discord"] = _build_discord(adapter)
|
||||
elif platform == Platform.SLACK:
|
||||
platforms["slack"] = _build_slack(adapter)
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
# Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
|
||||
for plat_name in ("telegram", "whatsapp", "signal"):
|
||||
if plat_name not in platforms:
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
|
||||
directory = {
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platforms": platforms,
|
||||
}
|
||||
|
||||
try:
|
||||
DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(DIRECTORY_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(directory, f, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to write: %s", e)
|
||||
|
||||
return directory
|
||||
|
||||
|
||||
def _build_discord(adapter) -> List[Dict[str, str]]:
|
||||
"""Enumerate all text channels the Discord bot can see."""
|
||||
channels = []
|
||||
client = getattr(adapter, "_client", None)
|
||||
if not client:
|
||||
return channels
|
||||
|
||||
try:
|
||||
import discord as _discord
|
||||
except ImportError:
|
||||
return channels
|
||||
|
||||
for guild in client.guilds:
|
||||
for ch in guild.text_channels:
|
||||
channels.append({
|
||||
"id": str(ch.id),
|
||||
"name": ch.name,
|
||||
"guild": guild.name,
|
||||
"type": "channel",
|
||||
})
|
||||
# Also include DM-capable users we've interacted with is not
|
||||
# feasible via guild enumeration; those come from sessions.
|
||||
|
||||
# Merge any DMs from session history
|
||||
channels.extend(_build_from_sessions("discord"))
|
||||
return channels
|
||||
|
||||
|
||||
def _build_slack(adapter) -> List[Dict[str, str]]:
|
||||
"""List Slack channels the bot has joined."""
|
||||
channels = []
|
||||
# Slack adapter may expose a web client
|
||||
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
|
||||
if not client:
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
try:
|
||||
import asyncio
|
||||
from tools.send_message_tool import _send_slack # noqa: F401
|
||||
# Use the Slack Web API directly if available
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to session data
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
|
||||
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
|
||||
"""Pull known channels/contacts from sessions.json origin data."""
|
||||
sessions_path = Path.home() / ".hermes" / "sessions" / "sessions.json"
|
||||
if not sessions_path.exists():
|
||||
return []
|
||||
|
||||
entries = []
|
||||
try:
|
||||
with open(sessions_path, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
seen_ids = set()
|
||||
for _key, session in data.items():
|
||||
origin = session.get("origin") or {}
|
||||
if origin.get("platform") != platform_name:
|
||||
continue
|
||||
chat_id = origin.get("chat_id")
|
||||
if not chat_id or chat_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(chat_id)
|
||||
entries.append({
|
||||
"id": str(chat_id),
|
||||
"name": origin.get("chat_name") or origin.get("user_name") or str(chat_id),
|
||||
"type": session.get("chat_type", "dm"),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("Channel directory: failed to read sessions for %s: %s", platform_name, e)
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Read / resolve
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_directory() -> Dict[str, Any]:
|
||||
"""Load the cached channel directory from disk."""
|
||||
if not DIRECTORY_PATH.exists():
|
||||
return {"updated_at": None, "platforms": {}}
|
||||
try:
|
||||
with open(DIRECTORY_PATH, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return {"updated_at": None, "platforms": {}}
|
||||
|
||||
|
||||
def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
|
||||
"""
|
||||
Resolve a human-friendly channel name to a numeric ID.
|
||||
|
||||
Matching strategy (case-insensitive, first match wins):
|
||||
- Discord: "bot-home", "#bot-home", "GuildName/bot-home"
|
||||
- Telegram: display name or group name
|
||||
- Slack: "engineering", "#engineering"
|
||||
"""
|
||||
directory = load_directory()
|
||||
channels = directory.get("platforms", {}).get(platform_name, [])
|
||||
if not channels:
|
||||
return None
|
||||
|
||||
query = name.lstrip("#").lower()
|
||||
|
||||
# 1. Exact name match
|
||||
for ch in channels:
|
||||
if ch["name"].lower() == query:
|
||||
return ch["id"]
|
||||
|
||||
# 2. Guild-qualified match for Discord ("GuildName/channel")
|
||||
if "/" in query:
|
||||
guild_part, ch_part = query.rsplit("/", 1)
|
||||
for ch in channels:
|
||||
guild = ch.get("guild", "").lower()
|
||||
if guild == guild_part and ch["name"].lower() == ch_part:
|
||||
return ch["id"]
|
||||
|
||||
# 3. Partial prefix match (only if unambiguous)
|
||||
matches = [ch for ch in channels if ch["name"].lower().startswith(query)]
|
||||
if len(matches) == 1:
|
||||
return matches[0]["id"]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def format_directory_for_display() -> str:
|
||||
"""Format the channel directory as a human-readable list for the model."""
|
||||
directory = load_directory()
|
||||
platforms = directory.get("platforms", {})
|
||||
|
||||
if not any(platforms.values()):
|
||||
return "No messaging platforms connected or no channels discovered yet."
|
||||
|
||||
lines = ["Available messaging targets:\n"]
|
||||
|
||||
for plat_name, channels in sorted(platforms.items()):
|
||||
if not channels:
|
||||
continue
|
||||
|
||||
# Group Discord channels by guild
|
||||
if plat_name == "discord":
|
||||
guilds: Dict[str, List] = {}
|
||||
dms: List = []
|
||||
for ch in channels:
|
||||
guild = ch.get("guild")
|
||||
if guild:
|
||||
guilds.setdefault(guild, []).append(ch)
|
||||
else:
|
||||
dms.append(ch)
|
||||
|
||||
for guild_name, guild_channels in sorted(guilds.items()):
|
||||
lines.append(f"Discord ({guild_name}):")
|
||||
for ch in sorted(guild_channels, key=lambda c: c["name"]):
|
||||
lines.append(f" discord:#{ch['name']}")
|
||||
if dms:
|
||||
lines.append("Discord (DMs):")
|
||||
for ch in dms:
|
||||
lines.append(f" discord:{ch['name']}")
|
||||
lines.append("")
|
||||
else:
|
||||
lines.append(f"{plat_name.title()}:")
|
||||
for ch in channels:
|
||||
type_label = f" ({ch['type']})" if ch.get("type") else ""
|
||||
lines.append(f" {plat_name}:{ch['name']}{type_label}")
|
||||
lines.append("")
|
||||
|
||||
lines.append('Use these as the "target" parameter when sending.')
|
||||
lines.append('Bare platform name (e.g. "telegram") sends to home channel.')
|
||||
|
||||
return "\n".join(lines)
|
||||
@@ -8,7 +8,6 @@ Handles loading and validating configuration for:
|
||||
- Delivery preferences
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
@@ -16,8 +15,6 @@ from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Any
|
||||
from enum import Enum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Platform(Enum):
|
||||
"""Supported messaging platforms."""
|
||||
@@ -25,9 +22,6 @@ class Platform(Enum):
|
||||
TELEGRAM = "telegram"
|
||||
DISCORD = "discord"
|
||||
WHATSAPP = "whatsapp"
|
||||
SLACK = "slack"
|
||||
SIGNAL = "signal"
|
||||
HOMEASSISTANT = "homeassistant"
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -67,11 +61,10 @@ class SessionResetPolicy:
|
||||
- "daily": Reset at a specific hour each day
|
||||
- "idle": Reset after N minutes of inactivity
|
||||
- "both": Whichever triggers first (daily boundary OR idle timeout)
|
||||
- "none": Never auto-reset (context managed only by compression)
|
||||
"""
|
||||
mode: str = "both" # "daily", "idle", "both", or "none"
|
||||
mode: str = "both" # "daily", "idle", or "both"
|
||||
at_hour: int = 4 # Hour for daily reset (0-23, local time)
|
||||
idle_minutes: int = 1440 # Minutes of inactivity before reset (24 hours)
|
||||
idle_minutes: int = 120 # Minutes of inactivity before reset
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
@@ -85,7 +78,7 @@ class SessionResetPolicy:
|
||||
return cls(
|
||||
mode=data.get("mode", "both"),
|
||||
at_hour=data.get("at_hour", 4),
|
||||
idle_minutes=data.get("idle_minutes", 1440),
|
||||
idle_minutes=data.get("idle_minutes", 120),
|
||||
)
|
||||
|
||||
|
||||
@@ -156,16 +149,7 @@ class GatewayConfig:
|
||||
"""Return list of platforms that are enabled and configured."""
|
||||
connected = []
|
||||
for platform, config in self.platforms.items():
|
||||
if not config.enabled:
|
||||
continue
|
||||
# Platforms that use token/api_key auth
|
||||
if config.token or config.api_key:
|
||||
connected.append(platform)
|
||||
# WhatsApp uses enabled flag only (bridge handles auth)
|
||||
elif platform == Platform.WHATSAPP:
|
||||
connected.append(platform)
|
||||
# Signal uses extra dict for config (http_url + account)
|
||||
elif platform == Platform.SIGNAL and config.extra.get("http_url"):
|
||||
if config.enabled and (config.token or config.api_key):
|
||||
connected.append(platform)
|
||||
return connected
|
||||
|
||||
@@ -276,58 +260,9 @@ def load_gateway_config() -> GatewayConfig:
|
||||
except Exception as e:
|
||||
print(f"[gateway] Warning: Failed to load {gateway_config_path}: {e}")
|
||||
|
||||
# Bridge session_reset from config.yaml (the user-facing config file)
|
||||
# into the gateway config. config.yaml takes precedence over gateway.json
|
||||
# for session reset policy since that's where hermes setup writes it.
|
||||
try:
|
||||
import yaml
|
||||
config_yaml_path = Path.home() / ".hermes" / "config.yaml"
|
||||
if config_yaml_path.exists():
|
||||
with open(config_yaml_path) as f:
|
||||
yaml_cfg = yaml.safe_load(f) or {}
|
||||
sr = yaml_cfg.get("session_reset")
|
||||
if sr and isinstance(sr, dict):
|
||||
config.default_reset_policy = SessionResetPolicy.from_dict(sr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Override with environment variables
|
||||
_apply_env_overrides(config)
|
||||
|
||||
# --- Validate loaded values ---
|
||||
policy = config.default_reset_policy
|
||||
|
||||
if not (0 <= policy.at_hour <= 23):
|
||||
logger.warning(
|
||||
"Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
|
||||
)
|
||||
policy.at_hour = 4
|
||||
|
||||
if policy.idle_minutes is None or policy.idle_minutes <= 0:
|
||||
logger.warning(
|
||||
"Invalid idle_minutes=%s (must be positive). Using default 1440.",
|
||||
policy.idle_minutes,
|
||||
)
|
||||
policy.idle_minutes = 1440
|
||||
|
||||
# Warn about empty bot tokens — platforms that loaded an empty string
|
||||
# won't connect and the cause can be confusing without a log line.
|
||||
_token_env_names = {
|
||||
Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
|
||||
Platform.DISCORD: "DISCORD_BOT_TOKEN",
|
||||
Platform.SLACK: "SLACK_BOT_TOKEN",
|
||||
}
|
||||
for platform, pconfig in config.platforms.items():
|
||||
if not pconfig.enabled:
|
||||
continue
|
||||
env_name = _token_env_names.get(platform)
|
||||
if env_name and pconfig.token is not None and not pconfig.token.strip():
|
||||
logger.warning(
|
||||
"%s is enabled but %s is empty. "
|
||||
"The adapter will likely fail to connect.",
|
||||
platform.value, env_name,
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
@@ -373,53 +308,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
config.platforms[Platform.WHATSAPP] = PlatformConfig()
|
||||
config.platforms[Platform.WHATSAPP].enabled = True
|
||||
|
||||
# Slack
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
if Platform.SLACK not in config.platforms:
|
||||
config.platforms[Platform.SLACK] = PlatformConfig()
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
config.platforms[Platform.SLACK].token = slack_token
|
||||
# Home channel
|
||||
slack_home = os.getenv("SLACK_HOME_CHANNEL")
|
||||
if slack_home:
|
||||
config.platforms[Platform.SLACK].home_channel = HomeChannel(
|
||||
platform=Platform.SLACK,
|
||||
chat_id=slack_home,
|
||||
name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
|
||||
)
|
||||
|
||||
# Signal
|
||||
signal_url = os.getenv("SIGNAL_HTTP_URL")
|
||||
signal_account = os.getenv("SIGNAL_ACCOUNT")
|
||||
if signal_url and signal_account:
|
||||
if Platform.SIGNAL not in config.platforms:
|
||||
config.platforms[Platform.SIGNAL] = PlatformConfig()
|
||||
config.platforms[Platform.SIGNAL].enabled = True
|
||||
config.platforms[Platform.SIGNAL].extra.update({
|
||||
"http_url": signal_url,
|
||||
"account": signal_account,
|
||||
"ignore_stories": os.getenv("SIGNAL_IGNORE_STORIES", "true").lower() in ("true", "1", "yes"),
|
||||
})
|
||||
signal_home = os.getenv("SIGNAL_HOME_CHANNEL")
|
||||
if signal_home:
|
||||
config.platforms[Platform.SIGNAL].home_channel = HomeChannel(
|
||||
platform=Platform.SIGNAL,
|
||||
chat_id=signal_home,
|
||||
name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Home Assistant
|
||||
hass_token = os.getenv("HASS_TOKEN")
|
||||
if hass_token:
|
||||
if Platform.HOMEASSISTANT not in config.platforms:
|
||||
config.platforms[Platform.HOMEASSISTANT] = PlatformConfig()
|
||||
config.platforms[Platform.HOMEASSISTANT].enabled = True
|
||||
config.platforms[Platform.HOMEASSISTANT].token = hass_token
|
||||
hass_url = os.getenv("HASS_URL")
|
||||
if hass_url:
|
||||
config.platforms[Platform.HOMEASSISTANT].extra["url"] = hass_url
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
|
||||
@@ -8,19 +8,14 @@ Routes messages to the appropriate destination based on:
|
||||
- Local (always saved to files)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
from enum import Enum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_PLATFORM_OUTPUT = 4000
|
||||
TRUNCATED_VISIBLE = 3800
|
||||
|
||||
from .config import Platform, GatewayConfig
|
||||
from .config import Platform, GatewayConfig, HomeChannel
|
||||
from .session import SessionSource
|
||||
|
||||
|
||||
@@ -251,15 +246,6 @@ class DeliveryRouter:
|
||||
"timestamp": timestamp
|
||||
}
|
||||
|
||||
def _save_full_output(self, content: str, job_id: str) -> Path:
|
||||
"""Save full cron output to disk and return the file path."""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
out_dir = Path.home() / ".hermes" / "cron" / "output"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = out_dir / f"{job_id}_{timestamp}.txt"
|
||||
path.write_text(content)
|
||||
return path
|
||||
|
||||
async def _deliver_to_platform(
|
||||
self,
|
||||
target: DeliveryTarget,
|
||||
@@ -275,16 +261,8 @@ class DeliveryRouter:
|
||||
if not target.chat_id:
|
||||
raise ValueError(f"No chat ID for {target.platform.value} delivery")
|
||||
|
||||
# Guard: truncate oversized cron output to stay within platform limits
|
||||
if len(content) > MAX_PLATFORM_OUTPUT:
|
||||
job_id = (metadata or {}).get("job_id", "unknown")
|
||||
saved_path = self._save_full_output(content, job_id)
|
||||
logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
|
||||
content = (
|
||||
content[:TRUNCATED_VISIBLE]
|
||||
+ f"\n\n... [truncated, full output saved to {saved_path}]"
|
||||
)
|
||||
|
||||
# Call the adapter's send method
|
||||
# Adapters should implement: async def send(chat_id: str, content: str) -> Dict
|
||||
return await adapter.send(target.chat_id, content, metadata=metadata)
|
||||
|
||||
|
||||
|
||||
150
gateway/hooks.py
150
gateway/hooks.py
@@ -1,150 +0,0 @@
|
||||
"""
|
||||
Event Hook System
|
||||
|
||||
A lightweight event-driven system that fires handlers at key lifecycle points.
|
||||
Hooks are discovered from ~/.hermes/hooks/ directories, each containing:
|
||||
- HOOK.yaml (metadata: name, description, events list)
|
||||
- handler.py (Python handler with async def handle(event_type, context))
|
||||
|
||||
Events:
|
||||
- gateway:startup -- Gateway process starts
|
||||
- session:start -- New session created
|
||||
- session:reset -- User ran /new or /reset
|
||||
- agent:start -- Agent begins processing a message
|
||||
- agent:step -- Each turn in the tool-calling loop
|
||||
- agent:end -- Agent finishes processing
|
||||
- command:* -- Any slash command executed (wildcard match)
|
||||
|
||||
Errors in hooks are caught and logged but never block the main pipeline.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
HOOKS_DIR = Path(os.path.expanduser("~/.hermes/hooks"))
|
||||
|
||||
|
||||
class HookRegistry:
|
||||
"""
|
||||
Discovers, loads, and fires event hooks.
|
||||
|
||||
Usage:
|
||||
registry = HookRegistry()
|
||||
registry.discover_and_load()
|
||||
await registry.emit("agent:start", {"platform": "telegram", ...})
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# event_type -> [handler_fn, ...]
|
||||
self._handlers: Dict[str, List[Callable]] = {}
|
||||
self._loaded_hooks: List[dict] = [] # metadata for listing
|
||||
|
||||
@property
|
||||
def loaded_hooks(self) -> List[dict]:
|
||||
"""Return metadata about all loaded hooks."""
|
||||
return list(self._loaded_hooks)
|
||||
|
||||
def discover_and_load(self) -> None:
|
||||
"""
|
||||
Scan the hooks directory for hook directories and load their handlers.
|
||||
|
||||
Each hook directory must contain:
|
||||
- HOOK.yaml with at least 'name' and 'events' keys
|
||||
- handler.py with a top-level 'handle' function (sync or async)
|
||||
"""
|
||||
if not HOOKS_DIR.exists():
|
||||
return
|
||||
|
||||
for hook_dir in sorted(HOOKS_DIR.iterdir()):
|
||||
if not hook_dir.is_dir():
|
||||
continue
|
||||
|
||||
manifest_path = hook_dir / "HOOK.yaml"
|
||||
handler_path = hook_dir / "handler.py"
|
||||
|
||||
if not manifest_path.exists() or not handler_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8"))
|
||||
if not manifest or not isinstance(manifest, dict):
|
||||
print(f"[hooks] Skipping {hook_dir.name}: invalid HOOK.yaml", flush=True)
|
||||
continue
|
||||
|
||||
hook_name = manifest.get("name", hook_dir.name)
|
||||
events = manifest.get("events", [])
|
||||
if not events:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
print(f"[hooks] Skipping {hook_name}: no 'handle' function found", flush=True)
|
||||
continue
|
||||
|
||||
# Register the handler for each declared event
|
||||
for event in events:
|
||||
self._handlers.setdefault(event, []).append(handle_fn)
|
||||
|
||||
self._loaded_hooks.append({
|
||||
"name": hook_name,
|
||||
"description": manifest.get("description", ""),
|
||||
"events": events,
|
||||
"path": str(hook_dir),
|
||||
})
|
||||
|
||||
print(f"[hooks] Loaded hook '{hook_name}' for events: {events}", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
|
||||
|
||||
async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""
|
||||
Fire all handlers registered for an event.
|
||||
|
||||
Supports wildcard matching: handlers registered for "command:*" will
|
||||
fire for any "command:..." event. Handlers registered for a base type
|
||||
like "agent" won't fire for "agent:start" -- only exact matches and
|
||||
explicit wildcards.
|
||||
|
||||
Args:
|
||||
event_type: The event identifier (e.g. "agent:start").
|
||||
context: Optional dict with event-specific data.
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
# Collect handlers: exact match + wildcard match
|
||||
handlers = list(self._handlers.get(event_type, []))
|
||||
|
||||
# Check for wildcard patterns (e.g., "command:*" matches "command:reset")
|
||||
if ":" in event_type:
|
||||
base = event_type.split(":")[0]
|
||||
wildcard_key = f"{base}:*"
|
||||
handlers.extend(self._handlers.get(wildcard_key, []))
|
||||
|
||||
for fn in handlers:
|
||||
try:
|
||||
result = fn(event_type, context)
|
||||
# Support both sync and async handlers
|
||||
if asyncio.iscoroutine(result):
|
||||
await result
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
|
||||
@@ -1,123 +0,0 @@
|
||||
"""
|
||||
Session mirroring for cross-platform message delivery.
|
||||
|
||||
When a message is sent to a platform (via send_message or cron delivery),
|
||||
this module appends a "delivery-mirror" record to the target session's
|
||||
transcript so the receiving-side agent has context about what was sent.
|
||||
|
||||
Standalone -- works from CLI, cron, and gateway contexts without needing
|
||||
the full SessionStore machinery.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SESSIONS_DIR = Path.home() / ".hermes" / "sessions"
|
||||
_SESSIONS_INDEX = _SESSIONS_DIR / "sessions.json"
|
||||
|
||||
|
||||
def mirror_to_session(
|
||||
platform: str,
|
||||
chat_id: str,
|
||||
message_text: str,
|
||||
source_label: str = "cli",
|
||||
) -> bool:
|
||||
"""
|
||||
Append a delivery-mirror message to the target session's transcript.
|
||||
|
||||
Finds the gateway session that matches the given platform + chat_id,
|
||||
then writes a mirror entry to both the JSONL transcript and SQLite DB.
|
||||
|
||||
Returns True if mirrored successfully, False if no matching session or error.
|
||||
All errors are caught -- this is never fatal.
|
||||
"""
|
||||
try:
|
||||
session_id = _find_session_id(platform, str(chat_id))
|
||||
if not session_id:
|
||||
logger.debug("Mirror: no session found for %s:%s", platform, chat_id)
|
||||
return False
|
||||
|
||||
mirror_msg = {
|
||||
"role": "assistant",
|
||||
"content": message_text,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"mirror": True,
|
||||
"mirror_source": source_label,
|
||||
}
|
||||
|
||||
_append_to_jsonl(session_id, mirror_msg)
|
||||
_append_to_sqlite(session_id, mirror_msg)
|
||||
|
||||
logger.debug("Mirror: wrote to session %s (from %s)", session_id, source_label)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Mirror failed for %s:%s: %s", platform, chat_id, e)
|
||||
return False
|
||||
|
||||
|
||||
def _find_session_id(platform: str, chat_id: str) -> Optional[str]:
|
||||
"""
|
||||
Find the active session_id for a platform + chat_id pair.
|
||||
|
||||
Scans sessions.json entries and matches where origin.chat_id == chat_id
|
||||
on the right platform. DM session keys don't embed the chat_id
|
||||
(e.g. "agent:main:telegram:dm"), so we check the origin dict.
|
||||
"""
|
||||
if not _SESSIONS_INDEX.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(_SESSIONS_INDEX, encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
platform_lower = platform.lower()
|
||||
best_match = None
|
||||
best_updated = ""
|
||||
|
||||
for _key, entry in data.items():
|
||||
origin = entry.get("origin") or {}
|
||||
entry_platform = (origin.get("platform") or entry.get("platform", "")).lower()
|
||||
|
||||
if entry_platform != platform_lower:
|
||||
continue
|
||||
|
||||
origin_chat_id = str(origin.get("chat_id", ""))
|
||||
if origin_chat_id == str(chat_id):
|
||||
updated = entry.get("updated_at", "")
|
||||
if updated > best_updated:
|
||||
best_updated = updated
|
||||
best_match = entry.get("session_id")
|
||||
|
||||
return best_match
|
||||
|
||||
|
||||
def _append_to_jsonl(session_id: str, message: dict) -> None:
|
||||
"""Append a message to the JSONL transcript file."""
|
||||
transcript_path = _SESSIONS_DIR / f"{session_id}.jsonl"
|
||||
try:
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
logger.debug("Mirror JSONL write failed: %s", e)
|
||||
|
||||
|
||||
def _append_to_sqlite(session_id: str, message: dict) -> None:
|
||||
"""Append a message to the SQLite session database."""
|
||||
try:
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB()
|
||||
db.append_message(
|
||||
session_id=session_id,
|
||||
role=message.get("role", "assistant"),
|
||||
content=message.get("content"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Mirror SQLite write failed: %s", e)
|
||||
@@ -1,282 +0,0 @@
|
||||
"""
|
||||
DM Pairing System
|
||||
|
||||
Code-based approval flow for authorizing new users on messaging platforms.
|
||||
Instead of static allowlists with user IDs, unknown users receive a one-time
|
||||
pairing code that the bot owner approves via the CLI.
|
||||
|
||||
Security features (based on OWASP + NIST SP 800-63-4 guidance):
|
||||
- 8-char codes from 32-char unambiguous alphabet (no 0/O/1/I)
|
||||
- Cryptographic randomness via secrets.choice()
|
||||
- 1-hour code expiry
|
||||
- Max 3 pending codes per platform
|
||||
- Rate limiting: 1 request per user per 10 minutes
|
||||
- Lockout after 5 failed approval attempts (1 hour)
|
||||
- File permissions: chmod 0600 on all data files
|
||||
- Codes are never logged to stdout
|
||||
|
||||
Storage: ~/.hermes/pairing/
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
|
||||
ALPHABET = "ABCDEFGHJKLMNPQRSTUVWXYZ23456789"
|
||||
CODE_LENGTH = 8
|
||||
|
||||
# Timing constants
|
||||
CODE_TTL_SECONDS = 3600 # Codes expire after 1 hour
|
||||
RATE_LIMIT_SECONDS = 600 # 1 request per user per 10 minutes
|
||||
LOCKOUT_SECONDS = 3600 # Lockout duration after too many failures
|
||||
|
||||
# Limits
|
||||
MAX_PENDING_PER_PLATFORM = 3 # Max pending codes per platform
|
||||
MAX_FAILED_ATTEMPTS = 5 # Failed approvals before lockout
|
||||
|
||||
PAIRING_DIR = Path(os.path.expanduser("~/.hermes/pairing"))
|
||||
|
||||
|
||||
def _secure_write(path: Path, data: str) -> None:
|
||||
"""Write data to file with restrictive permissions (owner read/write only)."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(data, encoding="utf-8")
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
pass # Windows doesn't support chmod the same way
|
||||
|
||||
|
||||
class PairingStore:
|
||||
"""
|
||||
Manages pairing codes and approved user lists.
|
||||
|
||||
Data files per platform:
|
||||
- {platform}-pending.json : pending pairing requests
|
||||
- {platform}-approved.json : approved (paired) users
|
||||
- _rate_limits.json : rate limit tracking
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
PAIRING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _pending_path(self, platform: str) -> Path:
|
||||
return PAIRING_DIR / f"{platform}-pending.json"
|
||||
|
||||
def _approved_path(self, platform: str) -> Path:
|
||||
return PAIRING_DIR / f"{platform}-approved.json"
|
||||
|
||||
def _rate_limit_path(self) -> Path:
|
||||
return PAIRING_DIR / "_rate_limits.json"
|
||||
|
||||
def _load_json(self, path: Path) -> dict:
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
def _save_json(self, path: Path, data: dict) -> None:
|
||||
_secure_write(path, json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
# ----- Approved users -----
|
||||
|
||||
def is_approved(self, platform: str, user_id: str) -> bool:
|
||||
"""Check if a user is approved (paired) on a platform."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
return user_id in approved
|
||||
|
||||
def list_approved(self, platform: str = None) -> list:
|
||||
"""List approved users, optionally filtered by platform."""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("approved")
|
||||
for p in platforms:
|
||||
approved = self._load_json(self._approved_path(p))
|
||||
for uid, info in approved.items():
|
||||
results.append({"platform": p, "user_id": uid, **info})
|
||||
return results
|
||||
|
||||
def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
|
||||
"""Add a user to the approved list."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
approved[user_id] = {
|
||||
"user_name": user_name,
|
||||
"approved_at": time.time(),
|
||||
}
|
||||
self._save_json(self._approved_path(platform), approved)
|
||||
|
||||
def revoke(self, platform: str, user_id: str) -> bool:
|
||||
"""Remove a user from the approved list. Returns True if found."""
|
||||
path = self._approved_path(platform)
|
||||
approved = self._load_json(path)
|
||||
if user_id in approved:
|
||||
del approved[user_id]
|
||||
self._save_json(path, approved)
|
||||
return True
|
||||
return False
|
||||
|
||||
# ----- Pending codes -----
|
||||
|
||||
def generate_code(
|
||||
self, platform: str, user_id: str, user_name: str = ""
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a pairing code for a new user.
|
||||
|
||||
Returns the code string, or None if:
|
||||
- User is rate-limited (too recent request)
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
"""
|
||||
self._cleanup_expired(platform)
|
||||
|
||||
# Check lockout
|
||||
if self._is_locked_out(platform):
|
||||
return None
|
||||
|
||||
# Check rate limit for this specific user
|
||||
if self._is_rate_limited(platform, user_id):
|
||||
return None
|
||||
|
||||
# Check max pending
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if len(pending) >= MAX_PENDING_PER_PLATFORM:
|
||||
return None
|
||||
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Record rate limit
|
||||
self._record_rate_limit(platform, user_id)
|
||||
|
||||
return code
|
||||
|
||||
def approve_code(self, platform: str, code: str) -> Optional[dict]:
|
||||
"""
|
||||
Approve a pairing code. Adds the user to the approved list.
|
||||
|
||||
Returns {user_id, user_name} on success, None if code is invalid/expired.
|
||||
"""
|
||||
self._cleanup_expired(platform)
|
||||
code = code.upper().strip()
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if code not in pending:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
entry = pending.pop(code)
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for code, info in pending.items():
|
||||
age_min = int((time.time() - info["created_at"]) / 60)
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code,
|
||||
"user_id": info["user_id"],
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
return results
|
||||
|
||||
def clear_pending(self, platform: str = None) -> int:
|
||||
"""Clear all pending requests. Returns count removed."""
|
||||
count = 0
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
count += len(pending)
|
||||
self._save_json(self._pending_path(p), {})
|
||||
return count
|
||||
|
||||
# ----- Rate limiting and lockout -----
|
||||
|
||||
def _is_rate_limited(self, platform: str, user_id: str) -> bool:
|
||||
"""Check if a user has requested a code too recently."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
key = f"{platform}:{user_id}"
|
||||
last_request = limits.get(key, 0)
|
||||
return (time.time() - last_request) < RATE_LIMIT_SECONDS
|
||||
|
||||
def _record_rate_limit(self, platform: str, user_id: str) -> None:
|
||||
"""Record the time of a pairing request for rate limiting."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
key = f"{platform}:{user_id}"
|
||||
limits[key] = time.time()
|
||||
self._save_json(self._rate_limit_path(), limits)
|
||||
|
||||
def _is_locked_out(self, platform: str) -> bool:
|
||||
"""Check if a platform is in lockout due to failed approval attempts."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
lockout_key = f"_lockout:{platform}"
|
||||
lockout_until = limits.get(lockout_key, 0)
|
||||
return time.time() < lockout_until
|
||||
|
||||
def _record_failed_attempt(self, platform: str) -> None:
|
||||
"""Record a failed approval attempt. Triggers lockout after MAX_FAILED_ATTEMPTS."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
fail_key = f"_failures:{platform}"
|
||||
fails = limits.get(fail_key, 0) + 1
|
||||
limits[fail_key] = fails
|
||||
if fails >= MAX_FAILED_ATTEMPTS:
|
||||
lockout_key = f"_lockout:{platform}"
|
||||
limits[lockout_key] = time.time() + LOCKOUT_SECONDS
|
||||
limits[fail_key] = 0 # Reset counter
|
||||
print(f"[pairing] Platform {platform} locked out for {LOCKOUT_SECONDS}s "
|
||||
f"after {MAX_FAILED_ATTEMPTS} failed attempts", flush=True)
|
||||
self._save_json(self._rate_limit_path(), limits)
|
||||
|
||||
# ----- Cleanup -----
|
||||
|
||||
def _cleanup_expired(self, platform: str) -> None:
|
||||
"""Remove expired pending codes."""
|
||||
path = self._pending_path(platform)
|
||||
pending = self._load_json(path)
|
||||
now = time.time()
|
||||
expired = [
|
||||
code for code, info in pending.items()
|
||||
if (now - info["created_at"]) > CODE_TTL_SECONDS
|
||||
]
|
||||
if expired:
|
||||
for code in expired:
|
||||
del pending[code]
|
||||
self._save_json(path, pending)
|
||||
|
||||
def _all_platforms(self, suffix: str) -> list:
|
||||
"""List all platforms that have data files of a given suffix."""
|
||||
platforms = []
|
||||
for f in PAIRING_DIR.iterdir():
|
||||
if f.name.endswith(f"-{suffix}.json"):
|
||||
platform = f.name.replace(f"-{suffix}.json", "")
|
||||
if not platform.startswith("_"):
|
||||
platforms.append(platform)
|
||||
return platforms
|
||||
@@ -1,313 +0,0 @@
|
||||
# Adding a New Messaging Platform
|
||||
|
||||
Checklist for integrating a new messaging platform into the Hermes gateway.
|
||||
Use this as a reference when building a new adapter — every item here is a
|
||||
real integration point that exists in the codebase. Missing any of them will
|
||||
cause broken functionality, missing features, or inconsistent behavior.
|
||||
|
||||
---
|
||||
|
||||
## 1. Core Adapter (`gateway/platforms/<platform>.py`)
|
||||
|
||||
The adapter is a subclass of `BasePlatformAdapter` from `gateway/platforms/base.py`.
|
||||
|
||||
### Required methods
|
||||
|
||||
| Method | Purpose |
|
||||
|--------|---------|
|
||||
| `__init__(self, config)` | Parse config, init state. Call `super().__init__(config, Platform.YOUR_PLATFORM)` |
|
||||
| `connect() -> bool` | Connect to the platform, start listeners. Return True on success |
|
||||
| `disconnect()` | Stop listeners, close connections, cancel tasks |
|
||||
| `send(chat_id, text, ...) -> SendResult` | Send a text message |
|
||||
| `send_typing(chat_id)` | Send typing indicator |
|
||||
| `send_image(chat_id, image_url, caption) -> SendResult` | Send an image |
|
||||
| `get_chat_info(chat_id) -> dict` | Return `{name, type, chat_id}` for a chat |
|
||||
|
||||
### Optional methods (have default stubs in base)
|
||||
|
||||
| Method | Purpose |
|
||||
|--------|---------|
|
||||
| `send_document(chat_id, path, caption)` | Send a file attachment |
|
||||
| `send_voice(chat_id, path)` | Send a voice message |
|
||||
| `send_video(chat_id, path, caption)` | Send a video |
|
||||
| `send_animation(chat_id, path, caption)` | Send a GIF/animation |
|
||||
| `send_image_file(chat_id, path, caption)` | Send image from local file |
|
||||
|
||||
### Required function
|
||||
|
||||
```python
|
||||
def check_<platform>_requirements() -> bool:
|
||||
"""Check if this platform's dependencies are available."""
|
||||
```
|
||||
|
||||
### Key patterns to follow
|
||||
|
||||
- Use `self.build_source(...)` to construct `SessionSource` objects
|
||||
- Call `self.handle_message(event)` to dispatch inbound messages to the gateway
|
||||
- Use `MessageEvent`, `MessageType`, `SendResult` from base
|
||||
- Use `cache_image_from_bytes`, `cache_audio_from_bytes`, `cache_document_from_bytes` for attachments
|
||||
- Filter self-messages (prevent reply loops)
|
||||
- Filter sync/echo messages if the platform has them
|
||||
- Redact sensitive identifiers (phone numbers, tokens) in all log output
|
||||
- Implement reconnection with exponential backoff + jitter for streaming connections
|
||||
- Set `MAX_MESSAGE_LENGTH` if the platform has message size limits
|
||||
|
||||
---
|
||||
|
||||
## 2. Platform Enum (`gateway/config.py`)
|
||||
|
||||
Add the platform to the `Platform` enum:
|
||||
|
||||
```python
|
||||
class Platform(Enum):
|
||||
...
|
||||
YOUR_PLATFORM = "your_platform"
|
||||
```
|
||||
|
||||
Add env var loading in `_apply_env_overrides()`:
|
||||
|
||||
```python
|
||||
# Your Platform
|
||||
your_token = os.getenv("YOUR_PLATFORM_TOKEN")
|
||||
if your_token:
|
||||
if Platform.YOUR_PLATFORM not in config.platforms:
|
||||
config.platforms[Platform.YOUR_PLATFORM] = PlatformConfig()
|
||||
config.platforms[Platform.YOUR_PLATFORM].enabled = True
|
||||
config.platforms[Platform.YOUR_PLATFORM].token = your_token
|
||||
```
|
||||
|
||||
Update `get_connected_platforms()` if your platform doesn't use token/api_key
|
||||
(e.g., WhatsApp uses `enabled` flag, Signal uses `extra` dict).
|
||||
|
||||
---
|
||||
|
||||
## 3. Adapter Factory (`gateway/run.py`)
|
||||
|
||||
Add to `_create_adapter()`:
|
||||
|
||||
```python
|
||||
elif platform == Platform.YOUR_PLATFORM:
|
||||
from gateway.platforms.your_platform import YourAdapter, check_your_requirements
|
||||
if not check_your_requirements():
|
||||
logger.warning("Your Platform: dependencies not met")
|
||||
return None
|
||||
return YourAdapter(config)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Authorization Maps (`gateway/run.py`)
|
||||
|
||||
Add to BOTH dicts in `_is_user_authorized()`:
|
||||
|
||||
```python
|
||||
platform_env_map = {
|
||||
...
|
||||
Platform.YOUR_PLATFORM: "YOUR_PLATFORM_ALLOWED_USERS",
|
||||
}
|
||||
platform_allow_all_map = {
|
||||
...
|
||||
Platform.YOUR_PLATFORM: "YOUR_PLATFORM_ALLOW_ALL_USERS",
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Session Source (`gateway/session.py`)
|
||||
|
||||
If your platform needs extra identity fields (e.g., Signal's UUID alongside
|
||||
phone number), add them to the `SessionSource` dataclass with `Optional` defaults,
|
||||
and update `to_dict()`, `from_dict()`, and `build_source()` in base.py.
|
||||
|
||||
---
|
||||
|
||||
## 6. System Prompt Hints (`agent/prompt_builder.py`)
|
||||
|
||||
Add a `PLATFORM_HINTS` entry so the agent knows what platform it's on:
|
||||
|
||||
```python
|
||||
PLATFORM_HINTS = {
|
||||
...
|
||||
"your_platform": (
|
||||
"You are on Your Platform. "
|
||||
"Describe formatting capabilities, media support, etc."
|
||||
),
|
||||
}
|
||||
```
|
||||
|
||||
Without this, the agent won't know it's on your platform and may use
|
||||
inappropriate formatting (e.g., markdown on platforms that don't render it).
|
||||
|
||||
---
|
||||
|
||||
## 7. Toolset (`toolsets.py`)
|
||||
|
||||
Add a named toolset for your platform:
|
||||
|
||||
```python
|
||||
"hermes-your-platform": {
|
||||
"description": "Your Platform bot toolset",
|
||||
"tools": _HERMES_CORE_TOOLS,
|
||||
"includes": []
|
||||
},
|
||||
```
|
||||
|
||||
And add it to the `hermes-gateway` composite:
|
||||
|
||||
```python
|
||||
"hermes-gateway": {
|
||||
"includes": [..., "hermes-your-platform"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Cron Delivery (`cron/scheduler.py`)
|
||||
|
||||
Add to `platform_map` in `_deliver_result()`:
|
||||
|
||||
```python
|
||||
platform_map = {
|
||||
...
|
||||
"your_platform": Platform.YOUR_PLATFORM,
|
||||
}
|
||||
```
|
||||
|
||||
Without this, `schedule_cronjob(deliver="your_platform")` silently fails.
|
||||
|
||||
---
|
||||
|
||||
## 9. Send Message Tool (`tools/send_message_tool.py`)
|
||||
|
||||
Add to `platform_map` in `send_message_tool()`:
|
||||
|
||||
```python
|
||||
platform_map = {
|
||||
...
|
||||
"your_platform": Platform.YOUR_PLATFORM,
|
||||
}
|
||||
```
|
||||
|
||||
Add routing in `_send_to_platform()`:
|
||||
|
||||
```python
|
||||
elif platform == Platform.YOUR_PLATFORM:
|
||||
return await _send_your_platform(pconfig, chat_id, message)
|
||||
```
|
||||
|
||||
Implement `_send_your_platform()` — a standalone async function that sends
|
||||
a single message without requiring the full adapter (for use by cron jobs
|
||||
and the send_message tool outside the gateway process).
|
||||
|
||||
Update the tool schema `target` description to include your platform example.
|
||||
|
||||
---
|
||||
|
||||
## 10. Cronjob Tool Schema (`tools/cronjob_tools.py`)
|
||||
|
||||
Update the `deliver` parameter description and docstring to mention your
|
||||
platform as a delivery option.
|
||||
|
||||
---
|
||||
|
||||
## 11. Channel Directory (`gateway/channel_directory.py`)
|
||||
|
||||
If your platform can't enumerate chats (most can't), add it to the
|
||||
session-based discovery list:
|
||||
|
||||
```python
|
||||
for plat_name in ("telegram", "whatsapp", "signal", "your_platform"):
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 12. Status Display (`hermes_cli/status.py`)
|
||||
|
||||
Add to the `platforms` dict in the Messaging Platforms section:
|
||||
|
||||
```python
|
||||
platforms = {
|
||||
...
|
||||
"Your Platform": ("YOUR_PLATFORM_TOKEN", "YOUR_PLATFORM_HOME_CHANNEL"),
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 13. Gateway Setup Wizard (`hermes_cli/gateway.py`)
|
||||
|
||||
Add to the `_PLATFORMS` list:
|
||||
|
||||
```python
|
||||
{
|
||||
"key": "your_platform",
|
||||
"label": "Your Platform",
|
||||
"emoji": "📱",
|
||||
"token_var": "YOUR_PLATFORM_TOKEN",
|
||||
"setup_instructions": [...],
|
||||
"vars": [...],
|
||||
}
|
||||
```
|
||||
|
||||
If your platform needs custom setup logic (connectivity testing, QR codes,
|
||||
policy choices), add a `_setup_your_platform()` function and route to it
|
||||
in the platform selection switch.
|
||||
|
||||
Update `_platform_status()` if your platform's "configured" check differs
|
||||
from the standard `bool(get_env_value(token_var))`.
|
||||
|
||||
---
|
||||
|
||||
## 14. Phone/ID Redaction (`agent/redact.py`)
|
||||
|
||||
If your platform uses sensitive identifiers (phone numbers, etc.), add a
|
||||
regex pattern and redaction function to `agent/redact.py`. This ensures
|
||||
identifiers are masked in ALL log output, not just your adapter's logs.
|
||||
|
||||
---
|
||||
|
||||
## 15. Documentation
|
||||
|
||||
| File | What to update |
|
||||
|------|---------------|
|
||||
| `README.md` | Platform list in feature table + documentation table |
|
||||
| `AGENTS.md` | Gateway description + env var config section |
|
||||
| `website/docs/user-guide/messaging/<platform>.md` | **NEW** — Full setup guide (see existing platform docs for template) |
|
||||
| `website/docs/user-guide/messaging/index.md` | Architecture diagram, toolset table, security examples, Next Steps links |
|
||||
| `website/docs/reference/environment-variables.md` | All env vars for the platform |
|
||||
|
||||
---
|
||||
|
||||
## 16. Tests (`tests/gateway/test_<platform>.py`)
|
||||
|
||||
Recommended test coverage:
|
||||
|
||||
- Platform enum exists with correct value
|
||||
- Config loading from env vars via `_apply_env_overrides`
|
||||
- Adapter init (config parsing, allowlist handling, default values)
|
||||
- Helper functions (redaction, parsing, file type detection)
|
||||
- Session source round-trip (to_dict → from_dict)
|
||||
- Authorization integration (platform in allowlist maps)
|
||||
- Send message tool routing (platform in platform_map)
|
||||
|
||||
Optional but valuable:
|
||||
- Async tests for message handling flow (mock the platform API)
|
||||
- SSE/WebSocket reconnection logic
|
||||
- Attachment processing
|
||||
- Group message filtering
|
||||
|
||||
---
|
||||
|
||||
## Quick Verification
|
||||
|
||||
After implementing everything, verify with:
|
||||
|
||||
```bash
|
||||
# All tests pass
|
||||
python -m pytest tests/ -q
|
||||
|
||||
# Grep for your platform name to find any missed integration points
|
||||
grep -r "telegram\|discord\|whatsapp\|slack" gateway/ tools/ agent/ cron/ hermes_cli/ toolsets.py \
|
||||
--include="*.py" -l | sort -u
|
||||
# Check each file in the output — if it mentions other platforms but not yours, you missed it
|
||||
```
|
||||
@@ -6,253 +6,23 @@ and implement the required methods.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
|
||||
from enum import Enum
|
||||
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image cache utilities
|
||||
#
|
||||
# When users send images on messaging platforms, we download them to a local
|
||||
# cache directory so they can be analyzed by the vision tool (which accepts
|
||||
# local file paths). This avoids issues with ephemeral platform URLs
|
||||
# (e.g. Telegram file URLs expire after ~1 hour).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Default location: ~/.hermes/image_cache/
|
||||
IMAGE_CACHE_DIR = Path(os.path.expanduser("~/.hermes/image_cache"))
|
||||
|
||||
|
||||
def get_image_cache_dir() -> Path:
|
||||
"""Return the image cache directory, creating it if it doesn't exist."""
|
||||
IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return IMAGE_CACHE_DIR
|
||||
|
||||
|
||||
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||
"""
|
||||
Save raw image bytes to the cache and return the absolute file path.
|
||||
|
||||
Args:
|
||||
data: Raw image bytes.
|
||||
ext: File extension including the dot (e.g. ".jpg", ".png").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached image file as a string.
|
||||
"""
|
||||
cache_dir = get_image_cache_dir()
|
||||
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
filepath.write_bytes(data)
|
||||
return str(filepath)
|
||||
|
||||
|
||||
async def cache_image_from_url(url: str, ext: str = ".jpg") -> str:
|
||||
"""
|
||||
Download an image from a URL and save it to the local cache.
|
||||
|
||||
Uses httpx for async download with a reasonable timeout.
|
||||
|
||||
Args:
|
||||
url: The HTTP/HTTPS URL to download from.
|
||||
ext: File extension including the dot (e.g. ".jpg", ".png").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached image file as a string.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
|
||||
"Accept": "image/*,*/*;q=0.8",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
|
||||
|
||||
def cleanup_image_cache(max_age_hours: int = 24) -> int:
|
||||
"""
|
||||
Delete cached images older than *max_age_hours*.
|
||||
|
||||
Returns the number of files removed.
|
||||
"""
|
||||
import time
|
||||
|
||||
cache_dir = get_image_cache_dir()
|
||||
cutoff = time.time() - (max_age_hours * 3600)
|
||||
removed = 0
|
||||
for f in cache_dir.iterdir():
|
||||
if f.is_file() and f.stat().st_mtime < cutoff:
|
||||
try:
|
||||
f.unlink()
|
||||
removed += 1
|
||||
except OSError:
|
||||
pass
|
||||
return removed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Audio cache utilities
|
||||
#
|
||||
# Same pattern as image cache -- voice messages from platforms are downloaded
|
||||
# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
AUDIO_CACHE_DIR = Path(os.path.expanduser("~/.hermes/audio_cache"))
|
||||
|
||||
|
||||
def get_audio_cache_dir() -> Path:
|
||||
"""Return the audio cache directory, creating it if it doesn't exist."""
|
||||
AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return AUDIO_CACHE_DIR
|
||||
|
||||
|
||||
def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
|
||||
"""
|
||||
Save raw audio bytes to the cache and return the absolute file path.
|
||||
|
||||
Args:
|
||||
data: Raw audio bytes.
|
||||
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
"""
|
||||
cache_dir = get_audio_cache_dir()
|
||||
filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
filepath.write_bytes(data)
|
||||
return str(filepath)
|
||||
|
||||
|
||||
async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
|
||||
"""
|
||||
Download an audio file from a URL and save it to the local cache.
|
||||
|
||||
Args:
|
||||
url: The HTTP/HTTPS URL to download from.
|
||||
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
|
||||
"Accept": "audio/*,*/*;q=0.8",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document cache utilities
|
||||
#
|
||||
# Same pattern as image/audio cache -- documents from platforms are downloaded
|
||||
# here so the agent can reference them by local file path.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DOCUMENT_CACHE_DIR = Path(os.path.expanduser("~/.hermes/document_cache"))
|
||||
|
||||
SUPPORTED_DOCUMENT_TYPES = {
|
||||
".pdf": "application/pdf",
|
||||
".md": "text/markdown",
|
||||
".txt": "text/plain",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
}
|
||||
|
||||
|
||||
def get_document_cache_dir() -> Path:
|
||||
"""Return the document cache directory, creating it if it doesn't exist."""
|
||||
DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return DOCUMENT_CACHE_DIR
|
||||
|
||||
|
||||
def cache_document_from_bytes(data: bytes, filename: str) -> str:
|
||||
"""
|
||||
Save raw document bytes to the cache and return the absolute file path.
|
||||
|
||||
The cached filename preserves the original human-readable name with a
|
||||
unique prefix: ``doc_{uuid12}_{original_filename}``.
|
||||
|
||||
Args:
|
||||
data: Raw document bytes.
|
||||
filename: Original filename (e.g. "report.pdf").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached document file as a string.
|
||||
|
||||
Raises:
|
||||
ValueError: If the sanitized path escapes the cache directory.
|
||||
"""
|
||||
cache_dir = get_document_cache_dir()
|
||||
# Sanitize: strip directory components, null bytes, and control characters
|
||||
safe_name = Path(filename).name if filename else "document"
|
||||
safe_name = safe_name.replace("\x00", "").strip()
|
||||
if not safe_name or safe_name in (".", ".."):
|
||||
safe_name = "document"
|
||||
cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
|
||||
filepath = cache_dir / cached_name
|
||||
# Final safety check: ensure path stays inside cache dir
|
||||
if not filepath.resolve().is_relative_to(cache_dir.resolve()):
|
||||
raise ValueError(f"Path traversal rejected: {filename!r}")
|
||||
filepath.write_bytes(data)
|
||||
return str(filepath)
|
||||
|
||||
|
||||
def cleanup_document_cache(max_age_hours: int = 24) -> int:
|
||||
"""
|
||||
Delete cached documents older than *max_age_hours*.
|
||||
|
||||
Returns the number of files removed.
|
||||
"""
|
||||
import time
|
||||
|
||||
cache_dir = get_document_cache_dir()
|
||||
cutoff = time.time() - (max_age_hours * 3600)
|
||||
removed = 0
|
||||
for f in cache_dir.iterdir():
|
||||
if f.is_file() and f.stat().st_mtime < cutoff:
|
||||
try:
|
||||
f.unlink()
|
||||
removed += 1
|
||||
except OSError:
|
||||
pass
|
||||
return removed
|
||||
|
||||
|
||||
class MessageType(Enum):
|
||||
"""Types of incoming messages."""
|
||||
TEXT = "text"
|
||||
LOCATION = "location"
|
||||
PHOTO = "photo"
|
||||
VIDEO = "video"
|
||||
AUDIO = "audio"
|
||||
@@ -319,7 +89,7 @@ class SendResult:
|
||||
raw_response: Any = None
|
||||
|
||||
|
||||
# Handler may return str (sent by base) or dict(content=..., already_sent=True).
|
||||
# Type for message handlers
|
||||
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
|
||||
|
||||
|
||||
@@ -399,20 +169,7 @@ class BasePlatformAdapter(ABC):
|
||||
SendResult with success status and message ID
|
||||
"""
|
||||
pass
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Edit a previously sent message. Optional — platforms that don't
|
||||
support editing return success=False and callers fall back to
|
||||
sending a new message.
|
||||
"""
|
||||
return SendResult(success=False, error="Not supported")
|
||||
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""
|
||||
Send a typing indicator.
|
||||
@@ -439,28 +196,6 @@ class BasePlatformAdapter(ABC):
|
||||
text = f"{caption}\n{image_url}" if caption else image_url
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
animation_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send an animated GIF natively via the platform API.
|
||||
|
||||
Override in subclasses to send GIFs as proper animations
|
||||
(e.g., Telegram send_animation) so they auto-play inline.
|
||||
Default falls back to send_image.
|
||||
"""
|
||||
return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
|
||||
|
||||
@staticmethod
|
||||
def _is_animation_url(url: str) -> bool:
|
||||
"""Check if a URL points to an animated GIF (vs a static image)."""
|
||||
lower = url.lower().split('?')[0] # Strip query params
|
||||
return lower.endswith('.gif')
|
||||
|
||||
@staticmethod
|
||||
def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
|
||||
"""
|
||||
@@ -496,14 +231,10 @@ class BasePlatformAdapter(ABC):
|
||||
url = match.group(1)
|
||||
images.append((url, ""))
|
||||
|
||||
# Remove only the matched image tags from content (not all markdown images)
|
||||
# Remove matched image tags from content if we found images
|
||||
if images:
|
||||
extracted_urls = {url for url, _ in images}
|
||||
def _remove_if_extracted(match):
|
||||
url = match.group(2) if match.lastindex >= 2 else match.group(1)
|
||||
return '' if url in extracted_urls else match.group(0)
|
||||
cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
|
||||
cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
|
||||
cleaned = re.sub(md_pattern, '', cleaned)
|
||||
cleaned = re.sub(html_pattern, '', cleaned)
|
||||
# Clean up leftover blank lines
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
||||
|
||||
@@ -527,63 +258,7 @@ class BasePlatformAdapter(ABC):
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a video natively via the platform API.
|
||||
|
||||
Override in subclasses to send videos as inline playable media.
|
||||
Default falls back to sending the file path as text.
|
||||
"""
|
||||
text = f"🎬 Video: {video_path}"
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a document/file natively via the platform API.
|
||||
|
||||
Override in subclasses to send files as downloadable attachments.
|
||||
Default falls back to sending the file path as text.
|
||||
"""
|
||||
text = f"📎 File: {file_path}"
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a local image file natively via the platform API.
|
||||
|
||||
Unlike send_image() which takes a URL, this takes a local file path.
|
||||
Override in subclasses for native photo attachments.
|
||||
Default falls back to sending the file path as text.
|
||||
"""
|
||||
text = f"🖼️ Image: {image_path}"
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
|
||||
"""
|
||||
@@ -659,27 +334,6 @@ class BasePlatformAdapter(ABC):
|
||||
# Spawn background task to process this message
|
||||
asyncio.create_task(self._process_message_background(event, session_key))
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
"""
|
||||
Return a random delay in seconds for human-like response pacing.
|
||||
|
||||
Reads from env vars:
|
||||
HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
|
||||
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
|
||||
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
|
||||
"""
|
||||
import random
|
||||
|
||||
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
|
||||
if mode == "off":
|
||||
return 0.0
|
||||
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
|
||||
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
|
||||
if mode == "natural":
|
||||
min_ms, max_ms = 800, 2500
|
||||
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
|
||||
|
||||
async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
|
||||
"""Background task that actually processes the message."""
|
||||
# Create interrupt event for this session
|
||||
@@ -691,32 +345,18 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
try:
|
||||
# Call the handler (this can take a while with tool calls)
|
||||
handler_result = await self._message_handler(event)
|
||||
|
||||
# Normalise: handler may return str or dict(content, already_sent)
|
||||
already_sent = False
|
||||
if isinstance(handler_result, dict):
|
||||
response = handler_result.get("content") or ""
|
||||
already_sent = handler_result.get("already_sent", False)
|
||||
else:
|
||||
response = handler_result
|
||||
response = await self._message_handler(event)
|
||||
|
||||
# Send response if any
|
||||
if not response:
|
||||
if not already_sent:
|
||||
logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
|
||||
if response:
|
||||
# Extract MEDIA:<path> tags (from TTS tool) before other processing
|
||||
media_files, response = self.extract_media(response)
|
||||
|
||||
# Extract image URLs and send them as native platform attachments
|
||||
images, text_content = self.extract_images(response)
|
||||
if images:
|
||||
logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
|
||||
|
||||
# Send the text portion first (if any remains after extractions)
|
||||
if text_content and not already_sent:
|
||||
logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
|
||||
if text_content:
|
||||
result = await self.send(
|
||||
chat_id=event.source.chat_id,
|
||||
content=text_content,
|
||||
@@ -735,70 +375,30 @@ class BasePlatformAdapter(ABC):
|
||||
if not fallback_result.success:
|
||||
print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
|
||||
|
||||
# Human-like pacing delay between text and media
|
||||
human_delay = self._get_human_delay()
|
||||
|
||||
# Send extracted images as native attachments
|
||||
if images:
|
||||
logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
|
||||
for image_url, alt_text in images:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "")
|
||||
# Route animated GIFs through send_animation for proper playback
|
||||
if self._is_animation_url(image_url):
|
||||
img_result = await self.send_animation(
|
||||
chat_id=event.source.chat_id,
|
||||
animation_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
)
|
||||
else:
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
image_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
)
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
image_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
)
|
||||
if not img_result.success:
|
||||
logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
|
||||
print(f"[{self.name}] Failed to send image: {img_result.error}")
|
||||
except Exception as img_err:
|
||||
logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
|
||||
print(f"[{self.name}] Error sending image: {img_err}")
|
||||
|
||||
# Send extracted media files — route by file type
|
||||
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
|
||||
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
|
||||
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
||||
|
||||
for media_path, is_voice in media_files:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
# Send extracted audio/voice files as native attachments
|
||||
for audio_path, is_voice in media_files:
|
||||
try:
|
||||
ext = Path(media_path).suffix.lower()
|
||||
if ext in _AUDIO_EXTS:
|
||||
media_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
audio_path=media_path,
|
||||
)
|
||||
elif ext in _VIDEO_EXTS:
|
||||
media_result = await self.send_video(
|
||||
chat_id=event.source.chat_id,
|
||||
video_path=media_path,
|
||||
)
|
||||
elif ext in _IMAGE_EXTS:
|
||||
media_result = await self.send_image_file(
|
||||
chat_id=event.source.chat_id,
|
||||
image_path=media_path,
|
||||
)
|
||||
else:
|
||||
media_result = await self.send_document(
|
||||
chat_id=event.source.chat_id,
|
||||
file_path=media_path,
|
||||
)
|
||||
|
||||
if not media_result.success:
|
||||
print(f"[{self.name}] Failed to send media ({ext}): {media_result.error}")
|
||||
except Exception as media_err:
|
||||
print(f"[{self.name}] Error sending media: {media_err}")
|
||||
voice_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
audio_path=audio_path,
|
||||
)
|
||||
if not voice_result.success:
|
||||
print(f"[{self.name}] Failed to send voice: {voice_result.error}")
|
||||
except Exception as voice_err:
|
||||
print(f"[{self.name}] Error sending voice: {voice_err}")
|
||||
|
||||
# Check if there's a pending message that was queued during our processing
|
||||
if session_key in self._pending_messages:
|
||||
@@ -846,15 +446,9 @@ class BasePlatformAdapter(ABC):
|
||||
chat_type: str = "dm",
|
||||
user_id: Optional[str] = None,
|
||||
user_name: Optional[str] = None,
|
||||
thread_id: Optional[str] = None,
|
||||
chat_topic: Optional[str] = None,
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
thread_id: Optional[str] = None
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
if chat_topic is not None and not chat_topic.strip():
|
||||
chat_topic = None
|
||||
return SessionSource(
|
||||
platform=self.platform,
|
||||
chat_id=str(chat_id),
|
||||
@@ -863,9 +457,6 @@ class BasePlatformAdapter(ABC):
|
||||
user_id=str(user_id) if user_id else None,
|
||||
user_name=user_name,
|
||||
thread_id=str(thread_id) if thread_id else None,
|
||||
chat_topic=chat_topic.strip() if chat_topic else None,
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
@@ -892,90 +483,34 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
|
||||
"""
|
||||
Split a long message into chunks, preserving code block boundaries.
|
||||
|
||||
When a split falls inside a triple-backtick code block, the fence is
|
||||
closed at the end of the current chunk and reopened (with the original
|
||||
language tag) at the start of the next chunk. Multi-chunk responses
|
||||
receive indicators like ``(1/3)``.
|
||||
|
||||
Split a long message into chunks.
|
||||
|
||||
Args:
|
||||
content: The full message content
|
||||
max_length: Maximum length per chunk (platform-specific)
|
||||
|
||||
|
||||
Returns:
|
||||
List of message chunks
|
||||
"""
|
||||
if len(content) <= max_length:
|
||||
return [content]
|
||||
|
||||
INDICATOR_RESERVE = 10 # room for " (XX/XX)"
|
||||
FENCE_CLOSE = "\n```"
|
||||
|
||||
chunks: List[str] = []
|
||||
remaining = content
|
||||
# When the previous chunk ended mid-code-block, this holds the
|
||||
# language tag (possibly "") so we can reopen the fence.
|
||||
carry_lang: Optional[str] = None
|
||||
|
||||
while remaining:
|
||||
# If we're continuing a code block from the previous chunk,
|
||||
# prepend a new opening fence with the same language tag.
|
||||
prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
|
||||
|
||||
# How much body text we can fit after accounting for the prefix,
|
||||
# a potential closing fence, and the chunk indicator.
|
||||
headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
|
||||
if headroom < 1:
|
||||
headroom = max_length // 2
|
||||
|
||||
# Everything remaining fits in one final chunk
|
||||
if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
|
||||
chunks.append(prefix + remaining)
|
||||
|
||||
chunks = []
|
||||
while content:
|
||||
if len(content) <= max_length:
|
||||
chunks.append(content)
|
||||
break
|
||||
|
||||
# Find a natural split point (prefer newlines, then spaces)
|
||||
region = remaining[:headroom]
|
||||
split_at = region.rfind("\n")
|
||||
if split_at < headroom // 2:
|
||||
split_at = region.rfind(" ")
|
||||
if split_at < 1:
|
||||
split_at = headroom
|
||||
|
||||
chunk_body = remaining[:split_at]
|
||||
remaining = remaining[split_at:].lstrip()
|
||||
|
||||
full_chunk = prefix + chunk_body
|
||||
|
||||
# Walk only the chunk_body (not the prefix we prepended) to
|
||||
# determine whether we end inside an open code block.
|
||||
in_code = carry_lang is not None
|
||||
lang = carry_lang or ""
|
||||
for line in chunk_body.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("```"):
|
||||
if in_code:
|
||||
in_code = False
|
||||
lang = ""
|
||||
else:
|
||||
in_code = True
|
||||
tag = stripped[3:].strip()
|
||||
lang = tag.split()[0] if tag else ""
|
||||
|
||||
if in_code:
|
||||
# Close the orphaned fence so the chunk is valid on its own
|
||||
full_chunk += FENCE_CLOSE
|
||||
carry_lang = lang
|
||||
else:
|
||||
carry_lang = None
|
||||
|
||||
chunks.append(full_chunk)
|
||||
|
||||
# Append chunk indicators when the response spans multiple messages
|
||||
if len(chunks) > 1:
|
||||
total = len(chunks)
|
||||
chunks = [
|
||||
f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
|
||||
]
|
||||
|
||||
|
||||
# Try to split at a newline
|
||||
split_idx = content.rfind("\n", 0, max_length)
|
||||
if split_idx == -1:
|
||||
# No newline, split at space
|
||||
split_idx = content.rfind(" ", 0, max_length)
|
||||
if split_idx == -1:
|
||||
# No space either, hard split
|
||||
split_idx = max_length
|
||||
|
||||
chunks.append(content[:split_idx])
|
||||
content = content[split_idx:].lstrip()
|
||||
|
||||
return chunks
|
||||
|
||||
@@ -8,12 +8,9 @@ Uses discord.py library for:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import discord
|
||||
from discord import Message as DiscordMessage, Intents
|
||||
@@ -27,8 +24,7 @@ except ImportError:
|
||||
commands = None
|
||||
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
@@ -36,8 +32,6 @@ from gateway.platforms.base import (
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -54,10 +48,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
- Receiving messages from servers and DMs
|
||||
- Sending responses with Discord markdown
|
||||
- Thread support
|
||||
- Native slash commands (/ask, /reset, /status, /stop)
|
||||
- Button-based exec approvals
|
||||
- Auto-threading for long conversations
|
||||
- Reaction-based feedback
|
||||
- Slash commands (future)
|
||||
"""
|
||||
|
||||
# Discord message limits
|
||||
@@ -67,7 +58,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
super().__init__(config, Platform.DISCORD)
|
||||
self._client: Optional[commands.Bot] = None
|
||||
self._ready_event = asyncio.Event()
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
@@ -80,12 +70,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
try:
|
||||
# Set up intents -- members intent needed for username-to-ID resolution
|
||||
# Set up intents
|
||||
intents = Intents.default()
|
||||
intents.message_content = True
|
||||
intents.dm_messages = True
|
||||
intents.guild_messages = True
|
||||
intents.members = True
|
||||
|
||||
# Create bot
|
||||
self._client = commands.Bot(
|
||||
@@ -93,30 +82,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
intents=intents,
|
||||
)
|
||||
|
||||
# Parse allowed user entries (may contain usernames or IDs)
|
||||
allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
|
||||
if allowed_env:
|
||||
self._allowed_user_ids = {
|
||||
uid.strip() for uid in allowed_env.split(",") if uid.strip()
|
||||
}
|
||||
|
||||
adapter_self = self # capture for closure
|
||||
|
||||
# Register event handlers
|
||||
@self._client.event
|
||||
async def on_ready():
|
||||
print(f"[{adapter_self.name}] Connected as {adapter_self._client.user}")
|
||||
|
||||
# Resolve any usernames in the allowed list to numeric IDs
|
||||
await adapter_self._resolve_allowed_usernames()
|
||||
|
||||
# Sync slash commands with Discord
|
||||
try:
|
||||
synced = await adapter_self._client.tree.sync()
|
||||
print(f"[{adapter_self.name}] Synced {len(synced)} slash command(s)")
|
||||
except Exception as e:
|
||||
print(f"[{adapter_self.name}] Slash command sync failed: {e}")
|
||||
adapter_self._ready_event.set()
|
||||
print(f"[{self.name}] Connected as {self._client.user}")
|
||||
self._ready_event.set()
|
||||
|
||||
@self._client.event
|
||||
async def on_message(message: DiscordMessage):
|
||||
@@ -125,9 +95,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return
|
||||
await self._handle_message(message)
|
||||
|
||||
# Register slash commands
|
||||
self._register_slash_commands()
|
||||
|
||||
# Start the bot in background
|
||||
asyncio.create_task(self._client.start(self.config.token))
|
||||
|
||||
@@ -188,8 +155,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
ref_msg = await channel.fetch_message(int(reply_to))
|
||||
reference = ref_msg
|
||||
except Exception as e:
|
||||
logger.debug("Could not fetch reply-to message: %s", e)
|
||||
except Exception:
|
||||
pass # Ignore if we can't find the referenced message
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
msg = await channel.send(
|
||||
@@ -206,29 +173,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent Discord message."""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
msg = await channel.fetch_message(int(message_id))
|
||||
formatted = self.format_message(content)
|
||||
if len(formatted) > self.MAX_MESSAGE_LENGTH:
|
||||
formatted = formatted[:self.MAX_MESSAGE_LENGTH - 3] + "..."
|
||||
await msg.edit(content=formatted)
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -267,43 +212,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
print(f"[{self.name}] Failed to send audio: {e}")
|
||||
return await super().send_voice(chat_id, audio_path, caption, reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a local image file natively as a Discord file attachment."""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import io
|
||||
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Channel {chat_id} not found")
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
||||
|
||||
filename = os.path.basename(image_path)
|
||||
|
||||
with open(image_path, "rb") as f:
|
||||
file = discord.File(io.BytesIO(f.read()), filename=filename)
|
||||
msg = await channel.send(
|
||||
content=caption if caption else None,
|
||||
file=file,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send local image: {e}")
|
||||
return await super().send_image_file(chat_id, image_path, caption, reply_to)
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -407,70 +315,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
return {"name": str(chat_id), "type": "dm", "error": str(e)}
|
||||
|
||||
async def _resolve_allowed_usernames(self) -> None:
|
||||
"""
|
||||
Resolve non-numeric entries in DISCORD_ALLOWED_USERS to Discord user IDs.
|
||||
|
||||
Users can specify usernames (e.g. "teknium") or display names instead of
|
||||
raw numeric IDs. After resolution, the env var and internal set are updated
|
||||
so authorization checks work with IDs only.
|
||||
"""
|
||||
if not self._allowed_user_ids or not self._client:
|
||||
return
|
||||
|
||||
numeric_ids = set()
|
||||
to_resolve = set()
|
||||
|
||||
for entry in self._allowed_user_ids:
|
||||
if entry.isdigit():
|
||||
numeric_ids.add(entry)
|
||||
else:
|
||||
to_resolve.add(entry.lower())
|
||||
|
||||
if not to_resolve:
|
||||
return
|
||||
|
||||
print(f"[{self.name}] Resolving {len(to_resolve)} username(s): {', '.join(to_resolve)}")
|
||||
resolved_count = 0
|
||||
|
||||
for guild in self._client.guilds:
|
||||
# Fetch full member list (requires members intent)
|
||||
try:
|
||||
members = guild.members
|
||||
if len(members) < guild.member_count:
|
||||
members = [m async for m in guild.fetch_members(limit=None)]
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
|
||||
continue
|
||||
|
||||
for member in members:
|
||||
name_lower = member.name.lower()
|
||||
display_lower = member.display_name.lower()
|
||||
global_lower = (member.global_name or "").lower()
|
||||
|
||||
matched = name_lower in to_resolve or display_lower in to_resolve or global_lower in to_resolve
|
||||
if matched:
|
||||
uid = str(member.id)
|
||||
numeric_ids.add(uid)
|
||||
resolved_count += 1
|
||||
matched_name = name_lower if name_lower in to_resolve else (
|
||||
display_lower if display_lower in to_resolve else global_lower
|
||||
)
|
||||
to_resolve.discard(matched_name)
|
||||
print(f"[{self.name}] Resolved '{matched_name}' -> {uid} ({member.name}#{member.discriminator})")
|
||||
|
||||
if not to_resolve:
|
||||
break
|
||||
|
||||
if to_resolve:
|
||||
print(f"[{self.name}] Could not resolve usernames: {', '.join(to_resolve)}")
|
||||
|
||||
# Update internal set and env var so gateway auth checks use IDs
|
||||
self._allowed_user_ids = numeric_ids
|
||||
os.environ["DISCORD_ALLOWED_USERS"] = ",".join(sorted(numeric_ids))
|
||||
if resolved_count:
|
||||
print(f"[{self.name}] Updated DISCORD_ALLOWED_USERS with {resolved_count} resolved ID(s)")
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""
|
||||
Format message for Discord.
|
||||
@@ -480,275 +324,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Discord markdown is fairly standard, no special escaping needed
|
||||
return content
|
||||
|
||||
def _register_slash_commands(self) -> None:
|
||||
"""Register Discord slash commands on the command tree."""
|
||||
if not self._client:
|
||||
return
|
||||
|
||||
tree = self._client.tree
|
||||
|
||||
@tree.command(name="ask", description="Ask Hermes a question")
|
||||
@discord.app_commands.describe(question="Your question for Hermes")
|
||||
async def slash_ask(interaction: discord.Interaction, question: str):
|
||||
await interaction.response.defer()
|
||||
event = self._build_slash_event(interaction, question)
|
||||
await self.handle_message(event)
|
||||
# The response is sent via the normal send() flow
|
||||
# Send a followup to close the interaction if needed
|
||||
try:
|
||||
await interaction.followup.send("Processing complete~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="new", description="Start a new conversation")
|
||||
async def slash_new(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/reset")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("New conversation started~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="reset", description="Reset your Hermes session")
|
||||
async def slash_reset(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/reset")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Session reset~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="model", description="Show or change the model")
|
||||
@discord.app_commands.describe(name="Model name (e.g. anthropic/claude-sonnet-4). Leave empty to see current.")
|
||||
async def slash_model(interaction: discord.Interaction, name: str = ""):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, f"/model {name}".strip())
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="personality", description="Set a personality")
|
||||
@discord.app_commands.describe(name="Personality name. Leave empty to list available.")
|
||||
async def slash_personality(interaction: discord.Interaction, name: str = ""):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, f"/personality {name}".strip())
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="retry", description="Retry your last message")
|
||||
async def slash_retry(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/retry")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Retrying~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="undo", description="Remove the last exchange")
|
||||
async def slash_undo(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/undo")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="status", description="Show Hermes session status")
|
||||
async def slash_status(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/status")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Status sent~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="sethome", description="Set this chat as the home channel")
|
||||
async def slash_sethome(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/sethome")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="stop", description="Stop the running Hermes agent")
|
||||
async def slash_stop(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/stop")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Stop requested~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="compress", description="Compress conversation context")
|
||||
async def slash_compress(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/compress")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="title", description="Set or show the session title")
|
||||
@discord.app_commands.describe(name="Session title. Leave empty to show current.")
|
||||
async def slash_title(interaction: discord.Interaction, name: str = ""):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, f"/title {name}".strip())
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="resume", description="Resume a previously-named session")
|
||||
@discord.app_commands.describe(name="Session name to resume. Leave empty to list sessions.")
|
||||
async def slash_resume(interaction: discord.Interaction, name: str = ""):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, f"/resume {name}".strip())
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="usage", description="Show token usage for this session")
|
||||
async def slash_usage(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/usage")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="provider", description="Show available providers")
|
||||
async def slash_provider(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/provider")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="help", description="Show available commands")
|
||||
async def slash_help(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/help")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="insights", description="Show usage insights and analytics")
|
||||
@discord.app_commands.describe(days="Number of days to analyze (default: 7)")
|
||||
async def slash_insights(interaction: discord.Interaction, days: int = 7):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, f"/insights {days}")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="reload-mcp", description="Reload MCP servers from config")
|
||||
async def slash_reload_mcp(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/reload-mcp")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Done~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="update", description="Update Hermes Agent to the latest version")
|
||||
async def slash_update(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/update")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Update initiated~", ephemeral=True)
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Discord slash command interaction."""
|
||||
is_dm = isinstance(interaction.channel, discord.DMChannel)
|
||||
chat_type = "dm" if is_dm else "group"
|
||||
chat_name = ""
|
||||
if not is_dm and hasattr(interaction.channel, "name"):
|
||||
chat_name = interaction.channel.name
|
||||
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
|
||||
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
|
||||
|
||||
# Get channel topic (if available)
|
||||
chat_topic = getattr(interaction.channel, "topic", None)
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=str(interaction.channel_id),
|
||||
chat_name=chat_name,
|
||||
chat_type=chat_type,
|
||||
user_id=str(interaction.user.id),
|
||||
user_name=interaction.user.display_name,
|
||||
chat_topic=chat_topic,
|
||||
)
|
||||
|
||||
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=interaction,
|
||||
)
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, approval_id: str
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a button-based exec approval prompt for a dangerous command.
|
||||
|
||||
Returns SendResult. The approval is resolved when a user clicks a button.
|
||||
"""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
|
||||
embed = discord.Embed(
|
||||
title="Command Approval Required",
|
||||
description=f"```\n{command[:500]}\n```",
|
||||
color=discord.Color.orange(),
|
||||
)
|
||||
embed.set_footer(text=f"Approval ID: {approval_id}")
|
||||
|
||||
view = ExecApprovalView(
|
||||
approval_id=approval_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _handle_message(self, message: DiscordMessage) -> None:
|
||||
"""Handle incoming Discord messages."""
|
||||
# In server channels (not DMs), require the bot to be @mentioned
|
||||
@@ -817,9 +392,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if isinstance(message.channel, discord.Thread):
|
||||
thread_id = str(message.channel.id)
|
||||
|
||||
# Get channel topic (if available - TextChannels have topics, DMs/threads don't)
|
||||
chat_topic = getattr(message.channel, "topic", None)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
chat_id=str(message.channel.id),
|
||||
@@ -828,47 +400,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
user_id=str(message.author.id),
|
||||
user_name=message.author.display_name,
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
# vision tool can access them reliably (Discord CDN URLs can expire).
|
||||
media_urls = []
|
||||
media_types = []
|
||||
for att in message.attachments:
|
||||
content_type = att.content_type or "unknown"
|
||||
if content_type.startswith("image/"):
|
||||
try:
|
||||
# Determine extension from content type (image/png -> .png)
|
||||
ext = "." + content_type.split("/")[-1].split(";")[0]
|
||||
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
|
||||
ext = ".jpg"
|
||||
cached_path = await cache_image_from_url(att.url, ext=ext)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(content_type)
|
||||
print(f"[Discord] Cached user image: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Discord] Failed to cache image attachment: {e}", flush=True)
|
||||
# Fall back to the CDN URL if caching fails
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
elif content_type.startswith("audio/"):
|
||||
try:
|
||||
ext = "." + content_type.split("/")[-1].split(";")[0]
|
||||
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
|
||||
ext = ".ogg"
|
||||
cached_path = await cache_audio_from_url(att.url, ext=ext)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(content_type)
|
||||
print(f"[Discord] Cached user audio: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Discord] Failed to cache audio attachment: {e}", flush=True)
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
else:
|
||||
# Other attachments: keep the original URL
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
# Build media URLs
|
||||
media_urls = [att.url for att in message.attachments]
|
||||
media_types = [att.content_type or "unknown" for att in message.attachments]
|
||||
|
||||
event = MessageEvent(
|
||||
text=message.content,
|
||||
@@ -883,94 +419,3 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Discord UI Components (outside the adapter class)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if DISCORD_AVAILABLE:
|
||||
|
||||
class ExecApprovalView(discord.ui.View):
|
||||
"""
|
||||
Interactive button view for exec approval of dangerous commands.
|
||||
|
||||
Shows three buttons: Allow Once (green), Always Allow (blue), Deny (red).
|
||||
Only users in the allowed list can click. The view times out after 5 minutes.
|
||||
"""
|
||||
|
||||
def __init__(self, approval_id: str, allowed_user_ids: set):
|
||||
super().__init__(timeout=300) # 5-minute timeout
|
||||
self.approval_id = approval_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
"""Verify the user clicking is authorized."""
|
||||
if not self.allowed_user_ids:
|
||||
return True # No allowlist = anyone can approve
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, action: str, color: discord.Color
|
||||
):
|
||||
"""Resolve the approval and update the message."""
|
||||
if self.resolved:
|
||||
await interaction.response.send_message(
|
||||
"This approval has already been resolved~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to approve commands~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
self.resolved = True
|
||||
|
||||
# Update the embed with the decision
|
||||
embed = interaction.message.embeds[0] if interaction.message.embeds else None
|
||||
if embed:
|
||||
embed.color = color
|
||||
embed.set_footer(text=f"{action} by {interaction.user.display_name}")
|
||||
|
||||
# Disable all buttons
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
await interaction.response.edit_message(embed=embed, view=self)
|
||||
|
||||
# Store the approval decision
|
||||
try:
|
||||
from tools.approval import approve_permanent
|
||||
if action == "allow_once":
|
||||
pass # One-time approval handled by gateway
|
||||
elif action == "allow_always":
|
||||
approve_permanent(self.approval_id)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@discord.ui.button(label="Allow Once", style=discord.ButtonStyle.green)
|
||||
async def allow_once(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "allow_once", discord.Color.green())
|
||||
|
||||
@discord.ui.button(label="Always Allow", style=discord.ButtonStyle.blurple)
|
||||
async def allow_always(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "allow_always", discord.Color.blue())
|
||||
|
||||
@discord.ui.button(label="Deny", style=discord.ButtonStyle.red)
|
||||
async def deny(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "deny", discord.Color.red())
|
||||
|
||||
async def on_timeout(self):
|
||||
"""Handle view timeout -- disable buttons and mark as expired."""
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
@@ -1,432 +0,0 @@
|
||||
"""
|
||||
Home Assistant platform adapter.
|
||||
|
||||
Connects to the HA WebSocket API for real-time event monitoring.
|
||||
State-change events are converted to MessageEvent objects and forwarded
|
||||
to the agent for processing. Outbound messages are delivered as HA
|
||||
persistent notifications.
|
||||
|
||||
Requires:
|
||||
- aiohttp (already in messaging extras)
|
||||
- HASS_TOKEN env var (Long-Lived Access Token)
|
||||
- HASS_URL env var (default: http://homeassistant.local:8123)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
AIOHTTP_AVAILABLE = True
|
||||
except ImportError:
|
||||
AIOHTTP_AVAILABLE = False
|
||||
aiohttp = None # type: ignore[assignment]
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_ha_requirements() -> bool:
|
||||
"""Check if Home Assistant dependencies are available and configured."""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
return False
|
||||
if not os.getenv("HASS_TOKEN"):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class HomeAssistantAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Home Assistant WebSocket adapter.
|
||||
|
||||
Subscribes to ``state_changed`` events and forwards them as
|
||||
MessageEvent objects. Supports domain/entity filtering and
|
||||
per-entity cooldowns to avoid event floods.
|
||||
"""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
# Reconnection backoff schedule (seconds)
|
||||
_BACKOFF_STEPS = [5, 10, 30, 60]
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.HOMEASSISTANT)
|
||||
|
||||
# Connection state
|
||||
self._session: Optional["aiohttp.ClientSession"] = None
|
||||
self._ws: Optional["aiohttp.ClientWebSocketResponse"] = None
|
||||
self._rest_session: Optional["aiohttp.ClientSession"] = None
|
||||
self._listen_task: Optional[asyncio.Task] = None
|
||||
self._msg_id: int = 0
|
||||
|
||||
# Configuration from extra
|
||||
extra = config.extra or {}
|
||||
token = config.token or os.getenv("HASS_TOKEN", "")
|
||||
url = extra.get("url") or os.getenv("HASS_URL", "http://homeassistant.local:8123")
|
||||
self._hass_url: str = url.rstrip("/")
|
||||
self._hass_token: str = token
|
||||
|
||||
# Event filtering
|
||||
self._watch_domains: Set[str] = set(extra.get("watch_domains", []))
|
||||
self._watch_entities: Set[str] = set(extra.get("watch_entities", []))
|
||||
self._ignore_entities: Set[str] = set(extra.get("ignore_entities", []))
|
||||
self._cooldown_seconds: int = int(extra.get("cooldown_seconds", 30))
|
||||
|
||||
# Cooldown tracking: entity_id -> last_event_timestamp
|
||||
self._last_event_time: Dict[str, float] = {}
|
||||
|
||||
def _next_id(self) -> int:
|
||||
"""Return the next WebSocket message ID."""
|
||||
self._msg_id += 1
|
||||
return self._msg_id
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Connection lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to HA WebSocket API and subscribe to events."""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
logger.warning("[%s] aiohttp not installed. Run: pip install aiohttp", self.name)
|
||||
return False
|
||||
|
||||
if not self._hass_token:
|
||||
logger.warning("[%s] No HASS_TOKEN configured", self.name)
|
||||
return False
|
||||
|
||||
try:
|
||||
success = await self._ws_connect()
|
||||
if not success:
|
||||
return False
|
||||
|
||||
# Dedicated REST session for send() calls
|
||||
self._rest_session = aiohttp.ClientSession()
|
||||
|
||||
# Start background listener
|
||||
self._listen_task = asyncio.create_task(self._listen_loop())
|
||||
self._running = True
|
||||
logger.info("[%s] Connected to %s", self.name, self._hass_url)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("[%s] Failed to connect: %s", self.name, e)
|
||||
return False
|
||||
|
||||
async def _ws_connect(self) -> bool:
|
||||
"""Establish WebSocket connection and authenticate."""
|
||||
ws_url = self._hass_url.replace("http://", "ws://").replace("https://", "wss://")
|
||||
ws_url = f"{ws_url}/api/websocket"
|
||||
|
||||
self._session = aiohttp.ClientSession()
|
||||
self._ws = await self._session.ws_connect(ws_url, heartbeat=30)
|
||||
|
||||
# Step 1: Receive auth_required
|
||||
msg = await self._ws.receive_json()
|
||||
if msg.get("type") != "auth_required":
|
||||
logger.error("Expected auth_required, got: %s", msg.get("type"))
|
||||
await self._cleanup_ws()
|
||||
return False
|
||||
|
||||
# Step 2: Send auth
|
||||
await self._ws.send_json({
|
||||
"type": "auth",
|
||||
"access_token": self._hass_token,
|
||||
})
|
||||
|
||||
# Step 3: Wait for auth_ok
|
||||
msg = await self._ws.receive_json()
|
||||
if msg.get("type") != "auth_ok":
|
||||
logger.error("Auth failed: %s", msg)
|
||||
await self._cleanup_ws()
|
||||
return False
|
||||
|
||||
# Step 4: Subscribe to state_changed events
|
||||
sub_id = self._next_id()
|
||||
await self._ws.send_json({
|
||||
"id": sub_id,
|
||||
"type": "subscribe_events",
|
||||
"event_type": "state_changed",
|
||||
})
|
||||
|
||||
# Verify subscription acknowledgement
|
||||
msg = await self._ws.receive_json()
|
||||
if not msg.get("success"):
|
||||
logger.error("Failed to subscribe to events: %s", msg)
|
||||
await self._cleanup_ws()
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def _cleanup_ws(self) -> None:
|
||||
"""Close WebSocket and session."""
|
||||
if self._ws and not self._ws.closed:
|
||||
await self._ws.close()
|
||||
self._ws = None
|
||||
if self._session and not self._session.closed:
|
||||
await self._session.close()
|
||||
self._session = None
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect from Home Assistant."""
|
||||
self._running = False
|
||||
if self._listen_task:
|
||||
self._listen_task.cancel()
|
||||
try:
|
||||
await self._listen_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._listen_task = None
|
||||
|
||||
await self._cleanup_ws()
|
||||
if self._rest_session and not self._rest_session.closed:
|
||||
await self._rest_session.close()
|
||||
self._rest_session = None
|
||||
logger.info("[%s] Disconnected", self.name)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Event listener
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _listen_loop(self) -> None:
|
||||
"""Main event loop with automatic reconnection."""
|
||||
backoff_idx = 0
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._read_events()
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning("[%s] WebSocket error: %s", self.name, e)
|
||||
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
# Reconnect with backoff
|
||||
delay = self._BACKOFF_STEPS[min(backoff_idx, len(self._BACKOFF_STEPS) - 1)]
|
||||
logger.info("[%s] Reconnecting in %ds...", self.name, delay)
|
||||
await asyncio.sleep(delay)
|
||||
backoff_idx += 1
|
||||
|
||||
try:
|
||||
await self._cleanup_ws()
|
||||
success = await self._ws_connect()
|
||||
if success:
|
||||
backoff_idx = 0 # Reset on successful reconnect
|
||||
logger.info("[%s] Reconnected", self.name)
|
||||
except Exception as e:
|
||||
logger.warning("[%s] Reconnection failed: %s", self.name, e)
|
||||
|
||||
async def _read_events(self) -> None:
|
||||
"""Read events from WebSocket until disconnected."""
|
||||
if self._ws is None or self._ws.closed:
|
||||
return
|
||||
async for ws_msg in self._ws:
|
||||
if ws_msg.type == aiohttp.WSMsgType.TEXT:
|
||||
try:
|
||||
data = json.loads(ws_msg.data)
|
||||
if data.get("type") == "event":
|
||||
await self._handle_ha_event(data.get("event", {}))
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Invalid JSON from HA WS: %s", ws_msg.data[:200])
|
||||
elif ws_msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
|
||||
break
|
||||
|
||||
async def _handle_ha_event(self, event: Dict[str, Any]) -> None:
|
||||
"""Process a state_changed event from Home Assistant."""
|
||||
event_data = event.get("data", {})
|
||||
entity_id: str = event_data.get("entity_id", "")
|
||||
|
||||
if not entity_id:
|
||||
return
|
||||
|
||||
# Apply ignore filter
|
||||
if entity_id in self._ignore_entities:
|
||||
return
|
||||
|
||||
# Apply domain/entity watch filters
|
||||
domain = entity_id.split(".")[0] if "." in entity_id else ""
|
||||
if self._watch_domains or self._watch_entities:
|
||||
domain_match = domain in self._watch_domains if self._watch_domains else False
|
||||
entity_match = entity_id in self._watch_entities if self._watch_entities else False
|
||||
if not domain_match and not entity_match:
|
||||
return
|
||||
|
||||
# Apply cooldown
|
||||
now = time.time()
|
||||
last = self._last_event_time.get(entity_id, 0)
|
||||
if (now - last) < self._cooldown_seconds:
|
||||
return
|
||||
self._last_event_time[entity_id] = now
|
||||
|
||||
# Build human-readable message
|
||||
old_state = event_data.get("old_state", {})
|
||||
new_state = event_data.get("new_state", {})
|
||||
message = self._format_state_change(entity_id, old_state, new_state)
|
||||
|
||||
if not message:
|
||||
return
|
||||
|
||||
# Build MessageEvent and forward to handler
|
||||
source = self.build_source(
|
||||
chat_id="ha_events",
|
||||
chat_name="Home Assistant Events",
|
||||
chat_type="channel",
|
||||
user_id="homeassistant",
|
||||
user_name="Home Assistant",
|
||||
)
|
||||
|
||||
msg_event = MessageEvent(
|
||||
text=message,
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id=f"ha_{entity_id}_{int(now)}",
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
@staticmethod
|
||||
def _format_state_change(
|
||||
entity_id: str,
|
||||
old_state: Dict[str, Any],
|
||||
new_state: Dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""Convert a state_changed event into a human-readable description."""
|
||||
if not new_state:
|
||||
return None
|
||||
|
||||
old_val = old_state.get("state", "unknown") if old_state else "unknown"
|
||||
new_val = new_state.get("state", "unknown")
|
||||
|
||||
# Skip if state didn't actually change
|
||||
if old_val == new_val:
|
||||
return None
|
||||
|
||||
friendly_name = new_state.get("attributes", {}).get("friendly_name", entity_id)
|
||||
domain = entity_id.split(".")[0] if "." in entity_id else ""
|
||||
|
||||
# Domain-specific formatting
|
||||
if domain == "climate":
|
||||
attrs = new_state.get("attributes", {})
|
||||
temp = attrs.get("current_temperature", "?")
|
||||
target = attrs.get("temperature", "?")
|
||||
return (
|
||||
f"[Home Assistant] {friendly_name}: HVAC mode changed from "
|
||||
f"'{old_val}' to '{new_val}' (current: {temp}, target: {target})"
|
||||
)
|
||||
|
||||
if domain == "sensor":
|
||||
unit = new_state.get("attributes", {}).get("unit_of_measurement", "")
|
||||
return (
|
||||
f"[Home Assistant] {friendly_name}: changed from "
|
||||
f"{old_val}{unit} to {new_val}{unit}"
|
||||
)
|
||||
|
||||
if domain == "binary_sensor":
|
||||
return (
|
||||
f"[Home Assistant] {friendly_name}: "
|
||||
f"{'triggered' if new_val == 'on' else 'cleared'} "
|
||||
f"(was {'triggered' if old_val == 'on' else 'cleared'})"
|
||||
)
|
||||
|
||||
if domain in ("light", "switch", "fan"):
|
||||
return (
|
||||
f"[Home Assistant] {friendly_name}: turned "
|
||||
f"{'on' if new_val == 'on' else 'off'}"
|
||||
)
|
||||
|
||||
if domain == "alarm_control_panel":
|
||||
return (
|
||||
f"[Home Assistant] {friendly_name}: alarm state changed from "
|
||||
f"'{old_val}' to '{new_val}'"
|
||||
)
|
||||
|
||||
# Generic fallback
|
||||
return (
|
||||
f"[Home Assistant] {friendly_name} ({entity_id}): "
|
||||
f"changed from '{old_val}' to '{new_val}'"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Outbound messaging
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a notification via HA REST API (persistent_notification.create).
|
||||
|
||||
Uses the REST API instead of WebSocket to avoid a race condition
|
||||
with the event listener loop that reads from the same WS connection.
|
||||
"""
|
||||
url = f"{self._hass_url}/api/services/persistent_notification/create"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self._hass_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"title": "Hermes Agent",
|
||||
"message": content[:self.MAX_MESSAGE_LENGTH],
|
||||
}
|
||||
|
||||
try:
|
||||
if self._rest_session:
|
||||
async with self._rest_session.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as resp:
|
||||
if resp.status < 300:
|
||||
return SendResult(success=True, message_id=uuid.uuid4().hex[:12])
|
||||
else:
|
||||
body = await resp.text()
|
||||
return SendResult(success=False, error=f"HTTP {resp.status}: {body}")
|
||||
else:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as resp:
|
||||
if resp.status < 300:
|
||||
return SendResult(success=True, message_id=uuid.uuid4().hex[:12])
|
||||
else:
|
||||
body = await resp.text()
|
||||
return SendResult(success=False, error=f"HTTP {resp.status}: {body}")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
return SendResult(success=False, error="Timeout sending notification to HA")
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""No typing indicator for Home Assistant."""
|
||||
pass
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Return basic info about the HA event channel."""
|
||||
return {
|
||||
"name": "Home Assistant Events",
|
||||
"type": "channel",
|
||||
"url": self._hass_url,
|
||||
}
|
||||
@@ -1,716 +0,0 @@
|
||||
"""Signal messenger platform adapter.
|
||||
|
||||
Connects to a signal-cli daemon running in HTTP mode.
|
||||
Inbound messages arrive via SSE (Server-Sent Events) streaming.
|
||||
Outbound messages and actions use JSON-RPC 2.0 over HTTP.
|
||||
|
||||
Based on PR #268 by ibhagwan, rebuilt with bug fixes.
|
||||
|
||||
Requires:
|
||||
- signal-cli installed and running: signal-cli daemon --http 127.0.0.1:8080
|
||||
- SIGNAL_HTTP_URL and SIGNAL_ACCOUNT environment variables set
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import unquote
|
||||
|
||||
import httpx
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
cache_image_from_url,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
SIGNAL_MAX_ATTACHMENT_SIZE = 100 * 1024 * 1024 # 100 MB
|
||||
MAX_MESSAGE_LENGTH = 8000 # Signal message size limit
|
||||
TYPING_INTERVAL = 8.0 # seconds between typing indicator refreshes
|
||||
SSE_RETRY_DELAY_INITIAL = 2.0
|
||||
SSE_RETRY_DELAY_MAX = 60.0
|
||||
HEALTH_CHECK_INTERVAL = 30.0 # seconds between health checks
|
||||
HEALTH_CHECK_STALE_THRESHOLD = 120.0 # seconds without SSE activity before concern
|
||||
|
||||
# E.164 phone number pattern for redaction
|
||||
_PHONE_RE = re.compile(r"\+[1-9]\d{6,14}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _redact_phone(phone: str) -> str:
|
||||
"""Redact a phone number for logging: +15551234567 -> +155****4567."""
|
||||
if not phone:
|
||||
return "<none>"
|
||||
if len(phone) <= 8:
|
||||
return phone[:2] + "****" + phone[-2:] if len(phone) > 4 else "****"
|
||||
return phone[:4] + "****" + phone[-4:]
|
||||
|
||||
|
||||
def _parse_comma_list(value: str) -> List[str]:
|
||||
"""Split a comma-separated string into a list, stripping whitespace."""
|
||||
return [v.strip() for v in value.split(",") if v.strip()]
|
||||
|
||||
|
||||
def _guess_extension(data: bytes) -> str:
|
||||
"""Guess file extension from magic bytes."""
|
||||
if data[:4] == b"\x89PNG":
|
||||
return ".png"
|
||||
if data[:2] == b"\xff\xd8":
|
||||
return ".jpg"
|
||||
if data[:4] == b"GIF8":
|
||||
return ".gif"
|
||||
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
|
||||
return ".webp"
|
||||
if data[:4] == b"%PDF":
|
||||
return ".pdf"
|
||||
if len(data) >= 8 and data[4:8] == b"ftyp":
|
||||
return ".mp4"
|
||||
if data[:4] == b"OggS":
|
||||
return ".ogg"
|
||||
if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0:
|
||||
return ".mp3"
|
||||
if data[:2] == b"PK":
|
||||
return ".zip"
|
||||
return ".bin"
|
||||
|
||||
|
||||
def _is_image_ext(ext: str) -> bool:
|
||||
return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp")
|
||||
|
||||
|
||||
def _is_audio_ext(ext: str) -> bool:
|
||||
return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac")
|
||||
|
||||
|
||||
def _render_mentions(text: str, mentions: list) -> str:
|
||||
"""Replace Signal mention placeholders (\\uFFFC) with readable @identifiers.
|
||||
|
||||
Signal encodes @mentions as the Unicode object replacement character
|
||||
with out-of-band metadata containing the mentioned user's UUID/number.
|
||||
"""
|
||||
if not mentions or "\uFFFC" not in text:
|
||||
return text
|
||||
# Sort mentions by start position (reverse) to replace from end to start
|
||||
# so indices don't shift as we replace
|
||||
sorted_mentions = sorted(mentions, key=lambda m: m.get("start", 0), reverse=True)
|
||||
for mention in sorted_mentions:
|
||||
start = mention.get("start", 0)
|
||||
length = mention.get("length", 1)
|
||||
# Use the mention's number or UUID as the replacement
|
||||
identifier = mention.get("number") or mention.get("uuid") or "user"
|
||||
replacement = f"@{identifier}"
|
||||
text = text[:start] + replacement + text[start + length:]
|
||||
return text
|
||||
|
||||
|
||||
def check_signal_requirements() -> bool:
|
||||
"""Check if Signal is configured (has URL and account)."""
|
||||
return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal Adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalAdapter(BasePlatformAdapter):
|
||||
"""Signal messenger adapter using signal-cli HTTP daemon."""
|
||||
|
||||
platform = Platform.SIGNAL
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.SIGNAL)
|
||||
|
||||
extra = config.extra or {}
|
||||
self.http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/")
|
||||
self.account = extra.get("account", "")
|
||||
self.ignore_stories = extra.get("ignore_stories", True)
|
||||
|
||||
# Parse allowlists — group policy is derived from presence of group allowlist
|
||||
group_allowed_str = os.getenv("SIGNAL_GROUP_ALLOWED_USERS", "")
|
||||
self.group_allow_from = set(_parse_comma_list(group_allowed_str))
|
||||
|
||||
# HTTP client
|
||||
self.client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
# Background tasks
|
||||
self._sse_task: Optional[asyncio.Task] = None
|
||||
self._health_monitor_task: Optional[asyncio.Task] = None
|
||||
self._typing_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._running = False
|
||||
self._last_sse_activity = 0.0
|
||||
self._sse_response: Optional[httpx.Response] = None
|
||||
|
||||
# Normalize account for self-message filtering
|
||||
self._account_normalized = self.account.strip()
|
||||
|
||||
logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
|
||||
self.http_url, _redact_phone(self.account),
|
||||
"enabled" if self.group_allow_from else "disabled")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to signal-cli daemon and start SSE listener."""
|
||||
if not self.http_url or not self.account:
|
||||
logger.error("Signal: SIGNAL_HTTP_URL and SIGNAL_ACCOUNT are required")
|
||||
return False
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
|
||||
# Health check — verify signal-cli daemon is reachable
|
||||
try:
|
||||
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
|
||||
if resp.status_code != 200:
|
||||
logger.error("Signal: health check failed (status %d)", resp.status_code)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
|
||||
return False
|
||||
|
||||
self._running = True
|
||||
self._last_sse_activity = time.time()
|
||||
self._sse_task = asyncio.create_task(self._sse_listener())
|
||||
self._health_monitor_task = asyncio.create_task(self._health_monitor())
|
||||
|
||||
logger.info("Signal: connected to %s", self.http_url)
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop SSE listener and clean up."""
|
||||
self._running = False
|
||||
|
||||
if self._sse_task:
|
||||
self._sse_task.cancel()
|
||||
try:
|
||||
await self._sse_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
if self._health_monitor_task:
|
||||
self._health_monitor_task.cancel()
|
||||
try:
|
||||
await self._health_monitor_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Cancel all typing tasks
|
||||
for task in self._typing_tasks.values():
|
||||
task.cancel()
|
||||
self._typing_tasks.clear()
|
||||
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
|
||||
logger.info("Signal: disconnected")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# SSE Streaming (inbound messages)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _sse_listener(self) -> None:
|
||||
"""Listen for SSE events from signal-cli daemon."""
|
||||
url = f"{self.http_url}/api/v1/events?account={self.account}"
|
||||
backoff = SSE_RETRY_DELAY_INITIAL
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
logger.debug("Signal SSE: connecting to %s", url)
|
||||
async with self.client.stream(
|
||||
"GET", url,
|
||||
headers={"Accept": "text/event-stream"},
|
||||
timeout=None,
|
||||
) as response:
|
||||
self._sse_response = response
|
||||
backoff = SSE_RETRY_DELAY_INITIAL # Reset on successful connection
|
||||
self._last_sse_activity = time.time()
|
||||
logger.info("Signal SSE: connected")
|
||||
|
||||
buffer = ""
|
||||
async for chunk in response.aiter_text():
|
||||
if not self._running:
|
||||
break
|
||||
buffer += chunk
|
||||
while "\n" in buffer:
|
||||
line, buffer = buffer.split("\n", 1)
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# Parse SSE data lines
|
||||
if line.startswith("data:"):
|
||||
data_str = line[5:].strip()
|
||||
if not data_str:
|
||||
continue
|
||||
self._last_sse_activity = time.time()
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
await self._handle_envelope(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Signal SSE: invalid JSON: %s", data_str[:100])
|
||||
except Exception:
|
||||
logger.exception("Signal SSE: error handling event")
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except httpx.HTTPError as e:
|
||||
if self._running:
|
||||
logger.warning("Signal SSE: HTTP error: %s (reconnecting in %.0fs)", e, backoff)
|
||||
except Exception as e:
|
||||
if self._running:
|
||||
logger.warning("Signal SSE: error: %s (reconnecting in %.0fs)", e, backoff)
|
||||
|
||||
if self._running:
|
||||
# Add 20% jitter to prevent thundering herd on reconnection
|
||||
jitter = backoff * 0.2 * random.random()
|
||||
await asyncio.sleep(backoff + jitter)
|
||||
backoff = min(backoff * 2, SSE_RETRY_DELAY_MAX)
|
||||
|
||||
self._sse_response = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Health Monitor
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _health_monitor(self) -> None:
|
||||
"""Monitor SSE connection health and force reconnect if stale."""
|
||||
while self._running:
|
||||
await asyncio.sleep(HEALTH_CHECK_INTERVAL)
|
||||
if not self._running:
|
||||
break
|
||||
|
||||
elapsed = time.time() - self._last_sse_activity
|
||||
if elapsed > HEALTH_CHECK_STALE_THRESHOLD:
|
||||
logger.warning("Signal: SSE idle for %.0fs, checking daemon health", elapsed)
|
||||
try:
|
||||
resp = await self.client.get(
|
||||
f"{self.http_url}/api/v1/check", timeout=10.0
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
# Daemon is alive but SSE is idle — update activity to
|
||||
# avoid repeated warnings (connection may just be quiet)
|
||||
self._last_sse_activity = time.time()
|
||||
logger.debug("Signal: daemon healthy, SSE idle")
|
||||
else:
|
||||
logger.warning("Signal: health check failed (%d), forcing reconnect", resp.status_code)
|
||||
self._force_reconnect()
|
||||
except Exception as e:
|
||||
logger.warning("Signal: health check error: %s, forcing reconnect", e)
|
||||
self._force_reconnect()
|
||||
|
||||
def _force_reconnect(self) -> None:
|
||||
"""Force SSE reconnection by closing the current response."""
|
||||
if self._sse_response and not self._sse_response.is_stream_consumed:
|
||||
try:
|
||||
asyncio.create_task(self._sse_response.aclose())
|
||||
except Exception:
|
||||
pass
|
||||
self._sse_response = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Message Handling
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_envelope(self, envelope: dict) -> None:
|
||||
"""Process an incoming signal-cli envelope."""
|
||||
# Unwrap nested envelope if present
|
||||
envelope_data = envelope.get("envelope", envelope)
|
||||
|
||||
# Filter syncMessage envelopes (sent transcripts, read receipts, etc.)
|
||||
# signal-cli may set syncMessage to null vs omitting it, so check key existence
|
||||
if "syncMessage" in envelope_data:
|
||||
return
|
||||
|
||||
# Extract sender info
|
||||
sender = (
|
||||
envelope_data.get("sourceNumber")
|
||||
or envelope_data.get("sourceUuid")
|
||||
or envelope_data.get("source")
|
||||
)
|
||||
sender_name = envelope_data.get("sourceName", "")
|
||||
sender_uuid = envelope_data.get("sourceUuid", "")
|
||||
|
||||
if not sender:
|
||||
logger.debug("Signal: ignoring envelope with no sender")
|
||||
return
|
||||
|
||||
# Self-message filtering — prevent reply loops
|
||||
if self._account_normalized and sender == self._account_normalized:
|
||||
return
|
||||
|
||||
# Filter stories
|
||||
if self.ignore_stories and envelope_data.get("storyMessage"):
|
||||
return
|
||||
|
||||
# Get data message — also check editMessage (edited messages contain
|
||||
# their updated dataMessage inside editMessage.dataMessage)
|
||||
data_message = (
|
||||
envelope_data.get("dataMessage")
|
||||
or (envelope_data.get("editMessage") or {}).get("dataMessage")
|
||||
)
|
||||
if not data_message:
|
||||
return
|
||||
|
||||
# Check for group message
|
||||
group_info = data_message.get("groupInfo")
|
||||
group_id = group_info.get("groupId") if group_info else None
|
||||
is_group = bool(group_id)
|
||||
|
||||
# Group message filtering — derived from SIGNAL_GROUP_ALLOWED_USERS:
|
||||
# - No env var set → groups disabled (default safe behavior)
|
||||
# - Env var set with group IDs → only those groups allowed
|
||||
# - Env var set with "*" → all groups allowed
|
||||
# DM auth is fully handled by run.py (_is_user_authorized)
|
||||
if is_group:
|
||||
if not self.group_allow_from:
|
||||
logger.debug("Signal: ignoring group message (no SIGNAL_GROUP_ALLOWED_USERS)")
|
||||
return
|
||||
if "*" not in self.group_allow_from and group_id not in self.group_allow_from:
|
||||
logger.debug("Signal: group %s not in allowlist", group_id[:8] if group_id else "?")
|
||||
return
|
||||
|
||||
# Build chat info
|
||||
chat_id = sender if not is_group else f"group:{group_id}"
|
||||
chat_type = "group" if is_group else "dm"
|
||||
|
||||
# Extract text and render mentions
|
||||
text = data_message.get("message", "")
|
||||
mentions = data_message.get("mentions", [])
|
||||
if text and mentions:
|
||||
text = _render_mentions(text, mentions)
|
||||
|
||||
# Process attachments
|
||||
attachments_data = data_message.get("attachments", [])
|
||||
image_paths = []
|
||||
audio_path = None
|
||||
document_paths = []
|
||||
|
||||
if attachments_data and not getattr(self, "ignore_attachments", False):
|
||||
for att in attachments_data:
|
||||
att_id = att.get("id")
|
||||
att_size = att.get("size", 0)
|
||||
if not att_id:
|
||||
continue
|
||||
if att_size > SIGNAL_MAX_ATTACHMENT_SIZE:
|
||||
logger.warning("Signal: attachment too large (%d bytes), skipping", att_size)
|
||||
continue
|
||||
try:
|
||||
cached_path, ext = await self._fetch_attachment(att_id)
|
||||
if cached_path:
|
||||
if _is_image_ext(ext):
|
||||
image_paths.append(cached_path)
|
||||
elif _is_audio_ext(ext):
|
||||
audio_path = cached_path
|
||||
else:
|
||||
document_paths.append(cached_path)
|
||||
except Exception:
|
||||
logger.exception("Signal: failed to fetch attachment %s", att_id)
|
||||
|
||||
# Build session source
|
||||
source = self.build_source(
|
||||
chat_id=chat_id,
|
||||
chat_name=group_info.get("groupName") if group_info else sender_name,
|
||||
chat_type=chat_type,
|
||||
user_id=sender,
|
||||
user_name=sender_name or sender,
|
||||
user_id_alt=sender_uuid if sender_uuid else None,
|
||||
chat_id_alt=group_id if is_group else None,
|
||||
)
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
if audio_path:
|
||||
msg_type = MessageType.VOICE
|
||||
elif image_paths:
|
||||
msg_type = MessageType.IMAGE
|
||||
|
||||
# Parse timestamp from envelope data (milliseconds since epoch)
|
||||
ts_ms = envelope_data.get("timestamp", 0)
|
||||
if ts_ms:
|
||||
try:
|
||||
timestamp = datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc)
|
||||
except (ValueError, OSError):
|
||||
timestamp = datetime.now(tz=timezone.utc)
|
||||
else:
|
||||
timestamp = datetime.now(tz=timezone.utc)
|
||||
|
||||
# Build and dispatch event
|
||||
event = MessageEvent(
|
||||
source=source,
|
||||
text=text or "",
|
||||
message_type=msg_type,
|
||||
image_paths=image_paths,
|
||||
audio_path=audio_path,
|
||||
document_paths=document_paths,
|
||||
timestamp=timestamp,
|
||||
)
|
||||
|
||||
logger.debug("Signal: message from %s in %s: %s",
|
||||
_redact_phone(sender), chat_id[:20], (text or "")[:50])
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Attachment Handling
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _fetch_attachment(self, attachment_id: str) -> tuple:
|
||||
"""Fetch an attachment via JSON-RPC and cache it. Returns (path, ext)."""
|
||||
result = await self._rpc("getAttachment", {
|
||||
"account": self.account,
|
||||
"attachmentId": attachment_id,
|
||||
})
|
||||
|
||||
if not result:
|
||||
return None, ""
|
||||
|
||||
# Result is base64-encoded file content
|
||||
raw_data = base64.b64decode(result)
|
||||
ext = _guess_extension(raw_data)
|
||||
|
||||
if _is_image_ext(ext):
|
||||
path = cache_image_from_bytes(raw_data, ext)
|
||||
elif _is_audio_ext(ext):
|
||||
path = cache_audio_from_bytes(raw_data, ext)
|
||||
else:
|
||||
path = cache_document_from_bytes(raw_data, ext)
|
||||
|
||||
return path, ext
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# JSON-RPC Communication
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
|
||||
"""Send a JSON-RPC 2.0 request to signal-cli daemon."""
|
||||
if not self.client:
|
||||
logger.warning("Signal: RPC called but client not connected")
|
||||
return None
|
||||
|
||||
if rpc_id is None:
|
||||
rpc_id = f"{method}_{int(time.time() * 1000)}"
|
||||
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"method": method,
|
||||
"params": params,
|
||||
"id": rpc_id,
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await self.client.post(
|
||||
f"{self.http_url}/api/v1/rpc",
|
||||
json=payload,
|
||||
timeout=30.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if "error" in data:
|
||||
logger.warning("Signal RPC error (%s): %s", method, data["error"])
|
||||
return None
|
||||
|
||||
return data.get("result")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Signal RPC %s failed: %s", method, e)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
text: str,
|
||||
reply_to_message_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
"""Send a text message."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": text,
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("send", params)
|
||||
|
||||
if result is not None:
|
||||
return SendResult(success=True)
|
||||
return SendResult(success=False, error="RPC send failed")
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Send a typing indicator."""
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
await self._rpc("sendTyping", params, rpc_id="typing")
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
"""Send an image. Supports http(s):// and file:// URLs."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
# Resolve image to local path
|
||||
if image_url.startswith("file://"):
|
||||
file_path = unquote(image_url[7:])
|
||||
else:
|
||||
# Download remote image to cache
|
||||
try:
|
||||
file_path = await cache_image_from_url(image_url)
|
||||
except Exception as e:
|
||||
logger.warning("Signal: failed to download image: %s", e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
if not file_path or not Path(file_path).exists():
|
||||
return SendResult(success=False, error="Image file not found")
|
||||
|
||||
# Validate size
|
||||
file_size = Path(file_path).stat().st_size
|
||||
if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
|
||||
return SendResult(success=False, error=f"Image too large ({file_size} bytes)")
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": caption or "",
|
||||
"attachments": [file_path],
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("send", params)
|
||||
if result is not None:
|
||||
return SendResult(success=True)
|
||||
return SendResult(success=False, error="RPC send with attachment failed")
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
filename: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
"""Send a document/file attachment."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
if not Path(file_path).exists():
|
||||
return SendResult(success=False, error="File not found")
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": caption or "",
|
||||
"attachments": [file_path],
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("send", params)
|
||||
if result is not None:
|
||||
return SendResult(success=True)
|
||||
return SendResult(success=False, error="RPC send document failed")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Typing Indicators
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _start_typing_indicator(self, chat_id: str) -> None:
|
||||
"""Start a typing indicator loop for a chat."""
|
||||
if chat_id in self._typing_tasks:
|
||||
return # Already running
|
||||
|
||||
async def _typing_loop():
|
||||
try:
|
||||
while True:
|
||||
await self.send_typing(chat_id)
|
||||
await asyncio.sleep(TYPING_INTERVAL)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
self._typing_tasks[chat_id] = asyncio.create_task(_typing_loop())
|
||||
|
||||
async def _stop_typing_indicator(self, chat_id: str) -> None:
|
||||
"""Stop a typing indicator loop for a chat."""
|
||||
task = self._typing_tasks.pop(chat_id, None)
|
||||
if task:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat Info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a chat/contact."""
|
||||
if chat_id.startswith("group:"):
|
||||
return {
|
||||
"name": chat_id,
|
||||
"type": "group",
|
||||
"chat_id": chat_id,
|
||||
}
|
||||
|
||||
# Try to resolve contact name
|
||||
result = await self._rpc("getContact", {
|
||||
"account": self.account,
|
||||
"contactAddress": chat_id,
|
||||
})
|
||||
|
||||
name = chat_id
|
||||
if result and isinstance(result, dict):
|
||||
name = result.get("name") or result.get("profileName") or chat_id
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"type": "dm",
|
||||
"chat_id": chat_id,
|
||||
}
|
||||
@@ -1,563 +0,0 @@
|
||||
"""
|
||||
Slack platform adapter.
|
||||
|
||||
Uses slack-bolt (Python) with Socket Mode for:
|
||||
- Receiving messages from channels and DMs
|
||||
- Sending responses back
|
||||
- Handling slash commands
|
||||
- Thread support
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
||||
from slack_sdk.web.async_client import AsyncWebClient
|
||||
SLACK_AVAILABLE = True
|
||||
except ImportError:
|
||||
SLACK_AVAILABLE = False
|
||||
AsyncApp = Any
|
||||
AsyncSocketModeHandler = Any
|
||||
AsyncWebClient = Any
|
||||
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
cache_document_from_bytes,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
def check_slack_requirements() -> bool:
|
||||
"""Check if Slack dependencies are available."""
|
||||
return SLACK_AVAILABLE
|
||||
|
||||
|
||||
class SlackAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Slack bot adapter using Socket Mode.
|
||||
|
||||
Requires two tokens:
|
||||
- SLACK_BOT_TOKEN (xoxb-...) for API calls
|
||||
- SLACK_APP_TOKEN (xapp-...) for Socket Mode connection
|
||||
|
||||
Features:
|
||||
- DMs and channel messages (mention-gated in channels)
|
||||
- Thread support
|
||||
- File/image/audio attachments
|
||||
- Slash commands (/hermes)
|
||||
- Typing indicators (not natively supported by Slack bots)
|
||||
"""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4000 # Slack's limit is higher but mrkdwn can inflate
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.SLACK)
|
||||
self._app: Optional[AsyncApp] = None
|
||||
self._handler: Optional[AsyncSocketModeHandler] = None
|
||||
self._bot_user_id: Optional[str] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
if not SLACK_AVAILABLE:
|
||||
print("[Slack] slack-bolt not installed. Run: pip install slack-bolt")
|
||||
return False
|
||||
|
||||
bot_token = self.config.token
|
||||
app_token = os.getenv("SLACK_APP_TOKEN")
|
||||
|
||||
if not bot_token:
|
||||
print("[Slack] SLACK_BOT_TOKEN not set")
|
||||
return False
|
||||
if not app_token:
|
||||
print("[Slack] SLACK_APP_TOKEN not set")
|
||||
return False
|
||||
|
||||
try:
|
||||
self._app = AsyncApp(token=bot_token)
|
||||
|
||||
# Get our own bot user ID for mention detection
|
||||
auth_response = await self._app.client.auth_test()
|
||||
self._bot_user_id = auth_response.get("user_id")
|
||||
bot_name = auth_response.get("user", "unknown")
|
||||
|
||||
# Register message event handler
|
||||
@self._app.event("message")
|
||||
async def handle_message_event(event, say):
|
||||
await self._handle_slack_message(event)
|
||||
|
||||
# Acknowledge app_mention events to prevent Bolt 404 errors.
|
||||
# The "message" handler above already processes @mentions in
|
||||
# channels, so this is intentionally a no-op to avoid duplicates.
|
||||
@self._app.event("app_mention")
|
||||
async def handle_app_mention(event, say):
|
||||
pass
|
||||
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
await ack()
|
||||
await self._handle_slash_command(command)
|
||||
|
||||
# Start Socket Mode handler in background
|
||||
self._handler = AsyncSocketModeHandler(self._app, app_token)
|
||||
asyncio.create_task(self._handler.start_async())
|
||||
|
||||
self._running = True
|
||||
print(f"[Slack] Connected as @{bot_name} (Socket Mode)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Connection failed: {e}")
|
||||
return False
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect from Slack."""
|
||||
if self._handler:
|
||||
await self._handler.close_async()
|
||||
self._running = False
|
||||
print("[Slack] Disconnected")
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a message to a Slack channel or DM."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
kwargs = {
|
||||
"channel": chat_id,
|
||||
"text": content,
|
||||
}
|
||||
|
||||
# Reply in thread if thread_ts is available
|
||||
if reply_to:
|
||||
kwargs["thread_ts"] = reply_to
|
||||
elif metadata and metadata.get("thread_ts"):
|
||||
kwargs["thread_ts"] = metadata["thread_ts"]
|
||||
|
||||
result = await self._app.client.chat_postMessage(**kwargs)
|
||||
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=result.get("ts"),
|
||||
raw_response=result,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Send error: {e}")
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent Slack message."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
await self._app.client.chat_update(
|
||||
channel=chat_id,
|
||||
ts=message_id,
|
||||
text=content,
|
||||
)
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Slack doesn't have a direct typing indicator API for bots."""
|
||||
pass
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a local image file to Slack by uploading it."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import os
|
||||
if not os.path.exists(image_path):
|
||||
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
||||
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=image_path,
|
||||
filename=os.path.basename(image_path),
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send local image: {e}")
|
||||
return await super().send_image_file(chat_id, image_path, caption, reply_to)
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an image to Slack by uploading the URL as a file."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import httpx
|
||||
|
||||
# Download the image first
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
content=response.content,
|
||||
filename="image.png",
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
# Fall back to sending the URL as text
|
||||
text = f"{caption}\n{image_url}" if caption else image_url
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an audio file to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=audio_path,
|
||||
filename=os.path.basename(audio_path),
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a video file to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
if not os.path.exists(video_path):
|
||||
return SendResult(success=False, error=f"Video file not found: {video_path}")
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=video_path,
|
||||
filename=os.path.basename(video_path),
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send video: {e}")
|
||||
return await super().send_video(chat_id, video_path, caption, reply_to)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a document/file attachment to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
display_name = file_name or os.path.basename(file_path)
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=file_path,
|
||||
filename=display_name,
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send document: {e}")
|
||||
return await super().send_document(chat_id, file_path, caption, file_name, reply_to)
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a Slack channel."""
|
||||
if not self._app:
|
||||
return {"name": chat_id, "type": "unknown"}
|
||||
|
||||
try:
|
||||
result = await self._app.client.conversations_info(channel=chat_id)
|
||||
channel = result.get("channel", {})
|
||||
is_dm = channel.get("is_im", False)
|
||||
return {
|
||||
"name": channel.get("name", chat_id),
|
||||
"type": "dm" if is_dm else "group",
|
||||
}
|
||||
except Exception:
|
||||
return {"name": chat_id, "type": "unknown"}
|
||||
|
||||
# ----- Internal handlers -----
|
||||
|
||||
async def _handle_slack_message(self, event: dict) -> None:
|
||||
"""Handle an incoming Slack message event."""
|
||||
# Ignore bot messages (including our own)
|
||||
if event.get("bot_id") or event.get("subtype") == "bot_message":
|
||||
return
|
||||
|
||||
# Ignore message edits and deletions
|
||||
subtype = event.get("subtype")
|
||||
if subtype in ("message_changed", "message_deleted"):
|
||||
return
|
||||
|
||||
text = event.get("text", "")
|
||||
user_id = event.get("user", "")
|
||||
channel_id = event.get("channel", "")
|
||||
thread_ts = event.get("thread_ts") or event.get("ts")
|
||||
ts = event.get("ts", "")
|
||||
|
||||
# Determine if this is a DM or channel message
|
||||
channel_type = event.get("channel_type", "")
|
||||
is_dm = channel_type == "im"
|
||||
|
||||
# In channels, only respond if bot is mentioned
|
||||
if not is_dm and self._bot_user_id:
|
||||
if f"<@{self._bot_user_id}>" not in text:
|
||||
return
|
||||
# Strip the bot mention from the text
|
||||
text = text.replace(f"<@{self._bot_user_id}>", "").strip()
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
if text.startswith("/"):
|
||||
msg_type = MessageType.COMMAND
|
||||
|
||||
# Handle file attachments
|
||||
media_urls = []
|
||||
media_types = []
|
||||
files = event.get("files", [])
|
||||
for f in files:
|
||||
mimetype = f.get("mimetype", "unknown")
|
||||
url = f.get("url_private_download") or f.get("url_private", "")
|
||||
if mimetype.startswith("image/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
|
||||
ext = ".jpg"
|
||||
# Slack private URLs require the bot token as auth header
|
||||
cached = await self._download_slack_file(url, ext)
|
||||
media_urls.append(cached)
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.PHOTO
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache image: {e}", flush=True)
|
||||
elif mimetype.startswith("audio/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
|
||||
ext = ".ogg"
|
||||
cached = await self._download_slack_file(url, ext, audio=True)
|
||||
media_urls.append(cached)
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.VOICE
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache audio: {e}", flush=True)
|
||||
elif url:
|
||||
# Try to handle as a document attachment
|
||||
try:
|
||||
original_filename = f.get("name", "")
|
||||
ext = ""
|
||||
if original_filename:
|
||||
_, ext = os.path.splitext(original_filename)
|
||||
ext = ext.lower()
|
||||
|
||||
# Fallback: reverse-lookup from MIME type
|
||||
if not ext and mimetype:
|
||||
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
||||
ext = mime_to_ext.get(mimetype, "")
|
||||
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
continue # Skip unsupported file types silently
|
||||
|
||||
# Check file size (Slack limit: 20 MB for bots)
|
||||
file_size = f.get("size", 0)
|
||||
MAX_DOC_BYTES = 20 * 1024 * 1024
|
||||
if not file_size or file_size > MAX_DOC_BYTES:
|
||||
print(f"[Slack] Document too large or unknown size: {file_size}", flush=True)
|
||||
continue
|
||||
|
||||
# Download and cache
|
||||
raw_bytes = await self._download_slack_file_bytes(url)
|
||||
cached_path = cache_document_from_bytes(
|
||||
raw_bytes, original_filename or f"document{ext}"
|
||||
)
|
||||
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(doc_mime)
|
||||
msg_type = MessageType.DOCUMENT
|
||||
print(f"[Slack] Cached user document: {cached_path}", flush=True)
|
||||
|
||||
# Inject text content for .txt/.md files (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = original_filename or f"document{ext}"
|
||||
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if text:
|
||||
text = f"{injection}\n\n{text}"
|
||||
else:
|
||||
text = injection
|
||||
except UnicodeDecodeError:
|
||||
pass # Binary content, skip injection
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache document: {e}", flush=True)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_name=channel_id, # Will be resolved later if needed
|
||||
chat_type="dm" if is_dm else "group",
|
||||
user_id=user_id,
|
||||
thread_id=thread_ts,
|
||||
)
|
||||
|
||||
msg_event = MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=event,
|
||||
message_id=ts,
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
||||
)
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
async def _handle_slash_command(self, command: dict) -> None:
|
||||
"""Handle /hermes slash command."""
|
||||
text = command.get("text", "").strip()
|
||||
user_id = command.get("user_id", "")
|
||||
channel_id = command.get("channel_id", "")
|
||||
|
||||
# Map subcommands to gateway commands
|
||||
subcommand_map = {
|
||||
"new": "/reset", "reset": "/reset",
|
||||
"status": "/status", "stop": "/stop",
|
||||
"help": "/help",
|
||||
"model": "/model", "personality": "/personality",
|
||||
"retry": "/retry", "undo": "/undo",
|
||||
}
|
||||
first_word = text.split()[0] if text else ""
|
||||
if first_word in subcommand_map:
|
||||
# Preserve arguments after the subcommand
|
||||
rest = text[len(first_word):].strip()
|
||||
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
|
||||
elif text:
|
||||
pass # Treat as a regular question
|
||||
else:
|
||||
text = "/help"
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_type="dm", # Slash commands are always in DM-like context
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.COMMAND if text.startswith("/") else MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=command,
|
||||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _download_slack_file(self, url: str, ext: str, audio: bool = False) -> str:
|
||||
"""Download a Slack file using the bot token for auth."""
|
||||
import httpx
|
||||
|
||||
bot_token = self.config.token
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {bot_token}"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
if audio:
|
||||
from gateway.platforms.base import cache_audio_from_bytes
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
else:
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
|
||||
async def _download_slack_file_bytes(self, url: str) -> bytes:
|
||||
"""Download a Slack file and return raw bytes."""
|
||||
import httpx
|
||||
|
||||
bot_token = self.config.token
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {bot_token}"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
@@ -8,13 +8,8 @@ Uses python-telegram-bot library for:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from telegram import Update, Bot, Message
|
||||
from telegram.ext import (
|
||||
@@ -32,21 +27,10 @@ except ImportError:
|
||||
Bot = Any
|
||||
Message = Any
|
||||
Application = Any
|
||||
CommandHandler = Any
|
||||
TelegramMessageHandler = Any
|
||||
filters = None
|
||||
ParseMode = None
|
||||
ChatType = None
|
||||
|
||||
# Mock ContextTypes so type annotations using ContextTypes.DEFAULT_TYPE
|
||||
# don't crash during class definition when the library isn't installed.
|
||||
class _MockContextTypes:
|
||||
DEFAULT_TYPE = Any
|
||||
ContextTypes = _MockContextTypes
|
||||
ContextTypes = Any
|
||||
|
||||
import sys
|
||||
from pathlib import Path as _Path
|
||||
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
@@ -54,10 +38,6 @@ from gateway.platforms.base import (
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
)
|
||||
|
||||
|
||||
@@ -66,29 +46,6 @@ def check_telegram_requirements() -> bool:
|
||||
return TELEGRAM_AVAILABLE
|
||||
|
||||
|
||||
# Matches every character that MarkdownV2 requires to be backslash-escaped
|
||||
# when it appears outside a code span or fenced code block.
|
||||
_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
|
||||
|
||||
|
||||
def _escape_mdv2(text: str) -> str:
|
||||
"""Escape Telegram MarkdownV2 special characters with a preceding backslash."""
|
||||
return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
|
||||
|
||||
|
||||
def _strip_mdv2(text: str) -> str:
|
||||
"""Strip MarkdownV2 escape backslashes to produce clean plain text.
|
||||
|
||||
Also removes MarkdownV2 bold markers (*text* -> text) so the fallback
|
||||
doesn't show stray asterisks from header/bold conversion.
|
||||
"""
|
||||
# Remove escape backslashes before special characters
|
||||
cleaned = re.sub(r'\\([_*\[\]()~`>#\+\-=|{}.!\\])', r'\1', text)
|
||||
# Remove MarkdownV2 bold markers that format_message converted from **bold**
|
||||
cleaned = re.sub(r'\*([^*]+)\*', r'\1', cleaned)
|
||||
return cleaned
|
||||
|
||||
|
||||
class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Telegram bot adapter.
|
||||
@@ -133,11 +90,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._handle_command
|
||||
))
|
||||
self._app.add_handler(TelegramMessageHandler(
|
||||
filters.LOCATION | getattr(filters, "VENUE", filters.LOCATION),
|
||||
self._handle_location_message
|
||||
))
|
||||
self._app.add_handler(TelegramMessageHandler(
|
||||
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
|
||||
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL,
|
||||
self._handle_media_message
|
||||
))
|
||||
|
||||
@@ -146,32 +99,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
await self._app.start()
|
||||
await self._app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
|
||||
|
||||
# Register bot commands so Telegram shows a hint menu when users type /
|
||||
try:
|
||||
from telegram import BotCommand
|
||||
await self._bot.set_my_commands([
|
||||
BotCommand("new", "Start a new conversation"),
|
||||
BotCommand("reset", "Reset conversation history"),
|
||||
BotCommand("model", "Show or change the model"),
|
||||
BotCommand("personality", "Set a personality"),
|
||||
BotCommand("retry", "Retry your last message"),
|
||||
BotCommand("undo", "Remove the last exchange"),
|
||||
BotCommand("status", "Show session info"),
|
||||
BotCommand("stop", "Stop the running agent"),
|
||||
BotCommand("sethome", "Set this chat as the home channel"),
|
||||
BotCommand("compress", "Compress conversation context"),
|
||||
BotCommand("title", "Set or show the session title"),
|
||||
BotCommand("resume", "Resume a previously-named session"),
|
||||
BotCommand("usage", "Show token usage for this session"),
|
||||
BotCommand("provider", "Show available providers"),
|
||||
BotCommand("insights", "Show usage insights and analytics"),
|
||||
BotCommand("update", "Update Hermes to the latest version"),
|
||||
BotCommand("reload_mcp", "Reload MCP servers from config"),
|
||||
BotCommand("help", "Show available commands"),
|
||||
])
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Could not register command menu: {e}")
|
||||
|
||||
self._running = True
|
||||
print(f"[{self.name}] Connected and polling for updates")
|
||||
return True
|
||||
@@ -220,20 +147,16 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
msg = await self._bot.send_message(
|
||||
chat_id=int(chat_id),
|
||||
text=chunk,
|
||||
parse_mode=ParseMode.MARKDOWN_V2,
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
|
||||
message_thread_id=int(thread_id) if thread_id else None,
|
||||
)
|
||||
except Exception as md_error:
|
||||
# Markdown parsing failed, try plain text
|
||||
if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
|
||||
logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
|
||||
# Strip MDV2 escape backslashes so the user doesn't
|
||||
# see raw backslashes littered through the message.
|
||||
plain_chunk = _strip_mdv2(chunk)
|
||||
msg = await self._bot.send_message(
|
||||
chat_id=int(chat_id),
|
||||
text=plain_chunk,
|
||||
text=chunk,
|
||||
parse_mode=None, # Plain text
|
||||
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
|
||||
message_thread_id=int(thread_id) if thread_id else None,
|
||||
@@ -250,36 +173,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent Telegram message."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
formatted = self.format_message(content)
|
||||
try:
|
||||
await self._bot.edit_message_text(
|
||||
chat_id=int(chat_id),
|
||||
message_id=int(message_id),
|
||||
text=formatted,
|
||||
parse_mode=ParseMode.MARKDOWN_V2,
|
||||
)
|
||||
except Exception:
|
||||
# Fallback: retry without markdown formatting
|
||||
await self._bot.edit_message_text(
|
||||
chat_id=int(chat_id),
|
||||
message_id=int(message_id),
|
||||
text=content,
|
||||
)
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -318,34 +212,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
print(f"[{self.name}] Failed to send voice/audio: {e}")
|
||||
return await super().send_voice(chat_id, audio_path, caption, reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a local image file natively as a Telegram photo."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import os
|
||||
if not os.path.exists(image_path):
|
||||
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
||||
|
||||
with open(image_path, "rb") as image_file:
|
||||
msg = await self._bot.send_photo(
|
||||
chat_id=int(chat_id),
|
||||
photo=image_file,
|
||||
caption=caption[:1024] if caption else None,
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send local image: {e}")
|
||||
return await super().send_image_file(chat_id, image_path, caption, reply_to)
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -353,16 +219,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an image natively as a Telegram photo.
|
||||
|
||||
Tries URL-based send first (fast, works for <5MB images).
|
||||
Falls back to downloading and uploading as file (supports up to 10MB).
|
||||
"""
|
||||
"""Send an image natively as a Telegram photo."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Telegram can send photos directly from URLs (up to ~5MB)
|
||||
# Telegram can send photos directly from URLs
|
||||
msg = await self._bot.send_photo(
|
||||
chat_id=int(chat_id),
|
||||
photo=image_url,
|
||||
@@ -371,51 +233,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] URL-based send_photo failed (%s), trying file upload", self.name, e)
|
||||
# Fallback: download and upload as file (supports up to 10MB)
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.get(image_url)
|
||||
resp.raise_for_status()
|
||||
image_data = resp.content
|
||||
|
||||
msg = await self._bot.send_photo(
|
||||
chat_id=int(chat_id),
|
||||
photo=image_data,
|
||||
caption=caption[:1024] if caption else None,
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e2:
|
||||
logger.error("[%s] File upload send_photo also failed: %s", self.name, e2)
|
||||
# Final fallback: send URL as text
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
print(f"[{self.name}] Failed to send photo, falling back to URL: {e}")
|
||||
# Fallback: send as text link
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
animation_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
msg = await self._bot.send_animation(
|
||||
chat_id=int(chat_id),
|
||||
animation=animation_url,
|
||||
caption=caption[:1024] if caption else None,
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
|
||||
# Fallback: try as a regular photo
|
||||
return await self.send_image(chat_id, animation_url, caption, reply_to)
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Send typing indicator."""
|
||||
if self._bot:
|
||||
@@ -456,83 +277,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""
|
||||
Convert standard markdown to Telegram MarkdownV2 format.
|
||||
|
||||
Protected regions (code blocks, inline code) are extracted first so
|
||||
their contents are never modified. Standard markdown constructs
|
||||
(headers, bold, italic, links) are translated to MarkdownV2 syntax,
|
||||
and all remaining special characters are escaped.
|
||||
Format message for Telegram.
|
||||
|
||||
Telegram uses a subset of markdown. We'll use the simpler
|
||||
Markdown mode (not MarkdownV2) for compatibility.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
placeholders: dict = {}
|
||||
counter = [0]
|
||||
|
||||
def _ph(value: str) -> str:
|
||||
"""Stash *value* behind a placeholder token that survives escaping."""
|
||||
key = f"\x00PH{counter[0]}\x00"
|
||||
counter[0] += 1
|
||||
placeholders[key] = value
|
||||
return key
|
||||
|
||||
text = content
|
||||
|
||||
# 1) Protect fenced code blocks (``` ... ```)
|
||||
text = re.sub(
|
||||
r'(```(?:[^\n]*\n)?[\s\S]*?```)',
|
||||
lambda m: _ph(m.group(0)),
|
||||
text,
|
||||
)
|
||||
|
||||
# 2) Protect inline code (`...`)
|
||||
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
|
||||
|
||||
# 3) Convert markdown links – escape the display text; inside the URL
|
||||
# only ')' and '\' need escaping per the MarkdownV2 spec.
|
||||
def _convert_link(m):
|
||||
display = _escape_mdv2(m.group(1))
|
||||
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
|
||||
return _ph(f'[{display}]({url})')
|
||||
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
|
||||
|
||||
# 4) Convert markdown headers (## Title) → bold *Title*
|
||||
def _convert_header(m):
|
||||
inner = m.group(1).strip()
|
||||
# Strip redundant bold markers that may appear inside a header
|
||||
inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
|
||||
return _ph(f'*{_escape_mdv2(inner)}*')
|
||||
|
||||
text = re.sub(
|
||||
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
|
||||
)
|
||||
|
||||
# 5) Convert bold: **text** → *text* (MarkdownV2 bold)
|
||||
text = re.sub(
|
||||
r'\*\*(.+?)\*\*',
|
||||
lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
|
||||
# [^*\n]+ prevents matching across newlines (which would corrupt
|
||||
# bullet lists using * markers and multi-line content).
|
||||
text = re.sub(
|
||||
r'\*([^*\n]+)\*',
|
||||
lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
|
||||
text,
|
||||
)
|
||||
|
||||
# 7) Escape remaining special characters in plain text
|
||||
text = _escape_mdv2(text)
|
||||
|
||||
# 8) Restore placeholders in reverse insertion order so that
|
||||
# nested references (a placeholder inside another) resolve correctly.
|
||||
for key in reversed(list(placeholders.keys())):
|
||||
text = text.replace(key, placeholders[key])
|
||||
|
||||
return text
|
||||
# Basic escaping for Telegram Markdown
|
||||
# In Markdown mode (not V2), only certain characters need escaping
|
||||
return content
|
||||
|
||||
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming text messages."""
|
||||
@@ -550,52 +302,15 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
event = self._build_message_event(update.message, MessageType.COMMAND)
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming location/venue pin messages."""
|
||||
if not update.message:
|
||||
return
|
||||
|
||||
msg = update.message
|
||||
venue = getattr(msg, "venue", None)
|
||||
location = getattr(venue, "location", None) if venue else getattr(msg, "location", None)
|
||||
|
||||
if not location:
|
||||
return
|
||||
|
||||
lat = getattr(location, "latitude", None)
|
||||
lon = getattr(location, "longitude", None)
|
||||
if lat is None or lon is None:
|
||||
return
|
||||
|
||||
# Build a text message with coordinates and context
|
||||
parts = ["[The user shared a location pin.]"]
|
||||
if venue:
|
||||
title = getattr(venue, "title", None)
|
||||
address = getattr(venue, "address", None)
|
||||
if title:
|
||||
parts.append(f"Venue: {title}")
|
||||
if address:
|
||||
parts.append(f"Address: {address}")
|
||||
parts.append(f"latitude: {lat}")
|
||||
parts.append(f"longitude: {lon}")
|
||||
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
|
||||
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
|
||||
|
||||
event = self._build_message_event(msg, MessageType.LOCATION)
|
||||
event.text = "\n".join(parts)
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming media messages, downloading images to local cache."""
|
||||
"""Handle incoming media messages."""
|
||||
if not update.message:
|
||||
return
|
||||
|
||||
msg = update.message
|
||||
|
||||
# Determine media type
|
||||
if msg.sticker:
|
||||
msg_type = MessageType.STICKER
|
||||
elif msg.photo:
|
||||
if msg.photo:
|
||||
msg_type = MessageType.PHOTO
|
||||
elif msg.video:
|
||||
msg_type = MessageType.VIDEO
|
||||
@@ -603,8 +318,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
msg_type = MessageType.AUDIO
|
||||
elif msg.voice:
|
||||
msg_type = MessageType.VOICE
|
||||
elif msg.document:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
else:
|
||||
msg_type = MessageType.DOCUMENT
|
||||
|
||||
@@ -614,193 +327,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
if msg.caption:
|
||||
event.text = msg.caption
|
||||
|
||||
# Handle stickers: describe via vision tool with caching
|
||||
if msg.sticker:
|
||||
await self._handle_sticker(msg, event)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Download photo to local image cache so the vision tool can access it
|
||||
# even after Telegram's ephemeral file URLs expire (~1 hour).
|
||||
if msg.photo:
|
||||
try:
|
||||
# msg.photo is a list of PhotoSize sorted by size; take the largest
|
||||
photo = msg.photo[-1]
|
||||
file_obj = await photo.get_file()
|
||||
# Download the image bytes directly into memory
|
||||
image_bytes = await file_obj.download_as_bytearray()
|
||||
# Determine extension from the file path if available
|
||||
ext = ".jpg"
|
||||
if file_obj.file_path:
|
||||
for candidate in [".png", ".webp", ".gif", ".jpeg", ".jpg"]:
|
||||
if file_obj.file_path.lower().endswith(candidate):
|
||||
ext = candidate
|
||||
break
|
||||
# Save to cache and populate media_urls with the local path
|
||||
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = [f"image/{ext.lstrip('.')}"]
|
||||
print(f"[Telegram] Cached user photo: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache photo: {e}", flush=True)
|
||||
|
||||
# Download voice/audio messages to cache for STT transcription
|
||||
if msg.voice:
|
||||
try:
|
||||
file_obj = await msg.voice.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/ogg"]
|
||||
print(f"[Telegram] Cached user voice: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache voice: {e}", flush=True)
|
||||
elif msg.audio:
|
||||
try:
|
||||
file_obj = await msg.audio.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/mp3"]
|
||||
print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache audio: {e}", flush=True)
|
||||
|
||||
# Download document files to cache for agent processing
|
||||
elif msg.document:
|
||||
doc = msg.document
|
||||
try:
|
||||
# Determine file extension
|
||||
ext = ""
|
||||
original_filename = doc.file_name or ""
|
||||
if original_filename:
|
||||
_, ext = os.path.splitext(original_filename)
|
||||
ext = ext.lower()
|
||||
|
||||
# If no extension from filename, reverse-lookup from MIME type
|
||||
if not ext and doc.mime_type:
|
||||
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
||||
ext = mime_to_ext.get(doc.mime_type, "")
|
||||
|
||||
# Check if supported
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
|
||||
event.text = (
|
||||
f"Unsupported document type '{ext or 'unknown'}'. "
|
||||
f"Supported types: {supported_list}"
|
||||
)
|
||||
print(f"[Telegram] Unsupported document type: {ext or 'unknown'}", flush=True)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Check file size (Telegram Bot API limit: 20 MB)
|
||||
MAX_DOC_BYTES = 20 * 1024 * 1024
|
||||
if not doc.file_size or doc.file_size > MAX_DOC_BYTES:
|
||||
event.text = (
|
||||
"The document is too large or its size could not be verified. "
|
||||
"Maximum: 20 MB."
|
||||
)
|
||||
print(f"[Telegram] Document too large: {doc.file_size} bytes", flush=True)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Download and cache
|
||||
file_obj = await doc.get_file()
|
||||
doc_bytes = await file_obj.download_as_bytearray()
|
||||
raw_bytes = bytes(doc_bytes)
|
||||
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}")
|
||||
mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = [mime_type]
|
||||
print(f"[Telegram] Cached user document: {cached_path}", flush=True)
|
||||
|
||||
# For text files, inject content into event.text (capped at 100 KB)
|
||||
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
||||
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
||||
try:
|
||||
text_content = raw_bytes.decode("utf-8")
|
||||
display_name = original_filename or f"document{ext}"
|
||||
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
||||
injection = f"[Content of {display_name}]:\n{text_content}"
|
||||
if event.text:
|
||||
event.text = f"{injection}\n\n{event.text}"
|
||||
else:
|
||||
event.text = injection
|
||||
except UnicodeDecodeError:
|
||||
print(f"[Telegram] Could not decode text file as UTF-8, skipping content injection", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache document: {e}", flush=True)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
|
||||
"""
|
||||
Describe a Telegram sticker via vision analysis, with caching.
|
||||
|
||||
For static stickers (WEBP), we download, analyze with vision, and cache
|
||||
the description by file_unique_id. For animated/video stickers, we inject
|
||||
a placeholder noting the emoji.
|
||||
"""
|
||||
from gateway.sticker_cache import (
|
||||
get_cached_description,
|
||||
cache_sticker_description,
|
||||
build_sticker_injection,
|
||||
build_animated_sticker_injection,
|
||||
STICKER_VISION_PROMPT,
|
||||
)
|
||||
|
||||
sticker = msg.sticker
|
||||
emoji = sticker.emoji or ""
|
||||
set_name = sticker.set_name or ""
|
||||
|
||||
# Animated and video stickers can't be analyzed as static images
|
||||
if sticker.is_animated or sticker.is_video:
|
||||
event.text = build_animated_sticker_injection(emoji)
|
||||
return
|
||||
|
||||
# Check the cache first
|
||||
cached = get_cached_description(sticker.file_unique_id)
|
||||
if cached:
|
||||
event.text = build_sticker_injection(
|
||||
cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
|
||||
)
|
||||
print(f"[Telegram] Sticker cache hit: {sticker.file_unique_id}", flush=True)
|
||||
return
|
||||
|
||||
# Cache miss -- download and analyze
|
||||
try:
|
||||
file_obj = await sticker.get_file()
|
||||
image_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
|
||||
print(f"[Telegram] Analyzing sticker: {cached_path}", flush=True)
|
||||
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
import json as _json
|
||||
|
||||
result_json = await vision_analyze_tool(
|
||||
image_url=cached_path,
|
||||
user_prompt=STICKER_VISION_PROMPT,
|
||||
)
|
||||
result = _json.loads(result_json)
|
||||
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "a sticker")
|
||||
cache_sticker_description(sticker.file_unique_id, description, emoji, set_name)
|
||||
event.text = build_sticker_injection(description, emoji, set_name)
|
||||
else:
|
||||
# Vision failed -- use emoji as fallback
|
||||
event.text = build_sticker_injection(
|
||||
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
||||
emoji, set_name,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Sticker analysis error: {e}", flush=True)
|
||||
event.text = build_sticker_injection(
|
||||
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
||||
emoji, set_name,
|
||||
)
|
||||
|
||||
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Telegram message."""
|
||||
chat = message.chat
|
||||
|
||||
@@ -17,54 +17,12 @@ with different backends via a bridge pattern.
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _kill_port_process(port: int) -> None:
|
||||
"""Kill any process listening on the given TCP port."""
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
# Use netstat to find the PID bound to this port, then taskkill
|
||||
result = subprocess.run(
|
||||
["netstat", "-ano", "-p", "TCP"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
for line in result.stdout.splitlines():
|
||||
parts = line.split()
|
||||
if len(parts) >= 5 and parts[3] == "LISTENING":
|
||||
local_addr = parts[1]
|
||||
if local_addr.endswith(f":{port}"):
|
||||
try:
|
||||
subprocess.run(
|
||||
["taskkill", "/PID", parts[4], "/F"],
|
||||
capture_output=True, timeout=5,
|
||||
)
|
||||
except subprocess.SubprocessError:
|
||||
pass
|
||||
else:
|
||||
result = subprocess.run(
|
||||
["fuser", f"{port}/tcp"],
|
||||
capture_output=True, timeout=5,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
subprocess.run(
|
||||
["fuser", "-k", f"{port}/tcp"],
|
||||
capture_output=True, timeout=5,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
@@ -72,8 +30,6 @@ from gateway.platforms.base import (
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -119,24 +75,16 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
# WhatsApp message limits
|
||||
MAX_MESSAGE_LENGTH = 65536 # WhatsApp allows longer messages
|
||||
|
||||
# Default bridge location relative to the hermes-agent install
|
||||
_DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.WHATSAPP)
|
||||
self._bridge_process: Optional[subprocess.Popen] = None
|
||||
self._bridge_port: int = config.extra.get("bridge_port", 3000)
|
||||
self._bridge_script: Optional[str] = config.extra.get(
|
||||
"bridge_script",
|
||||
str(self._DEFAULT_BRIDGE_DIR / "bridge.js"),
|
||||
)
|
||||
self._bridge_script: Optional[str] = config.extra.get("bridge_script")
|
||||
self._session_path: Path = Path(config.extra.get(
|
||||
"session_path",
|
||||
Path.home() / ".hermes" / "whatsapp" / "session"
|
||||
))
|
||||
self._message_queue: asyncio.Queue = asyncio.Queue()
|
||||
self._bridge_log_fh = None
|
||||
self._bridge_log: Optional[Path] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""
|
||||
@@ -145,182 +93,72 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
This launches the Node.js bridge process and waits for it to be ready.
|
||||
"""
|
||||
if not check_whatsapp_requirements():
|
||||
logger.warning("[%s] Node.js not found. WhatsApp requires Node.js.", self.name)
|
||||
print(f"[{self.name}] Node.js not found. WhatsApp requires Node.js.")
|
||||
return False
|
||||
|
||||
if not self._bridge_script:
|
||||
print(f"[{self.name}] No bridge script configured.")
|
||||
print(f"[{self.name}] Set 'bridge_script' in whatsapp.extra config.")
|
||||
print(f"[{self.name}] See docs/messaging.md for WhatsApp setup instructions.")
|
||||
return False
|
||||
|
||||
bridge_path = Path(self._bridge_script)
|
||||
if not bridge_path.exists():
|
||||
logger.warning("[%s] Bridge script not found: %s", self.name, bridge_path)
|
||||
print(f"[{self.name}] Bridge script not found: {bridge_path}")
|
||||
return False
|
||||
|
||||
logger.info("[%s] Bridge found at %s", self.name, bridge_path)
|
||||
|
||||
# Auto-install npm dependencies if node_modules doesn't exist
|
||||
bridge_dir = bridge_path.parent
|
||||
if not (bridge_dir / "node_modules").exists():
|
||||
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
|
||||
try:
|
||||
install_result = subprocess.run(
|
||||
["npm", "install", "--silent"],
|
||||
cwd=str(bridge_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if install_result.returncode != 0:
|
||||
print(f"[{self.name}] npm install failed: {install_result.stderr}")
|
||||
return False
|
||||
print(f"[{self.name}] Dependencies installed")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to install dependencies: {e}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Ensure session directory exists
|
||||
self._session_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Kill any orphaned bridge from a previous gateway run
|
||||
_kill_port_process(self._bridge_port)
|
||||
import time
|
||||
time.sleep(1)
|
||||
|
||||
# Start the bridge process in its own process group.
|
||||
# Route output to a log file so QR codes, errors, and reconnection
|
||||
# messages are preserved for troubleshooting.
|
||||
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
|
||||
self._bridge_log = self._session_path.parent / "bridge.log"
|
||||
bridge_log_fh = open(self._bridge_log, "a")
|
||||
self._bridge_log_fh = bridge_log_fh
|
||||
# Start the bridge process
|
||||
self._bridge_process = subprocess.Popen(
|
||||
[
|
||||
"node",
|
||||
str(bridge_path),
|
||||
"--port", str(self._bridge_port),
|
||||
"--session", str(self._session_path),
|
||||
"--mode", whatsapp_mode,
|
||||
],
|
||||
stdout=bridge_log_fh,
|
||||
stderr=bridge_log_fh,
|
||||
preexec_fn=None if _IS_WINDOWS else os.setsid,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Wait for the bridge to connect to WhatsApp.
|
||||
# Phase 1: wait for the HTTP server to come up (up to 15s).
|
||||
# Phase 2: wait for WhatsApp status: connected (up to 15s more).
|
||||
import aiohttp
|
||||
http_ready = False
|
||||
data = {}
|
||||
for attempt in range(15):
|
||||
await asyncio.sleep(1)
|
||||
if self._bridge_process.poll() is not None:
|
||||
print(f"[{self.name}] Bridge process died (exit code {self._bridge_process.returncode})")
|
||||
print(f"[{self.name}] Check log: {self._bridge_log}")
|
||||
self._close_bridge_log()
|
||||
return False
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"http://localhost:{self._bridge_port}/health",
|
||||
timeout=aiohttp.ClientTimeout(total=2)
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
http_ready = True
|
||||
data = await resp.json()
|
||||
if data.get("status") == "connected":
|
||||
print(f"[{self.name}] Bridge ready (status: connected)")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not http_ready:
|
||||
print(f"[{self.name}] Bridge HTTP server did not start in 15s")
|
||||
print(f"[{self.name}] Check log: {self._bridge_log}")
|
||||
self._close_bridge_log()
|
||||
return False
|
||||
# Wait for bridge to be ready (look for ready signal)
|
||||
# This is a simplified version - real implementation would
|
||||
# wait for an HTTP health check or specific stdout message
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Phase 2: HTTP is up but WhatsApp may still be connecting.
|
||||
# Give it more time to authenticate with saved credentials.
|
||||
if data.get("status") != "connected":
|
||||
print(f"[{self.name}] Bridge HTTP ready, waiting for WhatsApp connection...")
|
||||
for attempt in range(15):
|
||||
await asyncio.sleep(1)
|
||||
if self._bridge_process.poll() is not None:
|
||||
print(f"[{self.name}] Bridge process died during connection")
|
||||
print(f"[{self.name}] Check log: {self._bridge_log}")
|
||||
self._close_bridge_log()
|
||||
return False
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"http://localhost:{self._bridge_port}/health",
|
||||
timeout=aiohttp.ClientTimeout(total=2)
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
if data.get("status") == "connected":
|
||||
print(f"[{self.name}] Bridge ready (status: connected)")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
# Still not connected — warn but proceed (bridge may
|
||||
# auto-reconnect later, e.g. after a code 515 restart).
|
||||
print(f"[{self.name}] ⚠ WhatsApp not connected after 30s")
|
||||
print(f"[{self.name}] Bridge log: {self._bridge_log}")
|
||||
print(f"[{self.name}] If session expired, re-pair: hermes whatsapp")
|
||||
if self._bridge_process.poll() is not None:
|
||||
stderr = self._bridge_process.stderr.read() if self._bridge_process.stderr else ""
|
||||
print(f"[{self.name}] Bridge process died: {stderr}")
|
||||
return False
|
||||
|
||||
# Start message polling task
|
||||
asyncio.create_task(self._poll_messages())
|
||||
|
||||
self._running = True
|
||||
print(f"[{self.name}] Bridge started on port {self._bridge_port}")
|
||||
print(f"[{self.name}] Scan QR code if prompted (check bridge output)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
|
||||
self._close_bridge_log()
|
||||
print(f"[{self.name}] Failed to start bridge: {e}")
|
||||
return False
|
||||
|
||||
def _close_bridge_log(self) -> None:
|
||||
"""Close the bridge log file handle if open."""
|
||||
if self._bridge_log_fh:
|
||||
try:
|
||||
self._bridge_log_fh.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._bridge_log_fh = None
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
|
||||
"""Stop the WhatsApp bridge."""
|
||||
if self._bridge_process:
|
||||
try:
|
||||
# Kill the entire process group so child node processes die too
|
||||
import signal
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
self._bridge_process.terminate()
|
||||
else:
|
||||
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
self._bridge_process.terminate()
|
||||
self._bridge_process.terminate()
|
||||
await asyncio.sleep(1)
|
||||
if self._bridge_process.poll() is None:
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
self._bridge_process.kill()
|
||||
else:
|
||||
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
self._bridge_process.kill()
|
||||
self._bridge_process.kill()
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error stopping bridge: {e}")
|
||||
|
||||
# Also kill any orphaned bridge processes on our port
|
||||
_kill_port_process(self._bridge_port)
|
||||
|
||||
self._running = False
|
||||
self._bridge_process = None
|
||||
self._close_bridge_log()
|
||||
print(f"[{self.name}] Disconnected")
|
||||
|
||||
async def send(
|
||||
@@ -368,131 +206,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def edit_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent message via the WhatsApp bridge."""
|
||||
if not self._running:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
import aiohttp
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"http://localhost:{self._bridge_port}/edit",
|
||||
json={
|
||||
"chatId": chat_id,
|
||||
"messageId": message_id,
|
||||
"message": content,
|
||||
},
|
||||
timeout=aiohttp.ClientTimeout(total=15)
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
else:
|
||||
error = await resp.text()
|
||||
return SendResult(success=False, error=error)
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _send_media_to_bridge(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
media_type: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send any media file via bridge /send-media endpoint."""
|
||||
if not self._running:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"chatId": chat_id,
|
||||
"filePath": file_path,
|
||||
"mediaType": media_type,
|
||||
}
|
||||
if caption:
|
||||
payload["caption"] = caption
|
||||
if file_name:
|
||||
payload["fileName"] = file_name
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"http://localhost:{self._bridge_port}/send-media",
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=120),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=data.get("messageId"),
|
||||
raw_response=data,
|
||||
)
|
||||
else:
|
||||
error = await resp.text()
|
||||
return SendResult(success=False, error=error)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Download image URL to cache, send natively via bridge."""
|
||||
try:
|
||||
local_path = await cache_image_from_url(image_url)
|
||||
return await self._send_media_to_bridge(chat_id, local_path, "image", caption)
|
||||
except Exception:
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a local image file natively via bridge."""
|
||||
return await self._send_media_to_bridge(chat_id, image_path, "image", caption)
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a video natively via bridge — plays inline in WhatsApp."""
|
||||
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a document/file as a downloadable attachment via bridge."""
|
||||
return await self._send_media_to_bridge(
|
||||
chat_id, file_path, "document", caption,
|
||||
file_name or os.path.basename(file_path),
|
||||
)
|
||||
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Send typing indicator via bridge."""
|
||||
if not self._running:
|
||||
@@ -530,8 +244,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
"type": "group" if data.get("isGroup") else "dm",
|
||||
"participants": data.get("participants", []),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("Could not get WhatsApp chat info for %s: %s", chat_id, e)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {"name": chat_id, "type": "dm"}
|
||||
|
||||
@@ -553,7 +267,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
if resp.status == 200:
|
||||
messages = await resp.json()
|
||||
for msg_data in messages:
|
||||
event = await self._build_message_event(msg_data)
|
||||
event = self._build_message_event(msg_data)
|
||||
if event:
|
||||
await self.handle_message(event)
|
||||
except asyncio.CancelledError:
|
||||
@@ -564,8 +278,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
|
||||
await asyncio.sleep(1) # Poll interval
|
||||
|
||||
async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
|
||||
"""Build a MessageEvent from bridge message data, downloading images to cache."""
|
||||
def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
|
||||
"""Build a MessageEvent from bridge message data."""
|
||||
try:
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
@@ -593,46 +307,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
user_name=data.get("senderName"),
|
||||
)
|
||||
|
||||
# Download image media URLs to the local cache so the vision tool
|
||||
# can access them reliably regardless of URL expiration.
|
||||
raw_urls = data.get("mediaUrls", [])
|
||||
cached_urls = []
|
||||
media_types = []
|
||||
for url in raw_urls:
|
||||
if msg_type == MessageType.PHOTO and url.startswith(("http://", "https://")):
|
||||
try:
|
||||
cached_path = await cache_image_from_url(url, ext=".jpg")
|
||||
cached_urls.append(cached_path)
|
||||
media_types.append("image/jpeg")
|
||||
print(f"[{self.name}] Cached user image: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to cache image: {e}", flush=True)
|
||||
cached_urls.append(url)
|
||||
media_types.append("image/jpeg")
|
||||
elif msg_type == MessageType.VOICE and url.startswith(("http://", "https://")):
|
||||
try:
|
||||
cached_path = await cache_audio_from_url(url, ext=".ogg")
|
||||
cached_urls.append(cached_path)
|
||||
media_types.append("audio/ogg")
|
||||
print(f"[{self.name}] Cached user voice: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to cache voice: {e}", flush=True)
|
||||
cached_urls.append(url)
|
||||
media_types.append("audio/ogg")
|
||||
else:
|
||||
cached_urls.append(url)
|
||||
media_types.append("unknown")
|
||||
|
||||
return MessageEvent(
|
||||
text=data.get("body", ""),
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=data,
|
||||
message_id=data.get("messageId"),
|
||||
media_urls=cached_urls,
|
||||
media_types=media_types,
|
||||
media_urls=data.get("mediaUrls", []),
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error building event: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# Note: A reference Node.js bridge script would be provided in scripts/whatsapp-bridge/
|
||||
# It would use whatsapp-web.js or Baileys to:
|
||||
# 1. Handle WhatsApp Web authentication (QR code)
|
||||
# 2. Listen for incoming messages
|
||||
# 3. Expose HTTP endpoints for send/receive/status
|
||||
|
||||
2749
gateway/run.py
2749
gateway/run.py
File diff suppressed because it is too large
Load Diff
@@ -8,7 +8,6 @@ Handles:
|
||||
- Dynamic system prompt injection (agent knows its context)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
@@ -17,8 +16,6 @@ from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .config import (
|
||||
Platform,
|
||||
GatewayConfig,
|
||||
@@ -44,9 +41,6 @@ class SessionSource:
|
||||
user_id: Optional[str] = None
|
||||
user_name: Optional[str] = None
|
||||
thread_id: Optional[str] = None # For forum topics, Discord threads, etc.
|
||||
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
|
||||
user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -70,7 +64,7 @@ class SessionSource:
|
||||
return ", ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {
|
||||
return {
|
||||
"platform": self.platform.value,
|
||||
"chat_id": self.chat_id,
|
||||
"chat_name": self.chat_name,
|
||||
@@ -78,13 +72,7 @@ class SessionSource:
|
||||
"user_id": self.user_id,
|
||||
"user_name": self.user_name,
|
||||
"thread_id": self.thread_id,
|
||||
"chat_topic": self.chat_topic,
|
||||
}
|
||||
if self.user_id_alt:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
@@ -96,9 +84,6 @@ class SessionSource:
|
||||
user_id=data.get("user_id"),
|
||||
user_name=data.get("user_name"),
|
||||
thread_id=data.get("thread_id"),
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -167,16 +152,6 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
||||
else:
|
||||
lines.append(f"**Source:** {platform_name} ({context.source.description})")
|
||||
|
||||
# Channel topic (if available - provides context about the channel's purpose)
|
||||
if context.source.chat_topic:
|
||||
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
|
||||
|
||||
# User identity (especially useful for WhatsApp where multiple people DM)
|
||||
if context.source.user_name:
|
||||
lines.append(f"**User:** {context.source.user_name}")
|
||||
elif context.source.user_id:
|
||||
lines.append(f"**User ID:** {context.source.user_id}")
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
for p in context.connected_platforms:
|
||||
@@ -241,10 +216,6 @@ class SessionEntry:
|
||||
output_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
|
||||
# Set when a session was created because the previous one expired;
|
||||
# consumed once by the message handler to inject a notice into context
|
||||
was_auto_reset: bool = False
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result = {
|
||||
"session_key": self.session_key,
|
||||
@@ -290,50 +261,23 @@ class SessionEntry:
|
||||
)
|
||||
|
||||
|
||||
def build_session_key(source: SessionSource) -> str:
|
||||
"""Build a deterministic session key from a message source.
|
||||
|
||||
This is the single source of truth for session key construction.
|
||||
WhatsApp DMs include chat_id (multi-user), other DMs do not (single owner).
|
||||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if platform == "whatsapp" and source.chat_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
|
||||
|
||||
|
||||
class SessionStore:
|
||||
"""
|
||||
Manages session storage and retrieval.
|
||||
|
||||
Uses SQLite (via SessionDB) for session metadata and message transcripts.
|
||||
Falls back to legacy JSONL files if SQLite is unavailable.
|
||||
Sessions are stored in:
|
||||
- sessions.json: Index mapping session keys to session IDs
|
||||
- {session_id}.jsonl: Conversation transcripts
|
||||
"""
|
||||
|
||||
def __init__(self, sessions_dir: Path, config: GatewayConfig,
|
||||
has_active_processes_fn=None,
|
||||
on_auto_reset=None):
|
||||
def __init__(self, sessions_dir: Path, config: GatewayConfig):
|
||||
self.sessions_dir = sessions_dir
|
||||
self.config = config
|
||||
self._entries: Dict[str, SessionEntry] = {}
|
||||
self._loaded = False
|
||||
self._has_active_processes_fn = has_active_processes_fn
|
||||
# on_auto_reset is deprecated — memory flush now runs proactively
|
||||
# via the background session expiry watcher in GatewayRunner.
|
||||
self._pre_flushed_sessions: set = set() # session_ids already flushed by watcher
|
||||
|
||||
# Initialize SQLite session database
|
||||
self._db = None
|
||||
try:
|
||||
from hermes_state import SessionDB
|
||||
self._db = SessionDB()
|
||||
except Exception as e:
|
||||
print(f"[gateway] Warning: SQLite session store unavailable, falling back to JSONL: {e}")
|
||||
|
||||
def _ensure_loaded(self) -> None:
|
||||
"""Load sessions index from disk if not already loaded."""
|
||||
"""Load sessions from disk if not already loaded."""
|
||||
if self._loaded:
|
||||
return
|
||||
|
||||
@@ -342,7 +286,7 @@ class SessionStore:
|
||||
|
||||
if sessions_file.exists():
|
||||
try:
|
||||
with open(sessions_file, "r", encoding="utf-8") as f:
|
||||
with open(sessions_file, "r") as f:
|
||||
data = json.load(f)
|
||||
for key, entry_data in data.items():
|
||||
self._entries[key] = SessionEntry.from_dict(entry_data)
|
||||
@@ -352,83 +296,47 @@ class SessionStore:
|
||||
self._loaded = True
|
||||
|
||||
def _save(self) -> None:
|
||||
"""Save sessions index to disk (kept for session key -> ID mapping)."""
|
||||
"""Save sessions index to disk."""
|
||||
self.sessions_dir.mkdir(parents=True, exist_ok=True)
|
||||
sessions_file = self.sessions_dir / "sessions.json"
|
||||
|
||||
data = {key: entry.to_dict() for key, entry in self._entries.items()}
|
||||
with open(sessions_file, "w", encoding="utf-8") as f:
|
||||
with open(sessions_file, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def _generate_session_key(self, source: SessionSource) -> str:
|
||||
"""Generate a session key from a source."""
|
||||
return build_session_key(source)
|
||||
|
||||
def _is_session_expired(self, entry: SessionEntry) -> bool:
|
||||
"""Check if a session has expired based on its reset policy.
|
||||
platform = source.platform.value
|
||||
|
||||
Works from the entry alone — no SessionSource needed.
|
||||
Used by the background expiry watcher to proactively flush memories.
|
||||
Sessions with active background processes are never considered expired.
|
||||
"""
|
||||
if self._has_active_processes_fn:
|
||||
if self._has_active_processes_fn(entry.session_key):
|
||||
return False
|
||||
|
||||
policy = self.config.get_reset_policy(
|
||||
platform=entry.platform,
|
||||
session_type=entry.chat_type,
|
||||
)
|
||||
|
||||
if policy.mode == "none":
|
||||
return False
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
if policy.mode in ("idle", "both"):
|
||||
idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
|
||||
if now > idle_deadline:
|
||||
return True
|
||||
|
||||
if policy.mode in ("daily", "both"):
|
||||
today_reset = now.replace(
|
||||
hour=policy.at_hour,
|
||||
minute=0, second=0, microsecond=0,
|
||||
)
|
||||
if now.hour < policy.at_hour:
|
||||
today_reset -= timedelta(days=1)
|
||||
if entry.updated_at < today_reset:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
if source.chat_type == "dm":
|
||||
# DMs share the main session per platform
|
||||
return f"agent:main:{platform}:dm"
|
||||
else:
|
||||
# Groups/channels get their own keys
|
||||
return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
|
||||
|
||||
def _should_reset(self, entry: SessionEntry, source: SessionSource) -> bool:
|
||||
"""
|
||||
Check if a session should be reset based on policy.
|
||||
|
||||
Sessions with active background processes are never reset.
|
||||
Returns True if the session is stale and should start fresh.
|
||||
"""
|
||||
if self._has_active_processes_fn:
|
||||
session_key = self._generate_session_key(source)
|
||||
if self._has_active_processes_fn(session_key):
|
||||
return False
|
||||
|
||||
policy = self.config.get_reset_policy(
|
||||
platform=source.platform,
|
||||
session_type=source.chat_type
|
||||
)
|
||||
|
||||
if policy.mode == "none":
|
||||
return False
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
# Check idle timeout
|
||||
if policy.mode in ("idle", "both"):
|
||||
idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
|
||||
if now > idle_deadline:
|
||||
return True
|
||||
|
||||
# Check daily reset
|
||||
if policy.mode in ("daily", "both"):
|
||||
# Find the most recent reset boundary
|
||||
today_reset = now.replace(
|
||||
hour=policy.at_hour,
|
||||
minute=0,
|
||||
@@ -436,6 +344,7 @@ class SessionStore:
|
||||
microsecond=0
|
||||
)
|
||||
if now.hour < policy.at_hour:
|
||||
# Reset boundary was yesterday
|
||||
today_reset -= timedelta(days=1)
|
||||
|
||||
if entry.updated_at < today_reset:
|
||||
@@ -443,27 +352,6 @@ class SessionStore:
|
||||
|
||||
return False
|
||||
|
||||
def has_any_sessions(self) -> bool:
|
||||
"""Check if any sessions have ever been created (across all platforms).
|
||||
|
||||
Uses the SQLite database as the source of truth because it preserves
|
||||
historical session records (ended sessions still count). The in-memory
|
||||
``_entries`` dict replaces entries on reset, so ``len(_entries)`` would
|
||||
stay at 1 for single-platform users — which is the bug this fixes.
|
||||
|
||||
The current session is already in the DB by the time this is called
|
||||
(get_or_create_session runs first), so we check ``> 1``.
|
||||
"""
|
||||
if self._db:
|
||||
try:
|
||||
return self._db.session_count() > 1
|
||||
except Exception:
|
||||
pass # fall through to heuristic
|
||||
# Fallback: check if sessions.json was loaded with existing data.
|
||||
# This covers the rare case where the DB is unavailable.
|
||||
self._ensure_loaded()
|
||||
return len(self._entries) > 1
|
||||
|
||||
def get_or_create_session(
|
||||
self,
|
||||
source: SessionSource,
|
||||
@@ -473,33 +361,22 @@ class SessionStore:
|
||||
Get an existing session or create a new one.
|
||||
|
||||
Evaluates reset policy to determine if the existing session is stale.
|
||||
Creates a session record in SQLite when a new session starts.
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
session_key = self._generate_session_key(source)
|
||||
now = datetime.now()
|
||||
|
||||
# Check for existing session
|
||||
if session_key in self._entries and not force_new:
|
||||
entry = self._entries[session_key]
|
||||
|
||||
# Check if session should be reset
|
||||
if not self._should_reset(entry, source):
|
||||
# Update timestamp and return existing
|
||||
entry.updated_at = now
|
||||
self._save()
|
||||
return entry
|
||||
else:
|
||||
# Session is being auto-reset. The background expiry watcher
|
||||
# should have already flushed memories proactively; discard
|
||||
# the marker so it doesn't accumulate.
|
||||
was_auto_reset = True
|
||||
self._pre_flushed_sessions.discard(entry.session_id)
|
||||
if self._db:
|
||||
try:
|
||||
self._db.end_session(entry.session_id, "session_reset")
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
else:
|
||||
was_auto_reset = False
|
||||
|
||||
# Create new session
|
||||
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
||||
@@ -513,23 +390,11 @@ class SessionStore:
|
||||
display_name=source.chat_name,
|
||||
platform=source.platform,
|
||||
chat_type=source.chat_type,
|
||||
was_auto_reset=was_auto_reset,
|
||||
)
|
||||
|
||||
self._entries[session_key] = entry
|
||||
self._save()
|
||||
|
||||
# Create session in SQLite
|
||||
if self._db:
|
||||
try:
|
||||
self._db.create_session(
|
||||
session_id=session_id,
|
||||
source=source.platform.value,
|
||||
user_id=source.user_id,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[gateway] Warning: Failed to create SQLite session: {e}")
|
||||
|
||||
return entry
|
||||
|
||||
def update_session(
|
||||
@@ -548,14 +413,6 @@ class SessionStore:
|
||||
entry.output_tokens += output_tokens
|
||||
entry.total_tokens = entry.input_tokens + entry.output_tokens
|
||||
self._save()
|
||||
|
||||
if self._db:
|
||||
try:
|
||||
self._db.update_token_counts(
|
||||
entry.session_id, input_tokens, output_tokens
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
def reset_session(self, session_key: str) -> Optional[SessionEntry]:
|
||||
"""Force reset a session, creating a new session ID."""
|
||||
@@ -565,14 +422,6 @@ class SessionStore:
|
||||
return None
|
||||
|
||||
old_entry = self._entries[session_key]
|
||||
|
||||
# End old session in SQLite
|
||||
if self._db:
|
||||
try:
|
||||
self._db.end_session(old_entry.session_id, "session_reset")
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
now = datetime.now()
|
||||
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
@@ -590,63 +439,15 @@ class SessionStore:
|
||||
self._entries[session_key] = new_entry
|
||||
self._save()
|
||||
|
||||
# Create new session in SQLite
|
||||
if self._db:
|
||||
try:
|
||||
self._db.create_session(
|
||||
session_id=session_id,
|
||||
source=old_entry.platform.value if old_entry.platform else "unknown",
|
||||
user_id=old_entry.origin.user_id if old_entry.origin else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
return new_entry
|
||||
|
||||
def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]:
|
||||
"""Switch a session key to point at an existing session ID.
|
||||
|
||||
Used by ``/resume`` to restore a previously-named session.
|
||||
Ends the current session in SQLite (like reset), but instead of
|
||||
generating a fresh session ID, re-uses ``target_session_id`` so the
|
||||
old transcript is loaded on the next message.
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
if session_key not in self._entries:
|
||||
return None
|
||||
|
||||
old_entry = self._entries[session_key]
|
||||
|
||||
# Don't switch if already on that session
|
||||
if old_entry.session_id == target_session_id:
|
||||
return old_entry
|
||||
|
||||
# End the current session in SQLite
|
||||
if self._db:
|
||||
try:
|
||||
self._db.end_session(old_entry.session_id, "session_switch")
|
||||
except Exception as e:
|
||||
logger.debug("Session DB end_session failed: %s", e)
|
||||
|
||||
now = datetime.now()
|
||||
new_entry = SessionEntry(
|
||||
session_key=session_key,
|
||||
session_id=target_session_id,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
origin=old_entry.origin,
|
||||
display_name=old_entry.display_name,
|
||||
platform=old_entry.platform,
|
||||
chat_type=old_entry.chat_type,
|
||||
)
|
||||
|
||||
self._entries[session_key] = new_entry
|
||||
self._save()
|
||||
return new_entry
|
||||
|
||||
|
||||
def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
|
||||
"""List all sessions, optionally filtered by activity."""
|
||||
"""
|
||||
List all sessions, optionally filtered by activity.
|
||||
|
||||
Args:
|
||||
active_minutes: If provided, only return sessions updated within this many minutes
|
||||
"""
|
||||
self._ensure_loaded()
|
||||
|
||||
entries = list(self._entries.values())
|
||||
@@ -655,82 +456,31 @@ class SessionStore:
|
||||
cutoff = datetime.now() - timedelta(minutes=active_minutes)
|
||||
entries = [e for e in entries if e.updated_at >= cutoff]
|
||||
|
||||
# Sort by most recently updated
|
||||
entries.sort(key=lambda e: e.updated_at, reverse=True)
|
||||
|
||||
return entries
|
||||
|
||||
def get_transcript_path(self, session_id: str) -> Path:
|
||||
"""Get the path to a session's legacy transcript file."""
|
||||
"""Get the path to a session's transcript file."""
|
||||
return self.sessions_dir / f"{session_id}.jsonl"
|
||||
|
||||
def append_to_transcript(self, session_id: str, message: Dict[str, Any]) -> None:
|
||||
"""Append a message to a session's transcript (SQLite + legacy JSONL)."""
|
||||
# Write to SQLite
|
||||
if self._db:
|
||||
try:
|
||||
self._db.append_message(
|
||||
session_id=session_id,
|
||||
role=message.get("role", "unknown"),
|
||||
content=message.get("content"),
|
||||
tool_name=message.get("tool_name"),
|
||||
tool_calls=message.get("tool_calls"),
|
||||
tool_call_id=message.get("tool_call_id"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
|
||||
# Also write legacy JSONL (keeps existing tooling working during transition)
|
||||
"""Append a message to a session's transcript."""
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
with open(transcript_path, "a", encoding="utf-8") as f:
|
||||
|
||||
with open(transcript_path, "a") as f:
|
||||
f.write(json.dumps(message, ensure_ascii=False) + "\n")
|
||||
|
||||
def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Replace the entire transcript for a session with new messages.
|
||||
|
||||
Used by /retry, /undo, and /compress to persist modified conversation history.
|
||||
Rewrites both SQLite and legacy JSONL storage.
|
||||
"""
|
||||
# SQLite: clear old messages and re-insert
|
||||
if self._db:
|
||||
try:
|
||||
self._db.clear_messages(session_id)
|
||||
for msg in messages:
|
||||
self._db.append_message(
|
||||
session_id=session_id,
|
||||
role=msg.get("role", "unknown"),
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
# JSONL: overwrite the file
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
with open(transcript_path, "w", encoding="utf-8") as f:
|
||||
for msg in messages:
|
||||
f.write(json.dumps(msg, ensure_ascii=False) + "\n")
|
||||
|
||||
def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
"""Load all messages from a session's transcript."""
|
||||
# Try SQLite first
|
||||
if self._db:
|
||||
try:
|
||||
messages = self._db.get_messages_as_conversation(session_id)
|
||||
if messages:
|
||||
return messages
|
||||
except Exception as e:
|
||||
logger.debug("Could not load messages from DB: %s", e)
|
||||
|
||||
# Fall back to legacy JSONL
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
|
||||
if not transcript_path.exists():
|
||||
return []
|
||||
|
||||
messages = []
|
||||
with open(transcript_path, "r", encoding="utf-8") as f:
|
||||
with open(transcript_path, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
"""
|
||||
Gateway runtime status helpers.
|
||||
|
||||
Provides PID-file based detection of whether the gateway daemon is running,
|
||||
used by send_message's check_fn to gate availability in the CLI.
|
||||
|
||||
The PID file lives at ``{HERMES_HOME}/gateway.pid``. HERMES_HOME defaults to
|
||||
``~/.hermes`` but can be overridden via the environment variable. This means
|
||||
separate HERMES_HOME directories naturally get separate PID files — a property
|
||||
that will be useful when we add named profiles (multiple agents running
|
||||
concurrently under distinct configurations).
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _get_pid_path() -> Path:
|
||||
"""Return the path to the gateway PID file, respecting HERMES_HOME."""
|
||||
home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
return home / "gateway.pid"
|
||||
|
||||
|
||||
def write_pid_file() -> None:
|
||||
"""Write the current process PID to the gateway PID file."""
|
||||
pid_path = _get_pid_path()
|
||||
pid_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
pid_path.write_text(str(os.getpid()))
|
||||
|
||||
|
||||
def remove_pid_file() -> None:
|
||||
"""Remove the gateway PID file if it exists."""
|
||||
try:
|
||||
_get_pid_path().unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def get_running_pid() -> Optional[int]:
|
||||
"""Return the PID of a running gateway instance, or ``None``.
|
||||
|
||||
Checks the PID file and verifies the process is actually alive.
|
||||
Cleans up stale PID files automatically.
|
||||
"""
|
||||
pid_path = _get_pid_path()
|
||||
if not pid_path.exists():
|
||||
return None
|
||||
try:
|
||||
pid = int(pid_path.read_text().strip())
|
||||
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
|
||||
return pid
|
||||
except (ValueError, ProcessLookupError, PermissionError):
|
||||
# Stale PID file — process is gone
|
||||
remove_pid_file()
|
||||
return None
|
||||
|
||||
|
||||
def is_gateway_running() -> bool:
|
||||
"""Check if the gateway daemon is currently running."""
|
||||
return get_running_pid() is not None
|
||||
@@ -1,111 +0,0 @@
|
||||
"""
|
||||
Sticker description cache for Telegram.
|
||||
|
||||
When users send stickers, we describe them via the vision tool and cache
|
||||
the descriptions keyed by file_unique_id so we don't re-analyze the same
|
||||
sticker image on every send. Descriptions are concise (1-2 sentences).
|
||||
|
||||
Cache location: ~/.hermes/sticker_cache.json
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
CACHE_PATH = Path(os.path.expanduser("~/.hermes/sticker_cache.json"))
|
||||
|
||||
# Vision prompt for describing stickers -- kept concise to save tokens
|
||||
STICKER_VISION_PROMPT = (
|
||||
"Describe this sticker in 1-2 sentences. Focus on what it depicts -- "
|
||||
"character, action, emotion. Be concise and objective."
|
||||
)
|
||||
|
||||
|
||||
def _load_cache() -> dict:
|
||||
"""Load the sticker cache from disk."""
|
||||
if CACHE_PATH.exists():
|
||||
try:
|
||||
return json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _save_cache(cache: dict) -> None:
|
||||
"""Save the sticker cache to disk."""
|
||||
CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
CACHE_PATH.write_text(
|
||||
json.dumps(cache, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def get_cached_description(file_unique_id: str) -> Optional[dict]:
|
||||
"""
|
||||
Look up a cached sticker description.
|
||||
|
||||
Returns:
|
||||
dict with keys {description, emoji, set_name, cached_at} or None.
|
||||
"""
|
||||
cache = _load_cache()
|
||||
return cache.get(file_unique_id)
|
||||
|
||||
|
||||
def cache_sticker_description(
|
||||
file_unique_id: str,
|
||||
description: str,
|
||||
emoji: str = "",
|
||||
set_name: str = "",
|
||||
) -> None:
|
||||
"""
|
||||
Store a sticker description in the cache.
|
||||
|
||||
Args:
|
||||
file_unique_id: Telegram's stable sticker identifier.
|
||||
description: Vision-generated description text.
|
||||
emoji: Associated emoji (e.g. "😀").
|
||||
set_name: Sticker set name if available.
|
||||
"""
|
||||
cache = _load_cache()
|
||||
cache[file_unique_id] = {
|
||||
"description": description,
|
||||
"emoji": emoji,
|
||||
"set_name": set_name,
|
||||
"cached_at": time.time(),
|
||||
}
|
||||
_save_cache(cache)
|
||||
|
||||
|
||||
def build_sticker_injection(
|
||||
description: str,
|
||||
emoji: str = "",
|
||||
set_name: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Build the warm-style injection text for a sticker description.
|
||||
|
||||
Returns a string like:
|
||||
[The user sent a sticker 😀 from "MyPack"~ It shows: "A cat waving" (=^.w.^=)]
|
||||
"""
|
||||
context = ""
|
||||
if set_name and emoji:
|
||||
context = f" {emoji} from \"{set_name}\""
|
||||
elif emoji:
|
||||
context = f" {emoji}"
|
||||
|
||||
return f"[The user sent a sticker{context}~ It shows: \"{description}\" (=^.w.^=)]"
|
||||
|
||||
|
||||
def build_animated_sticker_injection(emoji: str = "") -> str:
|
||||
"""
|
||||
Build injection text for animated/video stickers we can't analyze.
|
||||
"""
|
||||
if emoji:
|
||||
return (
|
||||
f"[The user sent an animated sticker {emoji}~ "
|
||||
f"I can't see animated ones yet, but the emoji suggests: {emoji}]"
|
||||
)
|
||||
return "[The user sent an animated sticker~ I can't see animated ones yet]"
|
||||
@@ -11,4 +11,4 @@ Provides subcommands for:
|
||||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "v1.0.0"
|
||||
__version__ = "0.1.0"
|
||||
|
||||
2028
hermes_cli/auth.py
2028
hermes_cli/auth.py
File diff suppressed because it is too large
Load Diff
@@ -1,360 +0,0 @@
|
||||
"""Welcome banner, ASCII art, skills summary, and update check for the CLI.
|
||||
|
||||
Pure display functions with no HermesCLI state dependency.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any, Optional
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
|
||||
from prompt_toolkit import print_formatted_text as _pt_print
|
||||
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ANSI building blocks for conversation display
|
||||
# =========================================================================
|
||||
|
||||
_GOLD = "\033[1;33m"
|
||||
_BOLD = "\033[1m"
|
||||
_DIM = "\033[2m"
|
||||
_RST = "\033[0m"
|
||||
|
||||
|
||||
def cprint(text: str):
|
||||
"""Print ANSI-colored text through prompt_toolkit's renderer."""
|
||||
_pt_print(_PT_ANSI(text))
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ASCII Art & Branding
|
||||
# =========================================================================
|
||||
|
||||
from hermes_cli import __version__ as VERSION
|
||||
|
||||
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
|
||||
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]
|
||||
[#FFBF00]███████║█████╗ ██████╔╝██╔████╔██║█████╗ ███████╗█████╗███████║██║ ███╗█████╗ ██╔██╗ ██║ ██║[/]
|
||||
[#FFBF00]██╔══██║██╔══╝ ██╔══██╗██║╚██╔╝██║██╔══╝ ╚════██║╚════╝██╔══██║██║ ██║██╔══╝ ██║╚██╗██║ ██║[/]
|
||||
[#CD7F32]██║ ██║███████╗██║ ██║██║ ╚═╝ ██║███████╗███████║ ██║ ██║╚██████╔╝███████╗██║ ╚████║ ██║[/]
|
||||
[#CD7F32]╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝ ╚═╝ ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ╚═╝[/]"""
|
||||
|
||||
HERMES_CADUCEUS = """[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⡀⠀⣀⣀⠀⢀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#CD7F32]⠀⠀⠀⠀⠀⠀⢀⣠⣴⣾⣿⣿⣇⠸⣿⣿⠇⣸⣿⣿⣷⣦⣄⡀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FFBF00]⠀⢀⣠⣴⣶⠿⠋⣩⡿⣿⡿⠻⣿⡇⢠⡄⢸⣿⠟⢿⣿⢿⣍⠙⠿⣶⣦⣄⡀⠀[/]
|
||||
[#FFBF00]⠀⠀⠉⠉⠁⠶⠟⠋⠀⠉⠀⢀⣈⣁⡈⢁⣈⣁⡀⠀⠉⠀⠙⠻⠶⠈⠉⠉⠀⠀[/]
|
||||
[#FFD700]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣴⣿⡿⠛⢁⡈⠛⢿⣿⣦⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FFD700]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠿⣿⣦⣤⣈⠁⢠⣴⣿⠿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FFBF00]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠉⠻⢿⣿⣦⡉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FFBF00]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠘⢷⣦⣈⠛⠃⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢠⣴⠦⠈⠙⠿⣦⡄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿⣤⡈⠁⢤⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠛⠷⠄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⠑⢶⣄⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⠁⢰⡆⠈⡿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠳⠈⣡⠞⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]"""
|
||||
|
||||
COMPACT_BANNER = """
|
||||
[bold #FFD700]╔══════════════════════════════════════════════════════════════╗[/]
|
||||
[bold #FFD700]║[/] [#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- AI Agent Framework[/] [bold #FFD700]║[/]
|
||||
[bold #FFD700]║[/] [#CD7F32]Messenger of the Digital Gods[/] [dim #B8860B]Nous Research[/] [bold #FFD700]║[/]
|
||||
[bold #FFD700]╚══════════════════════════════════════════════════════════════╝[/]
|
||||
"""
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skills scanning
|
||||
# =========================================================================
|
||||
|
||||
def get_available_skills() -> Dict[str, List[str]]:
|
||||
"""Scan ~/.hermes/skills/ and return skills grouped by category."""
|
||||
import os
|
||||
|
||||
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
skills_dir = hermes_home / "skills"
|
||||
skills_by_category = {}
|
||||
|
||||
if not skills_dir.exists():
|
||||
return skills_by_category
|
||||
|
||||
for skill_file in skills_dir.rglob("SKILL.md"):
|
||||
rel_path = skill_file.relative_to(skills_dir)
|
||||
parts = rel_path.parts
|
||||
if len(parts) >= 2:
|
||||
category = parts[0]
|
||||
skill_name = parts[-2]
|
||||
else:
|
||||
category = "general"
|
||||
skill_name = skill_file.parent.name
|
||||
skills_by_category.setdefault(category, []).append(skill_name)
|
||||
|
||||
return skills_by_category
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Update check
|
||||
# =========================================================================
|
||||
|
||||
# Cache update check results for 6 hours to avoid repeated git fetches
|
||||
_UPDATE_CHECK_CACHE_SECONDS = 6 * 3600
|
||||
|
||||
|
||||
def check_for_updates() -> Optional[int]:
|
||||
"""Check how many commits behind origin/main the local repo is.
|
||||
|
||||
Does a ``git fetch`` at most once every 6 hours (cached to
|
||||
``~/.hermes/.update_check``). Returns the number of commits behind,
|
||||
or ``None`` if the check fails or isn't applicable.
|
||||
"""
|
||||
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
repo_dir = hermes_home / "hermes-agent"
|
||||
cache_file = hermes_home / ".update_check"
|
||||
|
||||
# Must be a git repo
|
||||
if not (repo_dir / ".git").exists():
|
||||
return None
|
||||
|
||||
# Read cache
|
||||
now = time.time()
|
||||
try:
|
||||
if cache_file.exists():
|
||||
cached = json.loads(cache_file.read_text())
|
||||
if now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS:
|
||||
return cached.get("behind")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fetch latest refs (fast — only downloads ref metadata, no files)
|
||||
try:
|
||||
subprocess.run(
|
||||
["git", "fetch", "origin", "--quiet"],
|
||||
capture_output=True, timeout=10,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
pass # Offline or timeout — use stale refs, that's fine
|
||||
|
||||
# Count commits behind
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-list", "--count", "HEAD..origin/main"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
if result.returncode == 0:
|
||||
behind = int(result.stdout.strip())
|
||||
else:
|
||||
behind = None
|
||||
except Exception:
|
||||
behind = None
|
||||
|
||||
# Write cache
|
||||
try:
|
||||
cache_file.write_text(json.dumps({"ts": now, "behind": behind}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return behind
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Welcome banner
|
||||
# =========================================================================
|
||||
|
||||
def _format_context_length(tokens: int) -> str:
|
||||
"""Format a token count for display (e.g. 128000 → '128K', 1048576 → '1M')."""
|
||||
if tokens >= 1_000_000:
|
||||
val = tokens / 1_000_000
|
||||
return f"{val:g}M"
|
||||
elif tokens >= 1_000:
|
||||
val = tokens / 1_000
|
||||
return f"{val:g}K"
|
||||
return str(tokens)
|
||||
|
||||
|
||||
def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
tools: List[dict] = None,
|
||||
enabled_toolsets: List[str] = None,
|
||||
session_id: str = None,
|
||||
get_toolset_for_tool=None,
|
||||
context_length: int = None):
|
||||
"""Build and print a welcome banner with caduceus on left and info on right.
|
||||
|
||||
Args:
|
||||
console: Rich Console instance.
|
||||
model: Current model name.
|
||||
cwd: Current working directory.
|
||||
tools: List of tool definitions.
|
||||
enabled_toolsets: List of enabled toolset names.
|
||||
session_id: Session identifier.
|
||||
get_toolset_for_tool: Callable to map tool name -> toolset name.
|
||||
context_length: Model's context window size in tokens.
|
||||
"""
|
||||
from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
|
||||
if get_toolset_for_tool is None:
|
||||
from model_tools import get_toolset_for_tool
|
||||
|
||||
tools = tools or []
|
||||
enabled_toolsets = enabled_toolsets or []
|
||||
|
||||
_, unavailable_toolsets = check_tool_availability(quiet=True)
|
||||
disabled_tools = set()
|
||||
for item in unavailable_toolsets:
|
||||
disabled_tools.update(item.get("tools", []))
|
||||
|
||||
layout_table = Table.grid(padding=(0, 2))
|
||||
layout_table.add_column("left", justify="center")
|
||||
layout_table.add_column("right", justify="left")
|
||||
|
||||
left_lines = ["", HERMES_CADUCEUS, ""]
|
||||
model_short = model.split("/")[-1] if "/" in model else model
|
||||
if len(model_short) > 28:
|
||||
model_short = model_short[:25] + "..."
|
||||
ctx_str = f" [dim #B8860B]·[/] [dim #B8860B]{_format_context_length(context_length)} context[/]" if context_length else ""
|
||||
left_lines.append(f"[#FFBF00]{model_short}[/]{ctx_str} [dim #B8860B]·[/] [dim #B8860B]Nous Research[/]")
|
||||
left_lines.append(f"[dim #B8860B]{cwd}[/]")
|
||||
if session_id:
|
||||
left_lines.append(f"[dim #8B8682]Session: {session_id}[/]")
|
||||
left_content = "\n".join(left_lines)
|
||||
|
||||
right_lines = ["[bold #FFBF00]Available Tools[/]"]
|
||||
toolsets_dict: Dict[str, list] = {}
|
||||
|
||||
for tool in tools:
|
||||
tool_name = tool["function"]["name"]
|
||||
toolset = get_toolset_for_tool(tool_name) or "other"
|
||||
toolsets_dict.setdefault(toolset, []).append(tool_name)
|
||||
|
||||
for item in unavailable_toolsets:
|
||||
toolset_id = item.get("id", item.get("name", "unknown"))
|
||||
display_name = f"{toolset_id}_tools" if not toolset_id.endswith("_tools") else toolset_id
|
||||
if display_name not in toolsets_dict:
|
||||
toolsets_dict[display_name] = []
|
||||
for tool_name in item.get("tools", []):
|
||||
if tool_name not in toolsets_dict[display_name]:
|
||||
toolsets_dict[display_name].append(tool_name)
|
||||
|
||||
sorted_toolsets = sorted(toolsets_dict.keys())
|
||||
display_toolsets = sorted_toolsets[:8]
|
||||
remaining_toolsets = len(sorted_toolsets) - 8
|
||||
|
||||
for toolset in display_toolsets:
|
||||
tool_names = toolsets_dict[toolset]
|
||||
colored_names = []
|
||||
for name in sorted(tool_names):
|
||||
if name in disabled_tools:
|
||||
colored_names.append(f"[red]{name}[/]")
|
||||
else:
|
||||
colored_names.append(f"[#FFF8DC]{name}[/]")
|
||||
|
||||
tools_str = ", ".join(colored_names)
|
||||
if len(", ".join(sorted(tool_names))) > 45:
|
||||
short_names = []
|
||||
length = 0
|
||||
for name in sorted(tool_names):
|
||||
if length + len(name) + 2 > 42:
|
||||
short_names.append("...")
|
||||
break
|
||||
short_names.append(name)
|
||||
length += len(name) + 2
|
||||
colored_names = []
|
||||
for name in short_names:
|
||||
if name == "...":
|
||||
colored_names.append("[dim]...[/]")
|
||||
elif name in disabled_tools:
|
||||
colored_names.append(f"[red]{name}[/]")
|
||||
else:
|
||||
colored_names.append(f"[#FFF8DC]{name}[/]")
|
||||
tools_str = ", ".join(colored_names)
|
||||
|
||||
right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
|
||||
|
||||
if remaining_toolsets > 0:
|
||||
right_lines.append(f"[dim #B8860B](and {remaining_toolsets} more toolsets...)[/]")
|
||||
|
||||
# MCP Servers section (only if configured)
|
||||
try:
|
||||
from tools.mcp_tool import get_mcp_status
|
||||
mcp_status = get_mcp_status()
|
||||
except Exception:
|
||||
mcp_status = []
|
||||
|
||||
if mcp_status:
|
||||
right_lines.append("")
|
||||
right_lines.append("[bold #FFBF00]MCP Servers[/]")
|
||||
for srv in mcp_status:
|
||||
if srv["connected"]:
|
||||
right_lines.append(
|
||||
f"[dim #B8860B]{srv['name']}[/] [#FFF8DC]({srv['transport']})[/] "
|
||||
f"[dim #B8860B]—[/] [#FFF8DC]{srv['tools']} tool(s)[/]"
|
||||
)
|
||||
else:
|
||||
right_lines.append(
|
||||
f"[red]{srv['name']}[/] [dim]({srv['transport']})[/] "
|
||||
f"[red]— failed[/]"
|
||||
)
|
||||
|
||||
right_lines.append("")
|
||||
right_lines.append("[bold #FFBF00]Available Skills[/]")
|
||||
skills_by_category = get_available_skills()
|
||||
total_skills = sum(len(s) for s in skills_by_category.values())
|
||||
|
||||
if skills_by_category:
|
||||
for category in sorted(skills_by_category.keys()):
|
||||
skill_names = sorted(skills_by_category[category])
|
||||
if len(skill_names) > 8:
|
||||
display_names = skill_names[:8]
|
||||
skills_str = ", ".join(display_names) + f" +{len(skill_names) - 8} more"
|
||||
else:
|
||||
skills_str = ", ".join(skill_names)
|
||||
if len(skills_str) > 50:
|
||||
skills_str = skills_str[:47] + "..."
|
||||
right_lines.append(f"[dim #B8860B]{category}:[/] [#FFF8DC]{skills_str}[/]")
|
||||
else:
|
||||
right_lines.append("[dim #B8860B]No skills installed[/]")
|
||||
|
||||
right_lines.append("")
|
||||
mcp_connected = sum(1 for s in mcp_status if s["connected"]) if mcp_status else 0
|
||||
summary_parts = [f"{len(tools)} tools", f"{total_skills} skills"]
|
||||
if mcp_connected:
|
||||
summary_parts.append(f"{mcp_connected} MCP servers")
|
||||
summary_parts.append("/help for commands")
|
||||
right_lines.append(f"[dim #B8860B]{' · '.join(summary_parts)}[/]")
|
||||
|
||||
# Update check — show if behind origin/main
|
||||
try:
|
||||
behind = check_for_updates()
|
||||
if behind and behind > 0:
|
||||
commits_word = "commit" if behind == 1 else "commits"
|
||||
right_lines.append(
|
||||
f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
|
||||
f"[dim yellow] — run [bold]hermes update[/bold] to update[/]"
|
||||
)
|
||||
except Exception:
|
||||
pass # Never break the banner over an update check
|
||||
|
||||
right_content = "\n".join(right_lines)
|
||||
layout_table.add_row(left_content, right_content)
|
||||
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold #FFD700]Hermes Agent {VERSION}[/]",
|
||||
border_style="#CD7F32",
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
console.print()
|
||||
console.print(HERMES_AGENT_LOGO)
|
||||
console.print()
|
||||
console.print(outer_panel)
|
||||
@@ -1,145 +0,0 @@
|
||||
"""Interactive prompt callbacks for terminal_tool integration.
|
||||
|
||||
These bridge terminal_tool's interactive prompts (clarify, sudo, approval)
|
||||
into prompt_toolkit's event loop. Each function takes the HermesCLI instance
|
||||
as its first argument and uses its state (queues, app reference) to coordinate
|
||||
with the TUI.
|
||||
"""
|
||||
|
||||
import queue
|
||||
import time as _time
|
||||
|
||||
from hermes_cli.banner import cprint, _DIM, _RST
|
||||
|
||||
|
||||
def clarify_callback(cli, question, choices):
|
||||
"""Prompt for clarifying question through the TUI.
|
||||
|
||||
Sets up the interactive selection UI, then blocks until the user
|
||||
responds. Returns the user's choice or a timeout message.
|
||||
"""
|
||||
from cli import CLI_CONFIG
|
||||
|
||||
timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
|
||||
response_queue = queue.Queue()
|
||||
is_open_ended = not choices or len(choices) == 0
|
||||
|
||||
cli._clarify_state = {
|
||||
"question": question,
|
||||
"choices": choices if not is_open_ended else [],
|
||||
"selected": 0,
|
||||
"response_queue": response_queue,
|
||||
}
|
||||
cli._clarify_deadline = _time.monotonic() + timeout
|
||||
cli._clarify_freetext = is_open_ended
|
||||
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = response_queue.get(timeout=1)
|
||||
cli._clarify_deadline = 0
|
||||
return result
|
||||
except queue.Empty:
|
||||
remaining = cli._clarify_deadline - _time.monotonic()
|
||||
if remaining <= 0:
|
||||
break
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
cli._clarify_state = None
|
||||
cli._clarify_freetext = False
|
||||
cli._clarify_deadline = 0
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
cprint(f"\n{_DIM}(clarify timed out after {timeout}s — agent will decide){_RST}")
|
||||
return (
|
||||
"The user did not provide a response within the time limit. "
|
||||
"Use your best judgement to make the choice and proceed."
|
||||
)
|
||||
|
||||
|
||||
def sudo_password_callback(cli) -> str:
|
||||
"""Prompt for sudo password through the TUI.
|
||||
|
||||
Sets up a password input area and blocks until the user responds.
|
||||
"""
|
||||
timeout = 45
|
||||
response_queue = queue.Queue()
|
||||
|
||||
cli._sudo_state = {"response_queue": response_queue}
|
||||
cli._sudo_deadline = _time.monotonic() + timeout
|
||||
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = response_queue.get(timeout=1)
|
||||
cli._sudo_state = None
|
||||
cli._sudo_deadline = 0
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
if result:
|
||||
cprint(f"\n{_DIM} ✓ Password received (cached for session){_RST}")
|
||||
else:
|
||||
cprint(f"\n{_DIM} ⏭ Skipped{_RST}")
|
||||
return result
|
||||
except queue.Empty:
|
||||
remaining = cli._sudo_deadline - _time.monotonic()
|
||||
if remaining <= 0:
|
||||
break
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
cli._sudo_state = None
|
||||
cli._sudo_deadline = 0
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
cprint(f"\n{_DIM} ⏱ Timeout — continuing without sudo{_RST}")
|
||||
return ""
|
||||
|
||||
|
||||
def approval_callback(cli, command: str, description: str) -> str:
|
||||
"""Prompt for dangerous command approval through the TUI.
|
||||
|
||||
Shows a selection UI with choices: once / session / always / deny.
|
||||
"""
|
||||
timeout = 60
|
||||
response_queue = queue.Queue()
|
||||
choices = ["once", "session", "always", "deny"]
|
||||
|
||||
cli._approval_state = {
|
||||
"command": command,
|
||||
"description": description,
|
||||
"choices": choices,
|
||||
"selected": 0,
|
||||
"response_queue": response_queue,
|
||||
}
|
||||
cli._approval_deadline = _time.monotonic() + timeout
|
||||
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
while True:
|
||||
try:
|
||||
result = response_queue.get(timeout=1)
|
||||
cli._approval_state = None
|
||||
cli._approval_deadline = 0
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
return result
|
||||
except queue.Empty:
|
||||
remaining = cli._approval_deadline - _time.monotonic()
|
||||
if remaining <= 0:
|
||||
break
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
|
||||
cli._approval_state = None
|
||||
cli._approval_deadline = 0
|
||||
if hasattr(cli, '_app') and cli._app:
|
||||
cli._app.invalidate()
|
||||
cprint(f"\n{_DIM} ⏱ Timeout — denying command{_RST}")
|
||||
return "deny"
|
||||
@@ -1,356 +0,0 @@
|
||||
"""Clipboard image extraction for macOS, Linux, and WSL2.
|
||||
|
||||
Provides a single function `save_clipboard_image(dest)` that checks the
|
||||
system clipboard for image data, saves it to *dest* as PNG, and returns
|
||||
True on success. No external Python dependencies — uses only OS-level
|
||||
CLI tools that ship with the platform (or are commonly installed).
|
||||
|
||||
Platform support:
|
||||
macOS — osascript (always available), pngpaste (if installed)
|
||||
WSL2 — powershell.exe via .NET System.Windows.Forms.Clipboard
|
||||
Linux — wl-paste (Wayland), xclip (X11)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache WSL detection (checked once per process)
|
||||
_wsl_detected: bool | None = None
|
||||
|
||||
|
||||
def save_clipboard_image(dest: Path) -> bool:
|
||||
"""Extract an image from the system clipboard and save it as PNG.
|
||||
|
||||
Returns True if an image was found and saved, False otherwise.
|
||||
"""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
if sys.platform == "darwin":
|
||||
return _macos_save(dest)
|
||||
return _linux_save(dest)
|
||||
|
||||
|
||||
def has_clipboard_image() -> bool:
|
||||
"""Quick check: does the clipboard currently contain an image?
|
||||
|
||||
Lighter than save_clipboard_image — doesn't extract or write anything.
|
||||
"""
|
||||
if sys.platform == "darwin":
|
||||
return _macos_has_image()
|
||||
if _is_wsl():
|
||||
return _wsl_has_image()
|
||||
if os.environ.get("WAYLAND_DISPLAY"):
|
||||
return _wayland_has_image()
|
||||
return _xclip_has_image()
|
||||
|
||||
|
||||
# ── macOS ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _macos_save(dest: Path) -> bool:
|
||||
"""Try pngpaste first (fast, handles more formats), fall back to osascript."""
|
||||
return _macos_pngpaste(dest) or _macos_osascript(dest)
|
||||
|
||||
|
||||
def _macos_has_image() -> bool:
|
||||
"""Check if macOS clipboard contains image data."""
|
||||
try:
|
||||
info = subprocess.run(
|
||||
["osascript", "-e", "clipboard info"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
return "«class PNGf»" in info.stdout or "«class TIFF»" in info.stdout
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _macos_pngpaste(dest: Path) -> bool:
|
||||
"""Use pngpaste (brew install pngpaste) — fastest, cleanest."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["pngpaste", str(dest)],
|
||||
capture_output=True, timeout=3,
|
||||
)
|
||||
if r.returncode == 0 and dest.exists() and dest.stat().st_size > 0:
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
pass # pngpaste not installed
|
||||
except Exception as e:
|
||||
logger.debug("pngpaste failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def _macos_osascript(dest: Path) -> bool:
|
||||
"""Use osascript to extract PNG data from clipboard (always available)."""
|
||||
if not _macos_has_image():
|
||||
return False
|
||||
|
||||
# Extract as PNG
|
||||
script = (
|
||||
'try\n'
|
||||
' set imgData to the clipboard as «class PNGf»\n'
|
||||
f' set f to open for access POSIX file "{dest}" with write permission\n'
|
||||
' write imgData to f\n'
|
||||
' close access f\n'
|
||||
'on error\n'
|
||||
' return "fail"\n'
|
||||
'end try\n'
|
||||
)
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["osascript", "-e", script],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if r.returncode == 0 and "fail" not in r.stdout and dest.exists() and dest.stat().st_size > 0:
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("osascript clipboard extract failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
# ── Linux ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _is_wsl() -> bool:
|
||||
"""Detect if running inside WSL (1 or 2)."""
|
||||
global _wsl_detected
|
||||
if _wsl_detected is not None:
|
||||
return _wsl_detected
|
||||
try:
|
||||
with open("/proc/version", "r") as f:
|
||||
_wsl_detected = "microsoft" in f.read().lower()
|
||||
except Exception:
|
||||
_wsl_detected = False
|
||||
return _wsl_detected
|
||||
|
||||
|
||||
def _linux_save(dest: Path) -> bool:
|
||||
"""Try clipboard backends in priority order: WSL → Wayland → X11."""
|
||||
if _is_wsl():
|
||||
if _wsl_save(dest):
|
||||
return True
|
||||
# Fall through — WSLg might have wl-paste or xclip working
|
||||
|
||||
if os.environ.get("WAYLAND_DISPLAY"):
|
||||
if _wayland_save(dest):
|
||||
return True
|
||||
|
||||
return _xclip_save(dest)
|
||||
|
||||
|
||||
# ── WSL2 (powershell.exe) ────────────────────────────────────────────────
|
||||
|
||||
# PowerShell script: get clipboard image as base64-encoded PNG on stdout.
|
||||
# Using .NET System.Windows.Forms.Clipboard — always available on Windows.
|
||||
_PS_CHECK_IMAGE = (
|
||||
"Add-Type -AssemblyName System.Windows.Forms;"
|
||||
"[System.Windows.Forms.Clipboard]::ContainsImage()"
|
||||
)
|
||||
|
||||
_PS_EXTRACT_IMAGE = (
|
||||
"Add-Type -AssemblyName System.Windows.Forms;"
|
||||
"Add-Type -AssemblyName System.Drawing;"
|
||||
"$img = [System.Windows.Forms.Clipboard]::GetImage();"
|
||||
"if ($null -eq $img) { exit 1 }"
|
||||
"$ms = New-Object System.IO.MemoryStream;"
|
||||
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
|
||||
"[System.Convert]::ToBase64String($ms.ToArray())"
|
||||
)
|
||||
|
||||
|
||||
def _wsl_has_image() -> bool:
|
||||
"""Check if Windows clipboard has an image (via powershell.exe)."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
|
||||
_PS_CHECK_IMAGE],
|
||||
capture_output=True, text=True, timeout=8,
|
||||
)
|
||||
return r.returncode == 0 and "True" in r.stdout
|
||||
except FileNotFoundError:
|
||||
logger.debug("powershell.exe not found — WSL clipboard unavailable")
|
||||
except Exception as e:
|
||||
logger.debug("WSL clipboard check failed: %s", e)
|
||||
return False
|
||||
|
||||
|
||||
def _wsl_save(dest: Path) -> bool:
|
||||
"""Extract clipboard image via powershell.exe → base64 → decode to PNG."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
|
||||
_PS_EXTRACT_IMAGE],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return False
|
||||
|
||||
b64_data = r.stdout.strip()
|
||||
if not b64_data:
|
||||
return False
|
||||
|
||||
png_bytes = base64.b64decode(b64_data)
|
||||
dest.write_bytes(png_bytes)
|
||||
return dest.exists() and dest.stat().st_size > 0
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.debug("powershell.exe not found — WSL clipboard unavailable")
|
||||
except Exception as e:
|
||||
logger.debug("WSL clipboard extraction failed: %s", e)
|
||||
dest.unlink(missing_ok=True)
|
||||
return False
|
||||
|
||||
|
||||
# ── Wayland (wl-paste) ──────────────────────────────────────────────────
|
||||
|
||||
def _wayland_has_image() -> bool:
|
||||
"""Check if Wayland clipboard has image content."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["wl-paste", "--list-types"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
return r.returncode == 0 and any(
|
||||
t.startswith("image/") for t in r.stdout.splitlines()
|
||||
)
|
||||
except FileNotFoundError:
|
||||
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def _wayland_save(dest: Path) -> bool:
|
||||
"""Use wl-paste to extract clipboard image (Wayland sessions)."""
|
||||
try:
|
||||
# Check available MIME types
|
||||
types_r = subprocess.run(
|
||||
["wl-paste", "--list-types"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
if types_r.returncode != 0:
|
||||
return False
|
||||
types = types_r.stdout.splitlines()
|
||||
|
||||
# Prefer PNG, fall back to other image formats
|
||||
mime = None
|
||||
for preferred in ("image/png", "image/jpeg", "image/bmp",
|
||||
"image/gif", "image/webp"):
|
||||
if preferred in types:
|
||||
mime = preferred
|
||||
break
|
||||
|
||||
if not mime:
|
||||
return False
|
||||
|
||||
# Extract the image data
|
||||
with open(dest, "wb") as f:
|
||||
subprocess.run(
|
||||
["wl-paste", "--type", mime],
|
||||
stdout=f, stderr=subprocess.DEVNULL, timeout=5, check=True,
|
||||
)
|
||||
|
||||
if not dest.exists() or dest.stat().st_size == 0:
|
||||
return False
|
||||
|
||||
# BMP needs conversion to PNG (common in WSLg where only BMP
|
||||
# is bridged from Windows clipboard via RDP).
|
||||
if mime == "image/bmp":
|
||||
return _convert_to_png(dest)
|
||||
|
||||
return True
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
|
||||
except Exception as e:
|
||||
logger.debug("wl-paste clipboard extraction failed: %s", e)
|
||||
dest.unlink(missing_ok=True)
|
||||
return False
|
||||
|
||||
|
||||
def _convert_to_png(path: Path) -> bool:
|
||||
"""Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
|
||||
# Try Pillow first (likely installed in the venv)
|
||||
try:
|
||||
from PIL import Image
|
||||
img = Image.open(path)
|
||||
img.save(path, "PNG")
|
||||
return True
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug("Pillow BMP→PNG conversion failed: %s", e)
|
||||
|
||||
# Fall back to ImageMagick convert
|
||||
tmp = path.with_suffix(".bmp")
|
||||
try:
|
||||
path.rename(tmp)
|
||||
r = subprocess.run(
|
||||
["convert", str(tmp), "png:" + str(path)],
|
||||
capture_output=True, timeout=5,
|
||||
)
|
||||
tmp.unlink(missing_ok=True)
|
||||
if r.returncode == 0 and path.exists() and path.stat().st_size > 0:
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
logger.debug("ImageMagick not installed — cannot convert BMP to PNG")
|
||||
if tmp.exists() and not path.exists():
|
||||
tmp.rename(path)
|
||||
except Exception as e:
|
||||
logger.debug("ImageMagick BMP→PNG conversion failed: %s", e)
|
||||
if tmp.exists() and not path.exists():
|
||||
tmp.rename(path)
|
||||
|
||||
# Can't convert — BMP is still usable as-is for most APIs
|
||||
return path.exists() and path.stat().st_size > 0
|
||||
|
||||
|
||||
# ── X11 (xclip) ─────────────────────────────────────────────────────────
|
||||
|
||||
def _xclip_has_image() -> bool:
|
||||
"""Check if X11 clipboard has image content."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["xclip", "-selection", "clipboard", "-t", "TARGETS", "-o"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
return r.returncode == 0 and "image/png" in r.stdout
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def _xclip_save(dest: Path) -> bool:
|
||||
"""Use xclip to extract clipboard image (X11 sessions)."""
|
||||
# Check if clipboard has image content
|
||||
try:
|
||||
targets = subprocess.run(
|
||||
["xclip", "-selection", "clipboard", "-t", "TARGETS", "-o"],
|
||||
capture_output=True, text=True, timeout=3,
|
||||
)
|
||||
if "image/png" not in targets.stdout:
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
logger.debug("xclip not installed — X11 clipboard image paste unavailable")
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# Extract PNG data
|
||||
try:
|
||||
with open(dest, "wb") as f:
|
||||
subprocess.run(
|
||||
["xclip", "-selection", "clipboard", "-t", "image/png", "-o"],
|
||||
stdout=f, stderr=subprocess.DEVNULL, timeout=5, check=True,
|
||||
)
|
||||
if dest.exists() and dest.stat().st_size > 0:
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("xclip image extraction failed: %s", e)
|
||||
dest.unlink(missing_ok=True)
|
||||
return False
|
||||
@@ -1,143 +0,0 @@
|
||||
"""Codex model discovery from API, local cache, and config."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CODEX_MODELS: List[str] = [
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex-mini",
|
||||
]
|
||||
|
||||
|
||||
def _fetch_models_from_api(access_token: str) -> List[str]:
|
||||
"""Fetch available models from the Codex API. Returns visible models sorted by priority."""
|
||||
try:
|
||||
import httpx
|
||||
resp = httpx.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
data = resp.json()
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to fetch Codex models from API: %s", exc)
|
||||
return []
|
||||
|
||||
sortable = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
if not isinstance(slug, str) or not slug.strip():
|
||||
continue
|
||||
slug = slug.strip()
|
||||
if item.get("supported_in_api") is False:
|
||||
continue
|
||||
visibility = item.get("visibility", "")
|
||||
if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
|
||||
continue
|
||||
priority = item.get("priority")
|
||||
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
|
||||
sortable.append((rank, slug))
|
||||
|
||||
sortable.sort(key=lambda x: (x[0], x[1]))
|
||||
return [slug for _, slug in sortable]
|
||||
|
||||
|
||||
def _read_default_model(codex_home: Path) -> Optional[str]:
|
||||
config_path = codex_home / "config.toml"
|
||||
if not config_path.exists():
|
||||
return None
|
||||
try:
|
||||
import tomllib
|
||||
except Exception:
|
||||
return None
|
||||
try:
|
||||
payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
model = payload.get("model") if isinstance(payload, dict) else None
|
||||
if isinstance(model, str) and model.strip():
|
||||
return model.strip()
|
||||
return None
|
||||
|
||||
|
||||
def _read_cache_models(codex_home: Path) -> List[str]:
|
||||
cache_path = codex_home / "models_cache.json"
|
||||
if not cache_path.exists():
|
||||
return []
|
||||
try:
|
||||
raw = json.loads(cache_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
entries = raw.get("models") if isinstance(raw, dict) else None
|
||||
sortable = []
|
||||
if isinstance(entries, list):
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
if not isinstance(slug, str) or not slug.strip():
|
||||
continue
|
||||
slug = slug.strip()
|
||||
if item.get("supported_in_api") is False:
|
||||
continue
|
||||
visibility = item.get("visibility")
|
||||
if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
|
||||
continue
|
||||
priority = item.get("priority")
|
||||
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
|
||||
sortable.append((rank, slug))
|
||||
|
||||
sortable.sort(key=lambda item: (item[0], item[1]))
|
||||
deduped: List[str] = []
|
||||
for _, slug in sortable:
|
||||
if slug not in deduped:
|
||||
deduped.append(slug)
|
||||
return deduped
|
||||
|
||||
|
||||
def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
|
||||
"""Return available Codex model IDs, trying API first, then local sources.
|
||||
|
||||
Resolution order: API (live, if token provided) > config.toml default >
|
||||
local cache > hardcoded defaults.
|
||||
"""
|
||||
codex_home_str = os.getenv("CODEX_HOME", "").strip() or str(Path.home() / ".codex")
|
||||
codex_home = Path(codex_home_str).expanduser()
|
||||
ordered: List[str] = []
|
||||
|
||||
# Try live API if we have a token
|
||||
if access_token:
|
||||
api_models = _fetch_models_from_api(access_token)
|
||||
if api_models:
|
||||
return api_models
|
||||
|
||||
# Fall back to local sources
|
||||
default_model = _read_default_model(codex_home)
|
||||
if default_model:
|
||||
ordered.append(default_model)
|
||||
|
||||
for model_id in _read_cache_models(codex_home):
|
||||
if model_id not in ordered:
|
||||
ordered.append(model_id)
|
||||
|
||||
for model_id in DEFAULT_CODEX_MODELS:
|
||||
if model_id not in ordered:
|
||||
ordered.append(model_id)
|
||||
|
||||
return ordered
|
||||
@@ -1,22 +0,0 @@
|
||||
"""Shared ANSI color utilities for Hermes CLI modules."""
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
class Colors:
|
||||
RESET = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
DIM = "\033[2m"
|
||||
RED = "\033[31m"
|
||||
GREEN = "\033[32m"
|
||||
YELLOW = "\033[33m"
|
||||
BLUE = "\033[34m"
|
||||
MAGENTA = "\033[35m"
|
||||
CYAN = "\033[36m"
|
||||
|
||||
|
||||
def color(text: str, *codes) -> str:
|
||||
"""Apply color codes to text (only when output is a TTY)."""
|
||||
if not sys.stdout.isatty():
|
||||
return text
|
||||
return "".join(codes) + text + Colors.RESET
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user